Add native LLM core foundation (#24712)

2026-05-13 15:44:56 +00:00 · 2026-05-08 16:56:20 -04:00
parent dc7d665e94
commit 5bb7b23440
144 changed files with 17052 additions and 2 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@ node_modules
 .worktrees
 .sst
 .env
+.env.local
 .idea
 .vscode
 .codex
--- a/.gitleaksignore
+++ b/.gitleaksignore
@@ -0,0 +1,5 @@
+# Fake secret-looking strings used by HTTP recorder redaction tests.
+afa57acfda894e0ebf3c637dd710310b705c0a2f:packages/http-recorder/test/record-replay.test.ts:generic-api-key:69
+afa57acfda894e0ebf3c637dd710310b705c0a2f:packages/http-recorder/test/record-replay.test.ts:generic-api-key:92
+afa57acfda894e0ebf3c637dd710310b705c0a2f:packages/http-recorder/test/record-replay.test.ts:generic-api-key:146
+afa57acfda894e0ebf3c637dd710310b705c0a2f:packages/http-recorder/test/record-replay.test.ts:gcp-api-key:71
--- a/bun.lock
+++ b/bun.lock
@@ -111,6 +111,7 @@
        "zod": "catalog:",
      },
      "devDependencies": {
+        "@types/bun": "catalog:",
        "@typescript/native-preview": "catalog:",
        "@webgpu/types": "0.1.54",
        "typescript": "catalog:",
@@ -302,6 +303,7 @@
      "devDependencies": {
        "@cloudflare/workers-types": "catalog:",
        "@tailwindcss/vite": "catalog:",
+        "@types/bun": "catalog:",
        "@types/luxon": "catalog:",
        "@typescript/native-preview": "catalog:",
        "tailwindcss": "catalog:",
@@ -325,6 +327,37 @@
        "typescript": "catalog:",
      },
    },
+    "packages/http-recorder": {
+      "name": "@opencode-ai/http-recorder",
+      "version": "0.0.0",
+      "dependencies": {
+        "@effect/platform-node": "catalog:",
+        "effect": "catalog:",
+      },
+      "devDependencies": {
+        "@tsconfig/bun": "catalog:",
+        "@types/bun": "catalog:",
+        "@typescript/native-preview": "catalog:",
+      },
+    },
+    "packages/llm": {
+      "name": "@opencode-ai/llm",
+      "version": "1.14.25",
+      "dependencies": {
+        "@smithy/eventstream-codec": "4.2.14",
+        "@smithy/util-utf8": "4.2.2",
+        "aws4fetch": "1.0.20",
+        "effect": "catalog:",
+      },
+      "devDependencies": {
+        "@clack/prompts": "1.0.0-alpha.1",
+        "@effect/platform-node": "catalog:",
+        "@opencode-ai/http-recorder": "workspace:*",
+        "@tsconfig/bun": "catalog:",
+        "@types/bun": "catalog:",
+        "@typescript/native-preview": "catalog:",
+      },
+    },
    "packages/opencode": {
      "name": "opencode",
      "version": "1.14.41",
@@ -1552,6 +1585,10 @@

    "@opencode-ai/function": ["@opencode-ai/function@workspace:packages/function"],

+    "@opencode-ai/http-recorder": ["@opencode-ai/http-recorder@workspace:packages/http-recorder"],
+
+    "@opencode-ai/llm": ["@opencode-ai/llm@workspace:packages/llm"],
+
    "@opencode-ai/plugin": ["@opencode-ai/plugin@workspace:packages/plugin"],

    "@opencode-ai/script": ["@opencode-ai/script@workspace:packages/script"],
@@ -5566,6 +5603,10 @@

    "@opencode-ai/desktop/typescript": ["typescript@5.6.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-hjcS1mhfuyi4WW8IWtjP7brDrG2cuDZukyrYrSauoXGNgx0S7zceP07adYkJycEr56BOUTNPzbInooiN3fn1qw=="],

+    "@opencode-ai/llm/@smithy/eventstream-codec": ["@smithy/eventstream-codec@4.2.14", "", { "dependencies": { "@aws-crypto/crc32": "5.2.0", "@smithy/types": "^4.14.1", "@smithy/util-hex-encoding": "^4.2.2", "tslib": "^2.6.2" } }, "sha512-erZq0nOIpzfeZdCyzZjdJb4nVSKLUmSkaQUVkRGQTXs30gyUGeKnrYEg+Xe1W5gE3aReS7IgsvANwVPxSzY6Pw=="],
+
+    "@opencode-ai/llm/@smithy/util-utf8": ["@smithy/util-utf8@4.2.2", "", { "dependencies": { "@smithy/util-buffer-from": "^4.2.2", "tslib": "^2.6.2" } }, "sha512-75MeYpjdWRe8M5E3AW0O4Cx3UadweS+cwdXjwYGBW5h/gxxnbeZ877sLPX/ZJA9GVTlL/qG0dXP29JWFCD1Ayw=="],
+
    "@opencode-ai/ui/@solid-primitives/resize-observer": ["@solid-primitives/resize-observer@2.1.3", "", { "dependencies": { "@solid-primitives/event-listener": "^2.4.3", "@solid-primitives/rootless": "^1.5.2", "@solid-primitives/static-store": "^0.1.2", "@solid-primitives/utils": "^6.3.2" }, "peerDependencies": { "solid-js": "^1.6.12" } }, "sha512-zBLje5E06TgOg93S7rGPldmhDnouNGhvfZVKOp+oG2XU8snA+GoCSSCz1M+jpNAg5Ek2EakU5UVQqL152WmdXQ=="],

    "@opencode-ai/web/@shikijs/transformers": ["@shikijs/transformers@3.20.0", "", { "dependencies": { "@shikijs/core": "3.20.0", "@shikijs/types": "3.20.0" } }, "sha512-PrHHMRr3Q5W1qB/42kJW6laqFyWdhrPF2hNR9qjOm1xcSiAO3hAHo7HaVyHE6pMyevmy3i51O8kuGGXC78uK3g=="],
@@ -6632,6 +6673,8 @@

    "@opencode-ai/desktop/@actions/artifact/@actions/http-client": ["@actions/http-client@2.2.3", "", { "dependencies": { "tunnel": "^0.0.6", "undici": "^5.25.4" } }, "sha512-mx8hyJi/hjFvbPokCg4uRd4ZX78t+YyRPtnKWwIl+RzNaVuFpQHfmlGVfsKEJN8LwTCvL+DfVgAM04XaHkm6bA=="],

+    "@opencode-ai/llm/@smithy/eventstream-codec/@smithy/types": ["@smithy/types@4.14.1", "", { "dependencies": { "tslib": "^2.6.2" } }, "sha512-59b5HtSVrVR/eYNei3BUj3DCPKD/G7EtDDe7OEJE7i7FtQFugYo6MxbotS8mVJkLNVf8gYaAlEBwwtJ9HzhWSg=="],
+
    "@opencode-ai/web/@shikijs/transformers/@shikijs/core": ["@shikijs/core@3.20.0", "", { "dependencies": { "@shikijs/types": "3.20.0", "@shikijs/vscode-textmate": "^10.0.2", "@types/hast": "^3.0.4", "hast-util-to-html": "^9.0.5" } }, "sha512-f2ED7HYV4JEk827mtMDwe/yQ25pRiXZmtHjWF8uzZKuKiEsJR7Ce1nuQ+HhV9FzDcbIo4ObBCD9GPTzNuy9S1g=="],

    "@opencode-ai/web/@shikijs/transformers/@shikijs/types": ["@shikijs/types@3.20.0", "", { "dependencies": { "@shikijs/vscode-textmate": "^10.0.2", "@types/hast": "^3.0.4" } }, "sha512-lhYAATn10nkZcBQ0BlzSbJA3wcmL5MXUUF8d2Zzon6saZDlToKaiRX60n2+ZaHJCmXEcZRWNzn+k9vplr8Jhsw=="],
--- a/packages/console/app/package.json
+++ b/packages/console/app/package.json
@@ -35,6 +35,7 @@
    "zod": "catalog:"
  },
  "devDependencies": {
+    "@types/bun": "catalog:",
    "@typescript/native-preview": "catalog:",
    "@webgpu/types": "0.1.54",
    "typescript": "catalog:",
--- a/packages/console/app/tsconfig.json
+++ b/packages/console/app/tsconfig.json
@@ -12,7 +12,7 @@
    "allowJs": true,
    "strict": true,
    "noEmit": true,
-    "types": ["vite/client", "@webgpu/types"],
+    "types": ["vite/client", "@webgpu/types", "bun"],
    "isolatedModules": true,
    "paths": {
      "~/*": ["./src/*"]
--- a/packages/enterprise/package.json
+++ b/packages/enterprise/package.json
@@ -32,6 +32,7 @@
    "@cloudflare/workers-types": "catalog:",
    "@tailwindcss/vite": "catalog:",
    "@typescript/native-preview": "catalog:",
+    "@types/bun": "catalog:",
    "@types/luxon": "catalog:",
    "tailwindcss": "catalog:",
    "typescript": "catalog:",
--- a/packages/enterprise/tsconfig.json
+++ b/packages/enterprise/tsconfig.json
@@ -11,7 +11,7 @@
    "allowJs": true,
    "noEmit": true,
    "strict": true,
-    "types": ["@cloudflare/workers-types", "vite/client"],
+    "types": ["@cloudflare/workers-types", "vite/client", "bun"],
    "isolatedModules": true,
    "paths": {
      "~/*": ["./src/*"]
--- a/packages/http-recorder/package.json
+++ b/packages/http-recorder/package.json
@@ -0,0 +1,26 @@
+{
+  "$schema": "https://json.schemastore.org/package.json",
+  "version": "0.0.0",
+  "name": "@opencode-ai/http-recorder",
+  "type": "module",
+  "license": "MIT",
+  "private": true,
+  "scripts": {
+    "test": "bun test --timeout 30000",
+    "test:ci": "mkdir -p .artifacts/unit && bun test --timeout 30000 --reporter=junit --reporter-outfile=.artifacts/unit/junit.xml",
+    "typecheck": "tsgo --noEmit"
+  },
+  "exports": {
+    ".": "./src/index.ts",
+    "./*": "./src/*.ts"
+  },
+  "devDependencies": {
+    "@tsconfig/bun": "catalog:",
+    "@types/bun": "catalog:",
+    "@typescript/native-preview": "catalog:"
+  },
+  "dependencies": {
+    "@effect/platform-node": "catalog:",
+    "effect": "catalog:"
+  }
+}
--- a/packages/http-recorder/src/cassette.ts
+++ b/packages/http-recorder/src/cassette.ts
@@ -0,0 +1,105 @@
+import { Context, Effect, FileSystem, Layer, PlatformError, Ref } from "effect"
+import * as path from "node:path"
+import { cassetteSecretFindings, type SecretFinding } from "./redaction"
+import type { Cassette, CassetteMetadata, Interaction } from "./schema"
+import { cassetteFor, cassettePath, DEFAULT_RECORDINGS_DIR, formatCassette, parseCassette } from "./storage"
+
+export interface Entry {
+  readonly name: string
+  readonly path: string
+}
+
+export interface Interface {
+  readonly path: (name: string) => string
+  readonly read: (name: string) => Effect.Effect<Cassette, PlatformError.PlatformError>
+  readonly write: (name: string, cassette: Cassette) => Effect.Effect<void, PlatformError.PlatformError>
+  readonly append: (
+    name: string,
+    interaction: Interaction,
+    metadata: CassetteMetadata | undefined,
+  ) => Effect.Effect<
+    {
+      readonly cassette: Cassette
+      readonly findings: ReadonlyArray<SecretFinding>
+    },
+    PlatformError.PlatformError
+  >
+  readonly exists: (name: string) => Effect.Effect<boolean>
+  readonly list: () => Effect.Effect<ReadonlyArray<Entry>, PlatformError.PlatformError>
+  readonly scan: (cassette: Cassette) => ReadonlyArray<SecretFinding>
+}
+
+export class Service extends Context.Service<Service, Interface>()("@opencode-ai/http-recorder/Cassette") {}
+
+export const layer = (options: { readonly directory?: string } = {}) =>
+  Layer.effect(
+    Service,
+    Effect.gen(function* () {
+      const fileSystem = yield* FileSystem.FileSystem
+      const directory = options.directory ?? DEFAULT_RECORDINGS_DIR
+      const recorded = yield* Ref.make(new Map<string, ReadonlyArray<Interaction>>())
+
+      const pathFor = (name: string) => cassettePath(name, directory)
+
+      const walk = (directory: string): Effect.Effect<ReadonlyArray<string>, PlatformError.PlatformError> =>
+        Effect.gen(function* () {
+          const entries = yield* fileSystem
+            .readDirectory(directory)
+            .pipe(Effect.catch(() => Effect.succeed([] as string[])))
+          const nested = yield* Effect.forEach(entries, (entry) => {
+            const full = path.join(directory, entry)
+            return fileSystem.stat(full).pipe(
+              Effect.flatMap((stat) => (stat.type === "Directory" ? walk(full) : Effect.succeed([full]))),
+              Effect.catch(() => Effect.succeed([] as string[])),
+            )
+          })
+          return nested.flat()
+        })
+
+      const read = Effect.fn("Cassette.read")(function* (name: string) {
+        return parseCassette(yield* fileSystem.readFileString(pathFor(name)))
+      })
+
+      const write = Effect.fn("Cassette.write")(function* (name: string, cassette: Cassette) {
+        yield* fileSystem.makeDirectory(path.dirname(pathFor(name)), { recursive: true })
+        yield* fileSystem.writeFileString(pathFor(name), formatCassette(cassette))
+      })
+
+      const append = Effect.fn("Cassette.append")(function* (
+        name: string,
+        interaction: Interaction,
+        metadata: CassetteMetadata | undefined,
+      ) {
+        const interactions = yield* Ref.updateAndGet(recorded, (previous) =>
+          new Map(previous).set(name, [...(previous.get(name) ?? []), interaction]),
+        )
+        const cassette = cassetteFor(name, interactions.get(name) ?? [], metadata)
+        const findings = cassetteSecretFindings(cassette)
+        if (findings.length === 0) yield* write(name, cassette)
+        return { cassette, findings }
+      })
+
+      const exists = Effect.fn("Cassette.exists")(function* (name: string) {
+        return yield* fileSystem.access(pathFor(name)).pipe(
+          Effect.as(true),
+          Effect.catch(() => Effect.succeed(false)),
+        )
+      })
+
+      const list = Effect.fn("Cassette.list")(function* () {
+        return (yield* walk(directory))
+          .filter((file) => file.endsWith(".json"))
+          .map((file) => ({
+            name: path.relative(directory, file).replace(/\\/g, "/").replace(/\.json$/, ""),
+            path: file,
+          }))
+          .toSorted((a, b) => a.name.localeCompare(b.name))
+      })
+
+      return Service.of({ path: pathFor, read, write, append, exists, list, scan: cassetteSecretFindings })
+    }),
+  )
+
+export const defaultLayer = layer()
+
+export * as Cassette from "./cassette"
--- a/packages/http-recorder/src/diff.ts
+++ b/packages/http-recorder/src/diff.ts
@@ -0,0 +1,95 @@
+import { Option } from "effect"
+import { Headers, HttpBody, HttpClientRequest, UrlParams } from "effect/unstable/http"
+import { decodeJson } from "./matching"
+import { REDACTED, redactUrl, secretFindings } from "./redaction"
+import { httpInteractions, type Cassette, type RequestSnapshot } from "./schema"
+
+const safeText = (value: unknown) => {
+  if (value === undefined) return "undefined"
+  if (secretFindings(value).length > 0) return JSON.stringify(REDACTED)
+  const text = typeof value === "string" ? JSON.stringify(value) : JSON.stringify(value)
+  if (!text) return String(value)
+  return text.length > 300 ? `${text.slice(0, 300)}...` : text
+}
+
+const jsonBody = (body: string) => Option.getOrUndefined(decodeJson(body))
+
+const valueDiffs = (expected: unknown, received: unknown, base = "$", limit = 8): ReadonlyArray<string> => {
+  if (Object.is(expected, received)) return []
+  if (
+    expected &&
+    received &&
+    typeof expected === "object" &&
+    typeof received === "object" &&
+    !Array.isArray(expected) &&
+    !Array.isArray(received)
+  ) {
+    return [...new Set([...Object.keys(expected), ...Object.keys(received)])]
+      .toSorted()
+      .flatMap((key) =>
+        valueDiffs(
+          (expected as Record<string, unknown>)[key],
+          (received as Record<string, unknown>)[key],
+          `${base}.${key}`,
+          limit,
+        ),
+      )
+      .slice(0, limit)
+  }
+  if (Array.isArray(expected) && Array.isArray(received)) {
+    return Array.from({ length: Math.max(expected.length, received.length) }, (_, index) => index)
+      .flatMap((index) => valueDiffs(expected[index], received[index], `${base}[${index}]`, limit))
+      .slice(0, limit)
+  }
+  return [`${base} expected ${safeText(expected)}, received ${safeText(received)}`]
+}
+
+const headerDiffs = (expected: Record<string, string>, received: Record<string, string>) =>
+  [...new Set([...Object.keys(expected), ...Object.keys(received)])].toSorted().flatMap((key) => {
+    if (expected[key] === received[key]) return []
+    if (expected[key] === undefined) return [`  ${key} unexpected ${safeText(received[key])}`]
+    if (received[key] === undefined) return [`  ${key} missing expected ${safeText(expected[key])}`]
+    return [`  ${key} expected ${safeText(expected[key])}, received ${safeText(received[key])}`]
+  })
+
+export const requestDiff = (expected: RequestSnapshot, received: RequestSnapshot) => {
+  const lines = []
+  if (expected.method !== received.method) {
+    lines.push("method:", `  expected ${expected.method}, received ${received.method}`)
+  }
+  if (expected.url !== received.url) {
+    lines.push("url:", `  expected ${expected.url}`, `  received ${received.url}`)
+  }
+  const headers = headerDiffs(expected.headers, received.headers)
+  if (headers.length > 0) lines.push("headers:", ...headers.slice(0, 8))
+  const expectedBody = jsonBody(expected.body)
+  const receivedBody = jsonBody(received.body)
+  const body =
+    expectedBody !== undefined && receivedBody !== undefined
+      ? valueDiffs(expectedBody, receivedBody).map((line) => `  ${line}`)
+      : expected.body === received.body
+        ? []
+        : [`  expected ${safeText(expected.body)}, received ${safeText(received.body)}`]
+  if (body.length > 0) lines.push("body:", ...body)
+  return lines
+}
+
+export const mismatchDetail = (cassette: Cassette, incoming: RequestSnapshot) => {
+  const interactions = httpInteractions(cassette)
+  if (interactions.length === 0) return "cassette has no recorded HTTP interactions"
+  const ranked = interactions
+    .map((interaction, index) => ({ index, lines: requestDiff(interaction.request, incoming) }))
+    .toSorted((a, b) => a.lines.length - b.lines.length || a.index - b.index)
+  const best = ranked[0]
+  return ["no recorded interaction matched", `closest interaction: #${best.index + 1}`, ...best.lines].join("\n")
+}
+
+export const redactedErrorRequest = (request: HttpClientRequest.HttpClientRequest) =>
+  HttpClientRequest.makeWith(
+    request.method,
+    redactUrl(request.url),
+    UrlParams.empty,
+    Option.none(),
+    Headers.empty,
+    HttpBody.empty,
+  )
--- a/packages/http-recorder/src/effect.ts
+++ b/packages/http-recorder/src/effect.ts
@@ -0,0 +1,211 @@
+import { NodeFileSystem } from "@effect/platform-node"
+import { Effect, Layer, Option, Ref } from "effect"
+import {
+  FetchHttpClient,
+  HttpClient,
+  HttpClientError,
+  HttpClientRequest,
+  HttpClientResponse,
+} from "effect/unstable/http"
+import { redactedErrorRequest, mismatchDetail, requestDiff } from "./diff"
+import { defaultMatcher, decodeJson, type RequestMatcher } from "./matching"
+import { redactHeaders, redactUrl, type SecretFinding } from "./redaction"
+import {
+  httpInteractions,
+  type Cassette,
+  type CassetteMetadata,
+  type HttpInteraction,
+  type ResponseSnapshot,
+} from "./schema"
+import * as CassetteService from "./cassette"
+
+export const DEFAULT_REQUEST_HEADERS: ReadonlyArray<string> = ["content-type", "accept", "openai-beta"]
+const DEFAULT_RESPONSE_HEADERS: ReadonlyArray<string> = ["content-type"]
+
+export type RecordReplayMode = "record" | "replay" | "passthrough"
+
+export interface RecordReplayOptions {
+  readonly mode?: RecordReplayMode
+  readonly directory?: string
+  readonly metadata?: CassetteMetadata
+  readonly redact?: {
+    readonly headers?: ReadonlyArray<string>
+    readonly query?: ReadonlyArray<string>
+    readonly url?: (url: string) => string
+  }
+  readonly requestHeaders?: ReadonlyArray<string>
+  readonly responseHeaders?: ReadonlyArray<string>
+  readonly redactBody?: (body: unknown) => unknown
+  readonly dispatch?: "match" | "sequential"
+  readonly match?: RequestMatcher
+}
+
+const responseHeaders = (
+  response: HttpClientResponse.HttpClientResponse,
+  allow: ReadonlyArray<string>,
+  redact: ReadonlyArray<string> | undefined,
+) => {
+  const merged = redactHeaders(response.headers as Record<string, string>, allow, redact)
+  if (!merged["content-type"]) merged["content-type"] = "text/event-stream"
+  return merged
+}
+
+const BINARY_CONTENT_TYPES: ReadonlyArray<string> = ["vnd.amazon.eventstream", "octet-stream"]
+
+const isBinaryContentType = (contentType: string | undefined) => {
+  if (!contentType) return false
+  const lower = contentType.toLowerCase()
+  return BINARY_CONTENT_TYPES.some((token) => lower.includes(token))
+}
+
+const captureResponseBody = (response: HttpClientResponse.HttpClientResponse, contentType: string | undefined) =>
+  isBinaryContentType(contentType)
+    ? response.arrayBuffer.pipe(
+        Effect.map((bytes) => ({ body: Buffer.from(bytes).toString("base64"), bodyEncoding: "base64" as const })),
+      )
+    : response.text.pipe(Effect.map((body) => ({ body })))
+
+const decodeResponseBody = (snapshot: ResponseSnapshot) =>
+  snapshot.bodyEncoding === "base64" ? Buffer.from(snapshot.body, "base64") : snapshot.body
+
+const fixtureMissing = (request: HttpClientRequest.HttpClientRequest, name: string) =>
+  new HttpClientError.HttpClientError({
+    reason: new HttpClientError.TransportError({
+      request: redactedErrorRequest(request),
+      description: `Fixture "${name}" not found. Run with RECORD=true to create it.`,
+    }),
+  })
+
+const fixtureMismatch = (request: HttpClientRequest.HttpClientRequest, name: string, detail: string) =>
+  new HttpClientError.HttpClientError({
+    reason: new HttpClientError.TransportError({
+      request: redactedErrorRequest(request),
+      description: `Fixture "${name}" does not match the current request: ${detail}. Run with RECORD=true to update it.`,
+    }),
+  })
+
+const unsafeCassette = (
+  request: HttpClientRequest.HttpClientRequest,
+  name: string,
+  findings: ReadonlyArray<SecretFinding>,
+) =>
+  new HttpClientError.HttpClientError({
+    reason: new HttpClientError.TransportError({
+      request: redactedErrorRequest(request),
+      description: `Refusing to write cassette "${name}" because it contains possible secrets: ${findings
+        .map((item) => `${item.path} (${item.reason})`)
+        .join(", ")}`,
+    }),
+  })
+
+export const recordingLayer = (
+  name: string,
+  options: Omit<RecordReplayOptions, "directory"> = {},
+): Layer.Layer<HttpClient.HttpClient, never, HttpClient.HttpClient | CassetteService.Service> =>
+  Layer.effect(
+    HttpClient.HttpClient,
+    Effect.gen(function* () {
+      const upstream = yield* HttpClient.HttpClient
+      const cassetteService = yield* CassetteService.Service
+      const requestHeadersAllow = options.requestHeaders ?? DEFAULT_REQUEST_HEADERS
+      const responseHeadersAllow = options.responseHeaders ?? DEFAULT_RESPONSE_HEADERS
+      const match = options.match ?? defaultMatcher
+      const mode = options.mode ?? "replay"
+      const sequential = options.dispatch === "sequential"
+      const replay = yield* Ref.make<Cassette | undefined>(undefined)
+      const cursor = yield* Ref.make(0)
+
+      const snapshotRequest = (request: HttpClientRequest.HttpClientRequest) =>
+        Effect.gen(function* () {
+          const web = yield* HttpClientRequest.toWeb(request).pipe(Effect.orDie)
+          const raw = yield* Effect.promise(() => web.text())
+          const body = options.redactBody
+            ? Option.match(decodeJson(raw), {
+                onNone: () => raw,
+                onSome: (parsed) => JSON.stringify(options.redactBody?.(parsed)),
+              })
+            : raw
+          return {
+            method: web.method,
+            url: redactUrl(web.url, options.redact?.query, options.redact?.url),
+            headers: redactHeaders(
+              Object.fromEntries(web.headers.entries()),
+              requestHeadersAllow,
+              options.redact?.headers,
+            ),
+            body,
+          }
+        })
+
+      const selectInteraction = (cassette: Cassette, incoming: HttpInteraction["request"]) =>
+        Effect.gen(function* () {
+          const interactions = httpInteractions(cassette)
+          if (sequential) {
+            const index = yield* Ref.get(cursor)
+            const interaction = interactions[index]
+            if (!interaction)
+              return { interaction, detail: `interaction ${index + 1} of ${interactions.length} not recorded` }
+            if (!match(incoming, interaction.request)) {
+              return { interaction: undefined, detail: requestDiff(interaction.request, incoming).join("\n") }
+            }
+            yield* Ref.update(cursor, (n) => n + 1)
+            return { interaction, detail: "" }
+          }
+          const interaction = interactions.find((candidate) => match(incoming, candidate.request))
+          return { interaction, detail: interaction ? "" : mismatchDetail(cassette, incoming) }
+        })
+
+      const loadReplay = (request: HttpClientRequest.HttpClientRequest) =>
+        Effect.gen(function* () {
+          const cached = yield* Ref.get(replay)
+          if (cached) return cached
+          const cassette = yield* cassetteService.read(name).pipe(Effect.mapError(() => fixtureMissing(request, name)))
+          yield* Ref.set(replay, cassette)
+          return cassette
+        })
+
+      return HttpClient.make((request) => {
+        if (mode === "passthrough") return upstream.execute(request)
+
+        if (mode === "record") {
+          return Effect.gen(function* () {
+            const currentRequest = yield* snapshotRequest(request)
+            const response = yield* upstream.execute(request)
+            const headers = responseHeaders(response, responseHeadersAllow, options.redact?.headers)
+            const captured = yield* captureResponseBody(response, headers["content-type"])
+            const interaction: HttpInteraction = {
+              transport: "http",
+              request: currentRequest,
+              response: { status: response.status, headers, ...captured },
+            }
+            const result = yield* cassetteService.append(name, interaction, options.metadata).pipe(Effect.orDie)
+            const findings = result.findings
+            if (findings.length > 0) return yield* unsafeCassette(request, name, findings)
+            return HttpClientResponse.fromWeb(
+              request,
+              new Response(decodeResponseBody(interaction.response), interaction.response),
+            )
+          })
+        }
+
+        return Effect.gen(function* () {
+          const cassette = yield* loadReplay(request)
+          const incoming = yield* snapshotRequest(request)
+          const { interaction, detail } = yield* selectInteraction(cassette, incoming)
+          if (!interaction) return yield* fixtureMismatch(request, name, detail)
+
+          return HttpClientResponse.fromWeb(
+            request,
+            new Response(decodeResponseBody(interaction.response), interaction.response),
+          )
+        })
+      })
+    }),
+  )
+
+export const cassetteLayer = (name: string, options: RecordReplayOptions = {}): Layer.Layer<HttpClient.HttpClient> =>
+  recordingLayer(name, options).pipe(
+    Layer.provide(CassetteService.layer({ directory: options.directory })),
+    Layer.provide(FetchHttpClient.layer),
+    Layer.provide(NodeFileSystem.layer),
+  )
--- a/packages/http-recorder/src/index.ts
+++ b/packages/http-recorder/src/index.ts
@@ -0,0 +1,10 @@
+export * from "./schema"
+export * from "./redaction"
+export * from "./matching"
+export * from "./diff"
+export * from "./storage"
+export * from "./websocket"
+export * from "./effect"
+export * as Cassette from "./cassette"
+
+export * as HttpRecorder from "."
--- a/packages/http-recorder/src/matching.ts
+++ b/packages/http-recorder/src/matching.ts
@@ -0,0 +1,36 @@
+import { Option, Schema } from "effect"
+import type { RequestSnapshot } from "./schema"
+
+const JsonValue = Schema.fromJsonString(Schema.Unknown)
+export const decodeJson = Schema.decodeUnknownOption(JsonValue)
+
+const isRecord = (value: unknown): value is Record<string, unknown> =>
+  value !== null && typeof value === "object" && !Array.isArray(value)
+
+export const canonicalizeJson = (value: unknown): unknown => {
+  if (Array.isArray(value)) return value.map(canonicalizeJson)
+  if (isRecord(value)) {
+    return Object.fromEntries(
+      Object.keys(value)
+        .toSorted()
+        .map((key) => [key, canonicalizeJson(value[key])]),
+    )
+  }
+  return value
+}
+
+export type RequestMatcher = (incoming: RequestSnapshot, recorded: RequestSnapshot) => boolean
+
+export const canonicalSnapshot = (snapshot: RequestSnapshot): string =>
+  JSON.stringify({
+    method: snapshot.method,
+    url: snapshot.url,
+    headers: canonicalizeJson(snapshot.headers),
+    body: Option.match(decodeJson(snapshot.body), {
+      onNone: () => snapshot.body,
+      onSome: canonicalizeJson,
+    }),
+  })
+
+export const defaultMatcher: RequestMatcher = (incoming, recorded) =>
+  canonicalSnapshot(incoming) === canonicalSnapshot(recorded)
--- a/packages/http-recorder/src/redaction.ts
+++ b/packages/http-recorder/src/redaction.ts
@@ -0,0 +1,112 @@
+import type { Cassette } from "./schema"
+
+export const REDACTED = "[REDACTED]"
+
+const DEFAULT_REDACT_HEADERS = [
+  "authorization",
+  "cookie",
+  "proxy-authorization",
+  "set-cookie",
+  "x-api-key",
+  "x-amz-security-token",
+  "x-goog-api-key",
+]
+
+const DEFAULT_REDACT_QUERY = [
+  "access_token",
+  "api-key",
+  "api_key",
+  "apikey",
+  "code",
+  "key",
+  "signature",
+  "sig",
+  "token",
+  "x-amz-credential",
+  "x-amz-security-token",
+  "x-amz-signature",
+]
+
+const SECRET_PATTERNS: ReadonlyArray<{ readonly label: string; readonly pattern: RegExp }> = [
+  { label: "bearer token", pattern: /\bBearer\s+[A-Za-z0-9._~+/=-]{16,}\b/i },
+  { label: "API key", pattern: /\bsk-[A-Za-z0-9][A-Za-z0-9_-]{20,}\b/ },
+  { label: "Anthropic API key", pattern: /\bsk-ant-[A-Za-z0-9_-]{20,}\b/ },
+  { label: "Google API key", pattern: /\bAIza[0-9A-Za-z_-]{20,}\b/ },
+  { label: "AWS access key", pattern: /\b(?:AKIA|ASIA)[0-9A-Z]{16}\b/ },
+  { label: "GitHub token", pattern: /\bgh[pousr]_[A-Za-z0-9_]{20,}\b/ },
+  { label: "private key", pattern: /-----BEGIN [A-Z ]*PRIVATE KEY-----/ },
+]
+
+const ENV_SECRET_NAMES = /(?:API|AUTH|BEARER|CREDENTIAL|KEY|PASSWORD|SECRET|TOKEN)/i
+const SAFE_ENV_VALUES = new Set(["fixture", "test", "test-key"])
+
+const envSecrets = () =>
+  Object.entries(process.env).flatMap(([name, value]) => {
+    if (!value) return []
+    if (!ENV_SECRET_NAMES.test(name)) return []
+    if (value.length < 12) return []
+    if (SAFE_ENV_VALUES.has(value.toLowerCase())) return []
+    return [{ name, value }]
+  })
+
+const pathFor = (base: string, key: string) => (base ? `${base}.${key}` : key)
+
+const stringEntries = (value: unknown, base = ""): ReadonlyArray<{ readonly path: string; readonly value: string }> => {
+  if (typeof value === "string") return [{ path: base, value }]
+  if (Array.isArray(value)) return value.flatMap((item, index) => stringEntries(item, `${base}[${index}]`))
+  if (value && typeof value === "object") {
+    return Object.entries(value).flatMap(([key, child]) => stringEntries(child, pathFor(base, key)))
+  }
+  return []
+}
+
+const redactionSet = (values: ReadonlyArray<string> | undefined, defaults: ReadonlyArray<string>) =>
+  new Set([...defaults, ...(values ?? [])].map((value) => value.toLowerCase()))
+
+export type UrlRedactor = (url: string) => string
+
+export const redactUrl = (raw: string, query: ReadonlyArray<string> = DEFAULT_REDACT_QUERY, urlRedactor?: UrlRedactor) => {
+  if (!URL.canParse(raw)) return urlRedactor?.(raw) ?? raw
+  const url = new URL(raw)
+  if (url.username) url.username = REDACTED
+  if (url.password) url.password = REDACTED
+  const redacted = redactionSet(query, DEFAULT_REDACT_QUERY)
+  for (const key of [...url.searchParams.keys()]) {
+    if (redacted.has(key.toLowerCase())) url.searchParams.set(key, REDACTED)
+  }
+  return urlRedactor?.(url.toString()) ?? url.toString()
+}
+
+export const redactHeaders = (
+  headers: Record<string, string>,
+  allow: ReadonlyArray<string>,
+  redact: ReadonlyArray<string> = DEFAULT_REDACT_HEADERS,
+) => {
+  const allowed = new Set(allow.map((name) => name.toLowerCase()))
+  const redacted = redactionSet(redact, DEFAULT_REDACT_HEADERS)
+  return Object.fromEntries(
+    Object.entries(headers)
+      .map(([name, value]) => [name.toLowerCase(), value] as const)
+      .filter(([name]) => allowed.has(name))
+      .map(([name, value]) => [name, redacted.has(name) ? REDACTED : value] as const)
+      .toSorted(([a], [b]) => a.localeCompare(b)),
+  )
+}
+
+export type SecretFinding = {
+  readonly path: string
+  readonly reason: string
+}
+
+export const secretFindings = (value: unknown): ReadonlyArray<SecretFinding> =>
+  stringEntries(value).flatMap((entry) => [
+    ...SECRET_PATTERNS.filter((item) => item.pattern.test(entry.value)).map((item) => ({
+      path: entry.path,
+      reason: item.label,
+    })),
+    ...envSecrets()
+      .filter((item) => entry.value.includes(item.value))
+      .map((item) => ({ path: entry.path, reason: `environment secret ${item.name}` })),
+  ])
+
+export const cassetteSecretFindings = (cassette: Cassette) => secretFindings(cassette)
--- a/packages/http-recorder/src/schema.ts
+++ b/packages/http-recorder/src/schema.ts
@@ -0,0 +1,67 @@
+import { Schema } from "effect"
+
+export const RequestSnapshotSchema = Schema.Struct({
+  method: Schema.String,
+  url: Schema.String,
+  headers: Schema.Record(Schema.String, Schema.String),
+  body: Schema.String,
+})
+export type RequestSnapshot = Schema.Schema.Type<typeof RequestSnapshotSchema>
+
+export const ResponseSnapshotSchema = Schema.Struct({
+  status: Schema.Number,
+  headers: Schema.Record(Schema.String, Schema.String),
+  body: Schema.String,
+  bodyEncoding: Schema.optional(Schema.Literals(["text", "base64"])),
+})
+export type ResponseSnapshot = Schema.Schema.Type<typeof ResponseSnapshotSchema>
+
+export const CassetteMetadataSchema = Schema.Record(Schema.String, Schema.Unknown)
+export type CassetteMetadata = Schema.Schema.Type<typeof CassetteMetadataSchema>
+
+export const HttpInteractionSchema = Schema.Struct({
+  transport: Schema.tag("http"),
+  request: RequestSnapshotSchema,
+  response: ResponseSnapshotSchema,
+})
+export type HttpInteraction = Schema.Schema.Type<typeof HttpInteractionSchema>
+
+export const WebSocketFrameSchema = Schema.Union([
+  Schema.Struct({ kind: Schema.tag("text"), body: Schema.String }),
+  Schema.Struct({ kind: Schema.tag("binary"), body: Schema.String, bodyEncoding: Schema.Literal("base64") }),
+])
+export type WebSocketFrame = Schema.Schema.Type<typeof WebSocketFrameSchema>
+
+export const WebSocketInteractionSchema = Schema.Struct({
+  transport: Schema.tag("websocket"),
+  open: Schema.Struct({
+    url: Schema.String,
+    headers: Schema.Record(Schema.String, Schema.String),
+  }),
+  client: Schema.Array(WebSocketFrameSchema),
+  server: Schema.Array(WebSocketFrameSchema),
+})
+export type WebSocketInteraction = Schema.Schema.Type<typeof WebSocketInteractionSchema>
+
+export const InteractionSchema = Schema.Union([HttpInteractionSchema, WebSocketInteractionSchema]).pipe(
+  Schema.toTaggedUnion("transport"),
+)
+export type Interaction = Schema.Schema.Type<typeof InteractionSchema>
+
+export const isHttpInteraction = InteractionSchema.guards.http
+
+export const isWebSocketInteraction = InteractionSchema.guards.websocket
+
+export const httpInteractions = (cassette: Cassette) => cassette.interactions.filter(isHttpInteraction)
+
+export const webSocketInteractions = (cassette: Cassette) => cassette.interactions.filter(isWebSocketInteraction)
+
+export const CassetteSchema = Schema.Struct({
+  version: Schema.Literal(1),
+  metadata: Schema.optional(CassetteMetadataSchema),
+  interactions: Schema.Array(InteractionSchema),
+})
+export type Cassette = Schema.Schema.Type<typeof CassetteSchema>
+
+export const decodeCassette = Schema.decodeUnknownSync(CassetteSchema)
+export const encodeCassette = Schema.encodeSync(CassetteSchema)
--- a/packages/http-recorder/src/storage.ts
+++ b/packages/http-recorder/src/storage.ts
@@ -0,0 +1,34 @@
+import { Option } from "effect"
+import * as fs from "node:fs"
+import * as path from "node:path"
+import { encodeCassette, decodeCassette, type Cassette, type CassetteMetadata, type Interaction } from "./schema"
+
+export const DEFAULT_RECORDINGS_DIR = path.resolve(process.cwd(), "test", "fixtures", "recordings")
+
+export const cassettePath = (name: string, directory = DEFAULT_RECORDINGS_DIR) => path.join(directory, `${name}.json`)
+
+export const metadataFor = (name: string, metadata: CassetteMetadata | undefined): CassetteMetadata => ({
+  name,
+  recordedAt: new Date().toISOString(),
+  ...(metadata ?? {}),
+})
+
+export const cassetteFor = (
+  name: string,
+  interactions: ReadonlyArray<Interaction>,
+  metadata: CassetteMetadata | undefined,
+): Cassette => ({
+  version: 1,
+  metadata: metadataFor(name, metadata),
+  interactions,
+})
+
+export const formatCassette = (cassette: Cassette) => `${JSON.stringify(encodeCassette(cassette), null, 2)}\n`
+
+export const parseCassette = (raw: string) => decodeCassette(JSON.parse(raw))
+
+export const hasCassetteSync = (name: string, options: { readonly directory?: string } = {}) => {
+  const file = cassettePath(name, options.directory)
+  if (!fs.existsSync(file)) return false
+  return Option.isSome(Option.liftThrowable(parseCassette)(fs.readFileSync(file, "utf8")))
+}
--- a/packages/http-recorder/src/websocket.ts
+++ b/packages/http-recorder/src/websocket.ts
@@ -0,0 +1,204 @@
+import { Effect, Option, Ref, Scope, Stream } from "effect"
+import type { Headers } from "effect/unstable/http"
+import * as CassetteService from "./cassette"
+import { canonicalizeJson, decodeJson } from "./matching"
+import { redactHeaders, redactUrl, type SecretFinding } from "./redaction"
+import { webSocketInteractions, type CassetteMetadata, type WebSocketFrame, type WebSocketInteraction } from "./schema"
+
+export const DEFAULT_WEBSOCKET_REQUEST_HEADERS: ReadonlyArray<string> = ["content-type", "accept", "openai-beta"]
+
+export interface WebSocketRequest {
+  readonly url: string
+  readonly headers: Headers.Headers
+}
+
+export interface WebSocketConnection<E> {
+  readonly sendText: (message: string) => Effect.Effect<void, E>
+  readonly messages: Stream.Stream<string | Uint8Array, E>
+  readonly close: Effect.Effect<void>
+}
+
+export interface WebSocketExecutor<E> {
+  readonly open: (request: WebSocketRequest) => Effect.Effect<WebSocketConnection<E>, E>
+}
+
+export interface WebSocketRecordReplayOptions<E> {
+  readonly name: string
+  readonly mode?: "record" | "replay" | "passthrough"
+  readonly metadata?: CassetteMetadata
+  readonly cassette: CassetteService.Interface
+  readonly live: WebSocketExecutor<E>
+  readonly redact?: {
+    readonly headers?: ReadonlyArray<string>
+    readonly query?: ReadonlyArray<string>
+    readonly url?: (url: string) => string
+  }
+  readonly requestHeaders?: ReadonlyArray<string>
+  readonly compareClientMessagesAsJson?: boolean
+}
+
+const headersRecord = (headers: Headers.Headers) =>
+  Object.fromEntries(
+    Object.entries(headers as Record<string, unknown>)
+      .filter((entry): entry is [string, string] => typeof entry[1] === "string")
+      .toSorted(([a], [b]) => a.localeCompare(b)),
+  )
+
+const openSnapshot = (
+  request: WebSocketRequest,
+  options: Pick<WebSocketRecordReplayOptions<never>, "redact" | "requestHeaders"> = {},
+) => ({
+  url: redactUrl(request.url, options.redact?.query, options.redact?.url),
+  headers: redactHeaders(
+    headersRecord(request.headers),
+    options.requestHeaders ?? DEFAULT_WEBSOCKET_REQUEST_HEADERS,
+    options.redact?.headers,
+  ),
+})
+
+const textFrame = (body: string): WebSocketFrame => ({ kind: "text", body })
+
+const frameText = (frame: WebSocketFrame) => {
+  if (frame.kind === "text") return frame.body
+  return new TextDecoder().decode(Buffer.from(frame.body, "base64"))
+}
+
+const frameMessage = (frame: WebSocketFrame) =>
+  frame.kind === "text" ? frame.body : new Uint8Array(Buffer.from(frame.body, "base64"))
+
+const receivedFrame = (message: string | Uint8Array): WebSocketFrame =>
+  typeof message === "string"
+    ? textFrame(message)
+    : { kind: "binary", body: Buffer.from(message).toString("base64"), bodyEncoding: "base64" }
+
+const unsafeCassette = (name: string, findings: ReadonlyArray<SecretFinding>) =>
+  new Error(
+    `Refusing to write WebSocket cassette "${name}" because it contains possible secrets: ${findings
+      .map((item) => `${item.path} (${item.reason})`)
+      .join(", ")}`,
+  )
+
+const mismatch = (message: string, actual: unknown, expected: unknown) =>
+  new Error(`${message}: expected ${JSON.stringify(expected)}, received ${JSON.stringify(actual)}`)
+
+const assertEqual = (message: string, actual: unknown, expected: unknown) =>
+  Effect.sync(() => {
+    if (JSON.stringify(actual) === JSON.stringify(expected)) return
+    throw mismatch(message, actual, expected)
+  })
+
+const jsonOrText = (value: string) => Option.match(decodeJson(value), { onNone: () => value, onSome: canonicalizeJson })
+
+const compareClientMessage = (actual: string, expected: WebSocketFrame | undefined, index: number, asJson: boolean) => {
+  if (!expected)
+    return Effect.sync(() => {
+      throw new Error(`Unexpected WebSocket client frame ${index + 1}: ${actual}`)
+    })
+  const expectedText = frameText(expected)
+  if (!asJson) return assertEqual(`WebSocket client frame ${index + 1}`, actual, expectedText)
+  return assertEqual(`WebSocket client JSON frame ${index + 1}`, jsonOrText(actual), jsonOrText(expectedText))
+}
+
+export const makeWebSocketExecutor = <E>(
+  options: WebSocketRecordReplayOptions<E>,
+): Effect.Effect<WebSocketExecutor<E>, never, Scope.Scope> =>
+  Effect.gen(function* () {
+    const mode = options.mode ?? "replay"
+
+    if (mode === "passthrough") return options.live
+
+    if (mode === "record") {
+      return {
+        open: (request) =>
+          Effect.gen(function* () {
+            const client: WebSocketFrame[] = []
+            const server: WebSocketFrame[] = []
+            const connection = yield* options.live.open(request)
+            const closed = yield* Ref.make(false)
+            const closeOnce = Effect.gen(function* () {
+              if (yield* Ref.getAndSet(closed, true)) return
+              yield* connection.close
+              const result = yield* options.cassette
+                .append(
+                  options.name,
+                  { transport: "websocket", open: openSnapshot(request, options), client, server },
+                  options.metadata,
+                )
+                .pipe(Effect.orDie)
+              if (result.findings.length > 0) yield* Effect.die(unsafeCassette(options.name, result.findings))
+            })
+            return {
+              sendText: (message: string) =>
+                connection.sendText(message).pipe(Effect.tap(() => Effect.sync(() => client.push(textFrame(message))))),
+              messages: connection.messages.pipe(
+                Stream.map((message) => {
+                  server.push(receivedFrame(message))
+                  return message
+                }),
+              ),
+              close: closeOnce,
+            }
+          }),
+      }
+    }
+
+    const replay = yield* Ref.make<{ readonly interactions: ReadonlyArray<WebSocketInteraction> } | undefined>(
+      undefined,
+    )
+    const cursor = yield* Ref.make(0)
+
+    yield* Effect.addFinalizer(() =>
+      Effect.gen(function* () {
+        const input = yield* Ref.get(replay)
+        if (!input) return
+        yield* assertEqual(
+          `Unused recorded WebSocket interactions in ${options.name}`,
+          yield* Ref.get(cursor),
+          input.interactions.length,
+        )
+      }),
+    )
+
+    const loadReplay = Effect.fn("WebSocketRecorder.loadReplay")(function* () {
+      const cached = yield* Ref.get(replay)
+      if (cached) return cached
+      const input = {
+        interactions: webSocketInteractions(yield* options.cassette.read(options.name).pipe(Effect.orDie)),
+      }
+      yield* Ref.set(replay, input)
+      return input
+    })
+
+    return {
+      open: (request) => {
+        return Effect.gen(function* () {
+          const input = yield* loadReplay()
+          const index = yield* Ref.getAndUpdate(cursor, (value) => value + 1)
+          const interaction = input.interactions[index]
+          if (!interaction) return yield* Effect.die(new Error(`No recorded WebSocket interaction for ${request.url}`))
+          yield* assertEqual(`WebSocket open frame ${index + 1}`, openSnapshot(request, options), interaction.open)
+          const messageIndex = yield* Ref.make(0)
+          return {
+            sendText: (message: string) =>
+              Effect.gen(function* () {
+                const current = yield* Ref.getAndUpdate(messageIndex, (value) => value + 1)
+                yield* compareClientMessage(
+                  message,
+                  interaction.client[current],
+                  current,
+                  options.compareClientMessagesAsJson === true,
+                )
+              }),
+            messages: Stream.fromIterable(interaction.server).pipe(Stream.map(frameMessage)),
+            close: Effect.gen(function* () {
+              yield* assertEqual(
+                `WebSocket client frame count for interaction ${index + 1}`,
+                yield* Ref.get(messageIndex),
+                interaction.client.length,
+              )
+            }),
+          }
+        })
+      },
+    }
+  })
--- a/packages/http-recorder/test/fixtures/recordings/record-replay/multi-step.json
+++ b/packages/http-recorder/test/fixtures/recordings/record-replay/multi-step.json
@@ -0,0 +1,41 @@
+{
+  "version": 1,
+  "interactions": [
+    {
+      "transport": "http",
+      "request": {
+        "method": "POST",
+        "url": "https://example.test/echo",
+        "headers": {
+          "content-type": "application/json"
+        },
+        "body": "{\"step\":1}"
+      },
+      "response": {
+        "status": 200,
+        "headers": {
+          "content-type": "application/json"
+        },
+        "body": "{\"reply\":\"first\"}"
+      }
+    },
+    {
+      "transport": "http",
+      "request": {
+        "method": "POST",
+        "url": "https://example.test/echo",
+        "headers": {
+          "content-type": "application/json"
+        },
+        "body": "{\"step\":2}"
+      },
+      "response": {
+        "status": 200,
+        "headers": {
+          "content-type": "application/json"
+        },
+        "body": "{\"reply\":\"second\"}"
+      }
+    }
+  ]
+}
--- a/packages/http-recorder/test/fixtures/recordings/record-replay/retry.json
+++ b/packages/http-recorder/test/fixtures/recordings/record-replay/retry.json
@@ -0,0 +1,41 @@
+{
+  "version": 1,
+  "interactions": [
+    {
+      "transport": "http",
+      "request": {
+        "method": "POST",
+        "url": "https://example.test/poll",
+        "headers": {
+          "content-type": "application/json"
+        },
+        "body": "{\"id\":\"job_1\"}"
+      },
+      "response": {
+        "status": 200,
+        "headers": {
+          "content-type": "application/json"
+        },
+        "body": "{\"status\":\"pending\"}"
+      }
+    },
+    {
+      "transport": "http",
+      "request": {
+        "method": "POST",
+        "url": "https://example.test/poll",
+        "headers": {
+          "content-type": "application/json"
+        },
+        "body": "{\"id\":\"job_1\"}"
+      },
+      "response": {
+        "status": 200,
+        "headers": {
+          "content-type": "application/json"
+        },
+        "body": "{\"status\":\"complete\"}"
+      }
+    }
+  ]
+}
--- a/packages/http-recorder/test/record-replay.test.ts
+++ b/packages/http-recorder/test/record-replay.test.ts
@@ -0,0 +1,322 @@
+import { NodeFileSystem } from "@effect/platform-node"
+import { describe, expect, test } from "bun:test"
+import { Cause, Effect, Exit, Scope, Stream } from "effect"
+import { Headers, HttpBody, HttpClient, HttpClientRequest } from "effect/unstable/http"
+import * as fs from "node:fs"
+import * as os from "node:os"
+import * as path from "node:path"
+import { HttpRecorder } from "../src"
+import { redactedErrorRequest } from "../src/diff"
+
+const post = (url: string, body: object) =>
+  Effect.gen(function* () {
+    const http = yield* HttpClient.HttpClient
+    const request = HttpClientRequest.post(url, {
+      headers: { "content-type": "application/json" },
+      body: HttpBody.text(JSON.stringify(body), "application/json"),
+    })
+    const response = yield* http.execute(request)
+    return yield* response.text
+  })
+
+const run = <A, E>(effect: Effect.Effect<A, E, HttpClient.HttpClient>) =>
+  Effect.runPromise(effect.pipe(Effect.provide(HttpRecorder.cassetteLayer("record-replay/multi-step"))))
+
+const runWith = <A, E>(
+  name: string,
+  options: HttpRecorder.RecordReplayOptions,
+  effect: Effect.Effect<A, E, HttpClient.HttpClient>,
+) => Effect.runPromise(effect.pipe(Effect.provide(HttpRecorder.cassetteLayer(name, options))))
+
+const runRecorder = <A, E>(effect: Effect.Effect<A, E, HttpRecorder.Cassette.Service | Scope.Scope>) =>
+  Effect.runPromise(
+    Effect.scoped(
+      effect.pipe(
+        Effect.provide(
+          HttpRecorder.Cassette.layer({ directory: fs.mkdtempSync(path.join(os.tmpdir(), "http-recorder-")) }),
+        ),
+        Effect.provide(NodeFileSystem.layer),
+      ),
+    ),
+  )
+
+const failureText = (exit: Exit.Exit<unknown, unknown>) => {
+  if (Exit.isSuccess(exit)) return ""
+  return Cause.prettyErrors(exit.cause).join("\n")
+}
+
+describe("http-recorder", () => {
+  test("redacts sensitive URL query parameters", () => {
+    expect(
+      HttpRecorder.redactUrl(
+        "https://example.test/path?key=secret-google-key&api_key=secret-openai-key&safe=value&X-Amz-Signature=secret-signature",
+      ),
+    ).toBe(
+      "https://example.test/path?key=%5BREDACTED%5D&api_key=%5BREDACTED%5D&safe=value&X-Amz-Signature=%5BREDACTED%5D",
+    )
+  })
+
+  test("redacts URL credentials", () => {
+    expect(HttpRecorder.redactUrl("https://user:password@example.test/path?safe=value")).toBe(
+      "https://%5BREDACTED%5D:%5BREDACTED%5D@example.test/path?safe=value",
+    )
+  })
+
+  test("applies custom URL redaction after built-in redaction", () => {
+    expect(
+      HttpRecorder.redactUrl(
+        "https://example.test/accounts/real-account/path?key=secret-key",
+        undefined,
+        (url) => url.replace("/accounts/real-account/", "/accounts/{account}/"),
+      ),
+    ).toBe("https://example.test/accounts/{account}/path?key=%5BREDACTED%5D")
+  })
+
+  test("redacts sensitive headers when allow-listed", () => {
+    expect(
+      HttpRecorder.redactHeaders(
+        {
+          authorization: "Bearer secret-token",
+          "content-type": "application/json",
+          "x-custom-token": "custom-secret",
+          "x-api-key": "secret-key",
+          "x-goog-api-key": "secret-google-key",
+        },
+        ["authorization", "content-type", "x-api-key", "x-goog-api-key", "x-custom-token"],
+        ["x-custom-token"],
+      ),
+    ).toEqual({
+      authorization: "[REDACTED]",
+      "content-type": "application/json",
+      "x-api-key": "[REDACTED]",
+      "x-custom-token": "[REDACTED]",
+      "x-goog-api-key": "[REDACTED]",
+    })
+  })
+
+  test("redacts error requests without retaining headers, params, or body", () => {
+    const request = HttpClientRequest.post("https://example.test/path", {
+      headers: { authorization: "Bearer super-secret" },
+      body: HttpBody.text("super-secret-body", "text/plain"),
+    }).pipe(HttpClientRequest.setUrlParam("api_key", "super-secret-key"))
+
+    expect(redactedErrorRequest(request).toJSON()).toMatchObject({
+      url: "https://example.test/path",
+      urlParams: { params: [] },
+      headers: {},
+      body: { _tag: "Empty" },
+    })
+  })
+
+  test("detects secret-looking values without returning the secret", () => {
+    expect(
+      HttpRecorder.cassetteSecretFindings({
+        version: 1,
+        interactions: [
+          {
+            transport: "http",
+            request: {
+              method: "POST",
+              url: "https://example.test/path?key=sk-123456789012345678901234",
+              headers: {},
+              body: JSON.stringify({ nested: "AIzaSyDHibiBRvJZLsFnPYPoiTwxY4ztQ55yqCE" }),
+            },
+            response: {
+              status: 200,
+              headers: {},
+              body: "Bearer abcdefghijklmnopqrstuvwxyz",
+            },
+          },
+        ],
+      }),
+    ).toEqual([
+      { path: "interactions[0].request.url", reason: "API key" },
+      { path: "interactions[0].request.body", reason: "Google API key" },
+      { path: "interactions[0].response.body", reason: "bearer token" },
+    ])
+  })
+
+  test("detects secret-looking values inside metadata", () => {
+    expect(
+      HttpRecorder.cassetteSecretFindings({
+        version: 1,
+        metadata: { token: "sk-123456789012345678901234" },
+        interactions: [],
+      }),
+    ).toEqual([{ path: "metadata.token", reason: "API key" }])
+  })
+
+  test("formats websocket cassettes with shared metadata", () => {
+    const cassette = HttpRecorder.cassetteFor(
+      "websocket/basic",
+      [
+        {
+          transport: "websocket",
+          open: { url: "wss://example.test/realtime", headers: { "content-type": "application/json" } },
+          client: [{ kind: "text", body: JSON.stringify({ type: "response.create" }) }],
+          server: [{ kind: "text", body: JSON.stringify({ type: "response.completed" }) }],
+        },
+      ],
+      { provider: "openai" },
+    )
+
+    expect(cassette.metadata).toMatchObject({ name: "websocket/basic", provider: "openai" })
+    expect(HttpRecorder.parseCassette(HttpRecorder.formatCassette(cassette))).toEqual(cassette)
+  })
+
+  test("replays websocket interactions from the shared cassette service", async () => {
+    await runRecorder(
+      Effect.gen(function* () {
+        const cassette = yield* HttpRecorder.Cassette.Service
+        yield* cassette.write(
+          "websocket/replay",
+          HttpRecorder.cassetteFor(
+            "websocket/replay",
+            [
+              {
+                transport: "websocket",
+                open: { url: "wss://example.test/realtime", headers: { "content-type": "application/json" } },
+                client: [{ kind: "text", body: JSON.stringify({ type: "response.create" }) }],
+                server: [{ kind: "text", body: JSON.stringify({ type: "response.completed" }) }],
+              },
+            ],
+            undefined,
+          ),
+        )
+        const executor = yield* HttpRecorder.makeWebSocketExecutor({
+          name: "websocket/replay",
+          cassette,
+          compareClientMessagesAsJson: true,
+          live: { open: () => Effect.die(new Error("unexpected live WebSocket open")) },
+        })
+        const connection = yield* executor.open({
+          url: "wss://example.test/realtime",
+          headers: Headers.fromInput({ "content-type": "application/json" }),
+        })
+        yield* connection.sendText(JSON.stringify({ type: "response.create" }))
+        const messages: Array<string | Uint8Array> = []
+        yield* connection.messages.pipe(Stream.runForEach((message) => Effect.sync(() => messages.push(message))))
+        yield* connection.close
+
+        expect(messages).toEqual([JSON.stringify({ type: "response.completed" })])
+      }),
+    )
+  })
+
+  test("records websocket interactions into the shared cassette service", async () => {
+    await runRecorder(
+      Effect.gen(function* () {
+        const cassette = yield* HttpRecorder.Cassette.Service
+        const executor = yield* HttpRecorder.makeWebSocketExecutor({
+          name: "websocket/record",
+          mode: "record",
+          metadata: { provider: "test" },
+          cassette,
+          live: {
+            open: () =>
+              Effect.succeed({
+                sendText: () => Effect.void,
+                messages: Stream.fromIterable([JSON.stringify({ type: "response.completed" })]),
+                close: Effect.void,
+              }),
+          },
+        })
+        const connection = yield* executor.open({
+          url: "wss://example.test/realtime",
+          headers: Headers.fromInput({ "content-type": "application/json" }),
+        })
+        yield* connection.sendText(JSON.stringify({ type: "response.create" }))
+        yield* connection.messages.pipe(Stream.runDrain)
+        yield* connection.close
+
+        expect(yield* cassette.read("websocket/record")).toMatchObject({
+          metadata: { name: "websocket/record", provider: "test" },
+          interactions: [
+            {
+              transport: "websocket",
+              open: { url: "wss://example.test/realtime", headers: { "content-type": "application/json" } },
+              client: [{ kind: "text", body: JSON.stringify({ type: "response.create" }) }],
+              server: [{ kind: "text", body: JSON.stringify({ type: "response.completed" }) }],
+            },
+          ],
+        })
+      }),
+    )
+  })
+
+  test("default matcher dispatches multi-interaction cassettes by request shape", async () => {
+    await run(
+      Effect.gen(function* () {
+        expect(yield* post("https://example.test/echo", { step: 2 })).toBe('{"reply":"second"}')
+        expect(yield* post("https://example.test/echo", { step: 1 })).toBe('{"reply":"first"}')
+      }),
+    )
+  })
+
+  test("sequential dispatch returns recorded responses in order for identical requests", async () => {
+    await runWith(
+      "record-replay/retry",
+      { dispatch: "sequential" },
+      Effect.gen(function* () {
+        expect(yield* post("https://example.test/poll", { id: "job_1" })).toBe('{"status":"pending"}')
+        expect(yield* post("https://example.test/poll", { id: "job_1" })).toBe('{"status":"complete"}')
+      }),
+    )
+  })
+
+  test("default matcher returns the first match for identical requests", async () => {
+    await runWith(
+      "record-replay/retry",
+      {},
+      Effect.gen(function* () {
+        expect(yield* post("https://example.test/poll", { id: "job_1" })).toBe('{"status":"pending"}')
+        expect(yield* post("https://example.test/poll", { id: "job_1" })).toBe('{"status":"pending"}')
+      }),
+    )
+  })
+
+  test("sequential dispatch reports cursor exhaustion when more requests are made than recorded", async () => {
+    await runWith(
+      "record-replay/multi-step",
+      { dispatch: "sequential" },
+      Effect.gen(function* () {
+        yield* post("https://example.test/echo", { step: 1 })
+        yield* post("https://example.test/echo", { step: 2 })
+        const exit = yield* Effect.exit(post("https://example.test/echo", { step: 3 }))
+        expect(Exit.isFailure(exit)).toBe(true)
+      }),
+    )
+  })
+
+  test("sequential dispatch still validates each recorded request", async () => {
+    await runWith(
+      "record-replay/multi-step",
+      { dispatch: "sequential" },
+      Effect.gen(function* () {
+        yield* post("https://example.test/echo", { step: 1 })
+        const exit = yield* Effect.exit(post("https://example.test/echo", { step: 3 }))
+        expect(Exit.isFailure(exit)).toBe(true)
+        expect(failureText(exit)).toContain("$.step expected 2, received 3")
+        expect(yield* post("https://example.test/echo", { step: 2 })).toBe('{"reply":"second"}')
+      }),
+    )
+  })
+
+  test("mismatch diagnostics show closest redacted request differences", async () => {
+    await run(
+      Effect.gen(function* () {
+        const exit = yield* Effect.exit(
+          post("https://example.test/echo?api_key=secret-value", { step: 3, token: "sk-123456789012345678901234" }),
+        )
+        const message = failureText(exit)
+        expect(message).toContain("closest interaction: #1")
+        expect(message).toContain("url:")
+        expect(message).toContain("https://example.test/echo?api_key=%5BREDACTED%5D")
+        expect(message).toContain("body:")
+        expect(message).toContain("$.step expected 1, received 3")
+        expect(message).toContain('$.token expected undefined, received "[REDACTED]"')
+        expect(message).not.toContain("sk-123456789012345678901234")
+      }),
+    )
+  })
+})
--- a/packages/http-recorder/tsconfig.json
+++ b/packages/http-recorder/tsconfig.json
@@ -0,0 +1,15 @@
+{
+  "$schema": "https://json.schemastore.org/tsconfig",
+  "extends": "@tsconfig/bun/tsconfig.json",
+  "compilerOptions": {
+    "lib": ["ESNext", "DOM", "DOM.Iterable"],
+    "noUncheckedIndexedAccess": false,
+    "plugins": [
+      {
+        "name": "@effect/language-service",
+        "transform": "@effect/language-service/transform",
+        "namespaceImportPackages": ["effect", "@effect/*"]
+      }
+    ]
+  }
+}
--- a/packages/llm/AGENTS.md
+++ b/packages/llm/AGENTS.md
@@ -0,0 +1,294 @@
+# LLM Package Guide
+
+## Effect
+
+- Prefer `HttpClient.HttpClient` / `HttpClientResponse.HttpClientResponse` over web `fetch` / `Response` at package boundaries.
+- Use `Stream.Stream` for streaming data flow. Avoid ad hoc async generators or manual web reader loops unless an Effect `Stream` API cannot model the behavior.
+- Use Effect Schema codecs for JSON encode/decode (`Schema.fromJsonString(...)`) instead of direct `JSON.parse` / `JSON.stringify` in implementation code.
+- In `Effect.gen`, yield yieldable errors directly (`return yield* new MyError(...)`) instead of `Effect.fail(new MyError(...))`.
+- Use `Effect.void` instead of `Effect.succeed(undefined)` when the successful value is intentionally void.
+
+## Tests
+
+- Use `testEffect(...)` from `test/lib/effect.ts` for tests requiring Effect layers.
+- Keep provider tests fixture-first. Live provider calls must stay behind `RECORD=true` and required API-key checks.
+
+## Architecture
+
+This package is an Effect Schema-first LLM core. The Schema classes in `src/schema/` are the canonical runtime data model. Convenience functions in `src/llm.ts` are thin constructors that return those same Schema class instances; they should improve callsites without creating a second model.
+
+### Request Flow
+
+The intended callsite is:
+
+```ts
+const request = LLM.request({
+  model: OpenAI.model("gpt-4o-mini", { apiKey }),
+  system: "You are concise.",
+  prompt: "Say hello.",
+})
+
+const response = yield* LLMClient.generate(request)
+```
+
+`LLM.request(...)` builds an `LLMRequest`. `LLMClient.generate(...)` selects a registered route by `request.model.route`, builds the provider-native body, asks the route's transport for a real `HttpClientRequest.HttpClientRequest`, sends it through `RequestExecutor.Service`, parses the provider stream into common `LLMEvent`s, and finally returns an `LLMResponse`.
+
+Use `LLMClient.stream(request)` when callers want incremental `LLMEvent`s. Use `LLMClient.generate(request)` when callers want those same events collected into an `LLMResponse`. Use `LLMClient.prepare<Body>(request)` to compile a request through the route pipeline without sending it — the optional `Body` type argument narrows `.body` to the route's native shape (e.g. `prepare<OpenAIChatBody>(...)` returns a `PreparedRequestOf<OpenAIChatBody>`). The runtime body is identical; the generic is a type-level assertion.
+
+Filter or narrow `LLMEvent` streams with `LLMEvent.is.*` (camelCase guards, e.g. `events.filter(LLMEvent.is.toolCall)`). The kebab-case `LLMEvent.guards["tool-call"]` form also works but prefer `is.*` in new code.
+
+### Routes
+
+A route is the registered, runnable composition of four orthogonal pieces:
+
+- **`Protocol`** (`src/route/protocol.ts`) — semantic API contract. Owns request body construction (`body.from`), the body schema (`body.schema`), the streaming-event schema (`stream.event`), and the event-to-`LLMEvent` state machine (`stream.step`). `Route.make(...)` validates and JSON-encodes the body from `body.schema` and decodes frames with `stream.event`. Examples: `OpenAIChat.protocol`, `OpenAIResponses.protocol`, `AnthropicMessages.protocol`, `Gemini.protocol`, `BedrockConverse.protocol`.
+- **`Endpoint`** (`src/route/endpoint.ts`) — path construction. The host always lives on `model.baseURL`; the endpoint just supplies the path. `Endpoint.path("/chat/completions")` is the common case; pass a function for paths that embed the model id or a body field (e.g. `Endpoint.path(({ body }) => `/model/${body.modelId}/converse-stream`)`).
+- **`Auth`** (`src/route/auth.ts`) — per-request transport authentication. Routes read `model.apiKey` at request time via `Auth.bearer` (the default; sets `Authorization: Bearer <apiKey>`) or `Auth.apiKeyHeader(name)` for providers that use a custom header (Anthropic `x-api-key`, Gemini `x-goog-api-key`). Routes that need per-request signing (Bedrock SigV4, future Vertex IAM, Azure AAD) implement `Auth` as a function that signs the body and merges signed headers into the result.
+- **`Framing`** (`src/route/framing.ts`) — bytes → frames. SSE (`Framing.sse`) is shared; Bedrock keeps its AWS event-stream framing as a typed `Framing<object>` value alongside its protocol.
+
+Compose them via `Route.make(...)`:
+
+```ts
+export const route = Route.make({
+  id: "openai-chat",
+  provider: "openai",
+  protocol: OpenAIChat.protocol,
+  transport: HttpTransport.httpJson({
+    endpoint: Endpoint.path("/chat/completions"),
+    auth: Auth.bearer(),
+    framing: Framing.sse,
+    encodeBody,
+  }),
+  defaults: {
+    baseURL: "https://api.openai.com/v1",
+    capabilities: capabilities({ tools: { calls: true, streamingInput: true } }),
+  },
+})
+```
+
+The four-axis decomposition is the reason DeepSeek, TogetherAI, Cerebras, Baseten, Fireworks, and DeepInfra all reuse `OpenAIChat.protocol` verbatim — each provider deployment is a 5-15 line `Route.make(...)` call instead of a 300-400 line route clone. Bug fixes in one protocol propagate to every consumer of that protocol in a single commit.
+
+When a provider ships a non-HTTP transport (OpenAI's WebSocket Responses backend, hypothetical bidirectional streaming APIs), the seam is `Transport` — `WebSocketTransport.json(...)` constructs a transport whose `prepare` builds a WebSocket URL and message and whose `frames` yields decoded text from the socket. Same protocol, different transport.
+
+### URL Construction
+
+`model.baseURL` is required; `Endpoint` only carries the path. Each protocol's `Route.make` includes a canonical URL in `defaults.baseURL` (e.g. `https://api.openai.com/v1`); provider helpers can override by passing `baseURL` in their input. Routes that have no canonical URL (OpenAI-compatible Chat, GitHub Copilot) set `baseURL: string` (required) on their input type so TypeScript catches a missing host at the call site.
+
+For providers where the URL is derived from typed inputs (Azure resource name, Bedrock region), the provider helper computes `baseURL` at model construction time. Use `AtLeastOne<T>` from `route/auth-options.ts` for inputs that accept either of two derivation paths (Azure: `resourceName` or `baseURL`).
+
+### Provider Definitions
+
+Provider-facing APIs are defined with `Provider.make(...)` from `src/provider.ts`:
+
+```ts
+export const provider = Provider.make({
+  id: ProviderID.make("openai"),
+  model: responses,
+  apis: { responses, chat },
+})
+
+export const model = provider.model
+export const apis = provider.apis
+```
+
+Keep provider definitions small and explicit:
+
+- Use only `id`, `model`, and optional `apis` in `Provider.make(...)`.
+- Use branded `ProviderID.make(...)` and `ModelID.make(...)` where ids are constructed directly.
+- Use `model` for the default API path and `apis` for named provider-native alternatives such as OpenAI `responses` versus `chat`.
+- Do not add author-facing `kind`, `version`, or `routes` fields.
+- Export lower-level `routes` arrays separately only when advanced internal wiring needs them.
+- Prefer `apiKey` as provider-specific sugar and `auth` as the explicit override; keep them mutually exclusive in provider option types with `ProviderAuthOption`.
+- Resolve `apiKey` → `Auth` with `AuthOptions.bearer(options, "<PROVIDER>_API_KEY")` (it honors an explicit `auth` override and falls back to `Auth.config(envVar)` so missing keys surface a typed `Authentication` error rather than a runtime crash).
+
+Built-in providers are namespace modules from `src/providers/index.ts`, so aliases like `OpenAI.model(...)`, `OpenAI.responses(...)`, and `OpenAI.apis.chat(...)` are fine. External provider packages should default-export the `Provider.make(...)` result and may add named aliases if useful.
+
+### Folder layout
+
+```
+packages/llm/src/
+  schema/                   canonical Schema model, split by concern
+    ids.ts                  branded IDs, literal types, ProviderMetadata
+    options.ts              Generation/Provider/Http options, Capabilities, Limits, ModelRef
+    messages.ts             content parts, Message, ToolDefinition, LLMRequest
+    events.ts               Usage, individual events, LLMEvent, PreparedRequest, LLMResponse
+    errors.ts               error reasons, LLMError, ToolFailure
+    index.ts                barrel
+  llm.ts                    request constructors and convenience helpers
+  route/
+    index.ts                @opencode-ai/llm/route advanced barrel
+    client.ts               Route.make + LLMClient.prepare/stream/generate
+    executor.ts             RequestExecutor service + transport error mapping
+    protocol.ts             Protocol type + Protocol.make
+    endpoint.ts             Endpoint type + Endpoint.path
+    auth.ts                 Auth type + Auth.bearer / Auth.apiKeyHeader / Auth.passthrough
+    auth-options.ts         ProviderAuthOption shape, AuthOptions.bearer, AtLeastOne helper
+    framing.ts              Framing type + Framing.sse
+    transport/              transport implementations
+      index.ts              Transport type + HttpTransport / WebSocketTransport namespaces
+      http.ts               HttpTransport.httpJson — POST + framing
+      websocket.ts          WebSocketTransport.json + WebSocketExecutor service
+  protocols/
+    shared.ts               ProviderShared toolkit used inside protocol impls
+    openai-chat.ts          protocol + route (compose OpenAIChat.protocol)
+    openai-responses.ts
+    anthropic-messages.ts
+    gemini.ts
+    bedrock-converse.ts
+    bedrock-event-stream.ts framing for AWS event-stream binary frames
+    openai-compatible-chat.ts route that reuses OpenAIChat.protocol, no canonical URL
+    utils/                  per-protocol helpers (auth, cache, media, tool-stream, ...)
+  providers/
+    openai-compatible.ts    generic compatible helper + family model helpers
+    openai-compatible-profile.ts family defaults (deepseek, togetherai, ...)
+    azure.ts / amazon-bedrock.ts / github-copilot.ts / google.ts / xai.ts / openai.ts / anthropic.ts / openrouter.ts
+  tool.ts                   typed tool() helper
+  tool-runtime.ts           implementation helpers for LLMClient tool execution
+```
+
+The dependency arrow points down: `providers/*.ts` files import `protocols`, `endpoint`, `auth`, and `framing`; protocols do not import provider metadata. Lower-level modules know nothing about specific providers.
+
+### Shared protocol helpers
+
+`ProviderShared` exports a small toolkit used inside protocol implementations to keep them focused on provider-native shapes:
+
+- `joinText(parts)` — joins an array of `TextPart` (or anything with a `.text`) with newlines. Use this anywhere a protocol flattens text content into a single string for a provider field.
+- `parseToolInput(route, name, raw)` — Schema-decodes a tool-call argument string with the canonical "Invalid JSON input for `<route>` tool call `<name>`" error message. Treats empty input as `{}`.
+- `parseJson(route, raw, message)` — generic JSON-via-Schema decode for non-tool bodies.
+- `eventError(route, message, ...)` — typed `InvalidProviderOutput` constructor for stream-time decode failures.
+- `validateWith(decoder)` — maps Schema decode errors to `InvalidRequest`. `Route.make(...)` uses this for body validation; lower-level routes can reuse it.
+- `matchToolChoice(provider, choice, branches)` — branches over `LLMRequest["toolChoice"]` for provider-specific lowering.
+
+If you find yourself copying a 3-to-5-line snippet between two protocols, lift it into `ProviderShared` next to these helpers rather than duplicating.
+
+### Tools
+
+Tool loops are represented in common messages and events:
+
+```ts
+const call = LLM.toolCall({ id: "call_1", name: "lookup", input: { query: "weather" } })
+const result = LLM.toolMessage({ id: "call_1", name: "lookup", result: { forecast: "sunny" } })
+
+const followUp = LLM.request({
+  model,
+  messages: [LLM.user("Weather?"), LLM.assistant([call]), result],
+})
+```
+
+Routes lower these into provider-native assistant tool-call messages and tool-result messages. Streaming providers should emit `tool-input-delta` events while arguments arrive, then a final `tool-call` event with parsed input.
+
+### Tool runtime
+
+`LLM.stream({ request, tools })` executes model-requested tools with full type safety. Plain `LLM.stream(request)` only streams the model; if `request.tools` contains schemas, tool calls are returned for the caller to handle. Use `toolExecution: "none"` to pass executable tool definitions as schemas without invoking handlers. Add `stopWhen` to opt into follow-up model rounds after tool results.
+
+```ts
+const get_weather = tool({
+  description: "Get current weather for a city",
+  parameters: Schema.Struct({ city: Schema.String }),
+  success: Schema.Struct({ temperature: Schema.Number, condition: Schema.String }),
+  execute: ({ city }) =>
+    Effect.gen(function* () {
+      // city: string  — typed from parameters Schema
+      const data = yield* WeatherApi.fetch(city)
+      return { temperature: data.temp, condition: data.cond }
+      // return type checked against success Schema
+    }),
+})
+
+const events = yield* LLM.stream({
+  request,
+  tools: { get_weather, get_time, ... },
+  stopWhen: LLM.stepCountIs(10),
+}).pipe(Stream.runCollect)
+```
+
+The runtime:
+
+- Adds tool definitions (derived from each tool's `parameters` Schema via `Schema.toJsonSchemaDocument`) onto `request.tools`.
+- Streams the model.
+- On `tool-call`: looks up the named tool, decodes input against `parameters` Schema, dispatches to the typed `execute`, encodes the result against `success` Schema, emits `tool-result`.
+- Emits local `tool-result` events in the same step by default.
+- Loops only when `stopWhen` is provided and the step finishes with `tool-calls`, appending the assistant + tool messages.
+
+Handler dependencies (services, permissions, plugin hooks, abort handling) are closed over by the consumer at tool-construction time. The runtime's only environment requirement is `RequestExecutor.Service`. Build the tools record inside an `Effect.gen` once and reuse it across many runs.
+
+Errors must be expressed as `ToolFailure`. The runtime catches it and emits a `tool-error` event, then a `tool-result` of `type: "error"`, so the model can self-correct on the next step. Anything that is not a `ToolFailure` is treated as a defect and fails the stream. Three recoverable error paths produce `tool-error` events:
+
+- The model called an unknown tool name.
+- Input failed the `parameters` Schema.
+- The handler returned a `ToolFailure`.
+
+Provider-defined / hosted tools (Anthropic `web_search` / `code_execution` / `web_fetch`, OpenAI Responses `web_search_call` / `file_search_call` / `code_interpreter_call` / `mcp_call` / `local_shell_call` / `image_generation_call` / `computer_use_call`) pass through the runtime untouched:
+
+- Routes surface the model's call as a `tool-call` event with `providerExecuted: true`, and the provider's result as a matching `tool-result` event with `providerExecuted: true`.
+- The runtime detects `providerExecuted` on `tool-call` and **skips client dispatch** — no handler is invoked and no `tool-error` is raised for "unknown tool". The provider already executed it.
+- Both events are appended to the assistant message in `assistantContent` so the next round's history carries the call + result for context. Anthropic encodes them back as `server_tool_use` + `web_search_tool_result` (or `code_execution_tool_result` / `web_fetch_tool_result`) blocks; OpenAI Responses callers typically use `previous_response_id` instead of resending hosted-tool items.
+
+Add provider-defined tools to `request.tools` (no runtime entry needed). The matching route must know how to lower the tool definition into the provider-native shape; right now Anthropic accepts `web_search` / `code_execution` / `web_fetch` and OpenAI Responses accepts the hosted tool names listed above.
+
+## Protocol File Style
+
+Protocol files should look self-similar. Provider quirks belong behind named helpers so a new route can be reviewed by comparing the same sections across files.
+
+### Section order
+
+Use this order for every protocol module:
+
+1. Public model input
+2. Request body schema
+3. Streaming event schema
+4. Parser state
+5. Request body construction (`fromRequest`)
+6. Stream parsing (`step` and per-event handlers)
+7. Protocol and route
+8. Model helper
+
+### Rules
+
+- Keep protocol files focused on the protocol. Move provider-specific projection, signing, media normalization, or other bulky transformations into `src/protocols/utils/*`.
+- Use `Effect.fn("Provider.fromRequest")` for request body construction entrypoints. Use `Effect.fn(...)` for event handlers that yield effects; keep purely synchronous handlers as plain functions returning a `StepResult` that the dispatcher lifts via `Effect.succeed(...)`.
+- Parser state owns terminal information. The state machine records finish reason, usage, and pending tool calls; emit one terminal `request-finish` (or `provider-error`) when a `terminal` event arrives. If a provider splits reason and usage across events, merge them in parser state before flushing.
+- Emit exactly one terminal `request-finish` event for a completed response. Use `stream.terminal` to signal the run is over and have `step` emit the final event.
+- Use shared helpers for repeated protocol policy such as text joining, usage totals, JSON parsing, and tool-call accumulation. `ToolStream` (`protocols/utils/tool-stream.ts`) accumulates streamed tool-call arguments uniformly.
+- Make intentional provider differences explicit in helper names or comments. If two protocol files differ visually, the reason should be obvious from the names.
+- Prefer dispatched per-event handlers (`onMessageStart`, `onContentBlockDelta`, ...) called from a small top-level `step` switch over a long if-chain. The dispatcher keeps the event surface visible at a glance.
+- Keep tests in the same conceptual order as the protocol: basic prepare, tools prepare, unsupported lowering, text/usage parsing, tool streaming, finish reasons, provider errors.
+
+### Review checklist
+
+- Can the file be skimmed side-by-side with `openai-chat.ts` without hunting for equivalent sections?
+- Are provider quirks named, isolated, and covered by focused tests?
+- Does request body construction validate unsupported common content at the protocol boundary?
+- Does stream parsing emit stable common events without leaking provider event order to callers?
+- Does `toolChoice: "none"` behavior read as intentional?
+
+## Recording Tests
+
+Recorded tests use one cassette file per scenario. A cassette holds an ordered array of `{ request, response }` interactions, so multi-step flows (tool loops, retries, polling) record into a single file. Use `recordedTests({ prefix, requires })` and let the helper derive cassette names from test names:
+
+```ts
+const recorded = recordedTests({ prefix: "openai-chat", requires: ["OPENAI_API_KEY"] })
+
+recorded.effect("streams text", () =>
+  Effect.gen(function* () {
+    // test body
+  }),
+)
+```
+
+Replay is the default. `RECORD=true` records fresh cassettes and requires the listed env vars. Cassettes are written as pretty-printed JSON so multi-interaction diffs stay reviewable.
+
+Pass `provider`, `protocol`, and optional `tags` to `recordedTests(...)` / `recorded.effect.with(...)` so cassettes carry searchable metadata. Use recorded-test filters to replay or record a narrow subset without rewriting a whole file:
+
+- `RECORDED_PROVIDER=openai` matches tests tagged with `provider:openai`; comma-separated values are allowed.
+- `RECORDED_PREFIX=openai-chat` matches cassette groups by `recordedTests({ prefix })`; comma-separated values are allowed.
+- `RECORDED_TAGS=tool` requires all listed tags to be present, e.g. `RECORDED_TAGS=provider:togetherai,tool`.
+- `RECORDED_TEST="streams text"` matches by test name, kebab-case test id, or cassette path.
+
+Filters apply in replay and record mode. Combine them with `RECORD=true` when refreshing only one provider or scenario.
+
+**Binary response bodies.** Most providers stream text (SSE, JSON). AWS Bedrock streams binary AWS event-stream frames whose CRC32 fields would be mangled by a UTF-8 round-trip — those bodies are stored as base64 with `bodyEncoding: "base64"` on the response snapshot. Detection is by `Content-Type` in `@opencode-ai/http-recorder` (currently `application/vnd.amazon.eventstream` and `application/octet-stream`); cassettes for SSE/JSON routes omit the field and decode as text.
+
+**Matching strategies.** Replay defaults to structural matching, which finds an interaction by comparing method, URL, allow-listed headers, and the canonical JSON body. This is the right choice for tool loops because each round's request differs (the message history grows). For scenarios where successive requests are byte-identical and expect different responses (retries, polling), pass `dispatch: "sequential"` in `RecordReplayOptions` — replay then walks the cassette in record order via an internal cursor. `scriptedResponses` (in `test/lib/http.ts`) is the deterministic counterpart for tests that don't need a live provider; it scripts response bodies in order without reading from disk.
+
+Do not blanket re-record an entire test file when adding one cassette. `RECORD=true` rewrites every recorded case that runs, and provider streams contain volatile IDs, timestamps, fingerprints, and obfuscation fields. Prefer deleting the one cassette you intend to refresh, or run a focused test pattern that only registers the scenario you want to record. Keep stable existing cassettes unchanged unless their request shape or expected behavior changed.
--- a/packages/llm/example/tutorial.ts
+++ b/packages/llm/example/tutorial.ts
@@ -0,0 +1,242 @@
+import { Config, Effect, Formatter, Layer, Schema, Stream } from "effect"
+import { LLM, LLMClient, Provider, ProviderID, Tool, type ProviderModelOptions } from "@opencode-ai/llm"
+import { Route, Auth, Endpoint, Framing, Protocol, RequestExecutor } from "@opencode-ai/llm/route"
+import { OpenAI } from "@opencode-ai/llm/providers"
+
+/**
+ * A runnable walkthrough of the LLM package use-site API.
+ *
+ * Run from `packages/llm` with an OpenAI key in the environment:
+ *
+ *   OPENAI_API_KEY=... bun example/tutorial.ts
+ *
+ * The file is intentionally written as a normal TypeScript program. You can
+ * hover imports and local values to see how the public API is typed.
+ */
+
+const apiKey = Config.redacted("OPENAI_API_KEY")
+
+// 1. Pick a model. The provider helper records provider identity, protocol
+// choice, capabilities, deployment options, authentication, and defaults.
+const model = OpenAI.model("gpt-4o-mini", {
+  apiKey,
+  generation: { maxTokens: 160 },
+  providerOptions: {
+    openai: { store: false },
+  },
+})
+
+// 2. Build a provider-neutral request. This is useful when reusing one request
+// across generate and stream examples.
+//
+// Options can live on both the model and the request:
+//
+//   - `generation`: common controls such as max tokens, temperature, topP/topK,
+//     penalties, seed, and stop sequences.
+//   - `providerOptions`: namespaced provider-native behavior. For example,
+//     OpenAI cache keys and store behavior, Anthropic thinking, Gemini thinking
+//     config, or OpenRouter routing/reasoning.
+//   - `http`: last-resort serializable overlays for final request body, headers,
+//     and query params. Prefer typed `providerOptions` when a field is stable.
+//
+// Model options are defaults. Request options override them for this call.
+const request = LLM.request({
+  model,
+  system: "You are concise and practical.",
+  prompt: "Tell me a joke",
+  generation: { maxTokens: 80, temperature: 0.7 },
+  providerOptions: {
+    openai: { promptCacheKey: "tutorial-joke" },
+  },
+})
+
+// `http` is intentionally not needed for normal calls. This shows the shape for
+// newly released provider fields before they deserve a typed provider option.
+const rawOverlayExample = LLM.request({
+  model,
+  prompt: "Show the final HTTP overlay shape.",
+  http: {
+    body: { metadata: { example: "tutorial" } },
+    headers: { "x-opencode-tutorial": "1" },
+    query: { debug: "1" },
+  },
+})
+
+// 3. `generate` sends the request and collects the event stream into one
+// response object. `response.text` is the collected text output.
+const generateOnce = Effect.gen(function* () {
+  const response = yield* LLM.generate(request)
+
+  console.log("\n== generate ==")
+  console.log("generated text:", response.text)
+  console.log("usage", Formatter.formatJson(response.usage, { space: 2 }))
+})
+
+// 4. `stream` exposes provider output as common `LLMEvent`s for UIs that want
+// incremental text, reasoning, tool input, usage, or finish events.
+const streamText = LLM.stream(request).pipe(
+  Stream.tap((event) =>
+    Effect.sync(() => {
+      if (event.type === "text-delta") process.stdout.write(`\ntext: ${event.text}`)
+      if (event.type === "request-finish") process.stdout.write(`\nfinish: ${event.reason}\n`)
+    }),
+  ),
+  Stream.runDrain,
+)
+
+// 5. Tools are typed with Effect Schema. Passing tools to `LLMClient.stream`
+// adds their definitions to the request and dispatches matching tool calls.
+// Add `stopWhen` to opt into follow-up model rounds after tool results.
+const tools = {
+  get_weather: Tool.make({
+    description: "Get current weather for a city.",
+    parameters: Schema.Struct({ city: Schema.String }),
+    success: Schema.Struct({ forecast: Schema.String }),
+    execute: (input) => Effect.succeed({ forecast: `${input.city}: sunny, 72F` }),
+  }),
+}
+
+const streamWithTools = LLM.stream({
+  request: LLM.request({
+    model,
+    prompt: "Use get_weather for San Francisco, then answer in one sentence.",
+    generation: { maxTokens: 80, temperature: 0 },
+  }),
+  tools,
+  stopWhen: LLM.stepCountIs(3),
+}).pipe(
+  Stream.tap((event) =>
+    Effect.sync(() => {
+      if (event.type === "tool-call") console.log("tool call", event.name, event.input)
+      if (event.type === "tool-result") console.log("tool result", event.name, event.result)
+      if (event.type === "text-delta") process.stdout.write(event.text)
+    }),
+  ),
+  Stream.runDrain,
+)
+
+// 6. `generateObject` is the structured-output helper. It forces a synthetic
+// tool call internally, so the same call site works across providers instead of
+// depending on provider-specific JSON mode flags.
+const WeatherReport = Schema.Struct({
+  city: Schema.String,
+  forecast: Schema.String,
+  highFahrenheit: Schema.Number,
+})
+
+const generateStructuredObject = Effect.gen(function* () {
+  const response = yield* LLM.generateObject({
+    model,
+    system: "Return only structured weather data.",
+    prompt: "Give me today's weather for San Francisco.",
+    schema: WeatherReport,
+    generation: { maxTokens: 120, temperature: 0 },
+  })
+
+  console.log("\n== generateObject ==")
+  console.log(Formatter.formatJson(response.object, { space: 2 }))
+})
+
+// If the shape is only known at runtime, pass raw JSON Schema instead. The
+// `.object` type is `unknown`; callers that need static types should validate it.
+const generateDynamicObject = LLM.generateObject({
+  model,
+  prompt: "Extract the city and forecast from: San Francisco is sunny.",
+  jsonSchema: {
+    type: "object",
+    properties: {
+      city: { type: "string" },
+      forecast: { type: "string" },
+    },
+    required: ["city", "forecast"],
+  },
+})
+
+// -----------------------------------------------------------------------------
+// Part 2: provider composition with a fake provider
+// -----------------------------------------------------------------------------
+
+// A protocol is the provider-native API shape: common request -> body, response
+// frames -> common events. This fake one turns text prompts into a JSON body
+// and treats every SSE frame as output text.
+const FakeBody = Schema.Struct({
+  model: Schema.String,
+  input: Schema.String,
+})
+type FakeBody = Schema.Schema.Type<typeof FakeBody>
+
+const FakeProtocol = Protocol.make<FakeBody, string, string, void>({
+  // Protocol ids are open strings, so external packages can define their own
+  // protocols without changing this package.
+  id: "fake-echo",
+  body: {
+    schema: FakeBody,
+    from: (request) =>
+      Effect.succeed({
+        model: request.model.id,
+        input: request.messages
+          .flatMap((message) => message.content)
+          .filter((part) => part.type === "text")
+          .map((part) => part.text)
+          .join("\n"),
+      }),
+  },
+  stream: {
+    event: Schema.String,
+    initial: () => undefined,
+    step: (_, frame) => Effect.succeed([undefined, [{ type: "text-delta", text: frame }]] as const),
+    onHalt: () => [{ type: "request-finish", reason: "stop" }],
+  },
+})
+
+// An route is the runnable binding for that protocol. It adds the deployment
+// axes that the protocol deliberately does not know: URL, auth, and framing.
+const FakeAdapter = Route.make({
+  id: "fake-echo",
+  protocol: FakeProtocol,
+  endpoint: Endpoint.path("/v1/echo"),
+  auth: Auth.passthrough,
+  framing: Framing.sse,
+})
+
+// A provider module exports a Provider definition. The default `model` helper
+// sets provider identity, protocol id, and the route id resolved by the registry.
+const fakeEchoModel = Route.model(FakeAdapter, { provider: "fake-echo", baseURL: "https://fake.local" })
+const FakeEcho = Provider.make({
+  id: ProviderID.make("fake-echo"),
+  model: (id: string, options: ProviderModelOptions = {}) => fakeEchoModel({ id, ...options }),
+})
+
+// `LLMClient.prepare` is the lower-level inspection hook: it compiles through
+// body conversion, validation, endpoint, auth, and HTTP construction without
+// sending anything over the network.
+const inspectFakeProvider = Effect.gen(function* () {
+  const prepared = yield* LLMClient.prepare(
+    LLM.request({
+      model: FakeEcho.model("tiny-echo"),
+      prompt: "Show me the provider pipeline.",
+    }),
+  )
+
+  console.log("\n== fake provider prepare ==")
+  console.log("route:", prepared.route)
+  console.log("body:", Formatter.formatJson(prepared.body, { space: 2 }))
+})
+
+// Provide the LLM runtime and the HTTP request executor once. Keep one path
+// enabled at a time so the tutorial can demonstrate generate, prepare, stream,
+// or tool-loop behavior without spending tokens on every example.
+const requestExecutorLayer = RequestExecutor.defaultLayer
+const llmClientLayer = LLMClient.layer.pipe(Layer.provide(requestExecutorLayer))
+
+const program = Effect.gen(function* () {
+  // yield* generateOnce
+  // yield* inspectFakeProvider
+  // yield* LLMClient.prepare(rawOverlayExample).pipe(Effect.andThen((prepared) => Effect.sync(() => console.log(prepared.body))))
+  // yield* streamText
+  // yield* generateStructuredObject
+  // yield* generateDynamicObject.pipe(Effect.andThen((response) => Effect.sync(() => console.log(response.object))))
+  yield* streamWithTools
+}).pipe(Effect.provide(Layer.mergeAll(requestExecutorLayer, llmClientLayer)))
+
+Effect.runPromise(program)
--- a/packages/llm/package.json
+++ b/packages/llm/package.json
@@ -0,0 +1,51 @@
+{
+  "$schema": "https://json.schemastore.org/package.json",
+  "version": "1.14.25",
+  "name": "@opencode-ai/llm",
+  "type": "module",
+  "license": "MIT",
+  "private": true,
+  "scripts": {
+    "setup:recording-env": "bun run script/setup-recording-env.ts",
+    "test": "bun test --timeout 30000",
+    "typecheck": "tsgo --noEmit"
+  },
+  "exports": {
+    ".": "./src/index.ts",
+    "./route": "./src/route/index.ts",
+    "./provider": "./src/provider.ts",
+    "./providers": "./src/providers/index.ts",
+    "./providers/amazon-bedrock": "./src/providers/amazon-bedrock.ts",
+    "./providers/anthropic": "./src/providers/anthropic.ts",
+    "./providers/azure": "./src/providers/azure.ts",
+    "./providers/cloudflare": "./src/providers/cloudflare.ts",
+    "./providers/github-copilot": "./src/providers/github-copilot.ts",
+    "./providers/google": "./src/providers/google.ts",
+    "./providers/openai": "./src/providers/openai.ts",
+    "./providers/openai-compatible": "./src/providers/openai-compatible.ts",
+    "./providers/openai-compatible-profile": "./src/providers/openai-compatible-profile.ts",
+    "./providers/openrouter": "./src/providers/openrouter.ts",
+    "./providers/xai": "./src/providers/xai.ts",
+    "./protocols": "./src/protocols/index.ts",
+    "./protocols/anthropic-messages": "./src/protocols/anthropic-messages.ts",
+    "./protocols/bedrock-converse": "./src/protocols/bedrock-converse.ts",
+    "./protocols/gemini": "./src/protocols/gemini.ts",
+    "./protocols/openai-chat": "./src/protocols/openai-chat.ts",
+    "./protocols/openai-compatible-chat": "./src/protocols/openai-compatible-chat.ts",
+    "./protocols/openai-responses": "./src/protocols/openai-responses.ts"
+  },
+  "devDependencies": {
+    "@clack/prompts": "1.0.0-alpha.1",
+    "@effect/platform-node": "catalog:",
+    "@opencode-ai/http-recorder": "workspace:*",
+    "@tsconfig/bun": "catalog:",
+    "@types/bun": "catalog:",
+    "@typescript/native-preview": "catalog:"
+  },
+  "dependencies": {
+    "@smithy/eventstream-codec": "4.2.14",
+    "@smithy/util-utf8": "4.2.2",
+    "aws4fetch": "1.0.20",
+    "effect": "catalog:"
+  }
+}
--- a/packages/llm/script/recording-cost-report.ts
+++ b/packages/llm/script/recording-cost-report.ts
@@ -0,0 +1,250 @@
+import * as fs from "node:fs/promises"
+import * as path from "node:path"
+
+const RECORDINGS_DIR = path.resolve(import.meta.dir, "..", "test", "fixtures", "recordings")
+const MODELS_DEV_URL = "https://models.dev/api.json"
+
+type JsonRecord = Record<string, unknown>
+
+type Pricing = {
+  readonly input?: number
+  readonly output?: number
+  readonly cache_read?: number
+  readonly cache_write?: number
+  readonly reasoning?: number
+}
+
+type Usage = {
+  readonly inputTokens: number
+  readonly outputTokens: number
+  readonly cacheReadTokens: number
+  readonly cacheWriteTokens: number
+  readonly reasoningTokens: number
+  readonly reportedCost: number
+}
+
+type Row = Usage & {
+  readonly cassette: string
+  readonly provider: string
+  readonly model: string
+  readonly estimatedCost: number
+  readonly pricingSource: string
+}
+
+const isRecord = (value: unknown): value is JsonRecord =>
+  value !== null && typeof value === "object" && !Array.isArray(value)
+
+const asNumber = (value: unknown) => (typeof value === "number" && Number.isFinite(value) ? value : 0)
+
+const asString = (value: unknown) => (typeof value === "string" ? value : undefined)
+
+const readJson = async (file: string) => JSON.parse(await Bun.file(file).text()) as unknown
+
+const walk = async (dir: string): Promise<ReadonlyArray<string>> =>
+  (await fs.readdir(dir, { withFileTypes: true }))
+    .flatMap((entry) => {
+      const file = path.join(dir, entry.name)
+      return entry.isDirectory() ? [] : [file]
+    })
+    .concat(
+      ...(await Promise.all(
+        (await fs.readdir(dir, { withFileTypes: true }))
+          .filter((entry) => entry.isDirectory())
+          .map((entry) => walk(path.join(dir, entry.name))),
+      )),
+    )
+
+const providerFromUrl = (url: string) => {
+  if (url.includes("api.openai.com")) return "openai"
+  if (url.includes("api.anthropic.com")) return "anthropic"
+  if (url.includes("generativelanguage.googleapis.com")) return "google"
+  if (url.includes("bedrock")) return "amazon-bedrock"
+  if (url.includes("openrouter.ai")) return "openrouter"
+  if (url.includes("api.x.ai")) return "xai"
+  if (url.includes("api.groq.com")) return "groq"
+  if (url.includes("api.deepseek.com")) return "deepseek"
+  if (url.includes("api.together.xyz")) return "togetherai"
+  return "unknown"
+}
+
+const providerAliases: Record<string, ReadonlyArray<string>> = {
+  openai: ["openai"],
+  anthropic: ["anthropic"],
+  google: ["google"],
+  "amazon-bedrock": ["amazon-bedrock"],
+  openrouter: ["openrouter", "openai", "anthropic", "google"],
+  xai: ["xai"],
+  groq: ["groq"],
+  deepseek: ["deepseek"],
+  togetherai: ["togetherai"],
+}
+
+const modelAliases = (model: string) => [
+  model,
+  model.replace(/^models\//, ""),
+  model.replace(/-\d{8}$/, ""),
+  model.replace(/-\d{4}-\d{2}-\d{2}$/, ""),
+  model.replace(/-\d{4}-\d{2}-\d{2}$/, "").replace(/-\d{8}$/, ""),
+  model.replace(/^openai\//, ""),
+  model.replace(/^anthropic\//, ""),
+  model.replace(/^google\//, ""),
+]
+
+const pricingFor = (models: JsonRecord, provider: string, model: string) => {
+  for (const providerID of providerAliases[provider] ?? [provider]) {
+    const providerEntry = models[providerID]
+    if (!isRecord(providerEntry) || !isRecord(providerEntry.models)) continue
+    for (const modelID of modelAliases(model)) {
+      const modelEntry = providerEntry.models[modelID]
+      if (isRecord(modelEntry) && isRecord(modelEntry.cost))
+        return { pricing: modelEntry.cost as Pricing, source: `${providerID}/${modelID}` }
+    }
+  }
+  return { pricing: undefined, source: "missing" }
+}
+
+const estimateCost = (usage: Usage, pricing: Pricing | undefined) => {
+  if (!pricing) return 0
+  return (
+    (usage.inputTokens * (pricing.input ?? 0) +
+      usage.outputTokens * (pricing.output ?? 0) +
+      usage.cacheReadTokens * (pricing.cache_read ?? 0) +
+      usage.cacheWriteTokens * (pricing.cache_write ?? 0) +
+      usage.reasoningTokens * (pricing.reasoning ?? 0)) /
+    1_000_000
+  )
+}
+
+const emptyUsage = (): Usage => ({
+  inputTokens: 0,
+  outputTokens: 0,
+  cacheReadTokens: 0,
+  cacheWriteTokens: 0,
+  reasoningTokens: 0,
+  reportedCost: 0,
+})
+
+const addUsage = (a: Usage, b: Usage): Usage => ({
+  inputTokens: a.inputTokens + b.inputTokens,
+  outputTokens: a.outputTokens + b.outputTokens,
+  cacheReadTokens: a.cacheReadTokens + b.cacheReadTokens,
+  cacheWriteTokens: a.cacheWriteTokens + b.cacheWriteTokens,
+  reasoningTokens: a.reasoningTokens + b.reasoningTokens,
+  reportedCost: a.reportedCost + b.reportedCost,
+})
+
+const usageFromObject = (usage: unknown): Usage => {
+  if (!isRecord(usage)) return emptyUsage()
+  const promptDetails = isRecord(usage.prompt_tokens_details) ? usage.prompt_tokens_details : {}
+  const completionDetails = isRecord(usage.completion_tokens_details) ? usage.completion_tokens_details : {}
+  const inputDetails = isRecord(usage.input_tokens_details) ? usage.input_tokens_details : {}
+  const outputDetails = isRecord(usage.output_tokens_details) ? usage.output_tokens_details : {}
+  const cacheWriteTokens = asNumber(promptDetails.cache_write_tokens) + asNumber(inputDetails.cache_write_tokens)
+  return {
+    inputTokens: asNumber(usage.prompt_tokens) + asNumber(usage.input_tokens),
+    outputTokens: asNumber(usage.completion_tokens) + asNumber(usage.output_tokens),
+    cacheReadTokens: asNumber(promptDetails.cached_tokens) + asNumber(inputDetails.cached_tokens),
+    cacheWriteTokens,
+    reasoningTokens: asNumber(completionDetails.reasoning_tokens) + asNumber(outputDetails.reasoning_tokens),
+    reportedCost: asNumber(usage.cost),
+  }
+}
+
+const jsonPayloads = (body: string) =>
+  body
+    .split("\n")
+    .map((line) => line.trim())
+    .filter((line) => line.startsWith("data:"))
+    .map((line) => line.slice("data:".length).trim())
+    .filter((line) => line !== "" && line !== "[DONE]")
+    .flatMap((line) => {
+      try {
+        return [JSON.parse(line) as unknown]
+      } catch {
+        return []
+      }
+    })
+
+const usageFromResponseBody = (body: string) =>
+  jsonPayloads(body).reduce<Usage>((usage, payload) => {
+    if (!isRecord(payload)) return usage
+    return addUsage(
+      usage,
+      addUsage(
+        usageFromObject(payload.usage),
+        usageFromObject(isRecord(payload.response) ? payload.response.usage : undefined),
+      ),
+    )
+  }, emptyUsage())
+
+const modelFromRequest = (request: unknown) => {
+  if (!isRecord(request)) return "unknown"
+  const requestBody = asString(request.body)
+  if (!requestBody) return "unknown"
+  try {
+    const body = JSON.parse(requestBody) as unknown
+    if (!isRecord(body)) return "unknown"
+    return asString(body.model) ?? "unknown"
+  } catch {
+    return "unknown"
+  }
+}
+
+const rowFor = (models: JsonRecord, file: string, cassette: unknown): Row | undefined => {
+  if (!isRecord(cassette) || !Array.isArray(cassette.interactions)) return undefined
+  const first = cassette.interactions.find(isRecord)
+  if (!first || !isRecord(first.request)) return undefined
+  const provider = providerFromUrl(asString(first.request.url) ?? "")
+  const model = modelFromRequest(first.request)
+  const usage = cassette.interactions.filter(isRecord).reduce<Usage>((total, interaction) => {
+    if (!isRecord(interaction.response)) return total
+    const responseBody = asString(interaction.response.body)
+    if (!responseBody) return total
+    return addUsage(total, usageFromResponseBody(responseBody))
+  }, emptyUsage())
+  const priced = pricingFor(models, provider, model)
+  return {
+    cassette: path.relative(RECORDINGS_DIR, file),
+    provider,
+    model,
+    ...usage,
+    estimatedCost: estimateCost(usage, priced.pricing),
+    pricingSource: priced.source,
+  }
+}
+
+const money = (value: number) => (value === 0 ? "$0.000000" : `$${value.toFixed(6)}`)
+const tokens = (value: number) => value.toLocaleString("en-US")
+
+const models = (await (await fetch(MODELS_DEV_URL)).json()) as JsonRecord
+const rows = (
+  await Promise.all(
+    (await walk(RECORDINGS_DIR))
+      .filter((file) => file.endsWith(".json"))
+      .map(async (file) => rowFor(models, file, await readJson(file))),
+  )
+).filter((row): row is Row => row !== undefined)
+
+const totals = rows.reduce(
+  (total, row) => ({
+    ...addUsage(total, row),
+    estimatedCost: total.estimatedCost + row.estimatedCost,
+  }),
+  { ...emptyUsage(), estimatedCost: 0 },
+)
+
+console.log("# Recording Cost Report")
+console.log("")
+console.log(`Pricing: ${MODELS_DEV_URL}`)
+console.log(`Cassettes: ${rows.length}`)
+console.log(`Reported cost: ${money(totals.reportedCost)}`)
+console.log(`Estimated cost: ${money(totals.estimatedCost)}`)
+console.log("")
+console.log("| Provider | Model | Input | Output | Reasoning | Reported | Estimated | Pricing | Cassette |")
+console.log("|---|---:|---:|---:|---:|---:|---:|---|---|")
+for (const row of rows.toSorted((a, b) => b.reportedCost + b.estimatedCost - (a.reportedCost + a.estimatedCost))) {
+  if (row.inputTokens + row.outputTokens + row.reasoningTokens + row.reportedCost + row.estimatedCost === 0) continue
+  console.log(
+    `| ${row.provider} | ${row.model} | ${tokens(row.inputTokens)} | ${tokens(row.outputTokens)} | ${tokens(row.reasoningTokens)} | ${money(row.reportedCost)} | ${money(row.estimatedCost)} | ${row.pricingSource} | ${row.cassette} |`,
+  )
+}
--- a/packages/llm/script/setup-recording-env.ts
+++ b/packages/llm/script/setup-recording-env.ts
@@ -0,0 +1,537 @@
+#!/usr/bin/env bun
+
+import { NodeFileSystem } from "@effect/platform-node"
+import * as path from "node:path"
+import * as prompts from "@clack/prompts"
+import { AwsV4Signer } from "aws4fetch"
+import { Config, ConfigProvider, Effect, FileSystem, PlatformError, Redacted } from "effect"
+import { FetchHttpClient, HttpClient, HttpClientRequest, type HttpClientResponse } from "effect/unstable/http"
+import * as ProviderShared from "../src/protocols/shared"
+import * as Cloudflare from "../src/providers/cloudflare"
+
+type Provider = {
+  readonly id: string
+  readonly label: string
+  readonly tier: "core" | "canary" | "compatible" | "optional"
+  readonly note: string
+  readonly vars: ReadonlyArray<{
+    readonly name: string
+    readonly label?: string
+    readonly optional?: boolean
+    readonly secret?: boolean
+  }>
+  readonly validate?: (env: Env) => Effect.Effect<string | undefined, unknown, HttpClient.HttpClient>
+}
+
+type Env = Record<string, string>
+
+const PROVIDERS: ReadonlyArray<Provider> = [
+  {
+    id: "openai",
+    label: "OpenAI",
+    tier: "core",
+    note: "Native OpenAI Chat / Responses recorded tests",
+    vars: [{ name: "OPENAI_API_KEY" }],
+    validate: (env) => validateBearer("https://api.openai.com/v1/models", Redacted.make(env.OPENAI_API_KEY)),
+  },
+  {
+    id: "anthropic",
+    label: "Anthropic",
+    tier: "core",
+    note: "Native Anthropic Messages recorded tests",
+    vars: [{ name: "ANTHROPIC_API_KEY" }],
+    validate: (env) =>
+      HttpClientRequest.get("https://api.anthropic.com/v1/models").pipe(
+        HttpClientRequest.setHeaders({
+          "anthropic-version": "2023-06-01",
+          "x-api-key": Redacted.value(Redacted.make(env.ANTHROPIC_API_KEY)),
+        }),
+        executeRequest,
+      ),
+  },
+  {
+    id: "google",
+    label: "Google Gemini",
+    tier: "core",
+    note: "Native Gemini recorded tests",
+    vars: [{ name: "GOOGLE_GENERATIVE_AI_API_KEY" }],
+    validate: (env) =>
+      HttpClientRequest.get(
+        `https://generativelanguage.googleapis.com/v1beta/models?key=${encodeURIComponent(env.GOOGLE_GENERATIVE_AI_API_KEY)}`,
+      ).pipe(executeRequest),
+  },
+  {
+    id: "bedrock",
+    label: "Amazon Bedrock",
+    tier: "core",
+    note: "Native Bedrock Converse recorded tests",
+    vars: [
+      { name: "AWS_ACCESS_KEY_ID" },
+      { name: "AWS_SECRET_ACCESS_KEY" },
+      { name: "AWS_SESSION_TOKEN", optional: true },
+      { name: "BEDROCK_RECORDING_REGION", optional: true },
+      { name: "BEDROCK_MODEL_ID", optional: true },
+    ],
+    validate: (env) => validateBedrock(env),
+  },
+  {
+    id: "groq",
+    label: "Groq",
+    tier: "canary",
+    note: "Fast OpenAI-compatible canary for text/tool streaming",
+    vars: [{ name: "GROQ_API_KEY" }],
+    validate: (env) => validateBearer("https://api.groq.com/openai/v1/models", Redacted.make(env.GROQ_API_KEY)),
+  },
+  {
+    id: "openrouter",
+    label: "OpenRouter",
+    tier: "canary",
+    note: "Router canary for OpenAI-compatible text/tool streaming",
+    vars: [{ name: "OPENROUTER_API_KEY" }],
+    validate: (env) =>
+      validateChat({
+        url: "https://openrouter.ai/api/v1/chat/completions",
+        token: Redacted.make(env.OPENROUTER_API_KEY),
+        model: "openai/gpt-4o-mini",
+      }),
+  },
+  {
+    id: "xai",
+    label: "xAI",
+    tier: "canary",
+    note: "OpenAI-compatible xAI chat endpoint",
+    vars: [{ name: "XAI_API_KEY" }],
+    validate: (env) => validateBearer("https://api.x.ai/v1/models", Redacted.make(env.XAI_API_KEY)),
+  },
+  {
+    id: "cloudflare-ai-gateway",
+    label: "Cloudflare AI Gateway",
+    tier: "canary",
+    note: "Cloudflare Unified/OpenAI-compatible gateway; supports provider/model ids like workers-ai/@cf/...",
+    vars: [
+      { name: "CLOUDFLARE_ACCOUNT_ID", label: "Cloudflare account ID", secret: false },
+      { name: "CLOUDFLARE_GATEWAY_ID", label: "Cloudflare AI Gateway ID (defaults to default)", optional: true, secret: false },
+      { name: "CLOUDFLARE_API_TOKEN", label: "Cloudflare AI Gateway token" },
+    ],
+    validate: (env) =>
+      validateChat({
+        url: `${Cloudflare.aiGatewayBaseURL({
+          accountId: env.CLOUDFLARE_ACCOUNT_ID,
+          gatewayId: env.CLOUDFLARE_GATEWAY_ID || undefined,
+        })}/chat/completions`,
+        token: Redacted.make(envValue(env, Cloudflare.aiGatewayAuthEnvVars)),
+        tokenHeader: "cf-aig-authorization",
+        model: "workers-ai/@cf/meta/llama-3.1-8b-instruct",
+      }),
+  },
+  {
+    id: "cloudflare-workers-ai",
+    label: "Cloudflare Workers AI",
+    tier: "canary",
+    note: "Direct Workers AI OpenAI-compatible endpoint; supports model ids like @cf/meta/...",
+    vars: [
+      { name: "CLOUDFLARE_ACCOUNT_ID", label: "Cloudflare account ID", secret: false },
+      { name: "CLOUDFLARE_API_KEY", label: "Cloudflare Workers AI API token" },
+    ],
+    validate: (env) =>
+      validateChat({
+        url: `${Cloudflare.workersAIBaseURL({ accountId: env.CLOUDFLARE_ACCOUNT_ID })}/chat/completions`,
+        token: Redacted.make(envValue(env, Cloudflare.workersAIAuthEnvVars)),
+        model: "@cf/meta/llama-3.1-8b-instruct",
+      }),
+  },
+  {
+    id: "deepseek",
+    label: "DeepSeek",
+    tier: "compatible",
+    note: "Existing OpenAI-compatible recorded tests",
+    vars: [{ name: "DEEPSEEK_API_KEY" }],
+    validate: (env) => validateBearer("https://api.deepseek.com/models", Redacted.make(env.DEEPSEEK_API_KEY)),
+  },
+  {
+    id: "togetherai",
+    label: "TogetherAI",
+    tier: "compatible",
+    note: "Existing OpenAI-compatible text/tool recorded tests",
+    vars: [{ name: "TOGETHER_AI_API_KEY" }],
+    validate: (env) => validateBearer("https://api.together.xyz/v1/models", Redacted.make(env.TOGETHER_AI_API_KEY)),
+  },
+  {
+    id: "mistral",
+    label: "Mistral",
+    tier: "optional",
+    note: "OpenAI-compatible bridge; native reasoning parity is follow-up work",
+    vars: [{ name: "MISTRAL_API_KEY" }],
+    validate: (env) => validateBearer("https://api.mistral.ai/v1/models", Redacted.make(env.MISTRAL_API_KEY)),
+  },
+  {
+    id: "perplexity",
+    label: "Perplexity",
+    tier: "optional",
+    note: "OpenAI-compatible bridge; citations/search metadata are follow-up work",
+    vars: [{ name: "PERPLEXITY_API_KEY" }],
+    validate: (env) => validateBearer("https://api.perplexity.ai/models", Redacted.make(env.PERPLEXITY_API_KEY)),
+  },
+  {
+    id: "venice",
+    label: "Venice",
+    tier: "optional",
+    note: "OpenAI-compatible bridge",
+    vars: [{ name: "VENICE_API_KEY" }],
+    validate: (env) => validateBearer("https://api.venice.ai/api/v1/models", Redacted.make(env.VENICE_API_KEY)),
+  },
+  {
+    id: "cerebras",
+    label: "Cerebras",
+    tier: "optional",
+    note: "OpenAI-compatible bridge",
+    vars: [{ name: "CEREBRAS_API_KEY" }],
+    validate: (env) => validateBearer("https://api.cerebras.ai/v1/models", Redacted.make(env.CEREBRAS_API_KEY)),
+  },
+  {
+    id: "deepinfra",
+    label: "DeepInfra",
+    tier: "optional",
+    note: "OpenAI-compatible bridge",
+    vars: [{ name: "DEEPINFRA_API_KEY" }],
+    validate: (env) =>
+      validateBearer("https://api.deepinfra.com/v1/openai/models", Redacted.make(env.DEEPINFRA_API_KEY)),
+  },
+  {
+    id: "fireworks",
+    label: "Fireworks",
+    tier: "optional",
+    note: "OpenAI-compatible bridge",
+    vars: [{ name: "FIREWORKS_API_KEY" }],
+    validate: (env) =>
+      validateBearer("https://api.fireworks.ai/inference/v1/models", Redacted.make(env.FIREWORKS_API_KEY)),
+  },
+  {
+    id: "baseten",
+    label: "Baseten",
+    tier: "optional",
+    note: "OpenAI-compatible bridge",
+    vars: [{ name: "BASETEN_API_KEY" }],
+  },
+]
+
+const args = process.argv.slice(2)
+const hasFlag = (name: string) => args.includes(name)
+const option = (name: string) => {
+  const index = args.indexOf(name)
+  if (index === -1) return undefined
+  return args[index + 1]
+}
+
+const envPath = path.resolve(process.cwd(), option("--env") ?? ".env.local")
+const checkOnly = hasFlag("--check")
+const providerOption = option("--providers")
+const interactive = Boolean(process.stdin.isTTY && process.stdout.isTTY)
+
+const envNames = Array.from(new Set(PROVIDERS.flatMap((provider) => provider.vars.map((item) => item.name))))
+
+const providersForOption = (value: string | undefined) => {
+  if (!value || value === "recommended")
+    return PROVIDERS.filter((provider) => provider.tier === "core" || provider.tier === "canary")
+  if (value === "recorded") return PROVIDERS.filter((provider) => provider.tier !== "optional")
+  if (value === "all") return PROVIDERS
+  const ids = new Set(
+    value
+      .split(",")
+      .map((item) => item.trim())
+      .filter(Boolean),
+  )
+  return PROVIDERS.filter((provider) => ids.has(provider.id))
+}
+
+const chooseProviders = async () => {
+  if (providerOption) return providersForOption(providerOption)
+  return providersForOption("recommended")
+}
+
+const catchMissingFile = (error: PlatformError.PlatformError) => {
+  if (error.reason._tag === "NotFound") return Effect.succeed("")
+  return Effect.fail(error)
+}
+
+const readEnvFile = Effect.fn("RecordingEnv.readFile")(function* () {
+  const fileSystem = yield* FileSystem.FileSystem
+  return yield* fileSystem.readFileString(envPath).pipe(Effect.catch(catchMissingFile))
+})
+
+const readConfigString = (provider: ConfigProvider.ConfigProvider, name: string) =>
+  Config.string(name)
+    .parse(provider)
+    .pipe(
+      Effect.match({
+        onFailure: () => undefined,
+        onSuccess: (value) => value,
+      }),
+    )
+
+const parseEnv = Effect.fn("RecordingEnv.parseEnv")(function* (contents: string) {
+  const provider = ConfigProvider.fromDotEnvContents(contents)
+  return Object.fromEntries(
+    (yield* Effect.forEach(envNames, (name) =>
+      readConfigString(provider, name).pipe(Effect.map((value) => [name, value] as const)),
+    )).filter((entry): entry is readonly [string, string] => entry[1] !== undefined),
+  )
+})
+
+const quote = (value: string) => JSON.stringify(value)
+
+const status = (name: string, fileEnv: Env) => {
+  if (fileEnv[name]) return "file"
+  if (process.env[name]) return "shell"
+  return "missing"
+}
+
+const statusLine = (provider: Provider, fileEnv: Env) =>
+  [
+    `${provider.label} (${provider.tier})`,
+    provider.note,
+    ...provider.vars.map((item) => {
+      const value = status(item.name, fileEnv)
+      const suffix = item.optional ? " optional" : ""
+      return `  ${value === "missing" ? "missing" : "set"} ${item.name}${suffix}${value === "shell" ? " (shell only)" : ""}`
+    }),
+  ].join("\n")
+
+const printStatus = (providers: ReadonlyArray<Provider>, fileEnv: Env) => {
+  prompts.note(providers.map((provider) => statusLine(provider, fileEnv)).join("\n\n"), `Recording env: ${envPath}`)
+}
+
+const exitIfCancel = <A>(value: A | symbol): A => {
+  if (!prompts.isCancel(value)) return value as A
+  prompts.cancel("Cancelled")
+  process.exit(130)
+}
+
+const upsertEnv = (contents: string, values: Env) => {
+  const names = Object.keys(values)
+  const seen = new Set<string>()
+  const lines = contents.split(/\r?\n/).map((line) => {
+    const match = line.match(/^\s*(?:export\s+)?([A-Za-z_][A-Za-z0-9_]*)\s*=/)
+    if (!match || !names.includes(match[1])) return line
+    seen.add(match[1])
+    return `${match[1]}=${quote(values[match[1]])}`
+  })
+  const missing = names.filter((name) => !seen.has(name))
+  if (missing.length === 0) return lines.join("\n").replace(/\n*$/, "\n")
+  const prefix = lines.join("\n").trimEnd()
+  const block = [
+    "",
+    "# Added by bun run setup:recording-env",
+    ...missing.map((name) => `${name}=${quote(values[name])}`),
+  ].join("\n")
+  return `${prefix}${block}\n`
+}
+
+const providerRequiredStatus = (provider: Provider, fileEnv: Env) => {
+  const required = requiredVars(provider)
+  if (required.some((item) => status(item.name, fileEnv) === "missing")) return "missing"
+  if (required.some((item) => status(item.name, fileEnv) === "shell")) return "set in shell"
+  return "already added"
+}
+
+const requiredVars = (provider: Provider) => provider.vars.filter((item) => !item.optional)
+
+const promptVars = (provider: Provider) => provider.vars.filter((item) => !item.optional || item.secret === false)
+
+const processEnv = (): Env =>
+  Object.fromEntries(Object.entries(process.env).filter((entry): entry is [string, string] => entry[1] !== undefined))
+
+const envValue = (env: Env, names: ReadonlyArray<string>) => names.map((name) => env[name]).find(Boolean) ?? ""
+
+const envWithValues = (fileEnv: Env, values: Env): Env => ({
+  ...processEnv(),
+  ...fileEnv,
+  ...values,
+})
+
+const responseError = Effect.fn("RecordingEnv.responseError")(function* (
+  response: HttpClientResponse.HttpClientResponse,
+) {
+  if (response.status >= 200 && response.status < 300) return undefined
+  const body = yield* response.text.pipe(Effect.catch(() => Effect.succeed("")))
+  return `${response.status}${body ? `: ${body.slice(0, 180)}` : ""}`
+})
+
+const executeRequest = Effect.fn("RecordingEnv.executeRequest")(function* (
+  request: HttpClientRequest.HttpClientRequest,
+) {
+  const http = yield* HttpClient.HttpClient
+  return yield* http.execute(request).pipe(Effect.flatMap(responseError))
+})
+
+const validateBearer = (url: string, token: Redacted.Redacted<string>, headers: Record<string, string> = {}) =>
+  HttpClientRequest.get(url).pipe(
+    HttpClientRequest.setHeaders({ ...headers, authorization: `Bearer ${Redacted.value(token)}` }),
+    executeRequest,
+  )
+
+const validateChat = (input: {
+  readonly url: string
+  readonly token: Redacted.Redacted<string>
+  readonly tokenHeader?: string
+  readonly model: string
+  readonly headers?: Record<string, string>
+}) =>
+  ProviderShared.jsonPost({
+    url: input.url,
+    headers: { ...input.headers, [input.tokenHeader ?? "authorization"]: `Bearer ${Redacted.value(input.token)}` },
+    body: ProviderShared.encodeJson({
+      model: input.model,
+      messages: [{ role: "user", content: "Reply with exactly: ok" }],
+      max_tokens: 3,
+      temperature: 0,
+    }),
+  }).pipe(executeRequest)
+
+const validateBedrock = (env: Env) =>
+  Effect.gen(function* () {
+    const request = yield* Effect.promise(() =>
+      new AwsV4Signer({
+        url: `https://bedrock.${env.BEDROCK_RECORDING_REGION || "us-east-1"}.amazonaws.com/foundation-models`,
+        method: "GET",
+        service: "bedrock",
+        region: env.BEDROCK_RECORDING_REGION || "us-east-1",
+        accessKeyId: env.AWS_ACCESS_KEY_ID,
+        secretAccessKey: env.AWS_SECRET_ACCESS_KEY,
+        sessionToken: env.AWS_SESSION_TOKEN || undefined,
+      }).sign(),
+    )
+    return yield* HttpClientRequest.get(request.url.toString()).pipe(
+      HttpClientRequest.setHeaders(Object.fromEntries(request.headers.entries())),
+      executeRequest,
+    )
+  })
+
+const validateProvider = Effect.fn("RecordingEnv.validateProvider")(function* (provider: Provider, env: Env) {
+  return yield* (provider.validate?.(env) ?? Effect.succeed("no lightweight validator")).pipe(
+    Effect.catch((error) => {
+      if (error instanceof Error) return Effect.succeed(error.message)
+      return Effect.succeed(String(error))
+    }),
+  )
+})
+
+const validateProviders = Effect.fn("RecordingEnv.validateProviders")(function* (
+  providers: ReadonlyArray<Provider>,
+  env: Env,
+) {
+  const spinner = prompts.spinner()
+  spinner.start("Validating credentials")
+  const results = yield* Effect.forEach(
+    providers,
+    (provider) => validateProvider(provider, env).pipe(Effect.map((error) => ({ provider, error }))),
+    { concurrency: 4 },
+  )
+  spinner.stop("Validation complete")
+  prompts.note(
+    results
+      .map(
+        (result) =>
+          `${result.error ? "failed" : "ok"} ${result.provider.label}${result.error ? ` - ${result.error}` : ""}`,
+      )
+      .join("\n"),
+    "Credential validation",
+  )
+})
+
+const writeEnvFile = Effect.fn("RecordingEnv.writeFile")(function* (contents: string) {
+  const fileSystem = yield* FileSystem.FileSystem
+  yield* fileSystem.makeDirectory(path.dirname(envPath), { recursive: true })
+  yield* fileSystem.writeFileString(envPath, contents, { mode: 0o600 })
+})
+
+const prompt = <A>(run: () => Promise<A | symbol>) => Effect.promise(run).pipe(Effect.map(exitIfCancel))
+
+const chooseConfigurableProviders = Effect.fn("RecordingEnv.chooseConfigurableProviders")(function* (
+  providers: ReadonlyArray<Provider>,
+  fileEnv: Env,
+) {
+  const configurable = providers.filter((provider) => requiredVars(provider).length > 0)
+  const selected = yield* prompt<ReadonlyArray<string>>(() =>
+    prompts.multiselect({
+      message: "Select provider credentials to add or override",
+      options: configurable.map((provider) => ({
+        value: provider.id,
+        label: provider.label,
+        hint: `${providerRequiredStatus(provider, fileEnv)} - ${requiredVars(provider)
+          .map((item) => item.name)
+          .join(", ")}`,
+      })),
+      initialValues: configurable
+        .filter((provider) => providerRequiredStatus(provider, fileEnv) === "missing")
+        .map((provider) => provider.id),
+    }),
+  )
+  return configurable.filter((provider) => selected.includes(provider.id))
+})
+
+const promptEnvVar = (item: Provider["vars"][number]) =>
+  prompt<string>(() => {
+    const input = {
+      message: item.label ?? item.name,
+      validate: (input: string | undefined) => {
+        if (item.optional) return undefined
+        return !input || input.length === 0 ? "Leave blank by pressing Esc/cancel, or paste a value" : undefined
+      },
+    }
+    return item.secret === false ? prompts.text(input) : prompts.password(input)
+  })
+
+const promptProviderValues = Effect.fn("RecordingEnv.promptProviderValues")(function* (
+  providers: ReadonlyArray<Provider>,
+) {
+  const values: Env = {}
+  for (const provider of providers) {
+    prompts.log.info(`${provider.label}: ${provider.note}`)
+    for (const item of promptVars(provider)) {
+      if (values[item.name]) continue
+      const value = yield* promptEnvVar(item)
+      if (value !== "") values[item.name] = value
+    }
+  }
+  return values
+})
+
+const main = Effect.fn("RecordingEnv.main")(function* () {
+  prompts.intro("LLM recording credentials")
+  const contents = yield* readEnvFile()
+  const fileEnv = yield* parseEnv(contents)
+  const providers = yield* Effect.promise(() => chooseProviders())
+  printStatus(providers, fileEnv)
+  if (checkOnly) {
+    prompts.outro("Check complete")
+    return
+  }
+  if (!interactive) {
+    prompts.outro("Run this command in a terminal to enter credentials")
+    return
+  }
+
+  const selectedProviders = yield* chooseConfigurableProviders(providers, fileEnv)
+  const values = yield* promptProviderValues(selectedProviders)
+
+  if (Object.keys(values).length === 0) {
+    prompts.outro("No changes")
+    return
+  }
+
+  if (
+    interactive &&
+    (yield* prompt(() => prompts.confirm({ message: "Validate credentials before saving?", initialValue: true })))
+  ) {
+    yield* validateProviders(selectedProviders, envWithValues(fileEnv, values))
+  }
+
+  yield* writeEnvFile(upsertEnv(contents, values))
+  prompts.log.success(
+    `Saved ${Object.keys(values).length} value${Object.keys(values).length === 1 ? "" : "s"} to ${envPath}`,
+  )
+  prompts.outro("Keep .env.local local. Store shared team credentials in a password manager or vault.")
+})
+
+await Effect.runPromise(main().pipe(Effect.provide(NodeFileSystem.layer), Effect.provide(FetchHttpClient.layer)))
--- a/packages/llm/src/index.ts
+++ b/packages/llm/src/index.ts
@@ -0,0 +1,35 @@
+export { LLMClient, modelLimits, modelRef } from "./route/client"
+export { Auth } from "./route/auth"
+export { Provider } from "./provider"
+export type {
+  RouteModelInput,
+  RouteRoutedModelInput,
+  Interface as LLMClientShape,
+  Service as LLMClientService,
+  ModelRefInput,
+} from "./route/client"
+export * from "./schema"
+export { Tool, ToolFailure, toDefinitions, tool } from "./tool"
+export type {
+  AnyExecutableTool,
+  AnyTool,
+  ExecutableTool,
+  ExecutableTools,
+  Tool as ToolShape,
+  ToolExecute,
+  Tools,
+  ToolSchema,
+} from "./tool"
+export type {
+  RunOptions as ToolRunOptions,
+  RuntimeState as ToolRuntimeState,
+  StopCondition as ToolStopCondition,
+  ToolExecution,
+} from "./tool-runtime"
+
+export * as LLM from "./llm"
+export type {
+  Definition as ProviderDefinition,
+  ModelFactory as ProviderModelFactory,
+  ModelOptions as ProviderModelOptions,
+} from "./provider"
--- a/packages/llm/src/llm.ts
+++ b/packages/llm/src/llm.ts
@@ -0,0 +1,224 @@
+import { Effect, JsonSchema, Schema } from "effect"
+import {
+  LLMClient,
+  modelLimits,
+  modelRef,
+  type ModelRefInput,
+} from "./route/client"
+import {
+  GenerationOptions,
+  HttpOptions,
+  InvalidProviderOutputReason,
+  LLMError,
+  LLMEvent,
+  LLMRequest,
+  LLMResponse,
+  Message,
+  SystemPart,
+  ToolChoice,
+  ToolDefinition,
+  type ContentPart,
+  ToolCallPart,
+  ToolResultPart,
+} from "./schema"
+import { make as makeTool, type ToolSchema } from "./tool"
+
+export type ModelInput = ModelRefInput
+
+export type MessageInput = Message.Input
+
+export type ToolChoiceInput = ToolChoice.Input
+export type ToolChoiceMode = ToolChoice.Mode
+
+export type ToolResultInput = Parameters<typeof ToolResultPart.make>[0]
+
+/** Input accepted by `LLM.request`, normalized into the canonical `LLMRequest` class. */
+export type RequestInput = Omit<
+  ConstructorParameters<typeof LLMRequest>[0],
+  "system" | "messages" | "tools" | "toolChoice" | "generation" | "http" | "providerOptions"
+> & {
+  readonly system?: string | SystemPart | ReadonlyArray<SystemPart>
+  readonly prompt?: string | ContentPart | ReadonlyArray<ContentPart>
+  readonly messages?: ReadonlyArray<Message | MessageInput>
+  readonly tools?: ReadonlyArray<ToolDefinition.Input>
+  readonly toolChoice?: ToolChoiceInput
+  readonly generation?: GenerationOptions.Input
+  readonly providerOptions?: ConstructorParameters<typeof LLMRequest>[0]["providerOptions"]
+  readonly http?: HttpOptions.Input
+}
+
+export const limits = modelLimits
+
+export const text = Message.text
+
+export const system = SystemPart.make
+
+export const message = Message.make
+
+export const user = Message.user
+
+export const assistant = Message.assistant
+
+export const model = modelRef
+
+export const toolDefinition = ToolDefinition.make
+
+export const toolCall = ToolCallPart.make
+
+export const toolResult = ToolResultPart.make
+
+export const toolMessage = Message.tool
+
+export const toolChoiceName = ToolChoice.named
+
+export const toolChoice = ToolChoice.make
+
+export const generation = GenerationOptions.make
+
+export const generate = LLMClient.generate
+
+export const stream = LLMClient.stream
+
+export const stepCountIs = LLMClient.stepCountIs
+
+export const requestInput = (input: LLMRequest): RequestInput => ({
+  ...LLMRequest.input(input),
+})
+
+export const request = (input: RequestInput) => {
+  const {
+    system: requestSystem,
+    prompt,
+    messages,
+    tools,
+    toolChoice: requestToolChoice,
+    generation: requestGeneration,
+    providerOptions: requestProviderOptions,
+    http: requestHttp,
+    ...rest
+  } = input
+  return new LLMRequest({
+    ...rest,
+    system: SystemPart.content(requestSystem),
+    messages: [...(messages?.map(message) ?? []), ...(prompt === undefined ? [] : [user(prompt)])],
+    tools: tools?.map(toolDefinition) ?? [],
+    toolChoice: requestToolChoice ? toolChoice(requestToolChoice) : undefined,
+    generation: requestGeneration === undefined ? undefined : generation(requestGeneration),
+    providerOptions: requestProviderOptions,
+    http: requestHttp === undefined ? undefined : HttpOptions.make(requestHttp),
+  })
+}
+
+export const updateRequest = (input: LLMRequest, patch: Partial<RequestInput>) =>
+  request({ ...requestInput(input), ...patch })
+
+const GENERATE_OBJECT_TOOL_NAME = "generate_object"
+
+const GENERATE_OBJECT_TOOL_DESCRIPTION = "Return the structured result by calling this tool."
+
+type GenerateObjectBase = Omit<RequestInput, "tools" | "toolChoice" | "responseFormat">
+
+export class GenerateObjectResponse<T> {
+  constructor(
+    readonly object: T,
+    readonly response: LLMResponse,
+  ) {}
+
+  get events() {
+    return this.response.events
+  }
+
+  get usage() {
+    return this.response.usage
+  }
+}
+
+export interface GenerateObjectOptions<S extends ToolSchema<any>> extends GenerateObjectBase {
+  readonly schema: S
+}
+
+export interface GenerateObjectDynamicOptions extends GenerateObjectBase {
+  /** Raw JSON Schema object describing the expected output shape. */
+  readonly jsonSchema: JsonSchema.JsonSchema
+}
+
+const runGenerateObject = Effect.fn("LLM.generateObject")(function* (
+  options: GenerateObjectBase,
+  tool: ReturnType<typeof makeTool>,
+) {
+  const baseRequest = request(options)
+  const generateRequest = LLMRequest.update(baseRequest, {
+    toolChoice: ToolChoice.named(GENERATE_OBJECT_TOOL_NAME),
+  })
+  const response = yield* LLMClient.generate({
+    request: generateRequest,
+    tools: { [GENERATE_OBJECT_TOOL_NAME]: tool },
+    toolExecution: "none",
+  })
+  const call = response.toolCalls.find(
+    (event) => LLMEvent.is.toolCall(event) && event.name === GENERATE_OBJECT_TOOL_NAME,
+  )
+  if (!call || !LLMEvent.is.toolCall(call))
+    return yield* new LLMError({
+      module: "LLM",
+      method: "generateObject",
+      reason: new InvalidProviderOutputReason({
+        message: `generateObject: model did not call the forced \`${GENERATE_OBJECT_TOOL_NAME}\` tool`,
+      }),
+    })
+  const object = yield* tool._decode(call.input).pipe(
+    Effect.mapError(
+      (error) =>
+        new LLMError({
+          module: "LLM",
+          method: "generateObject",
+          reason: new InvalidProviderOutputReason({
+            message: `generateObject: tool input failed schema decode: ${error.message}`,
+          }),
+        }),
+    ),
+  )
+  return new GenerateObjectResponse(object, response)
+})
+
+/**
+ * Run a model and decode its output against `schema`. Works on every protocol
+ * because it forces a synthetic tool call internally — provider-native JSON
+ * modes are intentionally avoided so behaviour is uniform.
+ *
+ * Two input modes:
+ *
+ * 1. `schema: EffectSchema<T>` — `.object` is decoded and typed as `T`.
+ *    Decode failures surface as `LLMError`.
+ * 2. `jsonSchema: JsonSchema.JsonSchema` — `.object` is `unknown`. Use when
+ *    the schema is only available at runtime (MCP, plugin manifests). Caller validates.
+ */
+export function generateObject<S extends ToolSchema<any>>(
+  options: GenerateObjectOptions<S>,
+): Effect.Effect<GenerateObjectResponse<Schema.Schema.Type<S>>, LLMError>
+export function generateObject(options: GenerateObjectDynamicOptions): Effect.Effect<GenerateObjectResponse<unknown>, LLMError>
+export function generateObject(
+  options: GenerateObjectOptions<ToolSchema<any>> | GenerateObjectDynamicOptions,
+) {
+  if ("schema" in options) {
+    const { schema, ...rest } = options
+    return runGenerateObject(
+      rest,
+      makeTool({
+        description: GENERATE_OBJECT_TOOL_DESCRIPTION,
+        parameters: schema,
+        success: Schema.Unknown as ToolSchema<unknown>,
+        execute: () => Effect.void,
+      }),
+    )
+  }
+  const { jsonSchema, ...rest } = options
+  return runGenerateObject(
+    rest,
+    makeTool({
+      description: GENERATE_OBJECT_TOOL_DESCRIPTION,
+      jsonSchema,
+      execute: () => Effect.void,
+    }),
+  )
+}
--- a/packages/llm/src/protocols/anthropic-messages.ts
+++ b/packages/llm/src/protocols/anthropic-messages.ts
@@ -0,0 +1,592 @@
+import { Effect, Schema } from "effect"
+import { Route } from "../route/client"
+import { Auth } from "../route/auth"
+import { Endpoint } from "../route/endpoint"
+import { Framing } from "../route/framing"
+import { Protocol } from "../route/protocol"
+import {
+  Usage,
+  type CacheHint,
+  type FinishReason,
+  type LLMEvent,
+  type LLMRequest,
+  type ProviderMetadata,
+  type ToolCallPart,
+  type ToolDefinition,
+  type ToolResultPart,
+} from "../schema"
+import { JsonObject, optionalArray, optionalNull, ProviderShared } from "./shared"
+import { ToolStream } from "./utils/tool-stream"
+
+const ADAPTER = "anthropic-messages"
+export const DEFAULT_BASE_URL = "https://api.anthropic.com/v1"
+export const PATH = "/messages"
+
+// =============================================================================
+// Request Body Schema
+// =============================================================================
+const AnthropicCacheControl = Schema.Struct({ type: Schema.tag("ephemeral") })
+
+const AnthropicTextBlock = Schema.Struct({
+  type: Schema.tag("text"),
+  text: Schema.String,
+  cache_control: Schema.optional(AnthropicCacheControl),
+})
+type AnthropicTextBlock = Schema.Schema.Type<typeof AnthropicTextBlock>
+
+const AnthropicThinkingBlock = Schema.Struct({
+  type: Schema.tag("thinking"),
+  thinking: Schema.String,
+  signature: Schema.optional(Schema.String),
+  cache_control: Schema.optional(AnthropicCacheControl),
+})
+
+const AnthropicToolUseBlock = Schema.Struct({
+  type: Schema.tag("tool_use"),
+  id: Schema.String,
+  name: Schema.String,
+  input: Schema.Unknown,
+  cache_control: Schema.optional(AnthropicCacheControl),
+})
+type AnthropicToolUseBlock = Schema.Schema.Type<typeof AnthropicToolUseBlock>
+
+const AnthropicServerToolUseBlock = Schema.Struct({
+  type: Schema.tag("server_tool_use"),
+  id: Schema.String,
+  name: Schema.String,
+  input: Schema.Unknown,
+  cache_control: Schema.optional(AnthropicCacheControl),
+})
+type AnthropicServerToolUseBlock = Schema.Schema.Type<typeof AnthropicServerToolUseBlock>
+
+// Server tool result blocks: web_search_tool_result, code_execution_tool_result,
+// and web_fetch_tool_result. The provider executes the tool and inlines the
+// structured result into the assistant turn — there is no client tool_result
+// round-trip. We round-trip the structured `content` payload as opaque JSON so
+// the next request can echo it back when continuing the conversation.
+const AnthropicServerToolResultType = Schema.Literals([
+  "web_search_tool_result",
+  "code_execution_tool_result",
+  "web_fetch_tool_result",
+])
+type AnthropicServerToolResultType = Schema.Schema.Type<typeof AnthropicServerToolResultType>
+
+const AnthropicServerToolResultBlock = Schema.Struct({
+  type: AnthropicServerToolResultType,
+  tool_use_id: Schema.String,
+  content: Schema.Unknown,
+  cache_control: Schema.optional(AnthropicCacheControl),
+})
+type AnthropicServerToolResultBlock = Schema.Schema.Type<typeof AnthropicServerToolResultBlock>
+
+const AnthropicToolResultBlock = Schema.Struct({
+  type: Schema.tag("tool_result"),
+  tool_use_id: Schema.String,
+  content: Schema.String,
+  is_error: Schema.optional(Schema.Boolean),
+  cache_control: Schema.optional(AnthropicCacheControl),
+})
+
+const AnthropicUserBlock = Schema.Union([AnthropicTextBlock, AnthropicToolResultBlock])
+const AnthropicAssistantBlock = Schema.Union([
+  AnthropicTextBlock,
+  AnthropicThinkingBlock,
+  AnthropicToolUseBlock,
+  AnthropicServerToolUseBlock,
+  AnthropicServerToolResultBlock,
+])
+type AnthropicAssistantBlock = Schema.Schema.Type<typeof AnthropicAssistantBlock>
+type AnthropicToolResultBlock = Schema.Schema.Type<typeof AnthropicToolResultBlock>
+
+const AnthropicMessage = Schema.Union([
+  Schema.Struct({ role: Schema.Literal("user"), content: Schema.Array(AnthropicUserBlock) }),
+  Schema.Struct({ role: Schema.Literal("assistant"), content: Schema.Array(AnthropicAssistantBlock) }),
+]).pipe(Schema.toTaggedUnion("role"))
+type AnthropicMessage = Schema.Schema.Type<typeof AnthropicMessage>
+
+const AnthropicTool = Schema.Struct({
+  name: Schema.String,
+  description: Schema.String,
+  input_schema: JsonObject,
+  cache_control: Schema.optional(AnthropicCacheControl),
+})
+type AnthropicTool = Schema.Schema.Type<typeof AnthropicTool>
+
+const AnthropicToolChoice = Schema.Union([
+  Schema.Struct({ type: Schema.Literals(["auto", "any"]) }),
+  Schema.Struct({ type: Schema.tag("tool"), name: Schema.String }),
+])
+
+const AnthropicThinking = Schema.Struct({
+  type: Schema.tag("enabled"),
+  budget_tokens: Schema.Number,
+})
+
+const AnthropicBodyFields = {
+  model: Schema.String,
+  system: optionalArray(AnthropicTextBlock),
+  messages: Schema.Array(AnthropicMessage),
+  tools: optionalArray(AnthropicTool),
+  tool_choice: Schema.optional(AnthropicToolChoice),
+  stream: Schema.Literal(true),
+  max_tokens: Schema.Number,
+  temperature: Schema.optional(Schema.Number),
+  top_p: Schema.optional(Schema.Number),
+  top_k: Schema.optional(Schema.Number),
+  stop_sequences: optionalArray(Schema.String),
+  thinking: Schema.optional(AnthropicThinking),
+}
+const AnthropicMessagesBody = Schema.Struct(AnthropicBodyFields)
+export type AnthropicMessagesBody = Schema.Schema.Type<typeof AnthropicMessagesBody>
+
+const AnthropicUsage = Schema.Struct({
+  input_tokens: Schema.optional(Schema.Number),
+  output_tokens: Schema.optional(Schema.Number),
+  cache_creation_input_tokens: optionalNull(Schema.Number),
+  cache_read_input_tokens: optionalNull(Schema.Number),
+})
+type AnthropicUsage = Schema.Schema.Type<typeof AnthropicUsage>
+
+const AnthropicStreamBlock = Schema.Struct({
+  type: Schema.String,
+  id: Schema.optional(Schema.String),
+  name: Schema.optional(Schema.String),
+  text: Schema.optional(Schema.String),
+  thinking: Schema.optional(Schema.String),
+  signature: Schema.optional(Schema.String),
+  input: Schema.optional(Schema.Unknown),
+  // *_tool_result blocks arrive whole as content_block_start (no streaming
+  // delta) with the structured payload in `content` and the originating
+  // server_tool_use id in `tool_use_id`.
+  tool_use_id: Schema.optional(Schema.String),
+  content: Schema.optional(Schema.Unknown),
+})
+
+const AnthropicStreamDelta = Schema.Struct({
+  type: Schema.optional(Schema.String),
+  text: Schema.optional(Schema.String),
+  thinking: Schema.optional(Schema.String),
+  partial_json: Schema.optional(Schema.String),
+  signature: Schema.optional(Schema.String),
+  stop_reason: optionalNull(Schema.String),
+  stop_sequence: optionalNull(Schema.String),
+})
+
+const AnthropicEvent = Schema.Struct({
+  type: Schema.String,
+  index: Schema.optional(Schema.Number),
+  message: Schema.optional(Schema.Struct({ usage: Schema.optional(AnthropicUsage) })),
+  content_block: Schema.optional(AnthropicStreamBlock),
+  delta: Schema.optional(AnthropicStreamDelta),
+  usage: Schema.optional(AnthropicUsage),
+  error: Schema.optional(Schema.Struct({ type: Schema.String, message: Schema.String })),
+})
+type AnthropicEvent = Schema.Schema.Type<typeof AnthropicEvent>
+
+interface ParserState {
+  readonly tools: ToolStream.State<number>
+  readonly usage?: Usage
+}
+
+const invalid = ProviderShared.invalidRequest
+
+// =============================================================================
+// Request Lowering
+// =============================================================================
+const cacheControl = (cache: CacheHint | undefined) =>
+  cache?.type === "ephemeral" ? { type: "ephemeral" as const } : undefined
+
+const anthropicMetadata = (metadata: Record<string, unknown>): ProviderMetadata => ({ anthropic: metadata })
+
+const signatureFromMetadata = (metadata: ProviderMetadata | undefined): string | undefined => {
+  const anthropic = metadata?.anthropic
+  if (!ProviderShared.isRecord(anthropic)) return undefined
+  return typeof anthropic.signature === "string" ? anthropic.signature : undefined
+}
+
+const lowerTool = (tool: ToolDefinition): AnthropicTool => ({
+  name: tool.name,
+  description: tool.description,
+  input_schema: tool.inputSchema,
+})
+
+const lowerToolChoice = (toolChoice: NonNullable<LLMRequest["toolChoice"]>) =>
+  ProviderShared.matchToolChoice("Anthropic Messages", toolChoice, {
+    auto: () => ({ type: "auto" as const }),
+    none: () => undefined,
+    required: () => ({ type: "any" as const }),
+    tool: (name) => ({ type: "tool" as const, name }),
+  })
+
+const lowerToolCall = (part: ToolCallPart): AnthropicToolUseBlock => ({
+  type: "tool_use",
+  id: part.id,
+  name: part.name,
+  input: part.input,
+})
+
+const lowerServerToolCall = (part: ToolCallPart): AnthropicServerToolUseBlock => ({
+  type: "server_tool_use",
+  id: part.id,
+  name: part.name,
+  input: part.input,
+})
+
+// Server tool result blocks are typed by name. Anthropic ships three today;
+// extend this list when new server tools land. The block content is the
+// structured payload returned by the provider, which we round-trip as-is.
+const serverToolResultType = (name: string): AnthropicServerToolResultType | undefined => {
+  if (name === "web_search") return "web_search_tool_result"
+  if (name === "code_execution") return "code_execution_tool_result"
+  if (name === "web_fetch") return "web_fetch_tool_result"
+  return undefined
+}
+
+const lowerServerToolResult = Effect.fn("AnthropicMessages.lowerServerToolResult")(function* (part: ToolResultPart) {
+  const wireType = serverToolResultType(part.name)
+  if (!wireType)
+    return yield* invalid(`Anthropic Messages does not know how to round-trip server tool result for ${part.name}`)
+  return { type: wireType, tool_use_id: part.id, content: part.result.value } satisfies AnthropicServerToolResultBlock
+})
+
+const lowerMessages = Effect.fn("AnthropicMessages.lowerMessages")(function* (request: LLMRequest) {
+  const messages: AnthropicMessage[] = []
+
+  for (const message of request.messages) {
+    if (message.role === "user") {
+      const content: AnthropicTextBlock[] = []
+      for (const part of message.content) {
+        if (!ProviderShared.supportsContent(part, ["text"]))
+          return yield* ProviderShared.unsupportedContent("Anthropic Messages", "user", ["text"])
+        content.push({ type: "text", text: part.text, cache_control: cacheControl(part.cache) })
+      }
+      messages.push({ role: "user", content })
+      continue
+    }
+
+    if (message.role === "assistant") {
+      const content: AnthropicAssistantBlock[] = []
+      for (const part of message.content) {
+        if (part.type === "text") {
+          content.push({ type: "text", text: part.text, cache_control: cacheControl(part.cache) })
+          continue
+        }
+        if (part.type === "reasoning") {
+          content.push({
+            type: "thinking",
+            thinking: part.text,
+            signature: part.encrypted ?? signatureFromMetadata(part.providerMetadata),
+          })
+          continue
+        }
+        if (part.type === "tool-call") {
+          content.push(part.providerExecuted ? lowerServerToolCall(part) : lowerToolCall(part))
+          continue
+        }
+        if (part.type === "tool-result" && part.providerExecuted) {
+          content.push(yield* lowerServerToolResult(part))
+          continue
+        }
+        return yield* invalid(
+          `Anthropic Messages assistant messages only support text, reasoning, and tool-call content for now`,
+        )
+      }
+      messages.push({ role: "assistant", content })
+      continue
+    }
+
+    const content: AnthropicToolResultBlock[] = []
+    for (const part of message.content) {
+      if (!ProviderShared.supportsContent(part, ["tool-result"]))
+        return yield* ProviderShared.unsupportedContent("Anthropic Messages", "tool", ["tool-result"])
+      content.push({
+        type: "tool_result",
+        tool_use_id: part.id,
+        content: ProviderShared.toolResultText(part),
+        is_error: part.result.type === "error" ? true : undefined,
+      })
+    }
+    messages.push({ role: "user", content })
+  }
+
+  return messages
+})
+
+const anthropicOptions = (request: LLMRequest) => request.providerOptions?.anthropic
+
+const lowerThinking = Effect.fn("AnthropicMessages.lowerThinking")(function* (request: LLMRequest) {
+  const thinking = anthropicOptions(request)?.thinking
+  if (!ProviderShared.isRecord(thinking) || thinking.type !== "enabled") return undefined
+  const budget =
+    typeof thinking.budgetTokens === "number"
+      ? thinking.budgetTokens
+      : typeof thinking.budget_tokens === "number"
+        ? thinking.budget_tokens
+        : undefined
+  if (budget === undefined) return yield* invalid("Anthropic thinking provider option requires budgetTokens")
+  return { type: "enabled" as const, budget_tokens: budget }
+})
+
+const fromRequest = Effect.fn("AnthropicMessages.fromRequest")(function* (request: LLMRequest) {
+  const toolChoice = request.toolChoice ? yield* lowerToolChoice(request.toolChoice) : undefined
+  const generation = request.generation
+  return {
+    model: request.model.id,
+    system:
+      request.system.length === 0
+        ? undefined
+        : request.system.map((part) => ({
+            type: "text" as const,
+            text: part.text,
+            cache_control: cacheControl(part.cache),
+          })),
+    messages: yield* lowerMessages(request),
+    tools: request.tools.length === 0 || request.toolChoice?.type === "none" ? undefined : request.tools.map(lowerTool),
+    tool_choice: toolChoice,
+    stream: true as const,
+    max_tokens: generation?.maxTokens ?? request.model.limits.output ?? 4096,
+    temperature: generation?.temperature,
+    top_p: generation?.topP,
+    top_k: generation?.topK,
+    stop_sequences: generation?.stop,
+    thinking: yield* lowerThinking(request),
+  }
+})
+
+// =============================================================================
+// Stream Parsing
+// =============================================================================
+const mapFinishReason = (reason: string | null | undefined): FinishReason => {
+  if (reason === "end_turn" || reason === "stop_sequence" || reason === "pause_turn") return "stop"
+  if (reason === "max_tokens") return "length"
+  if (reason === "tool_use") return "tool-calls"
+  if (reason === "refusal") return "content-filter"
+  return "unknown"
+}
+
+const mapUsage = (usage: AnthropicUsage | undefined): Usage | undefined => {
+  if (!usage) return undefined
+  return new Usage({
+    inputTokens: usage.input_tokens,
+    outputTokens: usage.output_tokens,
+    cacheReadInputTokens: usage.cache_read_input_tokens ?? undefined,
+    cacheWriteInputTokens: usage.cache_creation_input_tokens ?? undefined,
+    totalTokens: ProviderShared.totalTokens(usage.input_tokens, usage.output_tokens, undefined),
+    native: usage,
+  })
+}
+
+// Anthropic emits usage on `message_start` and again on `message_delta` — the
+// final delta carries the authoritative totals. Right-biased merge: each
+// field prefers `right` when defined, falls back to `left`. `totalTokens` is
+// recomputed from the merged input/output to stay consistent.
+const mergeUsage = (left: Usage | undefined, right: Usage | undefined) => {
+  if (!left) return right
+  if (!right) return left
+  const inputTokens = right.inputTokens ?? left.inputTokens
+  const outputTokens = right.outputTokens ?? left.outputTokens
+  return new Usage({
+    inputTokens,
+    outputTokens,
+    cacheReadInputTokens: right.cacheReadInputTokens ?? left.cacheReadInputTokens,
+    cacheWriteInputTokens: right.cacheWriteInputTokens ?? left.cacheWriteInputTokens,
+    totalTokens: ProviderShared.totalTokens(inputTokens, outputTokens, undefined),
+    native: { ...left.native, ...right.native },
+  })
+}
+
+// Server tool result blocks come whole in `content_block_start` (no streaming
+// delta sequence). We convert the payload to a `tool-result` event with
+// `providerExecuted: true`. The runtime appends it to the assistant message
+// for round-trip; downstream consumers can inspect `result.value` for the
+// structured payload.
+const SERVER_TOOL_RESULT_NAMES: Record<AnthropicServerToolResultType, string> = {
+  web_search_tool_result: "web_search",
+  code_execution_tool_result: "code_execution",
+  web_fetch_tool_result: "web_fetch",
+}
+
+const isServerToolResultType = (type: string): type is AnthropicServerToolResultType => type in SERVER_TOOL_RESULT_NAMES
+
+const serverToolResultEvent = (block: NonNullable<AnthropicEvent["content_block"]>): LLMEvent | undefined => {
+  if (!block.type || !isServerToolResultType(block.type)) return undefined
+  const errorPayload =
+    typeof block.content === "object" && block.content !== null && "type" in block.content
+      ? String((block.content as Record<string, unknown>).type)
+      : ""
+  const isError = errorPayload.endsWith("_tool_result_error")
+  return {
+    type: "tool-result",
+    id: block.tool_use_id ?? "",
+    name: SERVER_TOOL_RESULT_NAMES[block.type],
+    result: isError ? { type: "error", value: block.content } : { type: "json", value: block.content },
+    providerExecuted: true,
+    providerMetadata: anthropicMetadata({ blockType: block.type }),
+  }
+}
+
+type StepResult = readonly [ParserState, ReadonlyArray<LLMEvent>]
+
+const NO_EVENTS: StepResult["1"] = []
+
+const onMessageStart = (state: ParserState, event: AnthropicEvent): StepResult => {
+  const usage = mapUsage(event.message?.usage)
+  return [usage ? { ...state, usage: mergeUsage(state.usage, usage) } : state, NO_EVENTS]
+}
+
+const onContentBlockStart = (state: ParserState, event: AnthropicEvent): StepResult => {
+  const block = event.content_block
+  if (!block) return [state, NO_EVENTS]
+
+  if ((block.type === "tool_use" || block.type === "server_tool_use") && event.index !== undefined) {
+    return [
+      {
+        ...state,
+        tools: ToolStream.start(state.tools, event.index, {
+          id: block.id ?? String(event.index),
+          name: block.name ?? "",
+          providerExecuted: block.type === "server_tool_use",
+        }),
+      },
+      NO_EVENTS,
+    ]
+  }
+
+  if (block.type === "text" && block.text) {
+    return [state, [{ type: "text-delta", text: block.text }]]
+  }
+
+  if (block.type === "thinking" && block.thinking) {
+    return [
+      state,
+      [
+        {
+          type: "reasoning-delta",
+          text: block.thinking,
+          ...(block.signature ? { providerMetadata: anthropicMetadata({ signature: block.signature }) } : {}),
+        },
+      ],
+    ]
+  }
+
+  const result = serverToolResultEvent(block)
+  return [state, result ? [result] : NO_EVENTS]
+}
+
+const onContentBlockDelta = Effect.fn("AnthropicMessages.onContentBlockDelta")(function* (
+  state: ParserState,
+  event: AnthropicEvent,
+) {
+  const delta = event.delta
+
+  if (delta?.type === "text_delta" && delta.text) {
+    return [state, [{ type: "text-delta", text: delta.text }]] satisfies StepResult
+  }
+
+  if (delta?.type === "thinking_delta" && delta.thinking) {
+    return [state, [{ type: "reasoning-delta", text: delta.thinking }]] satisfies StepResult
+  }
+
+  if (delta?.type === "signature_delta" && delta.signature) {
+    return [
+      state,
+      [{ type: "reasoning-delta", text: "", providerMetadata: anthropicMetadata({ signature: delta.signature }) }],
+    ] satisfies StepResult
+  }
+
+  if (delta?.type === "input_json_delta" && event.index !== undefined) {
+    if (!delta.partial_json) return [state, NO_EVENTS] satisfies StepResult
+    const result = ToolStream.appendExisting(
+      ADAPTER,
+      state.tools,
+      event.index,
+      delta.partial_json,
+      "Anthropic Messages tool argument delta is missing its tool call",
+    )
+    if (ToolStream.isError(result)) return yield* result
+    return [{ ...state, tools: result.tools }, result.event ? [result.event] : NO_EVENTS] satisfies StepResult
+  }
+
+  return [state, NO_EVENTS] satisfies StepResult
+})
+
+const onContentBlockStop = Effect.fn("AnthropicMessages.onContentBlockStop")(function* (
+  state: ParserState,
+  event: AnthropicEvent,
+) {
+  if (event.index === undefined) return [state, NO_EVENTS] satisfies StepResult
+  const result = yield* ToolStream.finish(ADAPTER, state.tools, event.index)
+  return [{ ...state, tools: result.tools }, result.event ? [result.event] : NO_EVENTS] satisfies StepResult
+})
+
+const onMessageDelta = (state: ParserState, event: AnthropicEvent): StepResult => {
+  const usage = mergeUsage(state.usage, mapUsage(event.usage))
+  return [
+    { ...state, usage },
+    [
+      {
+        type: "request-finish",
+        reason: mapFinishReason(event.delta?.stop_reason),
+        usage,
+        ...(event.delta?.stop_sequence
+          ? { providerMetadata: anthropicMetadata({ stopSequence: event.delta.stop_sequence }) }
+          : {}),
+      },
+    ],
+  ]
+}
+
+const onError = (state: ParserState, event: AnthropicEvent): StepResult => [
+  state,
+  [{ type: "provider-error", message: event.error?.message ?? "Anthropic Messages stream error" }],
+]
+
+const step = (state: ParserState, event: AnthropicEvent) => {
+  if (event.type === "message_start") return Effect.succeed(onMessageStart(state, event))
+  if (event.type === "content_block_start") return Effect.succeed(onContentBlockStart(state, event))
+  if (event.type === "content_block_delta") return onContentBlockDelta(state, event)
+  if (event.type === "content_block_stop") return onContentBlockStop(state, event)
+  if (event.type === "message_delta") return Effect.succeed(onMessageDelta(state, event))
+  if (event.type === "error") return Effect.succeed(onError(state, event))
+  return Effect.succeed<StepResult>([state, NO_EVENTS])
+}
+
+// =============================================================================
+// Protocol And Anthropic Route
+// =============================================================================
+/**
+ * The Anthropic Messages protocol — request body construction, body schema,
+ * and the streaming-event state machine. Used by native Anthropic Cloud and
+ * (once registered) Vertex Anthropic / Bedrock-hosted Anthropic passthrough.
+ */
+export const protocol = Protocol.make({
+  id: ADAPTER,
+  body: {
+    schema: AnthropicMessagesBody,
+    from: fromRequest,
+  },
+  stream: {
+    event: Protocol.jsonEvent(AnthropicEvent),
+    initial: () => ({ tools: ToolStream.empty<number>() }),
+    step,
+  },
+})
+
+export const route = Route.make({
+  id: ADAPTER,
+  protocol,
+  endpoint: Endpoint.path(PATH),
+  auth: Auth.apiKeyHeader("x-api-key"),
+  framing: Framing.sse,
+  headers: () => ({ "anthropic-version": "2023-06-01" }),
+})
+
+// =============================================================================
+// Model Helper
+// =============================================================================
+export const model = Route.model(route, {
+  provider: "anthropic",
+  baseURL: DEFAULT_BASE_URL,
+})
+
+export * as AnthropicMessages from "./anthropic-messages"
--- a/packages/llm/src/protocols/bedrock-converse.ts
+++ b/packages/llm/src/protocols/bedrock-converse.ts
@@ -0,0 +1,531 @@
+import { Effect, Schema } from "effect"
+import { Route, type RouteModelInput } from "../route/client"
+import { Endpoint } from "../route/endpoint"
+import { Protocol } from "../route/protocol"
+import {
+  Usage,
+  type CacheHint,
+  type FinishReason,
+  type LLMEvent,
+  type LLMRequest,
+  type ToolCallPart,
+  type ToolDefinition,
+  type ToolResultPart,
+} from "../schema"
+import { BedrockEventStream } from "./bedrock-event-stream"
+import { JsonObject, optionalArray, ProviderShared } from "./shared"
+import { BedrockAuth, type Credentials as BedrockCredentials } from "./utils/bedrock-auth"
+import { BedrockCache } from "./utils/bedrock-cache"
+import { BedrockMedia } from "./utils/bedrock-media"
+import { ToolStream } from "./utils/tool-stream"
+
+const ADAPTER = "bedrock-converse"
+
+export type { Credentials as BedrockCredentials } from "./utils/bedrock-auth"
+
+// =============================================================================
+// Public Model Input
+// =============================================================================
+export type BedrockConverseModelInput = RouteModelInput & {
+  /**
+   * Bearer API key (Bedrock's newer API key auth). Sets the `Authorization`
+   * header and bypasses SigV4 signing. Mutually exclusive with `credentials`.
+   */
+  readonly apiKey?: string
+  /**
+   * AWS credentials for SigV4 signing. The route signs each request at
+   * `toHttp` time using `aws4fetch`. Mutually exclusive with `apiKey`.
+   */
+  readonly credentials?: BedrockCredentials
+  readonly headers?: Record<string, string>
+}
+
+// =============================================================================
+// Request Body Schema
+// =============================================================================
+const BedrockTextBlock = Schema.Struct({
+  text: Schema.String,
+})
+type BedrockTextBlock = Schema.Schema.Type<typeof BedrockTextBlock>
+
+const BedrockToolUseBlock = Schema.Struct({
+  toolUse: Schema.Struct({
+    toolUseId: Schema.String,
+    name: Schema.String,
+    input: Schema.Unknown,
+  }),
+})
+type BedrockToolUseBlock = Schema.Schema.Type<typeof BedrockToolUseBlock>
+
+const BedrockToolResultContentItem = Schema.Union([
+  Schema.Struct({ text: Schema.String }),
+  Schema.Struct({ json: Schema.Unknown }),
+])
+
+const BedrockToolResultBlock = Schema.Struct({
+  toolResult: Schema.Struct({
+    toolUseId: Schema.String,
+    content: Schema.Array(BedrockToolResultContentItem),
+    status: Schema.optional(Schema.Literals(["success", "error"])),
+  }),
+})
+type BedrockToolResultBlock = Schema.Schema.Type<typeof BedrockToolResultBlock>
+
+const BedrockReasoningBlock = Schema.Struct({
+  reasoningContent: Schema.Struct({
+    reasoningText: Schema.optional(
+      Schema.Struct({
+        text: Schema.String,
+        signature: Schema.optional(Schema.String),
+      }),
+    ),
+  }),
+})
+
+const BedrockUserBlock = Schema.Union([
+  BedrockTextBlock,
+  BedrockMedia.ImageBlock,
+  BedrockMedia.DocumentBlock,
+  BedrockToolResultBlock,
+  BedrockCache.CachePointBlock,
+])
+type BedrockUserBlock = Schema.Schema.Type<typeof BedrockUserBlock>
+
+const BedrockAssistantBlock = Schema.Union([
+  BedrockTextBlock,
+  BedrockReasoningBlock,
+  BedrockToolUseBlock,
+  BedrockCache.CachePointBlock,
+])
+type BedrockAssistantBlock = Schema.Schema.Type<typeof BedrockAssistantBlock>
+
+const BedrockMessage = Schema.Union([
+  Schema.Struct({ role: Schema.Literal("user"), content: Schema.Array(BedrockUserBlock) }),
+  Schema.Struct({ role: Schema.Literal("assistant"), content: Schema.Array(BedrockAssistantBlock) }),
+]).pipe(Schema.toTaggedUnion("role"))
+type BedrockMessage = Schema.Schema.Type<typeof BedrockMessage>
+
+const BedrockSystemBlock = Schema.Union([BedrockTextBlock, BedrockCache.CachePointBlock])
+type BedrockSystemBlock = Schema.Schema.Type<typeof BedrockSystemBlock>
+
+const BedrockTool = Schema.Struct({
+  toolSpec: Schema.Struct({
+    name: Schema.String,
+    description: Schema.String,
+    inputSchema: Schema.Struct({
+      json: JsonObject,
+    }),
+  }),
+})
+type BedrockTool = Schema.Schema.Type<typeof BedrockTool>
+
+const BedrockToolChoice = Schema.Union([
+  Schema.Struct({ auto: Schema.Struct({}) }),
+  Schema.Struct({ any: Schema.Struct({}) }),
+  Schema.Struct({ tool: Schema.Struct({ name: Schema.String }) }),
+])
+
+const BedrockBodyFields = {
+  modelId: Schema.String,
+  messages: Schema.Array(BedrockMessage),
+  system: optionalArray(BedrockSystemBlock),
+  inferenceConfig: Schema.optional(
+    Schema.Struct({
+      maxTokens: Schema.optional(Schema.Number),
+      temperature: Schema.optional(Schema.Number),
+      topP: Schema.optional(Schema.Number),
+      stopSequences: optionalArray(Schema.String),
+    }),
+  ),
+  toolConfig: Schema.optional(
+    Schema.Struct({
+      tools: Schema.Array(BedrockTool),
+      toolChoice: Schema.optional(BedrockToolChoice),
+    }),
+  ),
+  additionalModelRequestFields: Schema.optional(JsonObject),
+}
+const BedrockConverseBody = Schema.Struct(BedrockBodyFields)
+export type BedrockConverseBody = Schema.Schema.Type<typeof BedrockConverseBody>
+
+const BedrockUsageSchema = Schema.Struct({
+  inputTokens: Schema.optional(Schema.Number),
+  outputTokens: Schema.optional(Schema.Number),
+  totalTokens: Schema.optional(Schema.Number),
+  cacheReadInputTokens: Schema.optional(Schema.Number),
+  cacheWriteInputTokens: Schema.optional(Schema.Number),
+})
+type BedrockUsageSchema = Schema.Schema.Type<typeof BedrockUsageSchema>
+
+// Streaming event shape — the AWS event stream wraps each JSON payload by its
+// `:event-type` header (e.g. `messageStart`, `contentBlockDelta`). We
+// reconstruct that wrapping in `decodeFrames` below so the event schema can
+// stay a plain discriminated record.
+const BedrockEvent = Schema.Struct({
+  messageStart: Schema.optional(Schema.Struct({ role: Schema.String })),
+  contentBlockStart: Schema.optional(
+    Schema.Struct({
+      contentBlockIndex: Schema.Number,
+      start: Schema.optional(
+        Schema.Struct({
+          toolUse: Schema.optional(Schema.Struct({ toolUseId: Schema.String, name: Schema.String })),
+        }),
+      ),
+    }),
+  ),
+  contentBlockDelta: Schema.optional(
+    Schema.Struct({
+      contentBlockIndex: Schema.Number,
+      delta: Schema.optional(
+        Schema.Struct({
+          text: Schema.optional(Schema.String),
+          toolUse: Schema.optional(Schema.Struct({ input: Schema.String })),
+          reasoningContent: Schema.optional(
+            Schema.Struct({
+              text: Schema.optional(Schema.String),
+              signature: Schema.optional(Schema.String),
+            }),
+          ),
+        }),
+      ),
+    }),
+  ),
+  contentBlockStop: Schema.optional(Schema.Struct({ contentBlockIndex: Schema.Number })),
+  messageStop: Schema.optional(
+    Schema.Struct({
+      stopReason: Schema.String,
+      additionalModelResponseFields: Schema.optional(Schema.Unknown),
+    }),
+  ),
+  metadata: Schema.optional(
+    Schema.Struct({
+      usage: Schema.optional(BedrockUsageSchema),
+      metrics: Schema.optional(Schema.Unknown),
+    }),
+  ),
+  internalServerException: Schema.optional(Schema.Struct({ message: Schema.String })),
+  modelStreamErrorException: Schema.optional(Schema.Struct({ message: Schema.String })),
+  validationException: Schema.optional(Schema.Struct({ message: Schema.String })),
+  throttlingException: Schema.optional(Schema.Struct({ message: Schema.String })),
+  serviceUnavailableException: Schema.optional(Schema.Struct({ message: Schema.String })),
+})
+type BedrockEvent = Schema.Schema.Type<typeof BedrockEvent>
+
+// =============================================================================
+// Request Lowering
+// =============================================================================
+const lowerTool = (tool: ToolDefinition): BedrockTool => ({
+  toolSpec: {
+    name: tool.name,
+    description: tool.description,
+    inputSchema: { json: tool.inputSchema },
+  },
+})
+
+const textWithCache = (
+  text: string,
+  cache: CacheHint | undefined,
+): Array<BedrockTextBlock | BedrockCache.CachePointBlock> => {
+  const cachePoint = BedrockCache.block(cache)
+  return cachePoint ? [{ text }, cachePoint] : [{ text }]
+}
+
+const lowerToolChoice = (toolChoice: NonNullable<LLMRequest["toolChoice"]>) =>
+  ProviderShared.matchToolChoice("Bedrock Converse", toolChoice, {
+    auto: () => ({ auto: {} }) as const,
+    none: () => undefined,
+    required: () => ({ any: {} }) as const,
+    tool: (name) => ({ tool: { name } }) as const,
+  })
+
+const lowerToolCall = (part: ToolCallPart): BedrockToolUseBlock => ({
+  toolUse: {
+    toolUseId: part.id,
+    name: part.name,
+    input: part.input,
+  },
+})
+
+const lowerToolResult = (part: ToolResultPart): BedrockToolResultBlock => ({
+  toolResult: {
+    toolUseId: part.id,
+    content:
+      part.result.type === "text" || part.result.type === "error"
+        ? [{ text: ProviderShared.toolResultText(part) }]
+        : [{ json: part.result.value }],
+    status: part.result.type === "error" ? "error" : "success",
+  },
+})
+
+const lowerMessages = Effect.fn("BedrockConverse.lowerMessages")(function* (request: LLMRequest) {
+  const messages: BedrockMessage[] = []
+
+  for (const message of request.messages) {
+    if (message.role === "user") {
+      const content: BedrockUserBlock[] = []
+      for (const part of message.content) {
+        if (!ProviderShared.supportsContent(part, ["text", "media"]))
+          return yield* ProviderShared.unsupportedContent("Bedrock Converse", "user", ["text", "media"])
+        if (part.type === "text") {
+          content.push(...textWithCache(part.text, part.cache))
+          continue
+        }
+        if (part.type === "media") {
+          content.push(yield* BedrockMedia.lower(part))
+          continue
+        }
+      }
+      messages.push({ role: "user", content })
+      continue
+    }
+
+    if (message.role === "assistant") {
+      const content: BedrockAssistantBlock[] = []
+      for (const part of message.content) {
+        if (!ProviderShared.supportsContent(part, ["text", "reasoning", "tool-call"]))
+          return yield* ProviderShared.unsupportedContent("Bedrock Converse", "assistant", [
+            "text",
+            "reasoning",
+            "tool-call",
+          ])
+        if (part.type === "text") {
+          content.push(...textWithCache(part.text, part.cache))
+          continue
+        }
+        if (part.type === "reasoning") {
+          content.push({
+            reasoningContent: {
+              reasoningText: { text: part.text, signature: part.encrypted },
+            },
+          })
+          continue
+        }
+        if (part.type === "tool-call") {
+          content.push(lowerToolCall(part))
+          continue
+        }
+      }
+      messages.push({ role: "assistant", content })
+      continue
+    }
+
+    const content: BedrockToolResultBlock[] = []
+    for (const part of message.content) {
+      if (!ProviderShared.supportsContent(part, ["tool-result"]))
+        return yield* ProviderShared.unsupportedContent("Bedrock Converse", "tool", ["tool-result"])
+      content.push(lowerToolResult(part))
+    }
+    messages.push({ role: "user", content })
+  }
+
+  return messages
+})
+
+// System prompts share the cache-point convention: emit the text block, then
+// optionally a positional `cachePoint` marker.
+const lowerSystem = (system: ReadonlyArray<LLMRequest["system"][number]>): BedrockSystemBlock[] =>
+  system.flatMap((part) => textWithCache(part.text, part.cache))
+
+const fromRequest = Effect.fn("BedrockConverse.fromRequest")(function* (request: LLMRequest) {
+  const toolChoice = request.toolChoice ? yield* lowerToolChoice(request.toolChoice) : undefined
+  const generation = request.generation
+  return {
+    modelId: request.model.id,
+    messages: yield* lowerMessages(request),
+    system: request.system.length === 0 ? undefined : lowerSystem(request.system),
+    inferenceConfig:
+      generation?.maxTokens === undefined &&
+      generation?.temperature === undefined &&
+      generation?.topP === undefined &&
+      (generation?.stop === undefined || generation.stop.length === 0)
+        ? undefined
+        : {
+            maxTokens: generation?.maxTokens,
+            temperature: generation?.temperature,
+            topP: generation?.topP,
+            stopSequences: generation?.stop,
+          },
+    toolConfig:
+      request.tools.length > 0 && request.toolChoice?.type !== "none"
+        ? { tools: request.tools.map(lowerTool), toolChoice }
+        : undefined,
+  }
+})
+
+// =============================================================================
+// Stream Parsing
+// =============================================================================
+const mapFinishReason = (reason: string): FinishReason => {
+  if (reason === "end_turn" || reason === "stop_sequence") return "stop"
+  if (reason === "max_tokens") return "length"
+  if (reason === "tool_use") return "tool-calls"
+  if (reason === "content_filtered" || reason === "guardrail_intervened") return "content-filter"
+  return "unknown"
+}
+
+const mapUsage = (usage: BedrockUsageSchema | undefined): Usage | undefined => {
+  if (!usage) return undefined
+  return new Usage({
+    inputTokens: usage.inputTokens,
+    outputTokens: usage.outputTokens,
+    totalTokens: ProviderShared.totalTokens(usage.inputTokens, usage.outputTokens, usage.totalTokens),
+    cacheReadInputTokens: usage.cacheReadInputTokens,
+    cacheWriteInputTokens: usage.cacheWriteInputTokens,
+    native: usage,
+  })
+}
+
+interface ParserState {
+  readonly tools: ToolStream.State<number>
+  // Bedrock splits the finish into `messageStop` (carries `stopReason`) and
+  // `metadata` (carries usage). Hold the terminal event in state so `onHalt`
+  // can emit exactly one finish after both chunks have had a chance to arrive.
+  readonly pendingFinish: { readonly reason: FinishReason; readonly usage?: Usage } | undefined
+}
+
+const step = (state: ParserState, event: BedrockEvent) =>
+  Effect.gen(function* () {
+    if (event.contentBlockStart?.start?.toolUse) {
+      const index = event.contentBlockStart.contentBlockIndex
+      return [
+        {
+          ...state,
+          tools: ToolStream.start(state.tools, index, {
+            id: event.contentBlockStart.start.toolUse.toolUseId,
+            name: event.contentBlockStart.start.toolUse.name,
+          }),
+        },
+        [],
+      ] as const
+    }
+
+    if (event.contentBlockDelta?.delta?.text) {
+      return [state, [{ type: "text-delta" as const, text: event.contentBlockDelta.delta.text }]] as const
+    }
+
+    if (event.contentBlockDelta?.delta?.reasoningContent?.text) {
+      return [
+        state,
+        [{ type: "reasoning-delta" as const, text: event.contentBlockDelta.delta.reasoningContent.text }],
+      ] as const
+    }
+
+    if (event.contentBlockDelta?.delta?.toolUse) {
+      const index = event.contentBlockDelta.contentBlockIndex
+      const result = ToolStream.appendExisting(
+        ADAPTER,
+        state.tools,
+        index,
+        event.contentBlockDelta.delta.toolUse.input,
+        "Bedrock Converse tool delta is missing its tool call",
+      )
+      if (ToolStream.isError(result)) return yield* result
+      return [{ ...state, tools: result.tools }, result.event ? [result.event] : []] as const
+    }
+
+    if (event.contentBlockStop) {
+      const result = yield* ToolStream.finish(ADAPTER, state.tools, event.contentBlockStop.contentBlockIndex)
+      return [{ ...state, tools: result.tools }, result.event ? [result.event] : []] as const
+    }
+
+    if (event.messageStop) {
+      return [
+        {
+          ...state,
+          pendingFinish: { reason: mapFinishReason(event.messageStop.stopReason), usage: state.pendingFinish?.usage },
+        },
+        [],
+      ] as const
+    }
+
+    if (event.metadata) {
+      const usage = mapUsage(event.metadata.usage)
+      return [{ ...state, pendingFinish: { reason: state.pendingFinish?.reason ?? "stop", usage } }, []] as const
+    }
+
+    if (event.internalServerException || event.modelStreamErrorException || event.serviceUnavailableException) {
+      const message =
+        event.internalServerException?.message ??
+        event.modelStreamErrorException?.message ??
+        event.serviceUnavailableException?.message ??
+        "Bedrock Converse stream error"
+      return [state, [{ type: "provider-error" as const, message, retryable: true }]] as const
+    }
+
+    if (event.validationException || event.throttlingException) {
+      const message =
+        event.validationException?.message ?? event.throttlingException?.message ?? "Bedrock Converse error"
+      return [
+        state,
+        [{ type: "provider-error" as const, message, retryable: event.throttlingException !== undefined }],
+      ] as const
+    }
+
+    return [state, []] as const
+  })
+
+const framing = BedrockEventStream.framing(ADAPTER)
+
+const onHalt = (state: ParserState): ReadonlyArray<LLMEvent> =>
+  state.pendingFinish
+    ? [{ type: "request-finish", reason: state.pendingFinish.reason, usage: state.pendingFinish.usage }]
+    : []
+
+// =============================================================================
+// Protocol And Bedrock Route
+// =============================================================================
+/**
+ * The Bedrock Converse protocol — request body construction, body schema, and
+ * the streaming-event state machine.
+ */
+export const protocol = Protocol.make({
+  id: ADAPTER,
+  body: {
+    schema: BedrockConverseBody,
+    from: fromRequest,
+  },
+  stream: {
+    event: BedrockEvent,
+    initial: () => ({ tools: ToolStream.empty<number>(), pendingFinish: undefined }),
+    step,
+    onHalt,
+  },
+})
+
+export const route = Route.make({
+  id: ADAPTER,
+  protocol,
+  // Bedrock's URL embeds the region in the host (set on `model.baseURL` by
+  // the provider helper from credentials) and the validated modelId in the
+  // path. We read the validated body so the URL matches the body that gets
+  // signed.
+  endpoint: Endpoint.path<BedrockConverseBody>(
+    ({ body }) => `/model/${encodeURIComponent(body.modelId)}/converse-stream`,
+  ),
+  auth: BedrockAuth.auth,
+  framing,
+})
+
+export const nativeCredentials = BedrockAuth.nativeCredentials
+
+const bedrockModel = Route.model(
+  route,
+  {
+    provider: "bedrock",
+  },
+  {
+    mapInput: (input: BedrockConverseModelInput) => {
+      const { credentials, ...rest } = input
+      const region = credentials?.region ?? "us-east-1"
+      return {
+        ...rest,
+        baseURL: rest.baseURL ?? `https://bedrock-runtime.${region}.amazonaws.com`,
+        native: nativeCredentials(input.native, credentials),
+      }
+    },
+  },
+)
+
+export const model = bedrockModel
+
+export * as BedrockConverse from "./bedrock-converse"
--- a/packages/llm/src/protocols/bedrock-event-stream.ts
+++ b/packages/llm/src/protocols/bedrock-event-stream.ts
@@ -0,0 +1,87 @@
+import { EventStreamCodec } from "@smithy/eventstream-codec"
+import { fromUtf8, toUtf8 } from "@smithy/util-utf8"
+import { Effect, Stream } from "effect"
+import type { Framing } from "../route/framing"
+import { ProviderShared } from "./shared"
+
+// Bedrock streams responses using the AWS event stream binary protocol — each
+// frame is `[length:4][headers-length:4][prelude-crc:4][headers][payload][crc:4]`.
+// We use `@smithy/eventstream-codec` to validate framing and CRCs, then
+// reconstruct the JSON wrapping by `:event-type` so the chunk schema can match.
+const eventCodec = new EventStreamCodec(toUtf8, fromUtf8)
+const utf8 = new TextDecoder()
+
+// Cursor-tracking buffer state. Bytes accumulate in `buffer`; `offset` is the
+// read position. Reading by `subarray` is zero-copy. We only allocate a fresh
+// buffer when a new network chunk arrives and we need to append.
+interface FrameBufferState {
+  readonly buffer: Uint8Array
+  readonly offset: number
+}
+
+const initialFrameBuffer: FrameBufferState = { buffer: new Uint8Array(0), offset: 0 }
+
+const appendChunk = (state: FrameBufferState, chunk: Uint8Array): FrameBufferState => {
+  const remaining = state.buffer.length - state.offset
+  // Compact: drop the consumed prefix and append the new chunk in one alloc.
+  // This bounds buffer growth to at most one network chunk past the live
+  // window, regardless of stream length.
+  const next = new Uint8Array(remaining + chunk.length)
+  next.set(state.buffer.subarray(state.offset), 0)
+  next.set(chunk, remaining)
+  return { buffer: next, offset: 0 }
+}
+
+const consumeFrames = (route: string) => (state: FrameBufferState, chunk: Uint8Array) =>
+  Effect.gen(function* () {
+    let cursor = appendChunk(state, chunk)
+    const out: object[] = []
+    while (cursor.buffer.length - cursor.offset >= 4) {
+      const view = cursor.buffer.subarray(cursor.offset)
+      const totalLength = new DataView(view.buffer, view.byteOffset, view.byteLength).getUint32(0, false)
+      if (view.length < totalLength) break
+
+      const decoded = yield* Effect.try({
+        try: () => eventCodec.decode(view.subarray(0, totalLength)),
+        catch: (error) =>
+          ProviderShared.eventError(
+            route,
+            `Failed to decode Bedrock Converse event-stream frame: ${
+              error instanceof Error ? error.message : String(error)
+            }`,
+          ),
+      })
+      cursor = { buffer: cursor.buffer, offset: cursor.offset + totalLength }
+
+      if (decoded.headers[":message-type"]?.value !== "event") continue
+      const eventType = decoded.headers[":event-type"]?.value
+      if (typeof eventType !== "string") continue
+      const payload = utf8.decode(decoded.body)
+      if (!payload) continue
+      // The AWS event stream pads short payloads with a `p` field. Drop it
+      // before handing the object to the chunk schema. JSON decode goes
+      // through the shared Schema-driven codec to satisfy the package rule
+      // against ad-hoc `JSON.parse` calls.
+      const parsed = (yield* ProviderShared.parseJson(
+        route,
+        payload,
+        "Failed to parse Bedrock Converse event-stream payload",
+      )) as Record<string, unknown>
+      delete parsed.p
+      out.push({ [eventType]: parsed })
+    }
+    return [cursor, out] as const
+  })
+
+/**
+ * AWS event-stream framing for Bedrock Converse. Each frame is decoded by
+ * `@smithy/eventstream-codec` (length + header + payload + CRC) and rewrapped
+ * under its `:event-type` header so the chunk schema can match the JSON
+ * payload directly.
+ */
+export const framing = (route: string): Framing<object> => ({
+  id: "aws-event-stream",
+  frame: (bytes) => bytes.pipe(Stream.mapAccumEffect(() => initialFrameBuffer, consumeFrames(route))),
+})
+
+export * as BedrockEventStream from "./bedrock-event-stream"
--- a/packages/llm/src/protocols/gemini.ts
+++ b/packages/llm/src/protocols/gemini.ts
@@ -0,0 +1,397 @@
+import { Effect, Schema } from "effect"
+import { Route } from "../route/client"
+import { Auth } from "../route/auth"
+import { Endpoint } from "../route/endpoint"
+import { Framing } from "../route/framing"
+import { Protocol } from "../route/protocol"
+import {
+  Usage,
+  type FinishReason,
+  type LLMEvent,
+  type LLMRequest,
+  type MediaPart,
+  type TextPart,
+  type ToolCallPart,
+  type ToolDefinition,
+} from "../schema"
+import { JsonObject, optionalArray, ProviderShared } from "./shared"
+import { GeminiToolSchema } from "./utils/gemini-tool-schema"
+
+const ADAPTER = "gemini"
+export const DEFAULT_BASE_URL = "https://generativelanguage.googleapis.com/v1beta"
+
+// =============================================================================
+// Request Body Schema
+// =============================================================================
+const GeminiTextPart = Schema.Struct({
+  text: Schema.String,
+  thought: Schema.optional(Schema.Boolean),
+  thoughtSignature: Schema.optional(Schema.String),
+})
+
+const GeminiInlineDataPart = Schema.Struct({
+  inlineData: Schema.Struct({
+    mimeType: Schema.String,
+    data: Schema.String,
+  }),
+})
+
+const GeminiFunctionCallPart = Schema.Struct({
+  functionCall: Schema.Struct({
+    name: Schema.String,
+    args: Schema.Unknown,
+  }),
+  thoughtSignature: Schema.optional(Schema.String),
+})
+
+const GeminiFunctionResponsePart = Schema.Struct({
+  functionResponse: Schema.Struct({
+    name: Schema.String,
+    response: Schema.Unknown,
+  }),
+})
+
+const GeminiContentPart = Schema.Union([
+  GeminiTextPart,
+  GeminiInlineDataPart,
+  GeminiFunctionCallPart,
+  GeminiFunctionResponsePart,
+])
+
+const GeminiContent = Schema.Struct({
+  role: Schema.Literals(["user", "model"]),
+  parts: Schema.Array(GeminiContentPart),
+})
+type GeminiContent = Schema.Schema.Type<typeof GeminiContent>
+
+const GeminiSystemInstruction = Schema.Struct({
+  parts: Schema.Array(Schema.Struct({ text: Schema.String })),
+})
+
+const GeminiFunctionDeclaration = Schema.Struct({
+  name: Schema.String,
+  description: Schema.String,
+  parameters: Schema.optional(JsonObject),
+})
+
+const GeminiTool = Schema.Struct({
+  functionDeclarations: Schema.Array(GeminiFunctionDeclaration),
+})
+
+const GeminiToolConfig = Schema.Struct({
+  functionCallingConfig: Schema.Struct({
+    mode: Schema.Literals(["AUTO", "NONE", "ANY"]),
+    allowedFunctionNames: optionalArray(Schema.String),
+  }),
+})
+
+const GeminiThinkingConfig = Schema.Struct({
+  thinkingBudget: Schema.optional(Schema.Number),
+  includeThoughts: Schema.optional(Schema.Boolean),
+})
+
+const GeminiGenerationConfig = Schema.Struct({
+  maxOutputTokens: Schema.optional(Schema.Number),
+  temperature: Schema.optional(Schema.Number),
+  topP: Schema.optional(Schema.Number),
+  topK: Schema.optional(Schema.Number),
+  stopSequences: optionalArray(Schema.String),
+  thinkingConfig: Schema.optional(GeminiThinkingConfig),
+})
+
+const GeminiBodyFields = {
+  contents: Schema.Array(GeminiContent),
+  systemInstruction: Schema.optional(GeminiSystemInstruction),
+  tools: optionalArray(GeminiTool),
+  toolConfig: Schema.optional(GeminiToolConfig),
+  generationConfig: Schema.optional(GeminiGenerationConfig),
+}
+const GeminiBody = Schema.Struct(GeminiBodyFields)
+export type GeminiBody = Schema.Schema.Type<typeof GeminiBody>
+
+const GeminiUsage = Schema.Struct({
+  cachedContentTokenCount: Schema.optional(Schema.Number),
+  thoughtsTokenCount: Schema.optional(Schema.Number),
+  promptTokenCount: Schema.optional(Schema.Number),
+  candidatesTokenCount: Schema.optional(Schema.Number),
+  totalTokenCount: Schema.optional(Schema.Number),
+})
+type GeminiUsage = Schema.Schema.Type<typeof GeminiUsage>
+
+const GeminiCandidate = Schema.Struct({
+  content: Schema.optional(GeminiContent),
+  finishReason: Schema.optional(Schema.String),
+})
+
+const GeminiEvent = Schema.Struct({
+  candidates: optionalArray(GeminiCandidate),
+  usageMetadata: Schema.optional(GeminiUsage),
+})
+type GeminiEvent = Schema.Schema.Type<typeof GeminiEvent>
+
+interface ParserState {
+  readonly finishReason?: string
+  readonly hasToolCalls: boolean
+  readonly nextToolCallId: number
+  readonly usage?: Usage
+}
+
+const invalid = ProviderShared.invalidRequest
+
+const mediaData = ProviderShared.mediaBytes
+
+// =============================================================================
+// Tool Schema Conversion
+// =============================================================================
+// Tool-schema conversion has two distinct concerns:
+//
+// 1. Sanitize — fix common authoring mistakes Gemini rejects: integer/number
+//    enums (must be strings), `required` entries that don't match a property,
+//    untyped arrays (`items` must be present), and `properties`/`required`
+//    keys on non-object scalars. Mirrors OpenCode's historical Gemini rules.
+//
+// 2. Project — lossy mapping from JSON Schema to Gemini's schema dialect:
+//    drop empty objects, derive `nullable: true` from `type: [..., "null"]`,
+//    coerce `const` to `[const]` enum, recurse properties/items, propagate
+//    only an allowlisted set of keys (description, required, format, type,
+//    properties, items, allOf, anyOf, oneOf, minLength). Anything outside the
+//    allowlist (e.g. `additionalProperties`, `$ref`) is silently dropped.
+//
+// Sanitize runs first, then project. The implementation lives in
+// `utils/gemini-tool-schema` so this protocol keeps the same shape as the other
+// provider protocols.
+
+// =============================================================================
+// Request Lowering
+// =============================================================================
+const lowerTool = (tool: ToolDefinition) => ({
+  name: tool.name,
+  description: tool.description,
+  parameters: GeminiToolSchema.convert(tool.inputSchema),
+})
+
+const lowerToolConfig = (toolChoice: NonNullable<LLMRequest["toolChoice"]>) =>
+  ProviderShared.matchToolChoice("Gemini", toolChoice, {
+    auto: () => ({ functionCallingConfig: { mode: "AUTO" as const } }),
+    none: () => ({ functionCallingConfig: { mode: "NONE" as const } }),
+    required: () => ({ functionCallingConfig: { mode: "ANY" as const } }),
+    tool: (name) => ({ functionCallingConfig: { mode: "ANY" as const, allowedFunctionNames: [name] } }),
+  })
+
+const lowerUserPart = (part: TextPart | MediaPart) =>
+  part.type === "text" ? { text: part.text } : { inlineData: { mimeType: part.mediaType, data: mediaData(part) } }
+
+const lowerToolCall = (part: ToolCallPart) => ({
+  functionCall: { name: part.name, args: part.input },
+})
+
+const lowerMessages = Effect.fn("Gemini.lowerMessages")(function* (request: LLMRequest) {
+  const contents: GeminiContent[] = []
+
+  for (const message of request.messages) {
+    if (message.role === "user") {
+      const parts: Array<Schema.Schema.Type<typeof GeminiContentPart>> = []
+      for (const part of message.content) {
+        if (!ProviderShared.supportsContent(part, ["text", "media"]))
+          return yield* ProviderShared.unsupportedContent("Gemini", "user", ["text", "media"])
+        parts.push(lowerUserPart(part))
+      }
+      contents.push({ role: "user", parts })
+      continue
+    }
+
+    if (message.role === "assistant") {
+      const parts: Array<Schema.Schema.Type<typeof GeminiContentPart>> = []
+      for (const part of message.content) {
+        if (!ProviderShared.supportsContent(part, ["text", "reasoning", "tool-call"]))
+          return yield* ProviderShared.unsupportedContent("Gemini", "assistant", ["text", "reasoning", "tool-call"])
+        if (part.type === "text") {
+          parts.push({ text: part.text })
+          continue
+        }
+        if (part.type === "reasoning") {
+          parts.push({ text: part.text, thought: true })
+          continue
+        }
+        if (part.type === "tool-call") {
+          parts.push(lowerToolCall(part))
+          continue
+        }
+      }
+      contents.push({ role: "model", parts })
+      continue
+    }
+
+    const parts: Array<Schema.Schema.Type<typeof GeminiContentPart>> = []
+    for (const part of message.content) {
+      if (!ProviderShared.supportsContent(part, ["tool-result"]))
+        return yield* ProviderShared.unsupportedContent("Gemini", "tool", ["tool-result"])
+      parts.push({
+        functionResponse: {
+          name: part.name,
+          response: {
+            name: part.name,
+            content: ProviderShared.toolResultText(part),
+          },
+        },
+      })
+    }
+    contents.push({ role: "user", parts })
+  }
+
+  return contents
+})
+
+const geminiOptions = (request: LLMRequest) => request.providerOptions?.gemini
+
+const thinkingConfig = (request: LLMRequest) => {
+  const value = geminiOptions(request)?.thinkingConfig
+  if (!ProviderShared.isRecord(value)) return undefined
+  const result = {
+    thinkingBudget: typeof value.thinkingBudget === "number" ? value.thinkingBudget : undefined,
+    includeThoughts: typeof value.includeThoughts === "boolean" ? value.includeThoughts : undefined,
+  }
+  return Object.values(result).some((item) => item !== undefined) ? result : undefined
+}
+
+const fromRequest = Effect.fn("Gemini.fromRequest")(function* (request: LLMRequest) {
+  const toolsEnabled = request.tools.length > 0 && request.toolChoice?.type !== "none"
+  const generation = request.generation
+  const generationConfig = {
+    maxOutputTokens: generation?.maxTokens,
+    temperature: generation?.temperature,
+    topP: generation?.topP,
+    topK: generation?.topK,
+    stopSequences: generation?.stop,
+    thinkingConfig: thinkingConfig(request),
+  }
+
+  return {
+    contents: yield* lowerMessages(request),
+    systemInstruction:
+      request.system.length === 0 ? undefined : { parts: [{ text: ProviderShared.joinText(request.system) }] },
+    tools: toolsEnabled ? [{ functionDeclarations: request.tools.map(lowerTool) }] : undefined,
+    toolConfig: toolsEnabled && request.toolChoice ? yield* lowerToolConfig(request.toolChoice) : undefined,
+    generationConfig: Object.values(generationConfig).some((value) => value !== undefined)
+      ? generationConfig
+      : undefined,
+  }
+})
+
+// =============================================================================
+// Stream Parsing
+// =============================================================================
+const mapUsage = (usage: GeminiUsage | undefined) => {
+  if (!usage) return undefined
+  return new Usage({
+    inputTokens: usage.promptTokenCount,
+    outputTokens: usage.candidatesTokenCount,
+    reasoningTokens: usage.thoughtsTokenCount,
+    cacheReadInputTokens: usage.cachedContentTokenCount,
+    totalTokens: ProviderShared.totalTokens(usage.promptTokenCount, usage.candidatesTokenCount, usage.totalTokenCount),
+    native: usage,
+  })
+}
+
+const mapFinishReason = (finishReason: string | undefined, hasToolCalls: boolean): FinishReason => {
+  if (finishReason === "STOP") return hasToolCalls ? "tool-calls" : "stop"
+  if (finishReason === "MAX_TOKENS") return "length"
+  if (
+    finishReason === "IMAGE_SAFETY" ||
+    finishReason === "RECITATION" ||
+    finishReason === "SAFETY" ||
+    finishReason === "BLOCKLIST" ||
+    finishReason === "PROHIBITED_CONTENT" ||
+    finishReason === "SPII"
+  )
+    return "content-filter"
+  if (finishReason === "MALFORMED_FUNCTION_CALL") return "error"
+  return "unknown"
+}
+
+const finish = (state: ParserState): ReadonlyArray<LLMEvent> =>
+  state.finishReason || state.usage
+    ? [{ type: "request-finish", reason: mapFinishReason(state.finishReason, state.hasToolCalls), usage: state.usage }]
+    : []
+
+const step = (state: ParserState, event: GeminiEvent) => {
+  const nextState = {
+    ...state,
+    usage: event.usageMetadata ? (mapUsage(event.usageMetadata) ?? state.usage) : state.usage,
+  }
+  const candidate = event.candidates?.[0]
+  if (!candidate?.content)
+    return Effect.succeed([
+      { ...nextState, finishReason: candidate?.finishReason ?? nextState.finishReason },
+      [],
+    ] as const)
+
+  const events: LLMEvent[] = []
+  let hasToolCalls = nextState.hasToolCalls
+  let nextToolCallId = nextState.nextToolCallId
+
+  for (const part of candidate.content.parts) {
+    if ("text" in part && part.text.length > 0) {
+      events.push({ type: part.thought ? "reasoning-delta" : "text-delta", text: part.text })
+      continue
+    }
+
+    if ("functionCall" in part) {
+      const input = part.functionCall.args
+      const id = `tool_${nextToolCallId++}`
+      events.push({ type: "tool-call", id, name: part.functionCall.name, input })
+      hasToolCalls = true
+    }
+  }
+
+  return Effect.succeed([
+    {
+      ...nextState,
+      hasToolCalls,
+      nextToolCallId,
+      finishReason: candidate.finishReason ?? nextState.finishReason,
+    },
+    events,
+  ] as const)
+}
+
+// =============================================================================
+// Protocol And Gemini Route
+// =============================================================================
+/**
+ * The Gemini protocol — request body construction, body schema, and the
+ * streaming-event state machine. Used by Google AI Studio Gemini and (once
+ * registered) Vertex Gemini.
+ */
+export const protocol = Protocol.make({
+  id: ADAPTER,
+  body: {
+    schema: GeminiBody,
+    from: fromRequest,
+  },
+  stream: {
+    event: Protocol.jsonEvent(GeminiEvent),
+    initial: () => ({ hasToolCalls: false, nextToolCallId: 0 }),
+    step,
+    onHalt: finish,
+  },
+})
+
+export const route = Route.make({
+  id: ADAPTER,
+  protocol,
+  // Gemini's path embeds the model id and pins SSE framing at the URL level.
+  endpoint: Endpoint.path(({ request }) => `/models/${request.model.id}:streamGenerateContent?alt=sse`),
+  auth: Auth.apiKeyHeader("x-goog-api-key"),
+  framing: Framing.sse,
+})
+
+// =============================================================================
+// Model Helper
+// =============================================================================
+export const model = Route.model(route, {
+  provider: "google",
+  baseURL: DEFAULT_BASE_URL,
+})
+
+export * as Gemini from "./gemini"
--- a/packages/llm/src/protocols/index.ts
+++ b/packages/llm/src/protocols/index.ts
@@ -0,0 +1,6 @@
+export * as AnthropicMessages from "./anthropic-messages"
+export * as BedrockConverse from "./bedrock-converse"
+export * as Gemini from "./gemini"
+export * as OpenAIChat from "./openai-chat"
+export * as OpenAICompatibleChat from "./openai-compatible-chat"
+export * as OpenAIResponses from "./openai-responses"
--- a/packages/llm/src/protocols/openai-chat.ts
+++ b/packages/llm/src/protocols/openai-chat.ts
@@ -0,0 +1,404 @@
+import { Array as Arr, Effect, Schema } from "effect"
+import { Route } from "../route/client"
+import { Auth } from "../route/auth"
+import { Endpoint } from "../route/endpoint"
+import { Framing } from "../route/framing"
+import { HttpTransport } from "../route/transport"
+import { Protocol } from "../route/protocol"
+import {
+  Usage,
+  type FinishReason,
+  type LLMEvent,
+  type LLMRequest,
+  type TextPart,
+  type ToolCallPart,
+  type ToolDefinition,
+} from "../schema"
+import { isRecord, JsonObject, optionalArray, optionalNull, ProviderShared } from "./shared"
+import { OpenAIOptions } from "./utils/openai-options"
+import { ToolStream } from "./utils/tool-stream"
+
+const ADAPTER = "openai-chat"
+export const DEFAULT_BASE_URL = "https://api.openai.com/v1"
+export const PATH = "/chat/completions"
+
+// =============================================================================
+// Request Body Schema
+// =============================================================================
+// The body schema is the provider-native JSON body. `fromRequest` below builds
+// this shape from the common `LLMRequest`, then `Route.make` validates and
+// JSON-encodes it before transport.
+const OpenAIChatFunction = Schema.Struct({
+  name: Schema.String,
+  description: Schema.String,
+  parameters: JsonObject,
+})
+
+const OpenAIChatTool = Schema.Struct({
+  type: Schema.tag("function"),
+  function: OpenAIChatFunction,
+})
+type OpenAIChatTool = Schema.Schema.Type<typeof OpenAIChatTool>
+
+const OpenAIChatAssistantToolCall = Schema.Struct({
+  id: Schema.String,
+  type: Schema.tag("function"),
+  function: Schema.Struct({
+    name: Schema.String,
+    arguments: Schema.String,
+  }),
+})
+type OpenAIChatAssistantToolCall = Schema.Schema.Type<typeof OpenAIChatAssistantToolCall>
+
+const OpenAIChatMessage = Schema.Union([
+  Schema.Struct({ role: Schema.Literal("system"), content: Schema.String }),
+  Schema.Struct({ role: Schema.Literal("user"), content: Schema.String }),
+  Schema.Struct({
+    role: Schema.Literal("assistant"),
+    content: Schema.NullOr(Schema.String),
+    tool_calls: optionalArray(OpenAIChatAssistantToolCall),
+    reasoning_content: Schema.optional(Schema.String),
+  }),
+  Schema.Struct({ role: Schema.Literal("tool"), tool_call_id: Schema.String, content: Schema.String }),
+]).pipe(Schema.toTaggedUnion("role"))
+type OpenAIChatMessage = Schema.Schema.Type<typeof OpenAIChatMessage>
+
+const OpenAIChatToolChoice = Schema.Union([
+  Schema.Literals(["auto", "none", "required"]),
+  Schema.Struct({
+    type: Schema.tag("function"),
+    function: Schema.Struct({ name: Schema.String }),
+  }),
+])
+
+export const bodyFields = {
+  model: Schema.String,
+  messages: Schema.Array(OpenAIChatMessage),
+  tools: optionalArray(OpenAIChatTool),
+  tool_choice: Schema.optional(OpenAIChatToolChoice),
+  stream: Schema.Literal(true),
+  stream_options: Schema.optional(Schema.Struct({ include_usage: Schema.Boolean })),
+  store: Schema.optional(Schema.Boolean),
+  reasoning_effort: Schema.optional(OpenAIOptions.OpenAIReasoningEffort),
+  max_tokens: Schema.optional(Schema.Number),
+  temperature: Schema.optional(Schema.Number),
+  top_p: Schema.optional(Schema.Number),
+  frequency_penalty: Schema.optional(Schema.Number),
+  presence_penalty: Schema.optional(Schema.Number),
+  seed: Schema.optional(Schema.Number),
+  stop: optionalArray(Schema.String),
+}
+const OpenAIChatBody = Schema.Struct(bodyFields)
+export type OpenAIChatBody = Schema.Schema.Type<typeof OpenAIChatBody>
+
+// =============================================================================
+// Streaming Event Schema
+// =============================================================================
+// The event schema is one decoded SSE `data:` payload. `Framing.sse` splits the
+// byte stream into strings, then `Protocol.jsonEvent` decodes each string into
+// this provider-native event shape.
+const OpenAIChatUsage = Schema.Struct({
+  prompt_tokens: Schema.optional(Schema.Number),
+  completion_tokens: Schema.optional(Schema.Number),
+  total_tokens: Schema.optional(Schema.Number),
+  prompt_tokens_details: optionalNull(
+    Schema.Struct({
+      cached_tokens: Schema.optional(Schema.Number),
+    }),
+  ),
+  completion_tokens_details: optionalNull(
+    Schema.Struct({
+      reasoning_tokens: Schema.optional(Schema.Number),
+    }),
+  ),
+})
+
+const OpenAIChatToolCallDeltaFunction = Schema.Struct({
+  name: optionalNull(Schema.String),
+  arguments: optionalNull(Schema.String),
+})
+
+const OpenAIChatToolCallDelta = Schema.Struct({
+  index: Schema.Number,
+  id: optionalNull(Schema.String),
+  function: optionalNull(OpenAIChatToolCallDeltaFunction),
+})
+type OpenAIChatToolCallDelta = Schema.Schema.Type<typeof OpenAIChatToolCallDelta>
+
+const OpenAIChatDelta = Schema.Struct({
+  content: optionalNull(Schema.String),
+  tool_calls: optionalNull(Schema.Array(OpenAIChatToolCallDelta)),
+})
+
+const OpenAIChatChoice = Schema.Struct({
+  delta: optionalNull(OpenAIChatDelta),
+  finish_reason: optionalNull(Schema.String),
+})
+
+const OpenAIChatEvent = Schema.Struct({
+  choices: Schema.Array(OpenAIChatChoice),
+  usage: optionalNull(OpenAIChatUsage),
+})
+type OpenAIChatEvent = Schema.Schema.Type<typeof OpenAIChatEvent>
+type OpenAIChatRequestMessage = LLMRequest["messages"][number]
+
+interface ParserState {
+  readonly tools: ToolStream.State<number>
+  readonly toolCallEvents: ReadonlyArray<LLMEvent>
+  readonly usage?: Usage
+  readonly finishReason?: FinishReason
+}
+
+const invalid = ProviderShared.invalidRequest
+
+// =============================================================================
+// Request Lowering
+// =============================================================================
+// Lowering is the only place that knows how common LLM messages map onto the
+// OpenAI Chat wire format. Keep provider quirks here instead of leaking native
+// fields into `LLMRequest`.
+const lowerTool = (tool: ToolDefinition): OpenAIChatTool => ({
+  type: "function",
+  function: {
+    name: tool.name,
+    description: tool.description,
+    parameters: tool.inputSchema,
+  },
+})
+
+const lowerToolChoice = (toolChoice: NonNullable<LLMRequest["toolChoice"]>) =>
+  ProviderShared.matchToolChoice("OpenAI Chat", toolChoice, {
+    auto: () => "auto" as const,
+    none: () => "none" as const,
+    required: () => "required" as const,
+    tool: (name) => ({ type: "function" as const, function: { name } }),
+  })
+
+const lowerToolCall = (part: ToolCallPart): OpenAIChatAssistantToolCall => ({
+  id: part.id,
+  type: "function",
+  function: {
+    name: part.name,
+    arguments: ProviderShared.encodeJson(part.input),
+  },
+})
+
+const openAICompatibleReasoningContent = (native: unknown) =>
+  isRecord(native) && typeof native.reasoning_content === "string" ? native.reasoning_content : undefined
+
+const lowerUserMessage = Effect.fn("OpenAIChat.lowerUserMessage")(function* (message: OpenAIChatRequestMessage) {
+  const content: TextPart[] = []
+  for (const part of message.content) {
+    if (!ProviderShared.supportsContent(part, ["text"]))
+      return yield* ProviderShared.unsupportedContent("OpenAI Chat", "user", ["text"])
+    content.push(part)
+  }
+  return { role: "user" as const, content: ProviderShared.joinText(content) }
+})
+
+const lowerAssistantMessage = Effect.fn("OpenAIChat.lowerAssistantMessage")(function* (
+  message: OpenAIChatRequestMessage,
+) {
+  const content: TextPart[] = []
+  const toolCalls: OpenAIChatAssistantToolCall[] = []
+  for (const part of message.content) {
+    if (!ProviderShared.supportsContent(part, ["text", "tool-call"]))
+      return yield* ProviderShared.unsupportedContent("OpenAI Chat", "assistant", ["text", "tool-call"])
+    if (part.type === "text") {
+      content.push(part)
+      continue
+    }
+    if (part.type === "tool-call") {
+      toolCalls.push(lowerToolCall(part))
+      continue
+    }
+  }
+  return {
+    role: "assistant" as const,
+    content: content.length === 0 ? null : ProviderShared.joinText(content),
+    tool_calls: toolCalls.length === 0 ? undefined : toolCalls,
+    reasoning_content: openAICompatibleReasoningContent(message.native?.openaiCompatible),
+  }
+})
+
+const lowerToolMessages = Effect.fn("OpenAIChat.lowerToolMessages")(function* (message: OpenAIChatRequestMessage) {
+  const messages: OpenAIChatMessage[] = []
+  for (const part of message.content) {
+    if (!ProviderShared.supportsContent(part, ["tool-result"]))
+      return yield* ProviderShared.unsupportedContent("OpenAI Chat", "tool", ["tool-result"])
+    messages.push({ role: "tool", tool_call_id: part.id, content: ProviderShared.toolResultText(part) })
+  }
+  return messages
+})
+
+const lowerMessage = Effect.fn("OpenAIChat.lowerMessage")(function* (message: OpenAIChatRequestMessage) {
+  if (message.role === "user") return [yield* lowerUserMessage(message)]
+  if (message.role === "assistant") return [yield* lowerAssistantMessage(message)]
+  return yield* lowerToolMessages(message)
+})
+
+const lowerMessages = Effect.fn("OpenAIChat.lowerMessages")(function* (request: LLMRequest) {
+  const system: OpenAIChatMessage[] =
+    request.system.length === 0 ? [] : [{ role: "system", content: ProviderShared.joinText(request.system) }]
+  return [...system, ...Arr.flatten(yield* Effect.forEach(request.messages, lowerMessage))]
+})
+
+const lowerOptions = Effect.fn("OpenAIChat.lowerOptions")(function* (request: LLMRequest) {
+  const store = OpenAIOptions.store(request)
+  const reasoningEffort = OpenAIOptions.reasoningEffort(request)
+  if (reasoningEffort && !OpenAIOptions.isReasoningEffort(reasoningEffort))
+    return yield* invalid(`OpenAI Chat does not support reasoning effort ${reasoningEffort}`)
+  return {
+    ...(store !== undefined ? { store } : {}),
+    ...(reasoningEffort ? { reasoning_effort: reasoningEffort } : {}),
+  }
+})
+
+const fromRequest = Effect.fn("OpenAIChat.fromRequest")(function* (request: LLMRequest) {
+  // `fromRequest` returns the provider body only. Endpoint, auth, framing,
+  // validation, and HTTP execution are composed by `Route.make`.
+  const generation = request.generation
+  return {
+    model: request.model.id,
+    messages: yield* lowerMessages(request),
+    tools: request.tools.length === 0 ? undefined : request.tools.map(lowerTool),
+    tool_choice: request.toolChoice ? yield* lowerToolChoice(request.toolChoice) : undefined,
+    stream: true as const,
+    stream_options: { include_usage: true },
+    max_tokens: generation?.maxTokens,
+    temperature: generation?.temperature,
+    top_p: generation?.topP,
+    frequency_penalty: generation?.frequencyPenalty,
+    presence_penalty: generation?.presencePenalty,
+    seed: generation?.seed,
+    stop: generation?.stop,
+    ...(yield* lowerOptions(request)),
+  }
+})
+
+// =============================================================================
+// Stream Parsing
+// =============================================================================
+// Streaming parsers are small state machines: every event returns a new state
+// plus the common `LLMEvent`s produced by that event. Tool calls are accumulated
+// because OpenAI streams JSON arguments across multiple deltas.
+const mapFinishReason = (reason: string | null | undefined): FinishReason => {
+  if (reason === "stop") return "stop"
+  if (reason === "length") return "length"
+  if (reason === "content_filter") return "content-filter"
+  if (reason === "function_call" || reason === "tool_calls") return "tool-calls"
+  return "unknown"
+}
+
+const mapUsage = (usage: OpenAIChatEvent["usage"]): Usage | undefined => {
+  if (!usage) return undefined
+  return new Usage({
+    inputTokens: usage.prompt_tokens,
+    outputTokens: usage.completion_tokens,
+    reasoningTokens: usage.completion_tokens_details?.reasoning_tokens,
+    cacheReadInputTokens: usage.prompt_tokens_details?.cached_tokens,
+    totalTokens: ProviderShared.totalTokens(usage.prompt_tokens, usage.completion_tokens, usage.total_tokens),
+    native: usage,
+  })
+}
+
+const step = (state: ParserState, event: OpenAIChatEvent) =>
+  Effect.gen(function* () {
+    const events: LLMEvent[] = []
+    const usage = mapUsage(event.usage) ?? state.usage
+    const choice = event.choices[0]
+    const finishReason = choice?.finish_reason ? mapFinishReason(choice.finish_reason) : state.finishReason
+    const delta = choice?.delta
+    const toolDeltas = delta?.tool_calls ?? []
+    let tools = state.tools
+
+    if (delta?.content) events.push({ type: "text-delta", text: delta.content })
+
+    for (const tool of toolDeltas) {
+      const result = ToolStream.appendOrStart(
+        ADAPTER,
+        tools,
+        tool.index,
+        { id: tool.id ?? undefined, name: tool.function?.name ?? undefined, text: tool.function?.arguments ?? "" },
+        "OpenAI Chat tool call delta is missing id or name",
+      )
+      if (ToolStream.isError(result)) return yield* result
+      tools = result.tools
+      if (result.event) events.push(result.event)
+    }
+
+    // Finalize accumulated tool inputs eagerly when finish_reason arrives so
+    // JSON parse failures fail the stream at the boundary rather than at halt.
+    const finished =
+      finishReason !== undefined && state.finishReason === undefined && Object.keys(tools).length > 0
+        ? yield* ToolStream.finishAll(ADAPTER, tools)
+        : undefined
+
+    return [
+      {
+        tools: finished?.tools ?? tools,
+        toolCallEvents: finished?.events ?? state.toolCallEvents,
+        usage,
+        finishReason,
+      },
+      events,
+    ] as const
+  })
+
+const finishEvents = (state: ParserState): ReadonlyArray<LLMEvent> => {
+  const hasToolCalls = state.toolCallEvents.length > 0
+  const reason = state.finishReason === "stop" && hasToolCalls ? "tool-calls" : state.finishReason
+  return [
+    ...state.toolCallEvents,
+    ...(reason ? ([{ type: "request-finish", reason, usage: state.usage }] satisfies ReadonlyArray<LLMEvent>) : []),
+  ]
+}
+
+// =============================================================================
+// Protocol And OpenAI Route
+// =============================================================================
+/**
+ * The OpenAI Chat protocol — request body construction, body schema, and the
+ * streaming-event state machine. Reused by every route that speaks OpenAI Chat
+ * over HTTP+SSE: native OpenAI, DeepSeek, TogetherAI, Cerebras, Baseten,
+ * Fireworks, DeepInfra, and (once added) Azure OpenAI Chat.
+ */
+export const protocol = Protocol.make({
+  id: ADAPTER,
+  body: {
+    schema: OpenAIChatBody,
+    from: fromRequest,
+  },
+  stream: {
+    event: Protocol.jsonEvent(OpenAIChatEvent),
+    initial: () => ({ tools: ToolStream.empty<number>(), toolCallEvents: [] }),
+    step,
+    onHalt: finishEvents,
+  },
+})
+
+const encodeBody = Schema.encodeSync(Schema.fromJsonString(OpenAIChatBody))
+
+export const httpTransport = HttpTransport.httpJson({
+  endpoint: Endpoint.path(PATH),
+  auth: Auth.bearer(),
+  framing: Framing.sse,
+  encodeBody,
+})
+
+export const route = Route.make({
+  id: ADAPTER,
+  provider: "openai",
+  protocol,
+  transport: httpTransport,
+  defaults: {
+    baseURL: DEFAULT_BASE_URL,
+  },
+})
+
+// =============================================================================
+// Model Helper
+// =============================================================================
+export const model = route.model
+
+export * as OpenAIChat from "./openai-chat"
--- a/packages/llm/src/protocols/openai-compatible-chat.ts
+++ b/packages/llm/src/protocols/openai-compatible-chat.ts
@@ -0,0 +1,28 @@
+import { Route, type RouteRoutedModelInput } from "../route/client"
+import { Endpoint } from "../route/endpoint"
+import { Framing } from "../route/framing"
+import * as OpenAIChat from "./openai-chat"
+
+const ADAPTER = "openai-compatible-chat"
+
+export type OpenAICompatibleChatModelInput = Omit<RouteRoutedModelInput, "baseURL"> & {
+  readonly baseURL: string
+}
+
+/**
+ * Route for non-OpenAI providers that expose an OpenAI Chat-compatible
+ * `/chat/completions` endpoint. Reuses `OpenAIChat.protocol` end-to-end and
+ * overrides only the route id so providers can be resolved per-family without
+ * colliding with native OpenAI. The model carries the host on `baseURL`,
+ * supplied by whichever profile/provider helper builds it.
+ */
+export const route = Route.make({
+  id: ADAPTER,
+  protocol: OpenAIChat.protocol,
+  endpoint: Endpoint.path("/chat/completions"),
+  framing: Framing.sse,
+})
+
+export const model = Route.model<OpenAICompatibleChatModelInput>(route)
+
+export * as OpenAICompatibleChat from "./openai-compatible-chat"
--- a/packages/llm/src/protocols/openai-responses.ts
+++ b/packages/llm/src/protocols/openai-responses.ts
@@ -0,0 +1,575 @@
+import { Effect, Schema } from "effect"
+import { Route } from "../route/client"
+import { Auth } from "../route/auth"
+import { Endpoint } from "../route/endpoint"
+import { Framing } from "../route/framing"
+import { HttpTransport, WebSocketTransport } from "../route/transport"
+import { Protocol } from "../route/protocol"
+import {
+  Usage,
+  type FinishReason,
+  type LLMEvent,
+  type LLMRequest,
+  type ProviderMetadata,
+  type TextPart,
+  type ToolCallPart,
+  type ToolDefinition,
+} from "../schema"
+import { JsonObject, optionalArray, optionalNull, ProviderShared } from "./shared"
+import { OpenAIOptions } from "./utils/openai-options"
+import { ToolStream } from "./utils/tool-stream"
+
+const ADAPTER = "openai-responses"
+export const DEFAULT_BASE_URL = "https://api.openai.com/v1"
+export const PATH = "/responses"
+
+// =============================================================================
+// Request Body Schema
+// =============================================================================
+const OpenAIResponsesInputText = Schema.Struct({
+  type: Schema.tag("input_text"),
+  text: Schema.String,
+})
+
+const OpenAIResponsesOutputText = Schema.Struct({
+  type: Schema.tag("output_text"),
+  text: Schema.String,
+})
+
+const OpenAIResponsesInputItem = Schema.Union([
+  Schema.Struct({ role: Schema.tag("system"), content: Schema.String }),
+  Schema.Struct({ role: Schema.tag("user"), content: Schema.Array(OpenAIResponsesInputText) }),
+  Schema.Struct({ role: Schema.tag("assistant"), content: Schema.Array(OpenAIResponsesOutputText) }),
+  Schema.Struct({
+    type: Schema.tag("function_call"),
+    call_id: Schema.String,
+    name: Schema.String,
+    arguments: Schema.String,
+  }),
+  Schema.Struct({
+    type: Schema.tag("function_call_output"),
+    call_id: Schema.String,
+    output: Schema.String,
+  }),
+])
+type OpenAIResponsesInputItem = Schema.Schema.Type<typeof OpenAIResponsesInputItem>
+
+const OpenAIResponsesTool = Schema.Struct({
+  type: Schema.tag("function"),
+  name: Schema.String,
+  description: Schema.String,
+  parameters: JsonObject,
+  strict: Schema.optional(Schema.Boolean),
+})
+type OpenAIResponsesTool = Schema.Schema.Type<typeof OpenAIResponsesTool>
+
+const OpenAIResponsesToolChoice = Schema.Union([
+  Schema.Literals(["auto", "none", "required"]),
+  Schema.Struct({ type: Schema.tag("function"), name: Schema.String }),
+])
+
+// Fields shared between the HTTP body and the WebSocket `response.create`
+// message. The HTTP body adds `stream: true`; the WebSocket message adds
+// `type: "response.create"`. Defining the shared shape once keeps the two
+// transports in sync without a destructure-and-strip dance.
+const OpenAIResponsesCoreFields = {
+  model: Schema.String,
+  input: Schema.Array(OpenAIResponsesInputItem),
+  tools: optionalArray(OpenAIResponsesTool),
+  tool_choice: Schema.optional(OpenAIResponsesToolChoice),
+  store: Schema.optional(Schema.Boolean),
+  prompt_cache_key: Schema.optional(Schema.String),
+  include: optionalArray(Schema.Literal("reasoning.encrypted_content")),
+  reasoning: Schema.optional(
+    Schema.Struct({
+      effort: Schema.optional(OpenAIOptions.OpenAIReasoningEffort),
+      summary: Schema.optional(Schema.Literal("auto")),
+    }),
+  ),
+  text: Schema.optional(
+    Schema.Struct({
+      verbosity: Schema.optional(OpenAIOptions.OpenAITextVerbosity),
+    }),
+  ),
+  max_output_tokens: Schema.optional(Schema.Number),
+  temperature: Schema.optional(Schema.Number),
+  top_p: Schema.optional(Schema.Number),
+}
+
+const OpenAIResponsesBody = Schema.Struct({
+  ...OpenAIResponsesCoreFields,
+  stream: Schema.Literal(true),
+})
+export type OpenAIResponsesBody = Schema.Schema.Type<typeof OpenAIResponsesBody>
+
+const OpenAIResponsesWebSocketMessage = Schema.StructWithRest(
+  Schema.Struct({
+    type: Schema.tag("response.create"),
+    ...OpenAIResponsesCoreFields,
+  }),
+  [Schema.Record(Schema.String, Schema.Unknown)],
+)
+type OpenAIResponsesWebSocketMessage = Schema.Schema.Type<typeof OpenAIResponsesWebSocketMessage>
+const encodeWebSocketMessage = Schema.encodeSync(Schema.fromJsonString(OpenAIResponsesWebSocketMessage))
+
+const OpenAIResponsesUsage = Schema.Struct({
+  input_tokens: Schema.optional(Schema.Number),
+  input_tokens_details: optionalNull(Schema.Struct({ cached_tokens: Schema.optional(Schema.Number) })),
+  output_tokens: Schema.optional(Schema.Number),
+  output_tokens_details: optionalNull(Schema.Struct({ reasoning_tokens: Schema.optional(Schema.Number) })),
+  total_tokens: Schema.optional(Schema.Number),
+})
+type OpenAIResponsesUsage = Schema.Schema.Type<typeof OpenAIResponsesUsage>
+
+const OpenAIResponsesStreamItem = Schema.Struct({
+  type: Schema.String,
+  id: Schema.optional(Schema.String),
+  call_id: Schema.optional(Schema.String),
+  name: Schema.optional(Schema.String),
+  arguments: Schema.optional(Schema.String),
+  // Hosted (provider-executed) tool fields. Each hosted tool item carries its
+  // own subset of these — we capture them generically so we can surface the
+  // call's typed input portion and round-trip the full result payload without
+  // hand-rolling a per-tool schema.
+  status: Schema.optional(Schema.String),
+  action: Schema.optional(Schema.Unknown),
+  queries: Schema.optional(Schema.Unknown),
+  results: Schema.optional(Schema.Unknown),
+  code: Schema.optional(Schema.String),
+  container_id: Schema.optional(Schema.String),
+  outputs: Schema.optional(Schema.Unknown),
+  server_label: Schema.optional(Schema.String),
+  output: Schema.optional(Schema.Unknown),
+  error: Schema.optional(Schema.Unknown),
+})
+type OpenAIResponsesStreamItem = Schema.Schema.Type<typeof OpenAIResponsesStreamItem>
+
+const OpenAIResponsesEvent = Schema.Struct({
+  type: Schema.String,
+  delta: Schema.optional(Schema.String),
+  item_id: Schema.optional(Schema.String),
+  item: Schema.optional(OpenAIResponsesStreamItem),
+  response: Schema.optional(
+    Schema.Struct({
+      id: Schema.optional(Schema.String),
+      service_tier: Schema.optional(Schema.String),
+      incomplete_details: optionalNull(Schema.Struct({ reason: Schema.String })),
+      usage: optionalNull(OpenAIResponsesUsage),
+    }),
+  ),
+  code: Schema.optional(Schema.String),
+  message: Schema.optional(Schema.String),
+})
+type OpenAIResponsesEvent = Schema.Schema.Type<typeof OpenAIResponsesEvent>
+
+interface ParserState {
+  readonly tools: ToolStream.State<string>
+  readonly hasFunctionCall: boolean
+}
+
+const invalid = ProviderShared.invalidRequest
+
+// =============================================================================
+// Request Lowering
+// =============================================================================
+const lowerTool = (tool: ToolDefinition): OpenAIResponsesTool => ({
+  type: "function",
+  name: tool.name,
+  description: tool.description,
+  parameters: tool.inputSchema,
+})
+
+const lowerToolChoice = (toolChoice: NonNullable<LLMRequest["toolChoice"]>) =>
+  ProviderShared.matchToolChoice("OpenAI Responses", toolChoice, {
+    auto: () => "auto" as const,
+    none: () => "none" as const,
+    required: () => "required" as const,
+    tool: (name) => ({ type: "function" as const, name }),
+  })
+
+const lowerToolCall = (part: ToolCallPart): OpenAIResponsesInputItem => ({
+  type: "function_call",
+  call_id: part.id,
+  name: part.name,
+  arguments: ProviderShared.encodeJson(part.input),
+})
+
+const lowerMessages = Effect.fn("OpenAIResponses.lowerMessages")(function* (request: LLMRequest) {
+  const system: OpenAIResponsesInputItem[] =
+    request.system.length === 0 ? [] : [{ role: "system", content: ProviderShared.joinText(request.system) }]
+  const input: OpenAIResponsesInputItem[] = [...system]
+
+  for (const message of request.messages) {
+    if (message.role === "user") {
+      const content: TextPart[] = []
+      for (const part of message.content) {
+        if (!ProviderShared.supportsContent(part, ["text"]))
+          return yield* ProviderShared.unsupportedContent("OpenAI Responses", "user", ["text"])
+        content.push(part)
+      }
+      input.push({ role: "user", content: content.map((part) => ({ type: "input_text", text: part.text })) })
+      continue
+    }
+
+    if (message.role === "assistant") {
+      const content: TextPart[] = []
+      for (const part of message.content) {
+        if (!ProviderShared.supportsContent(part, ["text", "tool-call"]))
+          return yield* ProviderShared.unsupportedContent("OpenAI Responses", "assistant", ["text", "tool-call"])
+        if (part.type === "text") {
+          content.push(part)
+          continue
+        }
+        if (part.type === "tool-call") {
+          input.push(lowerToolCall(part))
+          continue
+        }
+      }
+      if (content.length > 0)
+        input.push({ role: "assistant", content: content.map((part) => ({ type: "output_text", text: part.text })) })
+      continue
+    }
+
+    for (const part of message.content) {
+      if (!ProviderShared.supportsContent(part, ["tool-result"]))
+        return yield* ProviderShared.unsupportedContent("OpenAI Responses", "tool", ["tool-result"])
+      input.push({ type: "function_call_output", call_id: part.id, output: ProviderShared.toolResultText(part) })
+    }
+  }
+
+  return input
+})
+
+const lowerOptions = Effect.fn("OpenAIResponses.lowerOptions")(function* (request: LLMRequest) {
+  const store = OpenAIOptions.store(request)
+  const promptCacheKey = OpenAIOptions.promptCacheKey(request)
+  const effort = OpenAIOptions.reasoningEffort(request)
+  if (effort && !OpenAIOptions.isReasoningEffort(effort))
+    return yield* invalid(`OpenAI Responses does not support reasoning effort ${effort}`)
+  const summary = OpenAIOptions.reasoningSummary(request)
+  const encryptedState = OpenAIOptions.encryptedReasoning(request)
+  const verbosity = OpenAIOptions.textVerbosity(request)
+  return {
+    ...(store !== undefined ? { store } : {}),
+    ...(promptCacheKey ? { prompt_cache_key: promptCacheKey } : {}),
+    ...(encryptedState ? { include: ["reasoning.encrypted_content"] as const } : {}),
+    ...(effort || summary ? { reasoning: { effort, summary } } : {}),
+    ...(verbosity ? { text: { verbosity } } : {}),
+  }
+})
+
+const fromRequest = Effect.fn("OpenAIResponses.fromRequest")(function* (request: LLMRequest) {
+  const generation = request.generation
+  return {
+    model: request.model.id,
+    input: yield* lowerMessages(request),
+    tools: request.tools.length === 0 ? undefined : request.tools.map(lowerTool),
+    tool_choice: request.toolChoice ? yield* lowerToolChoice(request.toolChoice) : undefined,
+    stream: true as const,
+    max_output_tokens: generation?.maxTokens,
+    temperature: generation?.temperature,
+    top_p: generation?.topP,
+    ...(yield* lowerOptions(request)),
+  }
+})
+
+// =============================================================================
+// Stream Parsing
+// =============================================================================
+const mapUsage = (usage: OpenAIResponsesUsage | null | undefined) => {
+  if (!usage) return undefined
+  return new Usage({
+    inputTokens: usage.input_tokens,
+    outputTokens: usage.output_tokens,
+    reasoningTokens: usage.output_tokens_details?.reasoning_tokens,
+    cacheReadInputTokens: usage.input_tokens_details?.cached_tokens,
+    totalTokens: ProviderShared.totalTokens(usage.input_tokens, usage.output_tokens, usage.total_tokens),
+    native: usage,
+  })
+}
+
+const mapFinishReason = (event: OpenAIResponsesEvent, hasFunctionCall: boolean): FinishReason => {
+  const reason = event.response?.incomplete_details?.reason
+  if (reason === undefined || reason === null) return hasFunctionCall ? "tool-calls" : "stop"
+  if (reason === "max_output_tokens") return "length"
+  if (reason === "content_filter") return "content-filter"
+  return hasFunctionCall ? "tool-calls" : "unknown"
+}
+
+const openaiMetadata = (metadata: Record<string, unknown>): ProviderMetadata => ({ openai: metadata })
+
+// Hosted tool items (provider-executed) ship their typed input + status +
+// result fields all in one item. We expose them as a `tool-call` +
+// `tool-result` pair so consumers can treat them uniformly with client tools,
+// only differentiated by `providerExecuted: true`.
+//
+// One record per OpenAI Responses item type that represents a hosted
+// (provider-executed) tool call: the common name we surface, plus an `input`
+// extractor that picks the fields the model actually populated for that tool.
+// Falling back to `{}` when an entry isn't fully typed keeps unknown tools
+// observable without rolling a per-tool schema.
+const HOSTED_TOOLS = {
+  web_search_call: { name: "web_search", input: (item) => item.action ?? {} },
+  web_search_preview_call: { name: "web_search_preview", input: (item) => item.action ?? {} },
+  file_search_call: { name: "file_search", input: (item) => ({ queries: item.queries ?? [] }) },
+  code_interpreter_call: {
+    name: "code_interpreter",
+    input: (item) => ({ code: item.code, container_id: item.container_id }),
+  },
+  computer_use_call: { name: "computer_use", input: (item) => item.action ?? {} },
+  image_generation_call: { name: "image_generation", input: () => ({}) },
+  mcp_call: {
+    name: "mcp",
+    input: (item) => ({ server_label: item.server_label, name: item.name, arguments: item.arguments }),
+  },
+  local_shell_call: { name: "local_shell", input: (item) => item.action ?? {} },
+} as const satisfies Record<
+  string,
+  { readonly name: string; readonly input: (item: OpenAIResponsesStreamItem) => unknown }
+>
+
+type HostedToolType = keyof typeof HOSTED_TOOLS
+
+const isHostedToolItem = (
+  item: OpenAIResponsesStreamItem,
+): item is OpenAIResponsesStreamItem & { type: HostedToolType; id: string } =>
+  item.type in HOSTED_TOOLS && typeof item.id === "string" && item.id.length > 0
+
+// Round-trip the full item as the structured result so consumers can extract
+// outputs / sources / status without re-decoding.
+const hostedToolResult = (item: OpenAIResponsesStreamItem) => {
+  const isError = typeof item.error !== "undefined" && item.error !== null
+  return isError ? { type: "error" as const, value: item.error } : { type: "json" as const, value: item }
+}
+
+const hostedToolEvents = (
+  item: OpenAIResponsesStreamItem & { type: HostedToolType; id: string },
+): ReadonlyArray<LLMEvent> => {
+  const tool = HOSTED_TOOLS[item.type]
+  const providerMetadata = openaiMetadata({ itemId: item.id })
+  return [
+    {
+      type: "tool-call",
+      id: item.id,
+      name: tool.name,
+      input: tool.input(item),
+      providerExecuted: true,
+      providerMetadata,
+    },
+    {
+      type: "tool-result",
+      id: item.id,
+      name: tool.name,
+      result: hostedToolResult(item),
+      providerExecuted: true,
+      providerMetadata,
+    },
+  ]
+}
+
+type StepResult = readonly [ParserState, ReadonlyArray<LLMEvent>]
+
+const NO_EVENTS: StepResult["1"] = []
+
+// `response.completed` / `response.incomplete` are clean finishes that emit a
+// `request-finish` event; `response.failed` is a hard failure that emits a
+// `provider-error`. All three end the stream — kept in one set so `step` and
+// the protocol's `terminal` predicate stay in sync.
+const TERMINAL_TYPES = new Set(["response.completed", "response.incomplete", "response.failed"])
+
+const onOutputTextDelta = (state: ParserState, event: OpenAIResponsesEvent): StepResult => {
+  if (!event.delta) return [state, NO_EVENTS]
+  return [
+    state,
+    [
+      {
+        type: "text-delta",
+        id: event.item_id,
+        text: event.delta,
+        ...(event.item_id ? { providerMetadata: openaiMetadata({ itemId: event.item_id }) } : {}),
+      },
+    ],
+  ]
+}
+
+const onOutputItemAdded = (state: ParserState, event: OpenAIResponsesEvent): StepResult => {
+  const item = event.item
+  if (item?.type !== "function_call" || !item.id) return [state, NO_EVENTS]
+  return [
+    {
+      hasFunctionCall: state.hasFunctionCall,
+      tools: ToolStream.start(state.tools, item.id, {
+        id: item.call_id ?? item.id,
+        name: item.name ?? "",
+        input: item.arguments ?? "",
+        providerMetadata: openaiMetadata({ itemId: item.id }),
+      }),
+    },
+    NO_EVENTS,
+  ]
+}
+
+const onFunctionCallArgumentsDelta = Effect.fn("OpenAIResponses.onFunctionCallArgumentsDelta")(function* (
+  state: ParserState,
+  event: OpenAIResponsesEvent,
+) {
+  if (!event.item_id || !event.delta) return [state, NO_EVENTS] satisfies StepResult
+  const result = ToolStream.appendExisting(
+    ADAPTER,
+    state.tools,
+    event.item_id,
+    event.delta,
+    "OpenAI Responses tool argument delta is missing its tool call",
+  )
+  if (ToolStream.isError(result)) return yield* result
+  return [
+    { hasFunctionCall: state.hasFunctionCall, tools: result.tools },
+    result.event ? [result.event] : NO_EVENTS,
+  ] satisfies StepResult
+})
+
+const onOutputItemDone = Effect.fn("OpenAIResponses.onOutputItemDone")(function* (
+  state: ParserState,
+  event: OpenAIResponsesEvent,
+) {
+  const item = event.item
+  if (!item) return [state, NO_EVENTS] satisfies StepResult
+
+  if (item.type === "function_call") {
+    if (!item.id || !item.call_id || !item.name) return [state, NO_EVENTS] satisfies StepResult
+    const tools = state.tools[item.id]
+      ? state.tools
+      : ToolStream.start(state.tools, item.id, { id: item.call_id, name: item.name })
+    const result =
+      item.arguments === undefined
+        ? yield* ToolStream.finish(ADAPTER, tools, item.id)
+        : yield* ToolStream.finishWithInput(ADAPTER, tools, item.id, item.arguments)
+    return [
+      { hasFunctionCall: result.event ? true : state.hasFunctionCall, tools: result.tools },
+      result.event ? [result.event] : NO_EVENTS,
+    ] satisfies StepResult
+  }
+
+  if (isHostedToolItem(item)) return [state, hostedToolEvents(item)] satisfies StepResult
+
+  return [state, NO_EVENTS] satisfies StepResult
+})
+
+const onResponseFinish = (state: ParserState, event: OpenAIResponsesEvent): StepResult => [
+  state,
+  [
+    {
+      type: "request-finish",
+      reason: mapFinishReason(event, state.hasFunctionCall),
+      usage: mapUsage(event.response?.usage),
+      ...(event.response?.id || event.response?.service_tier
+        ? {
+            providerMetadata: openaiMetadata({
+              responseId: event.response.id,
+              serviceTier: event.response.service_tier,
+            }),
+          }
+        : {}),
+    },
+  ],
+]
+
+const onResponseFailed = (state: ParserState, event: OpenAIResponsesEvent): StepResult => [
+  state,
+  [{ type: "provider-error", message: event.message ?? event.code ?? "OpenAI Responses response failed" }],
+]
+
+const onError = (state: ParserState, event: OpenAIResponsesEvent): StepResult => [
+  state,
+  [{ type: "provider-error", message: event.message ?? event.code ?? "OpenAI Responses stream error" }],
+]
+
+const step = (state: ParserState, event: OpenAIResponsesEvent) => {
+  if (event.type === "response.output_text.delta") return Effect.succeed(onOutputTextDelta(state, event))
+  if (event.type === "response.output_item.added") return Effect.succeed(onOutputItemAdded(state, event))
+  if (event.type === "response.function_call_arguments.delta") return onFunctionCallArgumentsDelta(state, event)
+  if (event.type === "response.output_item.done") return onOutputItemDone(state, event)
+  if (event.type === "response.completed" || event.type === "response.incomplete")
+    return Effect.succeed(onResponseFinish(state, event))
+  if (event.type === "response.failed") return Effect.succeed(onResponseFailed(state, event))
+  if (event.type === "error") return Effect.succeed(onError(state, event))
+  return Effect.succeed<StepResult>([state, NO_EVENTS])
+}
+
+// =============================================================================
+// Protocol And OpenAI Route
+// =============================================================================
+/**
+ * The OpenAI Responses protocol — request body construction, body schema, and
+ * the streaming-event state machine. Used by native OpenAI and (once
+ * registered) Azure OpenAI Responses.
+ */
+export const protocol = Protocol.make({
+  id: ADAPTER,
+  body: {
+    schema: OpenAIResponsesBody,
+    from: fromRequest,
+  },
+  stream: {
+    event: Protocol.jsonEvent(OpenAIResponsesEvent),
+    initial: () => ({ hasFunctionCall: false, tools: ToolStream.empty<string>() }),
+    step,
+    terminal: (event) => TERMINAL_TYPES.has(event.type),
+  },
+})
+
+const encodeBody = Schema.encodeSync(Schema.fromJsonString(OpenAIResponsesBody))
+const transportBase = {
+  endpoint: Endpoint.path<OpenAIResponsesBody>(PATH),
+  auth: Auth.bearer(),
+  encodeBody,
+}
+const routeDefaults = {
+  baseURL: DEFAULT_BASE_URL,
+}
+
+export const httpTransport = HttpTransport.httpJson({
+  ...transportBase,
+  framing: Framing.sse,
+})
+
+export const route = Route.make({
+  id: ADAPTER,
+  provider: "openai",
+  protocol,
+  transport: httpTransport,
+  defaults: routeDefaults,
+})
+
+const decodeWebSocketMessage = ProviderShared.validateWith(Schema.decodeUnknownEffect(OpenAIResponsesWebSocketMessage))
+
+const webSocketMessage = (body: OpenAIResponsesBody | Record<string, unknown>) =>
+  Effect.gen(function* () {
+    if (!ProviderShared.isRecord(body))
+      return yield* ProviderShared.invalidRequest("OpenAI Responses WebSocket body must be a JSON object")
+    const { stream: _stream, ...message } = body
+    return yield* decodeWebSocketMessage({ ...message, type: "response.create" })
+  })
+
+export const webSocketTransport = WebSocketTransport.json({
+  ...transportBase,
+  toMessage: webSocketMessage,
+  encodeMessage: encodeWebSocketMessage,
+})
+
+export const webSocketRoute = Route.make({
+  id: `${ADAPTER}-websocket`,
+  provider: "openai",
+  protocol,
+  transport: webSocketTransport,
+  defaults: routeDefaults,
+})
+
+// =============================================================================
+// Model Helper
+// =============================================================================
+export const model = route.model
+
+export const webSocketModel = webSocketRoute.model
+
+export * as OpenAIResponses from "./openai-responses"
--- a/packages/llm/src/protocols/shared.ts
+++ b/packages/llm/src/protocols/shared.ts
@@ -0,0 +1,203 @@
+import { Buffer } from "node:buffer"
+import { Effect, Schema, Stream } from "effect"
+import * as Sse from "effect/unstable/encoding/Sse"
+import { Headers, HttpClientRequest } from "effect/unstable/http"
+import {
+  InvalidProviderOutputReason,
+  InvalidRequestReason,
+  LLMError,
+  type ContentPart,
+  type LLMRequest,
+  type MediaPart,
+  type ToolResultPart,
+} from "../schema"
+
+export const Json = Schema.fromJsonString(Schema.Unknown)
+export const decodeJson = Schema.decodeUnknownSync(Json)
+export const encodeJson = Schema.encodeSync(Json)
+export const JsonObject = Schema.Record(Schema.String, Schema.Unknown)
+export const optionalArray = <const S extends Schema.Top>(schema: S) => Schema.optional(Schema.Array(schema))
+export const optionalNull = <const S extends Schema.Top>(schema: S) => Schema.optional(Schema.NullOr(schema))
+
+/**
+ * Plain-record narrowing. Excludes arrays so routes checking nested JSON
+ * Schema fragments don't accidentally treat a tuple as a key/value bag.
+ */
+export const isRecord = (value: unknown): value is Record<string, unknown> =>
+  typeof value === "object" && value !== null && !Array.isArray(value)
+
+/**
+ * Streaming tool-call accumulator. Adapters that build a tool call across
+ * multiple `tool-input-delta` chunks store the partial JSON input string here
+ * and finalize it with `parseToolInput` once the call completes.
+ */
+export interface ToolAccumulator {
+  readonly id: string
+  readonly name: string
+  readonly input: string
+}
+
+/**
+ * `Usage.totalTokens` policy shared by every route. Honors a provider-
+ * supplied total; otherwise falls back to `inputTokens + outputTokens` only
+ * when at least one is defined. Returns `undefined` when neither input nor
+ * output is known so routes don't publish a misleading `0`.
+ */
+export const totalTokens = (
+  inputTokens: number | undefined,
+  outputTokens: number | undefined,
+  total: number | undefined,
+) => {
+  if (total !== undefined) return total
+  if (inputTokens === undefined && outputTokens === undefined) return undefined
+  return (inputTokens ?? 0) + (outputTokens ?? 0)
+}
+
+export const eventError = (route: string, message: string, raw?: string) =>
+  new LLMError({
+    module: "ProviderShared",
+    method: "stream",
+    reason: new InvalidProviderOutputReason({ route, message, raw }),
+  })
+
+export const parseJson = (route: string, input: string, message: string) =>
+  Effect.try({
+    try: () => decodeJson(input),
+    catch: () => eventError(route, message, input),
+  })
+
+/**
+ * Join the `text` field of a list of parts with newlines. Used by routes
+ * that flatten system / message content arrays into a single provider string
+ * (OpenAI Chat `system` content, OpenAI Responses `system` content, Gemini
+ * `systemInstruction.parts[].text`).
+ */
+export const joinText = (parts: ReadonlyArray<{ readonly text: string }>) => parts.map((part) => part.text).join("\n")
+
+/**
+ * Parse the streamed JSON input of a tool call. Treats an empty string as
+ * `"{}"` — providers occasionally finish a tool call without ever emitting
+ * input deltas (e.g. zero-arg tools). The error message is uniform across
+ * routes: `Invalid JSON input for <route> tool call <name>`.
+ */
+export const parseToolInput = (route: string, name: string, raw: string) =>
+  parseJson(route, raw || "{}", `Invalid JSON input for ${route} tool call ${name}`)
+
+/**
+ * Encode a `MediaPart`'s raw bytes for inclusion in a JSON request body.
+ * `data: string` is assumed to already be base64 (matches caller convention
+ * across Gemini / Bedrock); `data: Uint8Array` is base64-encoded here. Used
+ * by every route that supports image / document inputs.
+ */
+export const mediaBytes = (part: MediaPart) =>
+  typeof part.data === "string" ? part.data : Buffer.from(part.data).toString("base64")
+
+export const trimBaseUrl = (value: string) => value.replace(/\/+$/, "")
+
+export const toolResultText = (part: ToolResultPart) => {
+  if (part.result.type === "text" || part.result.type === "error") return String(part.result.value)
+  return encodeJson(part.result.value)
+}
+
+export const errorText = (error: unknown) => {
+  if (error instanceof Error) return error.message
+  if (typeof error === "string") return error
+  if (typeof error === "number" || typeof error === "boolean" || typeof error === "bigint") return String(error)
+  if (error === null) return "null"
+  if (error === undefined) return "undefined"
+  return "Unknown stream error"
+}
+
+/**
+ * `framing` step for Server-Sent Events. Decodes UTF-8, runs the SSE channel
+ * decoder, and drops empty / `[DONE]` keep-alive events so the downstream
+ * `decodeChunk` sees one JSON string per element. The SSE channel emits a
+ * `Retry` control event on its error channel; we drop it here (we don't
+ * implement client-driven retries) so the public error channel stays
+ * `LLMError`.
+ */
+export const sseFraming = (bytes: Stream.Stream<Uint8Array, LLMError>): Stream.Stream<string, LLMError> =>
+  bytes.pipe(
+    Stream.decodeText(),
+    Stream.pipeThroughChannel(Sse.decode()),
+    Stream.catchTag("Retry", () => Stream.empty),
+    Stream.filter((event) => event.data.length > 0 && event.data !== "[DONE]"),
+    Stream.map((event) => event.data),
+  )
+
+/**
+ * Canonical invalid-request constructor. Lift one-line `const invalid =
+ * (message) => invalidRequest(message)` aliases out of every
+ * route so the error constructor lives in one place. If we ever extend
+ * `InvalidRequestReason` with route context or trace metadata, the change
+ * lands here.
+ */
+export const invalidRequest = (message: string) =>
+  new LLMError({
+    module: "ProviderShared",
+    method: "request",
+    reason: new InvalidRequestReason({ message }),
+  })
+
+export const matchToolChoice = <Auto, None, Required, Tool>(
+  route: string,
+  toolChoice: NonNullable<LLMRequest["toolChoice"]>,
+  cases: {
+    readonly auto: () => Auto
+    readonly none: () => None
+    readonly required: () => Required
+    readonly tool: (name: string) => Tool
+  },
+) =>
+  Effect.gen(function* () {
+    if (toolChoice.type === "auto") return cases.auto()
+    if (toolChoice.type === "none") return cases.none()
+    if (toolChoice.type === "required") return cases.required()
+    if (!toolChoice.name) return yield* invalidRequest(`${route} tool choice requires a tool name`)
+    return cases.tool(toolChoice.name)
+  })
+
+type ContentType = ContentPart["type"]
+
+const formatContentTypes = (types: ReadonlyArray<ContentType>) => {
+  if (types.length <= 1) return types[0] ?? ""
+  if (types.length === 2) return `${types[0]} and ${types[1]}`
+  return `${types.slice(0, -1).join(", ")}, and ${types.at(-1)}`
+}
+
+export const supportsContent = <const Type extends ContentType>(
+  part: ContentPart,
+  types: ReadonlyArray<Type>,
+): part is Extract<ContentPart, { readonly type: Type }> => (types as ReadonlyArray<ContentType>).includes(part.type)
+
+export const unsupportedContent = (
+  route: string,
+  role: LLMRequest["messages"][number]["role"],
+  types: ReadonlyArray<ContentType>,
+) => invalidRequest(`${route} ${role} messages only support ${formatContentTypes(types)} content for now`)
+
+/**
+ * Build a `validate` step from a Schema decoder. Replaces the per-route
+ * lambda body `(payload) => decode(payload).pipe(Effect.mapError((e) =>
+ * invalid(e.message)))`. Any decode error is translated into
+ * `LLMError` carrying the original parse-error message.
+ */
+export const validateWith =
+  <A, I, E extends { readonly message: string }>(decode: (input: I) => Effect.Effect<A, E>) =>
+  (payload: I) =>
+    decode(payload).pipe(Effect.mapError((error) => invalidRequest(error.message)))
+
+/**
+ * Build an HTTP POST with a JSON body. Sets `content-type: application/json`
+ * automatically after caller-supplied headers so routes cannot accidentally
+ * send JSON with a stale content type. The body is passed pre-encoded so
+ * routes can choose between
+ * `Schema.encodeSync(payload)` and `ProviderShared.encodeJson(payload)`.
+ */
+export const jsonPost = (input: { readonly url: string; readonly body: string; readonly headers?: Headers.Input }) =>
+  HttpClientRequest.post(input.url).pipe(
+    HttpClientRequest.setHeaders(Headers.set(Headers.fromInput(input.headers), "content-type", "application/json")),
+    HttpClientRequest.bodyText(input.body, "application/json"),
+  )
+
+export * as ProviderShared from "./shared"
--- a/packages/llm/src/protocols/utils/bedrock-auth.ts
+++ b/packages/llm/src/protocols/utils/bedrock-auth.ts
@@ -0,0 +1,103 @@
+import { AwsV4Signer } from "aws4fetch"
+import { Effect, Option, Schema } from "effect"
+import { Headers } from "effect/unstable/http"
+import { Auth, type AuthInput } from "../../route/auth"
+import type { LLMRequest } from "../../schema"
+import { ProviderShared } from "../shared"
+
+/**
+ * AWS credentials for SigV4 signing. Bedrock also supports Bearer API key auth
+ * via `model.apiKey`, which bypasses SigV4 signing. STS-vended credentials
+ * should be refreshed by the consumer (rebuild the model) before they expire;
+ * the route does not refresh.
+ */
+export interface Credentials {
+  readonly region: string
+  readonly accessKeyId: string
+  readonly secretAccessKey: string
+  readonly sessionToken?: string
+}
+
+const NativeCredentials = Schema.Struct({
+  accessKeyId: Schema.String,
+  secretAccessKey: Schema.String,
+  region: Schema.optional(Schema.String),
+  sessionToken: Schema.optional(Schema.String),
+})
+
+const decodeNativeCredentials = Schema.decodeUnknownOption(NativeCredentials)
+
+export const region = (request: LLMRequest) => {
+  const fromNative = request.model.native?.aws_region
+  if (typeof fromNative === "string" && fromNative !== "") return fromNative
+  return (
+    decodeNativeCredentials(request.model.native?.aws_credentials).pipe(
+      Option.map((credentials) => credentials.region),
+      Option.getOrUndefined,
+    ) ?? "us-east-1"
+  )
+}
+
+const credentialsFromInput = (request: LLMRequest): Credentials | undefined =>
+  decodeNativeCredentials(request.model.native?.aws_credentials).pipe(
+    Option.map((creds) => ({ ...creds, region: creds.region ?? region(request) })),
+    Option.getOrUndefined,
+  )
+
+const signRequest = (input: {
+  readonly url: string
+  readonly body: string
+  readonly headers: Headers.Headers
+  readonly credentials: Credentials
+}) =>
+  Effect.tryPromise({
+    try: async () => {
+      const signed = await new AwsV4Signer({
+        url: input.url,
+        method: "POST",
+        headers: Object.entries(input.headers),
+        body: input.body,
+        region: input.credentials.region,
+        accessKeyId: input.credentials.accessKeyId,
+        secretAccessKey: input.credentials.secretAccessKey,
+        sessionToken: input.credentials.sessionToken,
+        service: "bedrock",
+      }).sign()
+      return Object.fromEntries(signed.headers.entries())
+    },
+    catch: (error) =>
+      ProviderShared.invalidRequest(
+        `Bedrock Converse SigV4 signing failed: ${error instanceof Error ? error.message : String(error)}`,
+      ),
+  })
+
+/**
+ * Bedrock auth. `model.apiKey` (Bedrock's newer Bearer API key auth) wins if
+ * set; otherwise sign the exact JSON bytes with SigV4 using credentials from
+ * `model.native.aws_credentials`.
+ */
+export const auth = Auth.custom((input: AuthInput) => {
+  if (input.request.model.apiKey) return Auth.toEffect(Auth.bearer())(input)
+  return Effect.gen(function* () {
+    const credentials = credentialsFromInput(input.request)
+    if (!credentials) {
+      return yield* ProviderShared.invalidRequest(
+        "Bedrock Converse requires either model.apiKey or AWS credentials in model.native.aws_credentials",
+      )
+    }
+    const headersForSigning = Headers.set(input.headers, "content-type", "application/json")
+    const signed = yield* signRequest({ url: input.url, body: input.body, headers: headersForSigning, credentials })
+    return Headers.setAll(headersForSigning, signed)
+  })
+})
+
+export const nativeCredentials = (native: Record<string, unknown> | undefined, credentials: Credentials | undefined) =>
+  credentials
+    ? {
+        ...native,
+        aws_credentials: credentials,
+        aws_region: credentials.region,
+      }
+    : native
+
+export * as BedrockAuth from "./bedrock-auth"
--- a/packages/llm/src/protocols/utils/bedrock-cache.ts
+++ b/packages/llm/src/protocols/utils/bedrock-cache.ts
@@ -0,0 +1,20 @@
+import { Schema } from "effect"
+import type { CacheHint } from "../../schema"
+
+// Bedrock cache markers are positional: emit a `cachePoint` block immediately
+// after the content the caller wants treated as a cacheable prefix.
+export const CachePointBlock = Schema.Struct({
+  cachePoint: Schema.Struct({ type: Schema.tag("default") }),
+})
+export type CachePointBlock = Schema.Schema.Type<typeof CachePointBlock>
+
+// Bedrock recently added optional `ttl: "5m" | "1h"` on cachePoint. Map
+// `CacheHint.ttlSeconds` here once a recorded cassette validates the wire shape.
+const DEFAULT: CachePointBlock = { cachePoint: { type: "default" } }
+
+export const block = (cache: CacheHint | undefined): CachePointBlock | undefined => {
+  if (cache?.type !== "ephemeral" && cache?.type !== "persistent") return undefined
+  return DEFAULT
+}
+
+export * as BedrockCache from "./bedrock-cache"
--- a/packages/llm/src/protocols/utils/bedrock-media.ts
+++ b/packages/llm/src/protocols/utils/bedrock-media.ts
@@ -0,0 +1,80 @@
+import { Effect, Schema } from "effect"
+import type { MediaPart } from "../../schema"
+import { ProviderShared } from "../shared"
+
+// Bedrock Converse accepts image `format` as the file extension and
+// `source.bytes` as base64 in the JSON wire format.
+export const ImageFormat = Schema.Literals(["png", "jpeg", "gif", "webp"])
+export type ImageFormat = Schema.Schema.Type<typeof ImageFormat>
+
+export const ImageBlock = Schema.Struct({
+  image: Schema.Struct({
+    format: ImageFormat,
+    source: Schema.Struct({ bytes: Schema.String }),
+  }),
+})
+export type ImageBlock = Schema.Schema.Type<typeof ImageBlock>
+
+// Bedrock document blocks require a user-facing name so the model can refer to
+// the uploaded document.
+export const DocumentFormat = Schema.Literals(["pdf", "csv", "doc", "docx", "xls", "xlsx", "html", "txt", "md"])
+export type DocumentFormat = Schema.Schema.Type<typeof DocumentFormat>
+
+export const DocumentBlock = Schema.Struct({
+  document: Schema.Struct({
+    format: DocumentFormat,
+    name: Schema.String,
+    source: Schema.Struct({ bytes: Schema.String }),
+  }),
+})
+export type DocumentBlock = Schema.Schema.Type<typeof DocumentBlock>
+
+const IMAGE_FORMATS = {
+  "image/png": "png",
+  "image/jpeg": "jpeg",
+  "image/jpg": "jpeg",
+  "image/gif": "gif",
+  "image/webp": "webp",
+} as const satisfies Record<string, ImageFormat>
+
+const DOCUMENT_FORMATS = {
+  "application/pdf": "pdf",
+  "text/csv": "csv",
+  "application/msword": "doc",
+  "application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
+  "application/vnd.ms-excel": "xls",
+  "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
+  "text/html": "html",
+  "text/plain": "txt",
+  "text/markdown": "md",
+} as const satisfies Record<string, DocumentFormat>
+
+const imageBlock = (part: MediaPart, format: ImageFormat): ImageBlock => ({
+  image: { format, source: { bytes: ProviderShared.mediaBytes(part) } },
+})
+
+const documentBlock = (part: MediaPart, format: DocumentFormat): DocumentBlock => ({
+  document: {
+    format,
+    name: part.filename ?? `document.${format}`,
+    source: { bytes: ProviderShared.mediaBytes(part) },
+  },
+})
+
+// Route by MIME. Known image/document formats lower into a typed block; anything
+// else fails with a clear error instead of silently degrading to a malformed
+// document block. Image MIME types not in `IMAGE_FORMATS` (e.g. `image/svg+xml`)
+// get an image-specific error so the caller knows it's a format-support issue,
+// not a kind-detection issue.
+export const lower = (part: MediaPart) => {
+  const mime = part.mediaType.toLowerCase()
+  const imageFormat = IMAGE_FORMATS[mime as keyof typeof IMAGE_FORMATS]
+  if (imageFormat) return Effect.succeed(imageBlock(part, imageFormat))
+  if (mime.startsWith("image/"))
+    return ProviderShared.invalidRequest(`Bedrock Converse does not support image media type ${part.mediaType}`)
+  const documentFormat = DOCUMENT_FORMATS[mime as keyof typeof DOCUMENT_FORMATS]
+  if (documentFormat) return Effect.succeed(documentBlock(part, documentFormat))
+  return ProviderShared.invalidRequest(`Bedrock Converse does not support media type ${part.mediaType}`)
+}
+
+export * as BedrockMedia from "./bedrock-media"
--- a/packages/llm/src/protocols/utils/gemini-tool-schema.ts
+++ b/packages/llm/src/protocols/utils/gemini-tool-schema.ts
@@ -0,0 +1,101 @@
+import { ProviderShared } from "../shared"
+
+// Gemini accepts a JSON Schema-like dialect for tool parameters, but rejects a
+// handful of common JSON Schema shapes. Keep this projection isolated so the
+// Gemini protocol file still reads like the other protocol modules.
+const SCHEMA_INTENT_KEYS = [
+  "type",
+  "properties",
+  "items",
+  "prefixItems",
+  "enum",
+  "const",
+  "$ref",
+  "additionalProperties",
+  "patternProperties",
+  "required",
+  "not",
+  "if",
+  "then",
+  "else",
+]
+
+const isRecord = ProviderShared.isRecord
+
+const hasCombiner = (schema: unknown) =>
+  isRecord(schema) && (Array.isArray(schema.anyOf) || Array.isArray(schema.oneOf) || Array.isArray(schema.allOf))
+
+const hasSchemaIntent = (schema: unknown) =>
+  isRecord(schema) && (hasCombiner(schema) || SCHEMA_INTENT_KEYS.some((key) => key in schema))
+
+const sanitizeNode = (schema: unknown): unknown => {
+  if (!isRecord(schema)) return Array.isArray(schema) ? schema.map(sanitizeNode) : schema
+
+  const result: Record<string, unknown> = Object.fromEntries(
+    Object.entries(schema).map(([key, value]) => [
+      key,
+      key === "enum" && Array.isArray(value) ? value.map(String) : sanitizeNode(value),
+    ]),
+  )
+
+  if (Array.isArray(result.enum) && (result.type === "integer" || result.type === "number")) result.type = "string"
+
+  const properties = result.properties
+  if (result.type === "object" && isRecord(properties) && Array.isArray(result.required)) {
+    result.required = result.required.filter((field) => typeof field === "string" && field in properties)
+  }
+
+  if (result.type === "array" && !hasCombiner(result)) {
+    result.items = result.items ?? {}
+    if (isRecord(result.items) && !hasSchemaIntent(result.items)) result.items = { ...result.items, type: "string" }
+  }
+
+  if (typeof result.type === "string" && result.type !== "object" && !hasCombiner(result)) {
+    delete result.properties
+    delete result.required
+  }
+
+  return result
+}
+
+const emptyObjectSchema = (schema: Record<string, unknown>) =>
+  schema.type === "object" &&
+  (!isRecord(schema.properties) || Object.keys(schema.properties).length === 0) &&
+  !schema.additionalProperties
+
+const projectNode = (schema: unknown): Record<string, unknown> | undefined => {
+  if (!isRecord(schema)) return undefined
+  if (emptyObjectSchema(schema)) return undefined
+  return Object.fromEntries(
+    [
+      ["description", schema.description],
+      ["required", schema.required],
+      ["format", schema.format],
+      ["type", Array.isArray(schema.type) ? schema.type.filter((type) => type !== "null")[0] : schema.type],
+      ["nullable", Array.isArray(schema.type) && schema.type.includes("null") ? true : undefined],
+      ["enum", schema.const !== undefined ? [schema.const] : schema.enum],
+      [
+        "properties",
+        isRecord(schema.properties)
+          ? Object.fromEntries(Object.entries(schema.properties).map(([key, value]) => [key, projectNode(value)]))
+          : undefined,
+      ],
+      [
+        "items",
+        Array.isArray(schema.items)
+          ? schema.items.map(projectNode)
+          : schema.items === undefined
+            ? undefined
+            : projectNode(schema.items),
+      ],
+      ["allOf", Array.isArray(schema.allOf) ? schema.allOf.map(projectNode) : undefined],
+      ["anyOf", Array.isArray(schema.anyOf) ? schema.anyOf.map(projectNode) : undefined],
+      ["oneOf", Array.isArray(schema.oneOf) ? schema.oneOf.map(projectNode) : undefined],
+      ["minLength", schema.minLength],
+    ].filter((entry) => entry[1] !== undefined),
+  )
+}
+
+export const convert = (schema: unknown) => projectNode(sanitizeNode(schema))
+
+export * as GeminiToolSchema from "./gemini-tool-schema"
--- a/packages/llm/src/protocols/utils/openai-options.ts
+++ b/packages/llm/src/protocols/utils/openai-options.ts
@@ -0,0 +1,55 @@
+import { Schema } from "effect"
+import type { LLMRequest, ReasoningEffort, TextVerbosity as TextVerbosityValue } from "../../schema"
+import { ReasoningEfforts, TextVerbosity } from "../../schema"
+
+export const OpenAIReasoningEfforts = ReasoningEfforts.filter(
+  (effort): effort is Exclude<ReasoningEffort, "max"> => effort !== "max",
+)
+export type OpenAIReasoningEffort = (typeof OpenAIReasoningEfforts)[number]
+
+const REASONING_EFFORTS = new Set<string>(ReasoningEfforts)
+const OPENAI_REASONING_EFFORTS = new Set<string>(OpenAIReasoningEfforts)
+const TEXT_VERBOSITY = new Set<string>(["low", "medium", "high"])
+
+export const OpenAIReasoningEffort = Schema.Literals(OpenAIReasoningEfforts)
+export const OpenAITextVerbosity = TextVerbosity
+
+const isAnyReasoningEffort = (effort: unknown): effort is ReasoningEffort =>
+  typeof effort === "string" && REASONING_EFFORTS.has(effort)
+
+export const isReasoningEffort = (effort: unknown): effort is OpenAIReasoningEffort =>
+  typeof effort === "string" && OPENAI_REASONING_EFFORTS.has(effort)
+
+const isTextVerbosity = (value: unknown): value is TextVerbosityValue =>
+  typeof value === "string" && TEXT_VERBOSITY.has(value)
+
+const options = (request: LLMRequest) => request.providerOptions?.openai
+
+export const store = (request: LLMRequest): boolean | undefined => {
+  const value = options(request)?.store
+  return typeof value === "boolean" ? value : undefined
+}
+
+export const reasoningEffort = (request: LLMRequest): ReasoningEffort | undefined => {
+  const value = options(request)?.reasoningEffort
+  return isAnyReasoningEffort(value) ? value : undefined
+}
+
+export const reasoningSummary = (request: LLMRequest): "auto" | undefined => {
+  return options(request)?.reasoningSummary === "auto" ? "auto" : undefined
+}
+
+export const encryptedReasoning = (request: LLMRequest) =>
+  options(request)?.includeEncryptedReasoning === true ? true : undefined
+
+export const promptCacheKey = (request: LLMRequest) => {
+  const value = options(request)?.promptCacheKey
+  return typeof value === "string" ? value : undefined
+}
+
+export const textVerbosity = (request: LLMRequest) => {
+  const value = options(request)?.textVerbosity
+  return isTextVerbosity(value) ? value : undefined
+}
+
+export * as OpenAIOptions from "./openai-options"
--- a/packages/llm/src/protocols/utils/tool-stream.ts
+++ b/packages/llm/src/protocols/utils/tool-stream.ts
@@ -0,0 +1,196 @@
+import { Effect } from "effect"
+import { LLMError, type ProviderMetadata, type ToolCall, type ToolInputDelta } from "../../schema"
+import { eventError, parseToolInput, type ToolAccumulator } from "../shared"
+
+type StreamKey = string | number
+
+/**
+ * One pending streamed tool call. Providers emit the tool identity and JSON
+ * argument text across separate chunks; `input` is the raw JSON string collected
+ * so far, not the parsed object.
+ */
+export interface PendingTool extends ToolAccumulator {
+  readonly providerExecuted?: boolean
+  readonly providerMetadata?: ProviderMetadata
+}
+
+/**
+ * Sparse parser state keyed by the provider's stream-local tool identifier.
+ *
+ * This key is not the final tool-call id (`call_...`). It is the id/index the
+ * provider uses while streaming a partial call: OpenAI Chat / Anthropic /
+ * Bedrock use numeric content indexes, while OpenAI Responses uses string
+ * `item_id`s. The generic keeps each protocol internally consistent.
+ */
+export type State<K extends StreamKey> = Partial<Record<K, PendingTool>>
+
+/**
+ * Result of adding argument text to one pending tool call. It returns both the
+ * next `tools` state and the updated `tool` because parsers often need the
+ * current id/name immediately. `event` is present only when new text arrived;
+ * metadata-only deltas update identity without emitting `tool-input-delta`.
+ */
+export interface AppendOutcome<K extends StreamKey> {
+  readonly tools: State<K>
+  readonly tool: PendingTool
+  readonly event?: ToolInputDelta
+}
+
+/** Create empty accumulator state for one provider stream. */
+export const empty = <K extends StreamKey>(): State<K> => ({})
+
+const withTool = <K extends StreamKey>(tools: State<K>, key: K, tool: PendingTool): State<K> => {
+  return { ...tools, [key]: tool }
+}
+
+const withoutTool = <K extends StreamKey>(tools: State<K>, key: K): State<K> => {
+  const next = { ...tools }
+  delete next[key]
+  return next
+}
+
+const inputDelta = (tool: PendingTool, text: string): ToolInputDelta => ({
+  type: "tool-input-delta",
+  id: tool.id,
+  name: tool.name,
+  text,
+  ...(tool.providerMetadata ? { providerMetadata: tool.providerMetadata } : {}),
+})
+
+const toolCall = (route: string, tool: PendingTool, inputOverride?: string) =>
+  parseToolInput(route, tool.name, inputOverride ?? tool.input).pipe(
+    Effect.map(
+      (input): ToolCall =>
+        tool.providerExecuted
+          ? {
+              type: "tool-call",
+              id: tool.id,
+              name: tool.name,
+              input,
+              providerExecuted: true,
+              ...(tool.providerMetadata ? { providerMetadata: tool.providerMetadata } : {}),
+            }
+          : {
+              type: "tool-call",
+              id: tool.id,
+              name: tool.name,
+              input,
+              ...(tool.providerMetadata ? { providerMetadata: tool.providerMetadata } : {}),
+            },
+    ),
+  )
+
+/** Store the updated tool and produce the optional public delta event. */
+const appendTool = <K extends StreamKey>(
+  tools: State<K>,
+  key: K,
+  tool: PendingTool,
+  text: string,
+): AppendOutcome<K> => ({
+  tools: withTool(tools, key, tool),
+  tool,
+  event: text.length === 0 ? undefined : inputDelta(tool, text),
+})
+
+export const isError = <K extends StreamKey>(result: AppendOutcome<K> | LLMError): result is LLMError =>
+  result instanceof LLMError
+
+/**
+ * Register a tool call whose start event arrived before any argument deltas.
+ * Used by Anthropic `content_block_start`, Bedrock `contentBlockStart`, and
+ * OpenAI Responses `response.output_item.added`.
+ */
+export const start = <K extends StreamKey>(
+  tools: State<K>,
+  key: K,
+  tool: Omit<PendingTool, "input"> & { readonly input?: string },
+) => withTool(tools, key, { ...tool, input: tool.input ?? "" })
+
+/**
+ * Append a streamed argument delta, starting the tool if this provider encodes
+ * identity on the first delta instead of a separate start event. OpenAI Chat has
+ * this shape: `tool_calls[].index` is the stream key, and `id` / `name` may only
+ * appear on the first delta for that index.
+ */
+export const appendOrStart = <K extends StreamKey>(
+  route: string,
+  tools: State<K>,
+  key: K,
+  delta: { readonly id?: string; readonly name?: string; readonly text: string },
+  missingToolMessage: string,
+): AppendOutcome<K> | LLMError => {
+  const current = tools[key]
+  const id = delta.id ?? current?.id
+  const name = delta.name ?? current?.name
+  if (!id || !name) return eventError(route, missingToolMessage)
+
+  const tool = {
+    id,
+    name,
+    input: `${current?.input ?? ""}${delta.text}`,
+    providerExecuted: current?.providerExecuted,
+    providerMetadata: current?.providerMetadata,
+  }
+  if (current && delta.text.length === 0 && current.id === id && current.name === name) return { tools, tool: current }
+  return appendTool(tools, key, tool, delta.text)
+}
+
+/**
+ * Append argument text to a tool that must already have been started. This keeps
+ * protocols honest when their stream grammar promises a start event before any
+ * argument delta.
+ */
+export const appendExisting = <K extends StreamKey>(
+  route: string,
+  tools: State<K>,
+  key: K,
+  text: string,
+  missingToolMessage: string,
+): AppendOutcome<K> | LLMError => {
+  const current = tools[key]
+  if (!current) return eventError(route, missingToolMessage)
+  if (text.length === 0) return { tools, tool: current }
+  return appendTool(tools, key, { ...current, input: `${current.input}${text}` }, text)
+}
+
+/**
+ * Finalize one pending tool call: parse the accumulated raw JSON, remove it
+ * from state, and return the optional public `tool-call` event. Missing keys are
+ * a no-op because some providers emit stop events for non-tool content blocks.
+ */
+export const finish = <K extends StreamKey>(route: string, tools: State<K>, key: K) =>
+  Effect.gen(function* () {
+    const tool = tools[key]
+    if (!tool) return { tools }
+    return { tools: withoutTool(tools, key), event: yield* toolCall(route, tool) }
+  })
+
+/**
+ * Finalize one pending tool call with an authoritative final input string.
+ * OpenAI Responses can send accumulated deltas and then repeat the completed
+ * arguments on `response.output_item.done`; the final value wins.
+ */
+export const finishWithInput = <K extends StreamKey>(route: string, tools: State<K>, key: K, input: string) =>
+  Effect.gen(function* () {
+    const tool = tools[key]
+    if (!tool) return { tools }
+    return { tools: withoutTool(tools, key), event: yield* toolCall(route, tool, input) }
+  })
+
+/**
+ * Finalize every pending tool call at once. OpenAI Chat has this shape: it does
+ * not emit per-tool stop events, so all accumulated calls finish when the choice
+ * receives a terminal `finish_reason`.
+ */
+export const finishAll = <K extends StreamKey>(route: string, tools: State<K>) =>
+  Effect.gen(function* () {
+    const pending = Object.values<PendingTool | undefined>(tools).filter(
+      (tool): tool is PendingTool => tool !== undefined,
+    )
+    return {
+      tools: empty<K>(),
+      events: yield* Effect.forEach(pending, (tool) => toolCall(route, tool)),
+    }
+  })
+
+export * as ToolStream from "./tool-stream"
--- a/packages/llm/src/provider.ts
+++ b/packages/llm/src/provider.ts
@@ -0,0 +1,31 @@
+import type { RouteModelInput } from "./route/client"
+import type { ModelID, ModelRef, ProviderID } from "./schema"
+
+export type ModelOptions = Omit<RouteModelInput, "id">
+
+export type ModelFactory<Options extends ModelOptions = ModelOptions> = (
+  id: string | ModelID,
+  options?: Options,
+) => ModelRef
+
+type AnyModelFactory = (...args: never[]) => ModelRef
+
+export interface Definition<Factory extends AnyModelFactory = ModelFactory> {
+  readonly id: ProviderID
+  readonly model: Factory
+  readonly apis?: Record<string, AnyModelFactory>
+}
+
+type DefinitionShape = {
+  readonly id: ProviderID
+  readonly model: (...args: never[]) => ModelRef
+  readonly apis?: Record<string, (...args: never[]) => ModelRef>
+}
+
+type NoExtraFields<Input, Shape> = Input & Record<Exclude<keyof Input, keyof Shape>, never>
+
+export const make = <DefinitionType extends DefinitionShape>(
+  definition: NoExtraFields<DefinitionType, DefinitionShape>,
+) => definition
+
+export * as Provider from "./provider"
--- a/packages/llm/src/providers/amazon-bedrock.ts
+++ b/packages/llm/src/providers/amazon-bedrock.ts
@@ -0,0 +1,48 @@
+import { Route, type RouteModelInput } from "../route/client"
+import { Provider } from "../provider"
+import { ProviderID, type ModelID } from "../schema"
+import * as BedrockConverse from "../protocols/bedrock-converse"
+import type { BedrockCredentials } from "../protocols/bedrock-converse"
+
+export const id = ProviderID.make("amazon-bedrock")
+
+export type ModelOptions = Omit<RouteModelInput, "id" | "baseURL"> & {
+  readonly apiKey?: string
+  readonly headers?: Record<string, string>
+  readonly credentials?: BedrockCredentials
+  /** AWS region. Defaults to `us-east-1` when neither this nor `credentials.region` is set. */
+  readonly region?: string
+  /** Override the computed `https://bedrock-runtime.<region>.amazonaws.com` URL. */
+  readonly baseURL?: string
+}
+type ModelInput = ModelOptions & Pick<RouteModelInput, "id">
+
+export const routes = [BedrockConverse.route]
+
+const bedrockBaseURL = (region: string) => `https://bedrock-runtime.${region}.amazonaws.com`
+
+const converseModel = Route.model<ModelInput>(
+  BedrockConverse.route,
+  {
+    provider: "amazon-bedrock",
+  },
+  {
+    mapInput: (input) => {
+      const { credentials, region, baseURL, ...rest } = input
+      const resolvedRegion = region ?? credentials?.region ?? "us-east-1"
+      return {
+        ...rest,
+        baseURL: baseURL ?? bedrockBaseURL(resolvedRegion),
+        native: BedrockConverse.nativeCredentials(input.native, credentials),
+      }
+    },
+  },
+)
+
+export const model = (modelID: string | ModelID, options: ModelOptions = {}) =>
+  converseModel({ ...options, id: modelID })
+
+export const provider = Provider.make({
+  id,
+  model,
+})
--- a/packages/llm/src/providers/anthropic.ts
+++ b/packages/llm/src/providers/anthropic.ts
@@ -0,0 +1,16 @@
+import type { RouteModelInput } from "../route/client"
+import { Provider } from "../provider"
+import { ProviderID, type ModelID } from "../schema"
+import * as AnthropicMessages from "../protocols/anthropic-messages"
+
+export const id = ProviderID.make("anthropic")
+
+export const routes = [AnthropicMessages.route]
+
+export const model = (id: string | ModelID, options: Omit<RouteModelInput, "id" | "baseURL"> & { readonly baseURL?: string } = {}) =>
+  AnthropicMessages.model({ ...options, id })
+
+export const provider = Provider.make({
+  id,
+  model,
+})
--- a/packages/llm/src/providers/azure.ts
+++ b/packages/llm/src/providers/azure.ts
@@ -0,0 +1,83 @@
+import { Auth } from "../route/auth"
+import { type AtLeastOne, type ProviderAuthOption } from "../route/auth-options"
+import { Route } from "../route/client"
+import type { ModelInput } from "../llm"
+import { Provider } from "../provider"
+import { ProviderID, type ModelID } from "../schema"
+import * as OpenAIChat from "../protocols/openai-chat"
+import * as OpenAIResponses from "../protocols/openai-responses"
+import { withOpenAIOptions, type OpenAIProviderOptionsInput } from "./openai-options"
+
+export const id = ProviderID.make("azure")
+const routeAuth = Auth.remove("authorization").andThen(Auth.apiKeyHeader("api-key"))
+
+// Azure needs the customer's resource URL; supply either `resourceName`
+// (helper builds the URL) or `baseURL` directly.
+type AzureURL = AtLeastOne<{ readonly resourceName: string; readonly baseURL: string }>
+
+export type ModelOptions = AzureURL &
+  Omit<ModelInput, "id" | "provider" | "route" | "apiKey" | "auth" | "baseURL"> &
+  ProviderAuthOption<"optional"> & {
+    readonly apiVersion?: string
+    readonly useCompletionUrls?: boolean
+    readonly providerOptions?: OpenAIProviderOptionsInput
+  }
+type AzureModelInput = ModelOptions & Pick<ModelInput, "id">
+
+const resourceBaseURL = (resourceName: string) => `https://${resourceName.trim()}.openai.azure.com/openai/v1`
+
+const responsesRoute = OpenAIResponses.route.with({
+  id: "azure-openai-responses",
+  provider: id,
+  transport: OpenAIResponses.httpTransport.with({ auth: routeAuth }),
+})
+
+const chatRoute = OpenAIChat.route.with({
+  id: "azure-openai-chat",
+  provider: id,
+  transport: OpenAIChat.httpTransport.with({ auth: routeAuth }),
+})
+
+export const routes = [responsesRoute, chatRoute]
+
+const mapInput = (input: AzureModelInput) => {
+  const { apiKey: _, apiVersion, resourceName, useCompletionUrls, ...rest } = input
+  return {
+    ...withOpenAIOptions(input.id, rest),
+    auth:
+      "auth" in input && input.auth
+        ? input.auth
+        : Auth.remove("authorization").andThen(
+            Auth.optional("apiKey" in input ? input.apiKey : undefined, "apiKey")
+              .orElse(Auth.config("AZURE_OPENAI_API_KEY"))
+              .pipe(Auth.header("api-key")),
+          ),
+    // AtLeastOne guarantees at least one is set; baseURL wins if both are.
+    baseURL: rest.baseURL ?? resourceBaseURL(resourceName!),
+    queryParams: {
+      ...rest.queryParams,
+      "api-version": apiVersion ?? rest.queryParams?.["api-version"] ?? "v1",
+    },
+  }
+}
+
+const chatModel = Route.model<AzureModelInput>(chatRoute, {}, { mapInput })
+const responsesModel = Route.model<AzureModelInput>(responsesRoute, {}, { mapInput })
+
+export const responses = (modelID: string | ModelID, options: ModelOptions) =>
+  responsesModel({ ...options, id: modelID })
+
+export const chat = (modelID: string | ModelID, options: ModelOptions) => chatModel({ ...options, id: modelID })
+
+export const model = (modelID: string | ModelID, options: ModelOptions) => {
+  if (options.useCompletionUrls === true) return chat(modelID, options)
+  return responses(modelID, options)
+}
+
+export const provider = Provider.make({
+  id,
+  model,
+  apis: { responses, chat },
+})
+
+export const apis = provider.apis
--- a/packages/llm/src/providers/cloudflare.ts
+++ b/packages/llm/src/providers/cloudflare.ts
@@ -0,0 +1,139 @@
+import type { Config, Redacted } from "effect"
+import { type ModelInput } from "../llm"
+import { Provider } from "../provider"
+import * as OpenAICompatibleChat from "../protocols/openai-compatible-chat"
+import { Auth } from "../route/auth"
+import { AuthOptions, type AtLeastOne, type ProviderAuthOption } from "../route/auth-options"
+import { Route } from "../route/client"
+import { ProviderID, type ModelID } from "../schema"
+
+export const aiGatewayID = ProviderID.make("cloudflare-ai-gateway")
+export const workersAIID = ProviderID.make("cloudflare-workers-ai")
+export const id = aiGatewayID
+export const aiGatewayAuthEnvVars = ["CLOUDFLARE_API_TOKEN", "CF_AIG_TOKEN"] as const
+export const workersAIAuthEnvVars = ["CLOUDFLARE_API_KEY", "CLOUDFLARE_WORKERS_AI_TOKEN"] as const
+
+type CloudflareSecret = string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>>
+
+type GatewayURL = AtLeastOne<{
+  readonly accountId: string
+  readonly baseURL: string
+}> & {
+  readonly gatewayId?: string
+}
+
+export type AIGatewayOptions = GatewayURL &
+  Omit<ModelInput, "id" | "provider" | "route" | "baseURL" | "apiKey" | "auth"> &
+  ProviderAuthOption<"optional"> & {
+    /** Cloudflare AI Gateway authentication token. Sent as `cf-aig-authorization`. */
+    readonly gatewayApiKey?: CloudflareSecret
+  }
+
+type AIGatewayInput = AIGatewayOptions & Pick<ModelInput, "id">
+
+type WorkersAIURL = AtLeastOne<{
+  readonly accountId: string
+  readonly baseURL: string
+}>
+
+export type WorkersAIOptions = WorkersAIURL &
+  Omit<ModelInput, "id" | "provider" | "route" | "baseURL" | "apiKey" | "auth"> &
+  ProviderAuthOption<"optional">
+
+type WorkersAIInput = WorkersAIOptions & Pick<ModelInput, "id">
+
+export const aiGatewayBaseURL = (input: GatewayURL) => {
+  if (input.baseURL) return input.baseURL
+  if (!input.accountId) throw new Error("Cloudflare.aiGateway requires accountId unless baseURL is supplied")
+  return `https://gateway.ai.cloudflare.com/v1/${encodeURIComponent(input.accountId)}/${encodeURIComponent(input.gatewayId?.trim() || "default")}/compat`
+}
+
+const aiGatewayAuth = (input: AIGatewayInput) => {
+  if ("auth" in input && input.auth) return input.auth
+  const gateway = Auth.optional(input.gatewayApiKey, "gatewayApiKey")
+    .orElse(Auth.config("CLOUDFLARE_API_TOKEN"))
+    .orElse(Auth.config("CF_AIG_TOKEN"))
+    .pipe(Auth.bearerHeader("cf-aig-authorization"))
+  if (!("apiKey" in input) || input.apiKey === undefined) return gateway
+  if (input.gatewayApiKey === undefined) return Auth.bearer(input.apiKey)
+  return Auth.bearerHeader("cf-aig-authorization", input.gatewayApiKey).andThen(Auth.bearer(input.apiKey))
+}
+
+export const workersAIBaseURL = (input: WorkersAIURL) => {
+  if (input.baseURL) return input.baseURL
+  if (!input.accountId) throw new Error("Cloudflare.workersAI requires accountId unless baseURL is supplied")
+  return `https://api.cloudflare.com/client/v4/accounts/${encodeURIComponent(input.accountId)}/ai/v1`
+}
+
+const workersAIAuth = (input: WorkersAIInput) => {
+  return AuthOptions.bearer(input, workersAIAuthEnvVars)
+}
+
+export const aiGatewayRoute = OpenAICompatibleChat.route.with({
+  id: "cloudflare-ai-gateway",
+  provider: aiGatewayID,
+})
+
+export const workersAIRoute = OpenAICompatibleChat.route.with({
+  id: "cloudflare-workers-ai",
+  provider: workersAIID,
+})
+
+export const routes = [aiGatewayRoute, workersAIRoute]
+
+const aiGatewayModel = Route.model<AIGatewayInput>(
+  aiGatewayRoute,
+  {
+    provider: id,
+  },
+  {
+    mapInput: (input) => {
+      const {
+        accountId: _accountId,
+        gatewayId: _gatewayId,
+        apiKey: _apiKey,
+        gatewayApiKey: _gatewayApiKey,
+        auth: _auth,
+        ...rest
+      } = input
+      return {
+        ...rest,
+        auth: aiGatewayAuth(input),
+        baseURL: aiGatewayBaseURL(input),
+      }
+    },
+  },
+)
+
+const workersAIModel = Route.model<WorkersAIInput>(
+  workersAIRoute,
+  {
+    provider: workersAIID,
+  },
+  {
+    mapInput: (input) => {
+      const { accountId: _accountId, apiKey: _apiKey, auth: _auth, ...rest } = input
+      return {
+        ...rest,
+        auth: workersAIAuth(input),
+        baseURL: workersAIBaseURL(input),
+      }
+    },
+  },
+)
+
+export const aiGateway = (modelID: string | ModelID, options: AIGatewayOptions) =>
+  aiGatewayModel({ ...options, id: modelID })
+
+export const workersAI = (modelID: string | ModelID, options: WorkersAIOptions) =>
+  workersAIModel({ ...options, id: modelID })
+
+export const model = aiGateway
+
+export const provider = Provider.make({
+  id,
+  model,
+  apis: { aiGateway, workersAI },
+})
+
+export const apis = provider.apis
--- a/packages/llm/src/providers/github-copilot.ts
+++ b/packages/llm/src/providers/github-copilot.ts
@@ -0,0 +1,48 @@
+import { Route } from "../route/client"
+import type { ModelInput } from "../llm"
+import { Provider } from "../provider"
+import { ProviderID, type ModelID } from "../schema"
+import * as OpenAIChat from "../protocols/openai-chat"
+import * as OpenAIResponses from "../protocols/openai-responses"
+import { withOpenAIOptions, type OpenAIProviderOptionsInput } from "./openai-options"
+
+export const id = ProviderID.make("github-copilot")
+
+// GitHub Copilot has no canonical public URL — callers (opencode, etc.) must
+// supply `baseURL` explicitly.
+export type ModelOptions = Omit<ModelInput, "id" | "provider" | "route"> & {
+  readonly providerOptions?: OpenAIProviderOptionsInput
+}
+type CopilotModelInput = ModelOptions & Pick<ModelInput, "id">
+
+export const shouldUseResponsesApi = (modelID: string | ModelID) => {
+  const model = String(modelID)
+  const match = /^gpt-(\d+)/.exec(model)
+  if (!match) return false
+  return Number(match[1]) >= 5 && !model.startsWith("gpt-5-mini")
+}
+
+export const routes = [OpenAIResponses.route, OpenAIChat.route]
+
+const mapInput = (input: CopilotModelInput) => withOpenAIOptions(input.id, input)
+
+const chatModel = Route.model<CopilotModelInput>(OpenAIChat.route, { provider: id }, { mapInput })
+const responsesModel = Route.model<CopilotModelInput>(OpenAIResponses.route, { provider: id }, { mapInput })
+
+export const responses = (modelID: string | ModelID, options: ModelOptions) =>
+  responsesModel({ ...options, id: modelID })
+
+export const chat = (modelID: string | ModelID, options: ModelOptions) => chatModel({ ...options, id: modelID })
+
+export const model = (modelID: string | ModelID, options: ModelOptions) => {
+  const create = shouldUseResponsesApi(modelID) ? responsesModel : chatModel
+  return create({ ...options, id: modelID })
+}
+
+export const provider = Provider.make({
+  id,
+  model,
+  apis: { responses, chat },
+})
+
+export const apis = provider.apis
--- a/packages/llm/src/providers/google.ts
+++ b/packages/llm/src/providers/google.ts
@@ -0,0 +1,16 @@
+import type { RouteModelInput } from "../route/client"
+import { Provider } from "../provider"
+import { ProviderID, type ModelID } from "../schema"
+import * as Gemini from "../protocols/gemini"
+
+export const id = ProviderID.make("google")
+
+export const routes = [Gemini.route]
+
+export const model = (id: string | ModelID, options: Omit<RouteModelInput, "id" | "baseURL"> & { readonly baseURL?: string } = {}) =>
+  Gemini.model({ ...options, id })
+
+export const provider = Provider.make({
+  id,
+  model,
+})
--- a/packages/llm/src/providers/index.ts
+++ b/packages/llm/src/providers/index.ts
@@ -0,0 +1,10 @@
+export * as Anthropic from "./anthropic"
+export * as AmazonBedrock from "./amazon-bedrock"
+export * as Azure from "./azure"
+export * as Cloudflare from "./cloudflare"
+export * as GitHubCopilot from "./github-copilot"
+export * as Google from "./google"
+export * as OpenAI from "./openai"
+export * as OpenAICompatible from "./openai-compatible"
+export * as OpenRouter from "./openrouter"
+export * as XAI from "./xai"
--- a/packages/llm/src/providers/openai-compatible-profile.ts
+++ b/packages/llm/src/providers/openai-compatible-profile.ts
@@ -0,0 +1,20 @@
+export interface OpenAICompatibleProfile {
+  readonly provider: string
+  readonly baseURL: string
+}
+
+export const profiles = {
+  baseten: { provider: "baseten", baseURL: "https://inference.baseten.co/v1" },
+  cerebras: { provider: "cerebras", baseURL: "https://api.cerebras.ai/v1" },
+  deepinfra: { provider: "deepinfra", baseURL: "https://api.deepinfra.com/v1/openai" },
+  deepseek: { provider: "deepseek", baseURL: "https://api.deepseek.com/v1" },
+  fireworks: { provider: "fireworks", baseURL: "https://api.fireworks.ai/inference/v1" },
+  groq: { provider: "groq", baseURL: "https://api.groq.com/openai/v1" },
+  openrouter: { provider: "openrouter", baseURL: "https://openrouter.ai/api/v1" },
+  togetherai: { provider: "togetherai", baseURL: "https://api.together.xyz/v1" },
+  xai: { provider: "xai", baseURL: "https://api.x.ai/v1" },
+} as const satisfies Record<string, OpenAICompatibleProfile>
+
+export const byProvider: Record<string, OpenAICompatibleProfile> = Object.fromEntries(
+  Object.values(profiles).map((profile) => [profile.provider, profile]),
+)
--- a/packages/llm/src/providers/openai-compatible.ts
+++ b/packages/llm/src/providers/openai-compatible.ts
@@ -0,0 +1,61 @@
+import { Provider } from "../provider"
+import { ProviderID, type ModelID } from "../schema"
+import * as OpenAICompatibleChat from "../protocols/openai-compatible-chat"
+import type { OpenAICompatibleChatModelInput } from "../protocols/openai-compatible-chat"
+import { profiles, type OpenAICompatibleProfile } from "./openai-compatible-profile"
+
+export const id = ProviderID.make("openai-compatible")
+
+export type ModelOptions = Omit<OpenAICompatibleChatModelInput, "id" | "provider"> & {
+  readonly provider: string
+}
+
+type GenericModelOptions = Omit<ModelOptions, "provider"> & {
+  readonly provider?: string
+}
+
+export type FamilyModelOptions = Omit<OpenAICompatibleChatModelInput, "id" | "provider" | "baseURL"> & {
+  readonly baseURL?: string
+}
+
+export const routes = [OpenAICompatibleChat.route]
+
+export const model = (id: string | ModelID, options: ModelOptions) => {
+  return OpenAICompatibleChat.model({
+    ...options,
+    id,
+    provider: ProviderID.make(options.provider),
+  })
+}
+
+export const profileModel = (
+  profile: OpenAICompatibleProfile,
+  id: string | ModelID,
+  options: FamilyModelOptions = {},
+) =>
+  OpenAICompatibleChat.model({
+    ...options,
+    id,
+    provider: profile.provider,
+    baseURL: options.baseURL ?? profile.baseURL,
+  })
+
+const define = (profile: OpenAICompatibleProfile) =>
+  Provider.make({
+    id: ProviderID.make(profile.provider),
+    model: (id: string | ModelID, options: FamilyModelOptions = {}) => profileModel(profile, id, options),
+  })
+
+export const provider = Provider.make({
+  id,
+  model: (id: string | ModelID, options: GenericModelOptions) =>
+    model(id, { ...options, provider: options.provider ?? "openai-compatible" }),
+})
+
+export const baseten = define(profiles.baseten)
+export const cerebras = define(profiles.cerebras)
+export const deepinfra = define(profiles.deepinfra)
+export const deepseek = define(profiles.deepseek)
+export const fireworks = define(profiles.fireworks)
+export const groq = define(profiles.groq)
+export const togetherai = define(profiles.togetherai)
--- a/packages/llm/src/providers/openai-options.ts
+++ b/packages/llm/src/providers/openai-options.ts
@@ -0,0 +1,70 @@
+import type { ProviderOptions, ReasoningEffort, TextVerbosity } from "../schema"
+import { mergeProviderOptions } from "../schema"
+
+export interface OpenAIOptionsInput {
+  readonly [key: string]: unknown
+  readonly store?: boolean
+  readonly promptCacheKey?: string
+  readonly reasoningEffort?: ReasoningEffort
+  readonly reasoningSummary?: "auto"
+  readonly includeEncryptedReasoning?: boolean
+  readonly textVerbosity?: TextVerbosity
+}
+
+export type OpenAIProviderOptionsInput = ProviderOptions & {
+  readonly openai?: OpenAIOptionsInput
+}
+
+const definedEntries = (input: Record<string, unknown>) =>
+  Object.entries(input).filter((entry) => entry[1] !== undefined)
+
+const openAIProviderOptions = (options: OpenAIOptionsInput | undefined): ProviderOptions | undefined => {
+  const openai = Object.fromEntries(
+    definedEntries({
+      store: options?.store,
+      promptCacheKey: options?.promptCacheKey,
+      reasoningEffort: options?.reasoningEffort,
+      reasoningSummary: options?.reasoningSummary,
+      includeEncryptedReasoning: options?.includeEncryptedReasoning,
+      textVerbosity: options?.textVerbosity,
+    }),
+  )
+  if (Object.keys(openai).length === 0) return undefined
+  return { openai }
+}
+
+export const gpt5DefaultOptions = (
+  modelID: string,
+  options: { readonly textVerbosity?: boolean } = {},
+): ProviderOptions | undefined => {
+  const id = modelID.toLowerCase()
+  if (!id.includes("gpt-5") || id.includes("gpt-5-chat") || id.includes("gpt-5-pro")) return undefined
+  return openAIProviderOptions({
+    reasoningEffort: "medium",
+    reasoningSummary: "auto",
+    textVerbosity:
+      options.textVerbosity === true && id.includes("gpt-5.") && !id.includes("codex") && !id.includes("-chat")
+        ? "low"
+        : undefined,
+  })
+}
+
+export const openAIDefaultOptions = (
+  modelID: string,
+  options: { readonly textVerbosity?: boolean } = {},
+): ProviderOptions | undefined =>
+  mergeProviderOptions(openAIProviderOptions({ store: false }), gpt5DefaultOptions(modelID, options))
+
+export const withOpenAIOptions = <Options extends { readonly providerOptions?: OpenAIProviderOptionsInput }>(
+  modelID: string,
+  options: Options,
+  defaults: { readonly textVerbosity?: boolean } = {},
+): Options & { readonly id: string; readonly providerOptions?: ProviderOptions } => {
+  return {
+    ...options,
+    id: modelID,
+    providerOptions: mergeProviderOptions(openAIDefaultOptions(modelID, defaults), options.providerOptions),
+  }
+}
+
+export * as OpenAIProviderOptions from "./openai-options"
--- a/packages/llm/src/providers/openai.ts
+++ b/packages/llm/src/providers/openai.ts
@@ -0,0 +1,53 @@
+import { AuthOptions, type ProviderAuthOption } from "../route/auth-options"
+import type { RouteModelInput } from "../route/client"
+import { Provider } from "../provider"
+import { ProviderID, type ModelID } from "../schema"
+import * as OpenAIChat from "../protocols/openai-chat"
+import * as OpenAIResponses from "../protocols/openai-responses"
+import { withOpenAIOptions, type OpenAIProviderOptionsInput } from "./openai-options"
+
+export type { OpenAIOptionsInput } from "./openai-options"
+
+export const id = ProviderID.make("openai")
+
+export const routes = [OpenAIResponses.route, OpenAIResponses.webSocketRoute, OpenAIChat.route]
+
+// This provider facade wraps the lower-level Responses and Chat model factories
+// with OpenAI-specific conveniences: typed options, API-key sugar, env fallback,
+// and default option normalization.
+type OpenAIModelInput<ModelInput> = Omit<ModelInput, "apiKey" | "auth" | "baseURL"> &
+  ProviderAuthOption<"optional"> & {
+    readonly baseURL?: string
+    readonly providerOptions?: OpenAIProviderOptionsInput
+  }
+
+const auth = (options: ProviderAuthOption<"optional">) => AuthOptions.bearer(options, "OPENAI_API_KEY")
+
+export const responses = (id: string | ModelID, options: OpenAIModelInput<Omit<RouteModelInput, "id">> = {}) => {
+  const { apiKey: _, ...rest } = options
+  return OpenAIResponses.model(withOpenAIOptions(id, { ...rest, auth: auth(options) }, { textVerbosity: true }))
+}
+
+export const responsesWebSocket = (
+  id: string | ModelID,
+  options: OpenAIModelInput<Omit<RouteModelInput, "id">> = {},
+) => {
+  const { apiKey: _, ...rest } = options
+  return OpenAIResponses.webSocketModel(
+    withOpenAIOptions(id, { ...rest, auth: auth(options) }, { textVerbosity: true }),
+  )
+}
+
+export const chat = (id: string | ModelID, options: OpenAIModelInput<Omit<RouteModelInput, "id">> = {}) => {
+  const { apiKey: _, ...rest } = options
+  return OpenAIChat.model(withOpenAIOptions(id, { ...rest, auth: auth(options) }))
+}
+
+export const provider = Provider.make({
+  id,
+  model: responses,
+  apis: { responses, responsesWebSocket, chat },
+})
+
+export const model = provider.model
+export const apis = provider.apis
--- a/packages/llm/src/providers/openrouter.ts
+++ b/packages/llm/src/providers/openrouter.ts
@@ -0,0 +1,88 @@
+import { Effect, Schema } from "effect"
+import { Route, type RouteModelInput } from "../route/client"
+import { Endpoint } from "../route/endpoint"
+import { Framing } from "../route/framing"
+import { Provider } from "../provider"
+import { Protocol } from "../route/protocol"
+import { ProviderID, type ModelID, type ProviderOptions } from "../schema"
+import * as OpenAICompatibleProfiles from "./openai-compatible-profile"
+import * as OpenAIChat from "../protocols/openai-chat"
+import { isRecord } from "../protocols/shared"
+
+export const profile = OpenAICompatibleProfiles.profiles.openrouter
+export const id = ProviderID.make(profile.provider)
+const ADAPTER = "openrouter"
+
+export interface OpenRouterOptions {
+  readonly [key: string]: unknown
+  readonly usage?: boolean | Record<string, unknown>
+  readonly reasoning?: Record<string, unknown>
+  readonly promptCacheKey?: string
+}
+
+export type OpenRouterProviderOptionsInput = ProviderOptions & {
+  readonly openrouter?: OpenRouterOptions
+}
+
+export type ModelOptions = Omit<RouteModelInput, "id" | "baseURL" | "providerOptions"> & {
+  readonly baseURL?: string
+  readonly providerOptions?: OpenRouterProviderOptionsInput
+}
+type ModelInput = ModelOptions & Pick<RouteModelInput, "id">
+
+const OpenRouterBody = Schema.StructWithRest(Schema.Struct(OpenAIChat.bodyFields), [
+  Schema.Record(Schema.String, Schema.Any),
+])
+export type OpenRouterBody = Schema.Schema.Type<typeof OpenRouterBody>
+
+export const protocol = Protocol.make({
+  id: "openrouter-chat",
+  body: {
+    schema: OpenRouterBody,
+    from: (request) =>
+      OpenAIChat.protocol.body.from(request).pipe(
+        Effect.map(
+          (body) =>
+            ({
+              ...body,
+              ...bodyOptions(request.providerOptions?.openrouter),
+            }) as OpenRouterBody,
+        ),
+      ),
+  },
+  stream: OpenAIChat.protocol.stream,
+})
+
+const bodyOptions = (input: unknown) => {
+  const openrouter = isRecord(input) ? input : {}
+  return {
+    ...(openrouter.usage === true
+      ? { usage: { include: true } }
+      : isRecord(openrouter.usage)
+        ? { usage: openrouter.usage }
+        : {}),
+    ...(isRecord(openrouter.reasoning) ? { reasoning: openrouter.reasoning } : {}),
+    ...(typeof openrouter.promptCacheKey === "string" ? { prompt_cache_key: openrouter.promptCacheKey } : {}),
+  }
+}
+
+export const route = Route.make({
+  id: ADAPTER,
+  protocol,
+  endpoint: Endpoint.path("/chat/completions"),
+  framing: Framing.sse,
+})
+
+export const routes = [route]
+
+const modelRef = Route.model<ModelInput>(route, {
+  provider: profile.provider,
+  baseURL: profile.baseURL,
+})
+
+export const model = (id: string | ModelID, options: ModelOptions = {}) => modelRef({ ...options, id })
+
+export const provider = Provider.make({
+  id,
+  model,
+})
--- a/packages/llm/src/providers/xai.ts
+++ b/packages/llm/src/providers/xai.ts
@@ -0,0 +1,52 @@
+import { AuthOptions, type ProviderAuthOption } from "../route/auth-options"
+import { Route } from "../route/client"
+import type { RouteModelInput } from "../route/client"
+import { Provider } from "../provider"
+import { ProviderID, type ModelID } from "../schema"
+import * as OpenAICompatibleProfiles from "./openai-compatible-profile"
+import * as OpenAICompatibleChat from "../protocols/openai-compatible-chat"
+import * as OpenAIResponses from "../protocols/openai-responses"
+
+export const id = ProviderID.make("xai")
+
+export type ModelOptions = Omit<RouteModelInput, "id" | "apiKey" | "auth" | "baseURL"> &
+  ProviderAuthOption<"optional"> & {
+    readonly baseURL?: string
+  }
+
+export const routes = [OpenAIResponses.route, OpenAICompatibleChat.route]
+
+const responsesModel = Route.model(OpenAIResponses.route, { provider: id })
+const chatModel = OpenAICompatibleChat.model
+
+const auth = (options: ProviderAuthOption<"optional">) => AuthOptions.bearer(options, "XAI_API_KEY")
+
+export const responses = (modelID: string | ModelID, options: ModelOptions = {}) => {
+  const { apiKey: _, ...rest } = options
+  return responsesModel({
+    ...rest,
+    auth: auth(options),
+    id: modelID,
+    baseURL: options.baseURL ?? OpenAICompatibleProfiles.profiles.xai.baseURL,
+  })
+}
+
+export const chat = (modelID: string | ModelID, options: ModelOptions = {}) => {
+  const { apiKey: _, ...rest } = options
+  return chatModel({
+    ...rest,
+    auth: auth(options),
+    id: modelID,
+    provider: id,
+    baseURL: options.baseURL ?? OpenAICompatibleProfiles.profiles.xai.baseURL,
+  })
+}
+
+export const provider = Provider.make({
+  id,
+  model: responses,
+  apis: { responses, chat },
+})
+
+export const model = provider.model
+export const apis = provider.apis
--- a/packages/llm/src/route/auth-options.ts
+++ b/packages/llm/src/route/auth-options.ts
@@ -0,0 +1,57 @@
+import type { Config, Redacted } from "effect"
+import { Auth } from "./auth"
+
+export type ApiKeyMode = "optional" | "required"
+
+export type AuthOverride = {
+  readonly auth: Auth
+  readonly apiKey?: never
+}
+
+export type OptionalApiKeyAuth = {
+  readonly apiKey?: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>>
+  readonly auth?: never
+}
+
+export type RequiredApiKeyAuth = {
+  readonly apiKey: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>>
+  readonly auth?: never
+}
+
+export type ProviderAuthOption<Mode extends ApiKeyMode> =
+  | AuthOverride
+  | (Mode extends "optional" ? OptionalApiKeyAuth : RequiredApiKeyAuth)
+
+export type ModelOptions<Base, Mode extends ApiKeyMode> = Omit<Base, "apiKey" | "auth"> & ProviderAuthOption<Mode>
+
+export type ModelArgs<Base, Mode extends ApiKeyMode> = Mode extends "optional"
+  ? readonly [options?: ModelOptions<Base, Mode>]
+  : readonly [options: ModelOptions<Base, Mode>]
+
+export type ModelFactory<Base, Mode extends ApiKeyMode, Model> = (id: string, ...args: ModelArgs<Base, Mode>) => Model
+
+/**
+ * Require at least one of the keys in `T`. Use for option shapes where any
+ * subset of fields is acceptable but at least one must be present (e.g. Azure
+ * accepts `resourceName` or `baseURL`).
+ */
+export type AtLeastOne<T> = {
+  [K in keyof T]: Required<Pick<T, K>> & Partial<Omit<T, K>>
+}[keyof T]
+
+/**
+ * Standard bearer-auth resolution for providers: honor an explicit `auth`
+ * override, otherwise resolve `apiKey` (option > config var) and apply it as
+ * a bearer token.
+ */
+export const bearer = (options: ProviderAuthOption<"optional">, envVar: string | ReadonlyArray<string>): Auth => {
+  if ("auth" in options && options.auth) return options.auth
+  return (Array.isArray(envVar) ? envVar : [envVar])
+    .reduce(
+      (auth, name) => auth.orElse(Auth.config(name)),
+      Auth.optional("apiKey" in options ? options.apiKey : undefined, "apiKey"),
+    )
+    .bearer()
+}
+
+export * as AuthOptions from "./auth-options"
--- a/packages/llm/src/route/auth.ts
+++ b/packages/llm/src/route/auth.ts
@@ -0,0 +1,196 @@
+import { Config, Effect, Redacted } from "effect"
+import { Headers } from "effect/unstable/http"
+import { AuthenticationReason, InvalidRequestReason, LLMError, type LLMRequest } from "../schema"
+
+export class MissingCredentialError extends Error {
+  readonly _tag = "MissingCredentialError"
+
+  constructor(readonly source: string) {
+    super(`Missing auth credential: ${source}`)
+  }
+}
+
+export type CredentialError = MissingCredentialError | Config.ConfigError
+export type AuthError = CredentialError | LLMError
+
+export interface AuthInput {
+  readonly request: LLMRequest
+  readonly method: "POST" | "GET"
+  readonly url: string
+  readonly body: string
+  readonly headers: Headers.Headers
+}
+
+export interface Credential {
+  readonly load: Effect.Effect<Redacted.Redacted<string>, CredentialError>
+  readonly orElse: (that: Credential) => Credential
+  readonly bearer: () => Auth
+  readonly header: (name: string) => Auth
+  readonly pipe: <A>(f: (self: Credential) => A) => A
+}
+
+export interface Auth {
+  readonly apply: (input: AuthInput) => Effect.Effect<Headers.Headers, AuthError>
+  readonly andThen: (that: Auth) => Auth
+  readonly orElse: (that: Auth) => Auth
+  readonly pipe: <A>(f: (self: Auth) => A) => A
+}
+
+export const isAuth = (input: unknown): input is Auth =>
+  typeof input === "object" && input !== null && "apply" in input && typeof input.apply === "function"
+
+const credential = (load: Effect.Effect<Redacted.Redacted<string>, CredentialError>): Credential => {
+  const self: Credential = {
+    load,
+    orElse: (that) => credential(load.pipe(Effect.catch(() => that.load))),
+    bearer: () => fromCredential(self, (secret) => ({ authorization: `Bearer ${secret}` })),
+    header: (name) => fromCredential(self, (secret) => ({ [name]: secret })),
+    pipe: (f) => f(self),
+  }
+  return self
+}
+
+const auth = (apply: Auth["apply"]): Auth => {
+  const self: Auth = {
+    apply,
+    andThen: (that) =>
+      auth((input) => apply(input).pipe(Effect.flatMap((headers) => that.apply({ ...input, headers })))),
+    orElse: (that) => auth((input) => apply(input).pipe(Effect.catch(() => that.apply(input)))),
+    pipe: (f) => f(self),
+  }
+  return self
+}
+
+const fromCredential = (source: Credential, render: (secret: string) => Headers.Input) =>
+  auth((input) =>
+    source.load.pipe(Effect.map((secret) => Headers.setAll(input.headers, render(Redacted.value(secret))))),
+  )
+
+const secretEffect = (secret: string | Redacted.Redacted<string>, source: string) => {
+  const redacted = typeof secret === "string" ? Redacted.make(secret) : secret
+  if (Redacted.value(redacted) === "") return Effect.fail(new MissingCredentialError(source))
+  return Effect.succeed(redacted)
+}
+
+const credentialFromSecret = (
+  secret: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>>,
+  source: string,
+) => {
+  if (typeof secret === "string" || Redacted.isRedacted(secret)) return credential(secretEffect(secret, source))
+  return credential(
+    Effect.gen(function* () {
+      return yield* secretEffect(yield* secret, source)
+    }),
+  )
+}
+
+export const value = (secret: string, source = "value") => credentialFromSecret(secret, source)
+
+export const optional = (
+  secret: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>> | undefined,
+  source = "optional value",
+) =>
+  secret === undefined
+    ? credential(Effect.fail(new MissingCredentialError(source)))
+    : credentialFromSecret(secret, source)
+
+export const config = (name: string) => credentialFromSecret(Config.redacted(name), name)
+
+export const effect = (load: Effect.Effect<Redacted.Redacted<string>, CredentialError>) => credential(load)
+
+export const none = auth((input) => Effect.succeed(input.headers))
+
+export const headers = (input: Headers.Input) =>
+  auth((inputAuth) => Effect.succeed(Headers.setAll(inputAuth.headers, input)))
+
+export const remove = (name: string) => auth((input) => Effect.succeed(Headers.remove(input.headers, name)))
+
+export const custom = (apply: (input: AuthInput) => Effect.Effect<Headers.Headers, LLMError>) => auth(apply)
+
+export const passthrough = none
+
+const fromModelApiKey = (from: (apiKey: string) => Headers.Input) =>
+  auth(({ request, headers }) => {
+    const key = request.model.apiKey
+    if (!key) return Effect.succeed(headers)
+    return Effect.succeed(Headers.setAll(headers, from(key)))
+  })
+
+const credentialInput = (
+  source: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>> | Credential,
+) =>
+  typeof source === "string" || Redacted.isRedacted(source) || Config.isConfig(source)
+    ? credentialFromSecret(source, "value")
+    : source
+
+export function bearer(): Auth
+export function bearer(
+  source: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>> | Credential,
+): Auth
+export function bearer(
+  source?: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>> | Credential,
+) {
+  if (source === undefined) return fromModelApiKey((key) => ({ authorization: `Bearer ${key}` }))
+  return credentialInput(source).bearer()
+}
+
+export const apiKey = bearer
+
+export const apiKeyHeader = (name: string) => fromModelApiKey((key) => ({ [name]: key }))
+
+export function header(
+  name: string,
+): (source: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>> | Credential) => Auth
+export function header(
+  name: string,
+  source: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>> | Credential,
+): Auth
+export function header(
+  name: string,
+  source?: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>> | Credential,
+) {
+  if (source === undefined) {
+    return (
+      next: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>> | Credential,
+    ) => credentialInput(next).header(name)
+  }
+  return credentialInput(source).header(name)
+}
+
+export function bearerHeader(
+  name: string,
+): (source: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>> | Credential) => Auth
+export function bearerHeader(
+  name: string,
+  source: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>> | Credential,
+): Auth
+export function bearerHeader(
+  name: string,
+  source?: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>> | Credential,
+) {
+  const render = (input: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>> | Credential) =>
+    fromCredential(credentialInput(input), (secret) => ({ [name]: `Bearer ${secret}` }))
+  if (source === undefined) return render
+  return render(source)
+}
+
+const toLLMError = (error: AuthError): LLMError => {
+  if (error instanceof MissingCredentialError || error instanceof Config.ConfigError) {
+    return new LLMError({
+      module: "Auth",
+      method: "apply",
+      reason:
+        error instanceof MissingCredentialError
+          ? new AuthenticationReason({ message: error.message, kind: "missing" })
+          : new InvalidRequestReason({ message: `Failed to resolve auth config: ${error.message}` }),
+    })
+  }
+  return error
+}
+
+export const toEffect =
+  (input: Auth) =>
+  (authInput: AuthInput): Effect.Effect<Headers.Headers, LLMError> =>
+    input.apply(authInput).pipe(Effect.mapError(toLLMError))
+
+export * as Auth from "./auth"
--- a/packages/llm/src/route/client.ts
+++ b/packages/llm/src/route/client.ts
@@ -0,0 +1,528 @@
+import { Cause, Context, Effect, Layer, Schema, Stream } from "effect"
+import type { Auth as AuthDef } from "./auth"
+import type { Endpoint } from "./endpoint"
+import { RequestExecutor } from "./executor"
+import type { Framing } from "./framing"
+import { HttpTransport } from "./transport"
+import type { Transport, TransportRuntime } from "./transport"
+import { WebSocketExecutor } from "./transport"
+import type { Service as WebSocketExecutorService } from "./transport/websocket"
+import type { Protocol } from "./protocol"
+import * as ProviderShared from "../protocols/shared"
+import * as ToolRuntime from "../tool-runtime"
+import type { Tools } from "../tool"
+import type { LLMError, LLMEvent, PreparedRequestOf, ProtocolID } from "../schema"
+import {
+  GenerationOptions,
+  HttpOptions,
+  LLMRequest,
+  LLMResponse,
+  ModelID,
+  ModelLimits,
+  ModelRef,
+  LLMError as LLMErrorClass,
+  NoRouteReason,
+  PreparedRequest,
+  ProviderID,
+  RouteID,
+  mergeGenerationOptions,
+  mergeHttpOptions,
+  mergeProviderOptions,
+} from "../schema"
+
+export interface RouteBody<Body> {
+  /** Schema for the validated provider-native body sent as the JSON request. */
+  readonly schema: Schema.Codec<Body, unknown>
+  /** Build the provider-native body from a common `LLMRequest`. */
+  readonly from: (request: LLMRequest) => Effect.Effect<Body, LLMError>
+}
+
+export interface Route<Body, Prepared = unknown> {
+  readonly id: string
+  readonly provider?: ProviderID
+  readonly protocol: ProtocolID
+  readonly transport: Transport<Body, Prepared, unknown>
+  readonly defaults: RouteDefaults
+  readonly body: RouteBody<Body>
+  readonly with: (patch: RoutePatch<Body, Prepared>) => Route<Body, Prepared>
+  readonly model: <Input extends RouteModelInput = RouteModelInput>(input: Input) => ModelRef
+  readonly prepareTransport: (body: Body, request: LLMRequest) => Effect.Effect<Prepared, LLMError>
+  readonly streamPrepared: (
+    prepared: Prepared,
+    request: LLMRequest,
+    runtime: TransportRuntime,
+  ) => Stream.Stream<LLMEvent, LLMError>
+}
+
+// Route registries intentionally erase body generics after construction.
+// Normal call sites use `OpenAIChat.route`; callers only need body types
+// when preparing a request with a protocol-specific type assertion.
+// oxlint-disable-next-line typescript-eslint/no-explicit-any
+export type AnyRoute = Route<any, any>
+
+const routeRegistry = new Map<string, AnyRoute>()
+
+// Route lookup is intentionally global: model refs name a route id, and
+// importing the provider/protocol/custom-route module registers the runnable
+// implementation. Duplicate ids are bugs because model refs cannot disambiguate
+// them.
+const register = <R extends AnyRoute>(route: R): R => {
+  const existing = routeRegistry.get(route.id)
+  if (existing && existing !== route) throw new Error(`Duplicate LLM route id "${route.id}"`)
+  routeRegistry.set(route.id, route)
+  return route
+}
+
+const registeredRoute = (id: string) => routeRegistry.get(id)
+
+export type HttpOptionsInput = HttpOptions.Input
+
+export type ModelRefInput = Omit<
+  ConstructorParameters<typeof ModelRef>[0],
+  "id" | "provider" | "route" | "limits" | "generation" | "http" | "auth"
+> & {
+  readonly id: string | ModelID
+  readonly provider: string | ProviderID
+  readonly route: string | RouteID
+  readonly auth?: AuthDef
+  readonly limits?: ModelLimits.Input
+  readonly generation?: GenerationOptions.Input
+  readonly http?: HttpOptionsInput
+}
+
+// `baseURL` is required on `ModelRefInput` (every materialized `ModelRef` has
+// a host) but optional at the route-input layers below. The route's `defaults`
+// can supply a canonical URL (e.g. OpenAI/Anthropic) so the user's input may
+// omit it. Routes without a canonical URL (OpenAI-compatible, GitHub Copilot)
+// re-tighten this in their own input type.
+export type RouteModelInput = Omit<ModelRefInput, "provider" | "route" | "baseURL"> & {
+  readonly baseURL?: string
+}
+
+export type RouteModelDefaults = Omit<ModelRefInput, "id" | "route" | "baseURL"> & {
+  readonly baseURL?: string
+}
+
+export type RouteRoutedModelInput = Omit<ModelRefInput, "route" | "baseURL"> & {
+  readonly baseURL?: string
+}
+
+export type RouteRoutedModelDefaults = Partial<Omit<ModelRefInput, "id" | "provider" | "route">>
+
+export type RouteDefaults = Partial<Omit<ModelRefInput, "id" | "provider" | "route">>
+
+export interface RoutePatch<Body, Prepared> extends RouteDefaults {
+  readonly id: string
+  readonly provider?: string | ProviderID
+  readonly transport?: Transport<Body, Prepared, unknown>
+}
+
+type RouteMappedModelInput = RouteModelInput | RouteRoutedModelInput
+
+export interface RouteModelOptions<
+  Input extends RouteMappedModelInput,
+  Output extends RouteMappedModelInput = RouteMappedModelInput,
+> {
+  readonly mapInput?: (input: Input) => Output
+}
+
+export interface RouteMappedModelOptions<Input, Output extends RouteMappedModelInput = RouteMappedModelInput> {
+  readonly mapInput: (input: Input) => Output
+}
+
+const modelWithDefaults =
+  <Input>(
+    route: AnyRoute,
+    defaults: Partial<Omit<ModelRefInput, "id" | "route">>,
+    options: { readonly mapInput?: (input: Input) => RouteMappedModelInput },
+  ) =>
+  (input: Input) => {
+    const mapped = options.mapInput === undefined ? (input as RouteMappedModelInput) : options.mapInput(input)
+    const provider = defaults.provider ?? route.provider ?? ("provider" in mapped ? mapped.provider : undefined)
+    if (!provider) throw new Error(`Route.model(${route.id}) requires a provider`)
+    const baseURL = mapped.baseURL ?? defaults.baseURL ?? route.defaults.baseURL
+    if (!baseURL)
+      throw new Error(
+        `Route.model(${route.id}) requires a baseURL — supply it via input, defaults, or route defaults`,
+      )
+    const generation = mergeGenerationOptions(route.defaults.generation, defaults.generation)
+    const providerOptions = mergeProviderOptions(route.defaults.providerOptions, defaults.providerOptions)
+    const http = mergeHttpOptions(httpOptions(route.defaults.http), httpOptions(defaults.http))
+    return modelRef({
+      ...route.defaults,
+      ...defaults,
+      ...mapped,
+      baseURL,
+      provider,
+      route: route.id,
+      limits: mapped.limits ?? defaults.limits ?? route.defaults.limits,
+      generation: mergeGenerationOptions(generation, mapped.generation),
+      providerOptions: mergeProviderOptions(providerOptions, mapped.providerOptions),
+      http: mergeHttpOptions(http, httpOptions(mapped.http)),
+    })
+  }
+
+const mergeRouteDefaults = (base: RouteDefaults | undefined, patch: RouteDefaults): RouteDefaults => ({
+  ...base,
+  ...patch,
+  limits: patch.limits ?? base?.limits,
+  generation: mergeGenerationOptions(generationOptions(base?.generation), generationOptions(patch.generation)),
+  providerOptions: mergeProviderOptions(base?.providerOptions, patch.providerOptions),
+  http: mergeHttpOptions(httpOptions(base?.http), httpOptions(patch.http)),
+})
+
+export const modelLimits = ModelLimits.make
+
+export const generationOptions = (input: GenerationOptions.Input | undefined) =>
+  input === undefined ? undefined : GenerationOptions.make(input)
+
+export const httpOptions = (input: HttpOptionsInput | undefined) => {
+  if (input === undefined) return input
+  return HttpOptions.make(input)
+}
+
+export const modelRef = (input: ModelRefInput) =>
+  new ModelRef({
+    ...input,
+    id: ModelID.make(input.id),
+    provider: ProviderID.make(input.provider),
+    route: RouteID.make(input.route),
+    limits: modelLimits(input.limits),
+    generation: generationOptions(input.generation),
+    http: httpOptions(input.http),
+  })
+
+function model<Input extends RouteModelInput = RouteModelInput>(
+  route: AnyRoute,
+  defaults: RouteModelDefaults,
+  options?: RouteModelOptions<Input, RouteModelInput>,
+): (input: Input) => ModelRef
+function model<Input extends RouteRoutedModelInput = RouteRoutedModelInput>(
+  route: AnyRoute,
+  defaults?: RouteRoutedModelDefaults,
+  options?: RouteModelOptions<Input, RouteRoutedModelInput>,
+): (input: Input) => ModelRef
+function model<Input, Output extends RouteMappedModelInput = RouteMappedModelInput>(
+  route: AnyRoute,
+  defaults: Partial<Omit<ModelRefInput, "id" | "route">>,
+  options: RouteMappedModelOptions<Input, Output>,
+): (input: Input) => ModelRef
+function model<Input>(
+  route: AnyRoute,
+  defaults: Partial<Omit<ModelRefInput, "id" | "route">> = {},
+  options: { readonly mapInput?: (input: Input) => RouteMappedModelInput } = {},
+) {
+  return modelWithDefaults(route, defaults, options)
+}
+
+export interface Interface {
+  /**
+   * Compile a request through protocol body construction, validation, and HTTP
+   * preparation without sending it. Returns the prepared request including the
+   * provider-native body.
+   *
+   * Pass a `Body` type argument to statically expose the route's body
+   * shape (e.g. `prepare<OpenAIChatBody>(...)`) — the runtime body is
+   * identical, so this is a type-level assertion the caller makes about which
+   * route the request will resolve to.
+   */
+  readonly prepare: <Body = unknown>(request: LLMRequest) => Effect.Effect<PreparedRequestOf<Body>, LLMError>
+  readonly stream: StreamMethod
+  readonly generate: GenerateMethod
+}
+
+export interface StreamMethod {
+  (request: LLMRequest): Stream.Stream<LLMEvent, LLMError>
+  <T extends Tools>(options: ToolRuntime.RunOptions<T>): Stream.Stream<LLMEvent, LLMError>
+}
+
+export interface GenerateMethod {
+  (request: LLMRequest): Effect.Effect<LLMResponse, LLMError>
+  <T extends Tools>(options: ToolRuntime.RunOptions<T>): Effect.Effect<LLMResponse, LLMError>
+}
+
+export class Service extends Context.Service<Service, Interface>()("@opencode/LLMClient") {}
+
+const noRoute = (model: ModelRef) =>
+  new LLMErrorClass({
+    module: "LLMClient",
+    method: "resolveRoute",
+    reason: new NoRouteReason({ route: model.route, provider: model.provider, model: model.id }),
+  })
+
+const resolveRequestOptions = (request: LLMRequest) =>
+  LLMRequest.update(request, {
+    generation: mergeGenerationOptions(request.model.generation, request.generation) ?? new GenerationOptions({}),
+    providerOptions: mergeProviderOptions(request.model.providerOptions, request.providerOptions),
+    http: mergeHttpOptions(request.model.http, request.http),
+  })
+
+export interface MakeInput<Body, Frame, Event, State> {
+  /** Route id used in registry lookup and error messages. */
+  readonly id: string
+  /** Provider identity for route-owned model construction. */
+  readonly provider?: string | ProviderID
+  /** Semantic API contract — owns body construction, body schema, and parsing. */
+  readonly protocol: Protocol<Body, Frame, Event, State>
+  /** Where the request is sent. */
+  readonly endpoint: Endpoint<Body>
+  /** Per-request transport auth. Model-level `Auth` overrides this. */
+  readonly auth?: AuthDef
+  /** Stream framing — bytes -> frames before `protocol.stream.event` decoding. */
+  readonly framing: Framing<Frame>
+  /** Static / per-request headers added before `auth` runs. */
+  readonly headers?: (input: { readonly request: LLMRequest }) => Record<string, string>
+  /** Model defaults used by the route's `.model(...)` helper. */
+  readonly defaults?: RouteDefaults
+}
+
+export interface MakeTransportInput<Body, Prepared, Frame, Event, State> {
+  /** Route id used in registry lookup and error messages. */
+  readonly id: string
+  /** Provider identity for route-owned model construction. */
+  readonly provider?: string | ProviderID
+  /** Semantic API contract — owns body construction, body schema, and parsing. */
+  readonly protocol: Protocol<Body, Frame, Event, State>
+  /** Runnable transport route. */
+  readonly transport: Transport<Body, Prepared, Frame>
+  /** Provider/model defaults used by the route's `.model(...)` helper. */
+  readonly defaults?: RouteDefaults
+}
+
+const streamError = (route: string, message: string, cause: Cause.Cause<unknown>) => {
+  const failed = cause.reasons.find(Cause.isFailReason)?.error
+  if (failed instanceof LLMErrorClass) return failed
+  return ProviderShared.eventError(route, message, Cause.pretty(cause))
+}
+
+function makeFromTransport<Body, Prepared, Frame, Event, State>(
+  input: MakeTransportInput<Body, Prepared, Frame, Event, State>,
+): Route<Body, Prepared> {
+  const protocol = input.protocol
+  const decodeEventEffect = Schema.decodeUnknownEffect(protocol.stream.event)
+  const decodeEvent = (route: string) => (frame: Frame) =>
+    decodeEventEffect(frame).pipe(
+      Effect.mapError(() =>
+        ProviderShared.eventError(
+          input.id,
+          `Invalid ${route} stream event`,
+          typeof frame === "string" ? frame : ProviderShared.encodeJson(frame),
+        ),
+      ),
+    )
+
+  const build = (routeInput: MakeTransportInput<Body, Prepared, Frame, Event, State>): Route<Body, Prepared> => {
+    const route: Route<Body, Prepared> = {
+      id: routeInput.id,
+      provider: routeInput.provider === undefined ? undefined : ProviderID.make(routeInput.provider),
+      protocol: protocol.id,
+      transport: routeInput.transport,
+      defaults: routeInput.defaults ?? {},
+      body: protocol.body,
+      with: (patch: RoutePatch<Body, Prepared>) => {
+        const { id, provider, transport, ...defaults } = patch
+        if (!id || id === routeInput.id) throw new Error(`Route.with(${routeInput.id}) requires a new route id`)
+        return build({
+          ...routeInput,
+          id,
+          provider: provider ?? routeInput.provider,
+          transport: (transport as Transport<Body, Prepared, Frame> | undefined) ?? routeInput.transport,
+          defaults: mergeRouteDefaults(routeInput.defaults, defaults),
+        })
+      },
+      model: (input: RouteModelInput): ModelRef => modelWithDefaults<RouteModelInput>(route, {}, {})(input),
+      prepareTransport: routeInput.transport.prepare,
+      streamPrepared: (prepared: Prepared, request: LLMRequest, runtime: TransportRuntime) => {
+        const route = `${request.model.provider}/${request.model.route}`
+        const events = routeInput.transport
+          .frames(prepared, request, runtime)
+          .pipe(
+            Stream.mapEffect(decodeEvent(route)),
+            protocol.stream.terminal ? Stream.takeUntil(protocol.stream.terminal) : (stream) => stream,
+          )
+        return events.pipe(
+          Stream.mapAccumEffect(
+            protocol.stream.initial,
+            protocol.stream.step,
+            protocol.stream.onHalt ? { onHalt: protocol.stream.onHalt } : undefined,
+          ),
+          Stream.catchCause((cause) => Stream.fail(streamError(route, `Failed to read ${route} stream`, cause))),
+        )
+      },
+    } satisfies Route<Body, Prepared>
+    return register(route)
+  }
+
+  return build(input)
+}
+
+export function make<Body, Prepared, Frame, Event, State>(
+  input: MakeTransportInput<Body, Prepared, Frame, Event, State>,
+): Route<Body, Prepared>
+/**
+ * Build a `Route` by composing the four orthogonal pieces of a deployment:
+ *
+ * - `Protocol` — what is the API I'm speaking?
+ * - `Endpoint` — where do I send the request?
+ * - `Auth` — how do I authenticate it?
+ * - `Framing` — how do I cut the response stream into protocol frames?
+ *
+ * Plus optional `headers` for cross-cutting deployment concerns (provider
+ * version pins, per-deployment quirks).
+ *
+ * This is the canonical route constructor. If a new route does not fit
+ * this four-axis model, add a purpose-built constructor rather than widening
+ * the public surface preemptively.
+ */
+export function make<Body, Frame, Event, State>(
+  input: MakeInput<Body, Frame, Event, State>,
+): Route<Body, HttpTransport.HttpPrepared<Frame>>
+export function make<Body, Prepared, Frame, Event, State>(
+  input: MakeInput<Body, Frame, Event, State> | MakeTransportInput<Body, Prepared, Frame, Event, State>,
+): Route<Body, Prepared> | Route<Body, HttpTransport.HttpPrepared<Frame>> {
+  if ("transport" in input) return makeFromTransport(input)
+  const protocol = input.protocol
+  const encodeBody = Schema.encodeSync(Schema.fromJsonString(protocol.body.schema))
+  return makeFromTransport({
+    id: input.id,
+    provider: input.provider,
+    protocol,
+    transport: HttpTransport.httpJson({
+      endpoint: input.endpoint,
+      auth: input.auth,
+      framing: input.framing,
+      encodeBody,
+      headers: input.headers,
+    }),
+    defaults: input.defaults,
+  })
+}
+
+// `compile` is the important boundary: it turns a common `LLMRequest` into a
+// validated provider body plus transport-private prepared data, but does not
+// execute transport.
+const compile = Effect.fn("LLM.compile")(function* (request: LLMRequest) {
+  const resolved = resolveRequestOptions(request)
+  const route = registeredRoute(resolved.model.route)
+  if (!route) return yield* noRoute(resolved.model)
+
+  const body = yield* route.body
+    .from(resolved)
+    .pipe(Effect.flatMap(ProviderShared.validateWith(Schema.decodeUnknownEffect(route.body.schema))))
+  const prepared = yield* route.prepareTransport(body, resolved)
+
+  return {
+    request: resolved,
+    route,
+    body,
+    prepared,
+  }
+})
+
+const prepareWith = Effect.fn("LLMClient.prepare")(function* (request: LLMRequest) {
+  const compiled = yield* compile(request)
+
+  return new PreparedRequest({
+    id: compiled.request.id ?? "request",
+    route: compiled.route.id,
+    protocol: compiled.route.protocol,
+    model: compiled.request.model,
+    body: compiled.body,
+    metadata: { transport: compiled.route.transport.id },
+  })
+})
+
+const streamRequestWith = (runtime: TransportRuntime) => (request: LLMRequest) =>
+  Stream.unwrap(
+    Effect.gen(function* () {
+      const compiled = yield* compile(request)
+      return compiled.route.streamPrepared(compiled.prepared, compiled.request, runtime)
+    }),
+  )
+
+const isToolRunOptions = (input: LLMRequest | ToolRuntime.RunOptions<Tools>): input is ToolRuntime.RunOptions<Tools> =>
+  "request" in input && "tools" in input
+
+const streamWith = (streamRequest: (request: LLMRequest) => Stream.Stream<LLMEvent, LLMError>): StreamMethod =>
+  ((input: LLMRequest | ToolRuntime.RunOptions<Tools>) => {
+    if (isToolRunOptions(input)) return ToolRuntime.stream({ ...input, stream: streamRequest })
+    return streamRequest(input)
+  }) as StreamMethod
+
+const generateWith = (stream: Interface["stream"]) =>
+  Effect.fn("LLM.generate")(function* (input: LLMRequest | ToolRuntime.RunOptions<Tools>) {
+    return new LLMResponse(
+      yield* stream(input as never).pipe(
+        Stream.runFold(
+          () => ({ events: [] as LLMEvent[], usage: undefined as LLMResponse["usage"] }),
+          (acc, event) => {
+            acc.events.push(event)
+            if ("usage" in event && event.usage !== undefined) acc.usage = event.usage
+            return acc
+          },
+        ),
+      ),
+    )
+  })
+
+export const prepare = <Body = unknown>(request: LLMRequest) =>
+  prepareWith(request) as Effect.Effect<PreparedRequestOf<Body>, LLMError>
+
+export function stream(request: LLMRequest): Stream.Stream<LLMEvent, LLMError>
+export function stream<T extends Tools>(options: ToolRuntime.RunOptions<T>): Stream.Stream<LLMEvent, LLMError>
+export function stream(input: LLMRequest | ToolRuntime.RunOptions<Tools>) {
+  return Stream.unwrap(
+    Effect.gen(function* () {
+      return (yield* Service).stream(input as never)
+    }),
+  )
+}
+
+export function generate(request: LLMRequest): Effect.Effect<LLMResponse, LLMError>
+export function generate<T extends Tools>(options: ToolRuntime.RunOptions<T>): Effect.Effect<LLMResponse, LLMError>
+export function generate(input: LLMRequest | ToolRuntime.RunOptions<Tools>) {
+  return Effect.gen(function* () {
+    return yield* (yield* Service).generate(input as never)
+  })
+}
+
+export const streamRequest = (request: LLMRequest) =>
+  Stream.unwrap(
+    Effect.gen(function* () {
+      return (yield* Service).stream(request)
+    }),
+  )
+
+export const layer: Layer.Layer<Service, never, RequestExecutor.Service> = Layer.effect(
+  Service,
+  Effect.gen(function* () {
+    const stream = streamWith(streamRequestWith({ http: yield* RequestExecutor.Service }))
+    return Service.of({ prepare: prepareWith as Interface["prepare"], stream, generate: generateWith(stream) })
+  }),
+)
+
+export const layerWithWebSocket: Layer.Layer<Service, never, RequestExecutor.Service | WebSocketExecutorService> =
+  Layer.effect(
+    Service,
+    Effect.gen(function* () {
+      const stream = streamWith(
+        streamRequestWith({
+          http: yield* RequestExecutor.Service,
+          webSocket: yield* WebSocketExecutor.Service,
+        }),
+      )
+      return Service.of({ prepare: prepareWith as Interface["prepare"], stream, generate: generateWith(stream) })
+    }),
+  )
+
+export const Route = { make, model } as const
+
+export const LLMClient = {
+  Service,
+  layer,
+  layerWithWebSocket,
+  prepare,
+  stream,
+  generate,
+  stepCountIs: ToolRuntime.stepCountIs,
+} as const
--- a/packages/llm/src/route/endpoint.ts
+++ b/packages/llm/src/route/endpoint.ts
@@ -0,0 +1,41 @@
+import type { LLMRequest } from "../schema"
+import * as ProviderShared from "../protocols/shared"
+
+export interface EndpointInput<Body> {
+  readonly request: LLMRequest
+  readonly body: Body
+}
+
+export type EndpointPart<Body> = string | ((input: EndpointInput<Body>) => string)
+
+/**
+ * Declarative URL construction for one route.
+ *
+ * `Endpoint` carries only the path. The host always lives on `model.baseURL`,
+ * supplied by the provider helper that constructs the model. `render(...)`
+ * just appends the path (and any `model.queryParams`) to that host.
+ *
+ * `path` may be a string or a function of `EndpointInput`, for routes whose
+ * URL embeds the model id, region, or another body field (e.g. Bedrock,
+ * Gemini).
+ */
+export interface Endpoint<Body> {
+  readonly path: EndpointPart<Body>
+}
+
+/** Construct an `Endpoint` from a path string or path function. */
+export const path = <Body>(value: EndpointPart<Body>): Endpoint<Body> => ({ path: value })
+
+const renderPart = <Body>(part: EndpointPart<Body>, input: EndpointInput<Body>) =>
+  typeof part === "function" ? part(input) : part
+
+export const render = <Body>(endpoint: Endpoint<Body>, input: EndpointInput<Body>) => {
+  const url = new URL(
+    `${ProviderShared.trimBaseUrl(input.request.model.baseURL)}${renderPart(endpoint.path, input)}`,
+  )
+  const params = input.request.model.queryParams
+  if (params) for (const [key, value] of Object.entries(params)) url.searchParams.set(key, value)
+  return url
+}
+
+export * as Endpoint from "./endpoint"
--- a/packages/llm/src/route/executor.ts
+++ b/packages/llm/src/route/executor.ts
@@ -0,0 +1,374 @@
+import { Cause, Context, Effect, Layer, Random } from "effect"
+import {
+  FetchHttpClient,
+  Headers,
+  HttpClient,
+  HttpClientError,
+  HttpClientRequest,
+  HttpClientResponse,
+} from "effect/unstable/http"
+import {
+  AuthenticationReason,
+  ContentPolicyReason,
+  HttpContext,
+  HttpRateLimitDetails,
+  HttpRequestDetails,
+  HttpResponseDetails,
+  InvalidRequestReason,
+  LLMError,
+  ProviderInternalReason,
+  QuotaExceededReason,
+  RateLimitReason,
+  TransportReason,
+  UnknownProviderReason,
+} from "../schema"
+
+export interface Interface {
+  readonly execute: (
+    request: HttpClientRequest.HttpClientRequest,
+  ) => Effect.Effect<HttpClientResponse.HttpClientResponse, LLMError>
+}
+
+export class Service extends Context.Service<Service, Interface>()("@opencode/LLM/RequestExecutor") {}
+
+const BODY_LIMIT = 16_384
+const MAX_RETRIES = 2
+const BASE_DELAY_MS = 500
+const MAX_DELAY_MS = 10_000
+const REDACTED = "<redacted>"
+
+// One source of truth for what counts as a sensitive name across headers,
+// URL query keys, and field names embedded inside request/response bodies.
+//
+// `SENSITIVE_NAME` is used as both a substring matcher (for free-form header
+// names like `Authorization` / `X-API-Key`) and as the body-field alternation
+// list. `SHORT_QUERY_NAME` covers anchored short keys like `?key=…` / `?sig=…`
+// that are too generic to redact substring-style without false positives.
+const SENSITIVE_NAME_SOURCE =
+  "authorization|api[-_]?key|access[-_]?token|refresh[-_]?token|id[-_]?token|token|secret|credential|signature|x-amz-signature"
+const SENSITIVE_NAME = new RegExp(SENSITIVE_NAME_SOURCE, "i")
+const SHORT_QUERY_NAME = /^(key|sig)$/i
+const SENSITIVE_BODY_FIELD = new RegExp(`(?:${SENSITIVE_NAME_SOURCE}|key)`, "i")
+const REDACT_JSON_FIELD = new RegExp(`("(?:${SENSITIVE_BODY_FIELD.source})"\\s*:\\s*)"[^"]*"`, "gi")
+const REDACT_QUERY_FIELD = new RegExp(`((?:${SENSITIVE_BODY_FIELD.source})=)[^&\\s"]+`, "gi")
+
+const isSensitiveHeaderName = (name: string) => SENSITIVE_NAME.test(name)
+
+const isSensitiveQueryName = (name: string) => isSensitiveHeaderName(name) || SHORT_QUERY_NAME.test(name)
+
+const redactHeaders = (headers: Headers.Headers, redactedNames: ReadonlyArray<string | RegExp>) =>
+  Object.fromEntries(
+    Object.entries(Headers.redact(headers, [...redactedNames, SENSITIVE_NAME])).map(([name, value]) => [
+      name,
+      String(value),
+    ]),
+  )
+
+const redactUrl = (value: string) => {
+  if (!URL.canParse(value)) return REDACTED
+  const url = new URL(value)
+  url.searchParams.forEach((_, key) => {
+    if (isSensitiveQueryName(key)) url.searchParams.set(key, REDACTED)
+  })
+  return url.toString()
+}
+
+const normalizedHeaders = (headers: Headers.Headers) =>
+  Object.fromEntries(Object.entries(headers).map(([key, value]) => [key.toLowerCase(), value]))
+
+const requestId = (headers: Record<string, string>) => {
+  return (
+    headers["x-request-id"] ??
+    headers["request-id"] ??
+    headers["x-amzn-requestid"] ??
+    headers["x-amz-request-id"] ??
+    headers["x-goog-request-id"] ??
+    headers["cf-ray"]
+  )
+}
+
+const retryableStatus = (status: number) => status === 429 || status === 503 || status === 504 || status === 529
+
+const retryAfterMs = (headers: Record<string, string>) => {
+  const millis = Number(headers["retry-after-ms"])
+  if (Number.isFinite(millis)) return Math.max(0, millis)
+
+  const value = headers["retry-after"]
+  if (!value) return undefined
+
+  const seconds = Number(value)
+  if (Number.isFinite(seconds)) return Math.max(0, seconds * 1000)
+
+  const date = Date.parse(value)
+  if (!Number.isNaN(date)) return Math.max(0, date - Date.now())
+  return undefined
+}
+
+const addRateLimitValue = (target: Record<string, string>, key: string, value: string) => {
+  if (key.length > 0) target[key] = value
+}
+
+const rateLimitDetails = (headers: Record<string, string>, retryAfter: number | undefined) => {
+  const limit: Record<string, string> = {}
+  const remaining: Record<string, string> = {}
+  const reset: Record<string, string> = {}
+
+  Object.entries(headers).forEach(([name, value]) => {
+    const openaiLimit = /^x-ratelimit-limit-(.+)$/.exec(name)?.[1]
+    if (openaiLimit) return addRateLimitValue(limit, openaiLimit, value)
+
+    const openaiRemaining = /^x-ratelimit-remaining-(.+)$/.exec(name)?.[1]
+    if (openaiRemaining) return addRateLimitValue(remaining, openaiRemaining, value)
+
+    const openaiReset = /^x-ratelimit-reset-(.+)$/.exec(name)?.[1]
+    if (openaiReset) return addRateLimitValue(reset, openaiReset, value)
+
+    const anthropic = /^anthropic-ratelimit-(.+)-(limit|remaining|reset)$/.exec(name)
+    if (!anthropic) return
+    if (anthropic[2] === "limit") return addRateLimitValue(limit, anthropic[1], value)
+    if (anthropic[2] === "remaining") return addRateLimitValue(remaining, anthropic[1], value)
+    return addRateLimitValue(reset, anthropic[1], value)
+  })
+
+  if (
+    retryAfter === undefined &&
+    Object.keys(limit).length === 0 &&
+    Object.keys(remaining).length === 0 &&
+    Object.keys(reset).length === 0
+  )
+    return undefined
+
+  return new HttpRateLimitDetails({
+    retryAfterMs: retryAfter,
+    limit: Object.keys(limit).length === 0 ? undefined : limit,
+    remaining: Object.keys(remaining).length === 0 ? undefined : remaining,
+    reset: Object.keys(reset).length === 0 ? undefined : reset,
+  })
+}
+
+const requestDetails = (request: HttpClientRequest.HttpClientRequest, redactedNames: ReadonlyArray<string | RegExp>) =>
+  new HttpRequestDetails({
+    method: request.method,
+    url: redactUrl(request.url),
+    headers: redactHeaders(request.headers, redactedNames),
+  })
+
+const responseDetails = (
+  response: HttpClientResponse.HttpClientResponse,
+  redactedNames: ReadonlyArray<string | RegExp>,
+) =>
+  new HttpResponseDetails({
+    status: response.status,
+    headers: redactHeaders(response.headers, redactedNames),
+  })
+
+const secretValues = (request: HttpClientRequest.HttpClientRequest) => {
+  const values = new Set<string>()
+  const add = (value: string) => {
+    if (value.length < 4) return
+    values.add(value)
+    values.add(encodeURIComponent(value))
+  }
+
+  Object.entries(request.headers).forEach(([name, value]) => {
+    if (!isSensitiveHeaderName(name)) return
+    add(value)
+    const bearer = /^Bearer\s+(.+)$/i.exec(value)?.[1]
+    if (bearer) add(bearer)
+  })
+
+  if (!URL.canParse(request.url)) return values
+  new URL(request.url).searchParams.forEach((value, key) => {
+    if (isSensitiveQueryName(key)) add(value)
+  })
+  return values
+}
+
+// Two passes: structural (redact `"name": "value"` and `name=value` patterns
+// for any field name that looks sensitive) plus literal (replace any actual
+// secret values we sent in the request, in case the response echoes one back).
+const redactBody = (body: string, request: HttpClientRequest.HttpClientRequest) =>
+  Array.from(secretValues(request)).reduce(
+    (text, secret) => text.split(secret).join(REDACTED),
+    body.replace(REDACT_JSON_FIELD, `$1"${REDACTED}"`).replace(REDACT_QUERY_FIELD, `$1${REDACTED}`),
+  )
+
+const responseBody = (body: string | void, request: HttpClientRequest.HttpClientRequest) => {
+  if (body === undefined) return {}
+  const redacted = redactBody(body, request)
+  if (redacted.length <= BODY_LIMIT) return { body: redacted }
+  return { body: redacted.slice(0, BODY_LIMIT), bodyTruncated: true }
+}
+
+const providerMessage = (status: number, body: { readonly body?: string }) => {
+  if (body.body && body.body.length <= 500) return `Provider request failed with HTTP ${status}: ${body.body}`
+  return `Provider request failed with HTTP ${status}`
+}
+
+const responseHttp = (input: {
+  readonly request: HttpClientRequest.HttpClientRequest
+  readonly response: HttpClientResponse.HttpClientResponse
+  readonly redactedNames: ReadonlyArray<string | RegExp>
+  readonly body: ReturnType<typeof responseBody>
+  readonly requestId?: string | undefined
+  readonly rateLimit?: HttpRateLimitDetails | undefined
+}) =>
+  new HttpContext({
+    request: requestDetails(input.request, input.redactedNames),
+    response: responseDetails(input.response, input.redactedNames),
+    ...input.body,
+    requestId: input.requestId,
+    rateLimit: input.rateLimit,
+  })
+
+const statusReason = (input: {
+  readonly status: number
+  readonly message: string
+  readonly retryAfterMs?: number | undefined
+  readonly rateLimit?: HttpRateLimitDetails | undefined
+  readonly http: HttpContext
+}) => {
+  const body = input.http.body ?? ""
+  if (/content[-_\s]?policy|content_filter|safety/i.test(body)) {
+    return new ContentPolicyReason({ message: input.message, http: input.http })
+  }
+  if (input.status === 401) {
+    return new AuthenticationReason({ message: input.message, kind: "invalid", http: input.http })
+  }
+  if (input.status === 403) {
+    return new AuthenticationReason({ message: input.message, kind: "insufficient-permissions", http: input.http })
+  }
+  if (input.status === 429) {
+    if (/insufficient[-_\s]?quota|quota[-_\s]?exceeded/i.test(body)) {
+      return new QuotaExceededReason({ message: input.message, http: input.http })
+    }
+    return new RateLimitReason({
+      message: input.message,
+      retryAfterMs: input.retryAfterMs,
+      rateLimit: input.rateLimit,
+      http: input.http,
+    })
+  }
+  if (input.status === 400 || input.status === 404 || input.status === 409 || input.status === 422) {
+    return new InvalidRequestReason({ message: input.message, http: input.http })
+  }
+  if (input.status >= 500 || retryableStatus(input.status)) {
+    return new ProviderInternalReason({
+      message: input.message,
+      status: input.status,
+      retryAfterMs: input.retryAfterMs,
+      http: input.http,
+    })
+  }
+  return new UnknownProviderReason({ message: input.message, status: input.status, http: input.http })
+}
+
+const statusError =
+  (request: HttpClientRequest.HttpClientRequest, redactedNames: ReadonlyArray<string | RegExp>) =>
+  (response: HttpClientResponse.HttpClientResponse) =>
+    Effect.gen(function* () {
+      if (response.status < 400) return response
+      const body = yield* response.text.pipe(Effect.catch(() => Effect.void))
+      const headers = normalizedHeaders(response.headers)
+      const retryAfter = retryAfterMs(headers)
+      const rateLimit = rateLimitDetails(headers, retryAfter)
+      const details = responseBody(body, request)
+      return yield* new LLMError({
+        module: "RequestExecutor",
+        method: "execute",
+        reason: statusReason({
+          status: response.status,
+          message: providerMessage(response.status, details),
+          retryAfterMs: retryAfter,
+          rateLimit,
+          http: responseHttp({
+            request,
+            response,
+            redactedNames,
+            body: details,
+            requestId: requestId(headers),
+            rateLimit,
+          }),
+        }),
+      })
+    })
+
+const toHttpError = (redactedNames: ReadonlyArray<string | RegExp>) => (error: unknown) => {
+  const transportError = (input: {
+    readonly message: string
+    readonly kind?: string | undefined
+    readonly request?: HttpClientRequest.HttpClientRequest | undefined
+  }) =>
+    new LLMError({
+      module: "RequestExecutor",
+      method: "execute",
+      reason: new TransportReason({
+        message: input.message,
+        kind: input.kind,
+        url: input.request ? redactUrl(input.request.url) : undefined,
+        http: input.request ? new HttpContext({ request: requestDetails(input.request, redactedNames) }) : undefined,
+      }),
+    })
+
+  if (Cause.isTimeoutError(error)) {
+    return transportError({ message: error.message, kind: "Timeout" })
+  }
+  if (!HttpClientError.isHttpClientError(error)) {
+    return transportError({ message: "HTTP transport failed" })
+  }
+  const request = "request" in error ? error.request : undefined
+  if (error.reason._tag === "TransportError") {
+    return transportError({
+      message: error.reason.description ?? "HTTP transport failed",
+      kind: error.reason._tag,
+      request,
+    })
+  }
+  return transportError({
+    message: `HTTP transport failed: ${error.reason._tag}`,
+    kind: error.reason._tag,
+    request,
+  })
+}
+
+const retryDelay = (error: LLMError, attempt: number) => {
+  if (error.retryAfterMs !== undefined) return Effect.succeed(Math.min(error.retryAfterMs, MAX_DELAY_MS))
+  return Random.nextBetween(
+    Math.min(BASE_DELAY_MS * 2 ** attempt * 0.8, MAX_DELAY_MS),
+    Math.min(BASE_DELAY_MS * 2 ** attempt * 1.2, MAX_DELAY_MS),
+  ).pipe(Effect.map((delay) => Math.round(delay)))
+}
+
+const retryStatusFailures = <A, R>(
+  effect: Effect.Effect<A, LLMError, R>,
+  retries = MAX_RETRIES,
+  attempt = 0,
+): Effect.Effect<A, LLMError, R> =>
+  Effect.catchTag(effect, "LLM.Error", (error): Effect.Effect<A, LLMError, R> => {
+    if (!error.retryable || retries <= 0) return Effect.fail(error)
+    return retryDelay(error, attempt).pipe(
+      Effect.flatMap((delay) => Effect.sleep(delay)),
+      Effect.flatMap(() => retryStatusFailures(effect, retries - 1, attempt + 1)),
+    )
+  })
+
+export const layer: Layer.Layer<Service, never, HttpClient.HttpClient> = Layer.effect(
+  Service,
+  Effect.gen(function* () {
+    const http = yield* HttpClient.HttpClient
+    const executeOnce = (request: HttpClientRequest.HttpClientRequest) =>
+      Effect.gen(function* () {
+        const redactedNames = yield* Headers.CurrentRedactedNames
+        return yield* http
+          .execute(request)
+          .pipe(Effect.mapError(toHttpError(redactedNames)), Effect.flatMap(statusError(request, redactedNames)))
+      })
+    return Service.of({
+      execute: (request) => retryStatusFailures(executeOnce(request)),
+    })
+  }),
+)
+
+export const defaultLayer = layer.pipe(Layer.provide(FetchHttpClient.layer))
+
+export * as RequestExecutor from "./executor"
--- a/packages/llm/src/route/framing.ts
+++ b/packages/llm/src/route/framing.ts
@@ -0,0 +1,27 @@
+import type { Stream } from "effect"
+import * as ProviderShared from "../protocols/shared"
+import type { LLMError } from "../schema"
+
+/**
+ * Decode a streaming HTTP response body into provider-protocol frames.
+ *
+ * `Framing` is the byte-stream-shaped seam between transport and protocol:
+ *
+ * - SSE (`Framing.sse`) — UTF-8 decode the body, run the SSE channel decoder,
+ *   drop empty / `[DONE]` keep-alives. Each emitted frame is the JSON `data:`
+ *   payload of one event.
+ * - AWS event stream — length-prefixed binary frames with CRC checksums.
+ *   Each emitted frame is one parsed binary event record.
+ *
+ * The frame type is opaque to this layer; the protocol's `decode` step turns
+ * a frame into a typed chunk.
+ */
+export interface Framing<Frame> {
+  readonly id: string
+  readonly frame: (bytes: Stream.Stream<Uint8Array, LLMError>) => Stream.Stream<Frame, LLMError>
+}
+
+/** Server-Sent Events framing. Used by every JSON-streaming HTTP provider. */
+export const sse: Framing<string> = { id: "sse", frame: ProviderShared.sseFraming }
+
+export * as Framing from "./framing"
--- a/packages/llm/src/route/index.ts
+++ b/packages/llm/src/route/index.ts
@@ -0,0 +1,26 @@
+export { Route, LLMClient, modelLimits, modelRef } from "./client"
+export type {
+  Route as RouteShape,
+  RouteModelDefaults,
+  RouteModelInput,
+  RouteRoutedModelDefaults,
+  RouteRoutedModelInput,
+  AnyRoute,
+  Interface as LLMClientShape,
+  Service as LLMClientService,
+  ModelRefInput,
+} from "./client"
+export * from "./executor"
+export { Auth } from "./auth"
+export { AuthOptions } from "./auth-options"
+export { Endpoint } from "./endpoint"
+export { Framing } from "./framing"
+export { Protocol } from "./protocol"
+export { HttpTransport, WebSocketExecutor, WebSocketTransport } from "./transport"
+export * as Transport from "./transport"
+export type { Auth as AuthShape, AuthInput, Credential, CredentialError } from "./auth"
+export type { ApiKeyMode, AuthOverride, ProviderAuthOption } from "./auth-options"
+export type { Endpoint as EndpointFn, EndpointInput } from "./endpoint"
+export type { Framing as FramingDef } from "./framing"
+export type { Protocol as ProtocolDef } from "./protocol"
+export type { Transport as TransportDef, TransportRuntime } from "./transport"
--- a/packages/llm/src/route/protocol.ts
+++ b/packages/llm/src/route/protocol.ts
@@ -0,0 +1,84 @@
+import { Schema, type Effect } from "effect"
+import type { LLMError, LLMEvent, LLMRequest, ProtocolID } from "../schema"
+
+/**
+ * The semantic API contract of one model server family.
+ *
+ * A `Protocol` owns the parts of a route that are intrinsic to "what does
+ * this API look like": how a common `LLMRequest` becomes a provider-native
+ * body, what schema that body must satisfy before it is JSON-encoded, and
+ * how the streaming response decodes back into common `LLMEvent`s.
+ *
+ * Examples:
+ *
+ * - `OpenAIChat.protocol` — chat completions style
+ * - `OpenAIResponses.protocol` — responses API
+ * - `AnthropicMessages.protocol` — messages API with content blocks
+ * - `Gemini.protocol` — generateContent
+ * - `BedrockConverse.protocol` — Converse with binary event-stream framing
+ *
+ * A `Protocol` is **not** a deployment. It does not know which URL, which
+ * headers, or which auth scheme to use. Those are deployment concerns owned
+ * by `Route.make(...)` along with the chosen `Endpoint`, `Auth`,
+ * and `Framing`. This separation is what lets DeepSeek, TogetherAI, Cerebras,
+ * etc. all reuse `OpenAIChat.protocol` without forking 300 lines per provider.
+ *
+ * The four type parameters reflect the pipeline:
+ *
+ * - `Body` — provider-native request body candidate. `Route.make(...)`
+ *   validates and JSON-encodes it with `body.schema`.
+ * - `Frame` — one unit of the framed response stream. SSE: a JSON data
+ *   string. AWS event stream: a parsed binary frame.
+ * - `Event` — schema-decoded provider event produced from one frame.
+ * - `State` — accumulator threaded through `stream.step` to translate event
+ *   sequences into `LLMEvent` sequences.
+ */
+export interface Protocol<Body, Frame, Event, State> {
+  /** Stable id for the wire protocol implementation. */
+  readonly id: ProtocolID
+  /** Request side: schema for the provider-native body and how to build it. */
+  readonly body: ProtocolBody<Body>
+  /** Response side: streaming state machine. */
+  readonly stream: ProtocolStream<Frame, Event, State>
+}
+
+export interface ProtocolBody<Body> {
+  /** Schema for the validated provider-native body sent as the JSON request. */
+  readonly schema: Schema.Codec<Body, unknown>
+  /** Build the provider-native body from a common `LLMRequest`. */
+  readonly from: (request: LLMRequest) => Effect.Effect<Body, LLMError>
+}
+
+export interface ProtocolStream<Frame, Event, State> {
+  /** Schema for one decoded streaming event, decoded from a transport frame. */
+  readonly event: Schema.Codec<Event, Frame>
+  /** Initial parser state. Called once per response. */
+  readonly initial: () => State
+  /** Translate one event into emitted `LLMEvent`s plus the next state. */
+  readonly step: (state: State, event: Event) => Effect.Effect<readonly [State, ReadonlyArray<LLMEvent>], LLMError>
+  /** Optional request-completion signal for transports that do not end naturally. */
+  readonly terminal?: (event: Event) => boolean
+  /** Optional flush emitted when the framed stream ends. */
+  readonly onHalt?: (state: State) => ReadonlyArray<LLMEvent>
+}
+
+/**
+ * Construct a `Protocol` from its body and stream pieces:
+ *
+ * - `body.schema` infers the provider-native request body shape.
+ * - `body.from` ties the common `LLMRequest` to the provider body.
+ * - `stream.event` infers the decoded streaming event and the wire frame.
+ * - `stream.initial`, `stream.step`, and `stream.onHalt` infer the parser state.
+ *
+ * Provider implementations should usually call `Protocol.make({ ... })`
+ * without explicit type arguments; the schemas and parser functions are the
+ * source of truth. The constructor remains as the public seam for future
+ * cross-cutting concerns such as tracing or instrumentation.
+ */
+export const make = <Body, Frame, Event, State>(
+  input: Protocol<Body, Frame, Event, State>,
+): Protocol<Body, Frame, Event, State> => input
+
+export const jsonEvent = <const S extends Schema.Top>(schema: S) => Schema.fromJsonString(schema)
+
+export * as Protocol from "./protocol"
--- a/packages/llm/src/route/transport/http.ts
+++ b/packages/llm/src/route/transport/http.ts
@@ -0,0 +1,122 @@
+import { Effect, Stream } from "effect"
+import { Headers, HttpClientRequest } from "effect/unstable/http"
+import { Auth, type Auth as AuthDef } from "../auth"
+import { type Endpoint, render as renderEndpoint } from "../endpoint"
+import type { Framing } from "../framing"
+import type { Transport } from "./index"
+import * as ProviderShared from "../../protocols/shared"
+import { mergeJsonRecords, type LLMRequest } from "../../schema"
+
+export interface JsonRequestInput<Body> {
+  readonly body: Body
+  readonly request: LLMRequest
+  readonly endpoint: Endpoint<Body>
+  readonly auth: AuthDef
+  readonly encodeBody: (body: Body) => string
+  readonly headers?: (input: { readonly request: LLMRequest }) => Record<string, string>
+}
+
+export interface JsonRequestParts<Body = unknown> {
+  readonly url: string
+  readonly jsonBody: Body | Record<string, unknown>
+  readonly bodyText: string
+  readonly headers: Headers.Headers
+}
+
+export interface HttpPrepared<Frame> {
+  readonly request: HttpClientRequest.HttpClientRequest
+  readonly framing: Framing<Frame>
+}
+
+const applyQuery = (url: string, query: Record<string, string> | undefined) => {
+  if (!query) return url
+  const next = new URL(url)
+  Object.entries(query).forEach(([key, value]) => next.searchParams.set(key, value))
+  return next.toString()
+}
+
+const bodyWithOverlay = <Body>(body: Body, request: LLMRequest, encodeBody: (body: Body) => string) =>
+  Effect.gen(function* () {
+    if (request.http?.body === undefined) return { jsonBody: body, bodyText: encodeBody(body) }
+    if (ProviderShared.isRecord(body)) {
+      const overlaid = mergeJsonRecords(body, request.http.body) ?? {}
+      return { jsonBody: overlaid, bodyText: ProviderShared.encodeJson(overlaid) }
+    }
+    return yield* ProviderShared.invalidRequest("http.body can only overlay JSON object request bodies")
+  })
+
+export const jsonRequestParts = <Body>(input: JsonRequestInput<Body>) =>
+  Effect.gen(function* () {
+    const url = applyQuery(
+      renderEndpoint(input.endpoint, { request: input.request, body: input.body }).toString(),
+      input.request.http?.query,
+    )
+    const body = yield* bodyWithOverlay(input.body, input.request, input.encodeBody)
+    const headers = yield* Auth.toEffect(Auth.isAuth(input.request.model.auth) ? input.request.model.auth : input.auth)(
+      {
+        request: input.request,
+        method: "POST",
+        url,
+        body: body.bodyText,
+        headers: Headers.fromInput({
+          ...(input.headers?.({ request: input.request }) ?? {}),
+          ...input.request.model.headers,
+          ...input.request.http?.headers,
+        }),
+      },
+    )
+    return { url, jsonBody: body.jsonBody, bodyText: body.bodyText, headers }
+  })
+
+export interface HttpJsonInput<Body, Frame> {
+  readonly endpoint: Endpoint<Body>
+  readonly auth?: AuthDef
+  readonly framing: Framing<Frame>
+  readonly encodeBody: (body: Body) => string
+  readonly headers?: (input: { readonly request: LLMRequest }) => Record<string, string>
+}
+
+export type HttpJsonPatch<Body, Frame> = Partial<HttpJsonInput<Body, Frame>>
+
+export interface HttpJsonTransport<Body, Frame> extends Transport<Body, HttpPrepared<Frame>, Frame> {
+  readonly with: (patch: HttpJsonPatch<Body, Frame>) => HttpJsonTransport<Body, Frame>
+}
+
+export const httpJson = <Body, Frame>(input: HttpJsonInput<Body, Frame>): HttpJsonTransport<Body, Frame> => ({
+  id: "http-json",
+  with: (patch) => httpJson({ ...input, ...patch }),
+  prepare: (body, request) =>
+    jsonRequestParts({
+      body,
+      request,
+      endpoint: input.endpoint,
+      auth: input.auth ?? Auth.bearer(),
+      encodeBody: input.encodeBody,
+      headers: input.headers,
+    }).pipe(
+      Effect.map((parts) => ({
+        request: ProviderShared.jsonPost({ url: parts.url, body: parts.bodyText, headers: parts.headers }),
+        framing: input.framing,
+      })),
+    ),
+  frames: (prepared, request, runtime) =>
+    Stream.unwrap(
+      runtime.http
+        .execute(prepared.request)
+        .pipe(
+          Effect.map((response) =>
+            prepared.framing.frame(
+              response.stream.pipe(
+                Stream.mapError((error) =>
+                  ProviderShared.eventError(
+                    `${request.model.provider}/${request.model.route}`,
+                    `Failed to read ${request.model.provider}/${request.model.route} stream`,
+                    ProviderShared.errorText(error),
+                  ),
+                ),
+              ),
+            ),
+          ),
+        ),
+    ),
+})
--- a/packages/llm/src/route/transport/index.ts
+++ b/packages/llm/src/route/transport/index.ts
@@ -0,0 +1,22 @@
+import type { Effect, Stream } from "effect"
+import type { Interface as RequestExecutorInterface } from "../executor"
+import type { Interface as WebSocketExecutorInterface } from "./websocket"
+import type { LLMError, LLMRequest } from "../../schema"
+
+export interface TransportRuntime {
+  readonly http: RequestExecutorInterface
+  readonly webSocket?: WebSocketExecutorInterface
+}
+
+export interface Transport<Body, Prepared, Frame> {
+  readonly id: string
+  readonly prepare: (body: Body, request: LLMRequest) => Effect.Effect<Prepared, LLMError>
+  readonly frames: (
+    prepared: Prepared,
+    request: LLMRequest,
+    runtime: TransportRuntime,
+  ) => Stream.Stream<Frame, LLMError>
+}
+
+export * as HttpTransport from "./http"
+export { WebSocketExecutor, WebSocketTransport } from "./websocket"
--- a/packages/llm/src/route/transport/websocket.ts
+++ b/packages/llm/src/route/transport/websocket.ts
@@ -0,0 +1,282 @@
+import { Cause, Context, Effect, Queue, Stream } from "effect"
+import { Headers } from "effect/unstable/http"
+import { Auth, type Auth as AuthDef } from "../auth"
+import type { Endpoint } from "../endpoint"
+import { LLMError, TransportReason, type LLMRequest } from "../../schema"
+import * as HttpTransport from "./http"
+import type { Transport } from "./index"
+
+export interface WebSocketRequest {
+  readonly url: string
+  readonly headers: Headers.Headers
+}
+
+export interface WebSocketConnection {
+  readonly sendText: (message: string) => Effect.Effect<void, LLMError>
+  readonly messages: Stream.Stream<string | Uint8Array, LLMError>
+  readonly close: Effect.Effect<void, never>
+}
+
+export interface Interface {
+  readonly open: (input: WebSocketRequest) => Effect.Effect<WebSocketConnection, LLMError>
+}
+
+type WebSocketConstructorWithHeaders = new (
+  url: string,
+  options?: { readonly headers?: Headers.Headers },
+) => globalThis.WebSocket
+
+export class Service extends Context.Service<Service, Interface>()("@opencode/LLM/WebSocketExecutor") {}
+
+const transportError = (
+  method: string,
+  message: string,
+  input: { readonly url?: string; readonly kind?: string } = {},
+) =>
+  new LLMError({
+    module: "WebSocketExecutor",
+    method,
+    reason: new TransportReason({ message, url: input.url, kind: input.kind }),
+  })
+
+const eventMessage = (event: Event) => {
+  if ("message" in event && typeof event.message === "string") return event.message
+  return event.type
+}
+
+const binaryMessage = (data: unknown) => {
+  if (data instanceof Uint8Array) return data
+  if (data instanceof ArrayBuffer) return new Uint8Array(data)
+  if (ArrayBuffer.isView(data)) return new Uint8Array(data.buffer, data.byteOffset, data.byteLength)
+  return undefined
+}
+
+const waitOpen = (ws: globalThis.WebSocket, input: WebSocketRequest) => {
+  if (ws.readyState === globalThis.WebSocket.OPEN) return Effect.void
+  if (ws.readyState === globalThis.WebSocket.CLOSING || ws.readyState === globalThis.WebSocket.CLOSED) {
+    return Effect.fail(
+      transportError("open", `WebSocket closed before opening (state ${ws.readyState})`, {
+        url: input.url,
+        kind: "open",
+      }),
+    )
+  }
+  return Effect.callback<void, LLMError>((resume, signal) => {
+    const cleanup = () => {
+      ws.removeEventListener("open", onOpen)
+      ws.removeEventListener("error", onError)
+      ws.removeEventListener("close", onClose)
+      signal.removeEventListener("abort", onAbort)
+    }
+    const onAbort = () => {
+      cleanup()
+      if (ws.readyState !== globalThis.WebSocket.CLOSED && ws.readyState !== globalThis.WebSocket.CLOSING)
+        ws.close(1000)
+    }
+    const onOpen = () => {
+      cleanup()
+      resume(Effect.void)
+    }
+    const onError = (event: Event) => {
+      cleanup()
+      resume(
+        Effect.fail(
+          transportError("open", `Failed to open WebSocket: ${eventMessage(event)}`, { url: input.url, kind: "open" }),
+        ),
+      )
+    }
+    const onClose = (event: CloseEvent) => {
+      cleanup()
+      resume(
+        Effect.fail(
+          transportError("open", `WebSocket closed before opening with code ${event.code}`, {
+            url: input.url,
+            kind: "open",
+          }),
+        ),
+      )
+    }
+    ws.addEventListener("open", onOpen, { once: true })
+    ws.addEventListener("error", onError, { once: true })
+    ws.addEventListener("close", onClose, { once: true })
+    signal.addEventListener("abort", onAbort, { once: true })
+  })
+}
+
+const webSocketUrl = (value: string) =>
+  Effect.try({
+    try: () => {
+      const url = new URL(value)
+      if (url.protocol === "https:") {
+        url.protocol = "wss:"
+        return url.toString()
+      }
+      if (url.protocol === "http:") {
+        url.protocol = "ws:"
+        return url.toString()
+      }
+      throw new Error(`Unsupported WebSocket URL protocol ${url.protocol}`)
+    },
+    catch: (error) =>
+      transportError("prepare", error instanceof Error ? error.message : "Invalid WebSocket URL", {
+        url: value,
+        kind: "websocket",
+      }),
+  })
+
+export const open = (input: WebSocketRequest) =>
+  Effect.try({
+    try: () =>
+      new (globalThis.WebSocket as unknown as WebSocketConstructorWithHeaders)(input.url, { headers: input.headers }),
+    catch: (error) =>
+      transportError("open", error instanceof Error ? error.message : "Failed to construct WebSocket", {
+        url: input.url,
+        kind: "open",
+      }),
+  }).pipe(Effect.flatMap((ws) => fromWebSocket(ws, input)))
+
+export const fromWebSocket = (
+  ws: globalThis.WebSocket,
+  input: WebSocketRequest,
+): Effect.Effect<WebSocketConnection, LLMError> =>
+  Effect.gen(function* () {
+    yield* waitOpen(ws, input)
+    const messages = yield* Queue.bounded<string | Uint8Array, LLMError | Cause.Done<void>>(128)
+
+    const onMessage = (event: MessageEvent) => {
+      if (typeof event.data === "string") return Queue.offerUnsafe(messages, event.data)
+      const binary = binaryMessage(event.data)
+      if (binary) return Queue.offerUnsafe(messages, binary)
+      Queue.failCauseUnsafe(
+        messages,
+        Cause.fail(
+          transportError("message", "Unsupported WebSocket message payload", { url: input.url, kind: "message" }),
+        ),
+      )
+    }
+    const onError = (event: Event) => {
+      Queue.failCauseUnsafe(
+        messages,
+        Cause.fail(
+          transportError("message", `WebSocket error: ${eventMessage(event)}`, { url: input.url, kind: "message" }),
+        ),
+      )
+    }
+    const onClose = (event: CloseEvent) => {
+      if (event.code === 1000 || event.code === 1005) return Queue.endUnsafe(messages)
+      Queue.failCauseUnsafe(
+        messages,
+        Cause.fail(
+          transportError("message", `WebSocket closed with code ${event.code}`, { url: input.url, kind: "close" }),
+        ),
+      )
+    }
+    const cleanup = Effect.sync(() => {
+      ws.removeEventListener("message", onMessage)
+      ws.removeEventListener("error", onError)
+      ws.removeEventListener("close", onClose)
+    }).pipe(Effect.andThen(Queue.shutdown(messages)))
+
+    ws.addEventListener("message", onMessage)
+    ws.addEventListener("error", onError)
+    ws.addEventListener("close", onClose)
+
+    return {
+      sendText: (message) =>
+        Effect.try({
+          try: () => ws.send(message),
+          catch: (error) =>
+            transportError("sendText", error instanceof Error ? error.message : "Failed to send WebSocket message", {
+              url: input.url,
+              kind: "write",
+            }),
+        }),
+      messages: Stream.fromQueue(messages),
+      close: cleanup.pipe(
+        Effect.andThen(
+          Effect.sync(() => {
+            if (ws.readyState === globalThis.WebSocket.CLOSED || ws.readyState === globalThis.WebSocket.CLOSING) return
+            ws.close(1000)
+          }),
+        ),
+      ),
+    }
+  })
+
+export const messageText = (message: string | Uint8Array, decoder: TextDecoder) =>
+  typeof message === "string" ? message : decoder.decode(message)
+
+export interface JsonPrepared {
+  readonly url: string
+  readonly headers: Headers.Headers
+  readonly message: string
+}
+
+export interface JsonInput<Body, Message> {
+  readonly endpoint: Endpoint<Body>
+  readonly auth?: AuthDef
+  readonly encodeBody: (body: Body) => string
+  readonly toMessage: (body: Body | Record<string, unknown>) => Effect.Effect<Message, LLMError>
+  readonly encodeMessage: (message: Message) => string
+  readonly headers?: (input: { readonly request: LLMRequest }) => Record<string, string>
+}
+
+export type JsonPatch<Body, Message> = Partial<JsonInput<Body, Message>>
+
+export interface JsonTransport<Body, Message> extends Transport<Body, JsonPrepared, string> {
+  readonly with: (patch: JsonPatch<Body, Message>) => JsonTransport<Body, Message>
+}
+
+export const json = <Body, Message>(input: JsonInput<Body, Message>): JsonTransport<Body, Message> => ({
+  id: "websocket-json",
+  with: (patch) => json({ ...input, ...patch }),
+  prepare: (body, request) =>
+    Effect.gen(function* () {
+      const parts = yield* HttpTransport.jsonRequestParts({
+        body,
+        request,
+        endpoint: input.endpoint,
+        auth: input.auth ?? Auth.bearer(),
+        encodeBody: input.encodeBody,
+        headers: input.headers,
+      })
+      return {
+        url: yield* webSocketUrl(parts.url),
+        headers: parts.headers,
+        message: input.encodeMessage(yield* input.toMessage(parts.jsonBody)),
+      }
+    }),
+  frames: (prepared, _request, runtime) => {
+    const webSocket = runtime.webSocket
+    if (!webSocket) {
+      return Stream.fail(
+        transportError("json", "WebSocket JSON transport requires WebSocketExecutor.Service", {
+          url: prepared.url,
+          kind: "websocket",
+        }),
+      )
+    }
+    const decoder = new TextDecoder()
+    return Stream.unwrap(
+      Effect.gen(function* () {
+        const connection = yield* Effect.acquireRelease(
+          webSocket.open({ url: prepared.url, headers: prepared.headers }),
+          (connection) => connection.close,
+        )
+        yield* connection.sendText(prepared.message)
+        return connection.messages.pipe(Stream.map((message) => messageText(message, decoder)))
+      }),
+    )
+  },
+})
+
+export const WebSocketExecutor = {
+  Service,
+  open,
+  fromWebSocket,
+  messageText,
+} as const
+
+export const WebSocketTransport = {
+  json,
+} as const
--- a/packages/llm/src/schema/errors.ts
+++ b/packages/llm/src/schema/errors.ts
@@ -0,0 +1,202 @@
+import { Schema } from "effect"
+import { ModelID, ProviderID, ProviderMetadata, RouteID } from "./ids"
+
+export class HttpRequestDetails extends Schema.Class<HttpRequestDetails>("LLM.HttpRequestDetails")({
+  method: Schema.String,
+  url: Schema.String,
+  headers: Schema.Record(Schema.String, Schema.String),
+}) {}
+
+export class HttpResponseDetails extends Schema.Class<HttpResponseDetails>("LLM.HttpResponseDetails")({
+  status: Schema.Number,
+  headers: Schema.Record(Schema.String, Schema.String),
+}) {}
+
+export class HttpRateLimitDetails extends Schema.Class<HttpRateLimitDetails>("LLM.HttpRateLimitDetails")({
+  retryAfterMs: Schema.optional(Schema.Number),
+  limit: Schema.optional(Schema.Record(Schema.String, Schema.String)),
+  remaining: Schema.optional(Schema.Record(Schema.String, Schema.String)),
+  reset: Schema.optional(Schema.Record(Schema.String, Schema.String)),
+}) {}
+
+export class HttpContext extends Schema.Class<HttpContext>("LLM.HttpContext")({
+  request: HttpRequestDetails,
+  response: Schema.optional(HttpResponseDetails),
+  body: Schema.optional(Schema.String),
+  bodyTruncated: Schema.optional(Schema.Boolean),
+  requestId: Schema.optional(Schema.String),
+  rateLimit: Schema.optional(HttpRateLimitDetails),
+}) {}
+
+export class InvalidRequestReason extends Schema.Class<InvalidRequestReason>("LLM.Error.InvalidRequest")({
+  _tag: Schema.tag("InvalidRequest"),
+  message: Schema.String,
+  parameter: Schema.optional(Schema.String),
+  providerMetadata: Schema.optional(ProviderMetadata),
+  http: Schema.optional(HttpContext),
+}) {
+  get retryable() {
+    return false
+  }
+}
+
+export class NoRouteReason extends Schema.Class<NoRouteReason>("LLM.Error.NoRoute")({
+  _tag: Schema.tag("NoRoute"),
+  route: RouteID,
+  provider: ProviderID,
+  model: ModelID,
+}) {
+  get retryable() {
+    return false
+  }
+
+  get message() {
+    return `No LLM route for ${this.provider}/${this.model} using ${this.route}`
+  }
+}
+
+export class AuthenticationReason extends Schema.Class<AuthenticationReason>("LLM.Error.Authentication")({
+  _tag: Schema.tag("Authentication"),
+  message: Schema.String,
+  kind: Schema.Literals(["missing", "invalid", "expired", "insufficient-permissions", "unknown"]),
+  providerMetadata: Schema.optional(ProviderMetadata),
+  http: Schema.optional(HttpContext),
+}) {
+  get retryable() {
+    return false
+  }
+}
+
+export class RateLimitReason extends Schema.Class<RateLimitReason>("LLM.Error.RateLimit")({
+  _tag: Schema.tag("RateLimit"),
+  message: Schema.String,
+  retryAfterMs: Schema.optional(Schema.Number),
+  rateLimit: Schema.optional(HttpRateLimitDetails),
+  providerMetadata: Schema.optional(ProviderMetadata),
+  http: Schema.optional(HttpContext),
+}) {
+  get retryable() {
+    return true
+  }
+}
+
+export class QuotaExceededReason extends Schema.Class<QuotaExceededReason>("LLM.Error.QuotaExceeded")({
+  _tag: Schema.tag("QuotaExceeded"),
+  message: Schema.String,
+  providerMetadata: Schema.optional(ProviderMetadata),
+  http: Schema.optional(HttpContext),
+}) {
+  get retryable() {
+    return false
+  }
+}
+
+export class ContentPolicyReason extends Schema.Class<ContentPolicyReason>("LLM.Error.ContentPolicy")({
+  _tag: Schema.tag("ContentPolicy"),
+  message: Schema.String,
+  providerMetadata: Schema.optional(ProviderMetadata),
+  http: Schema.optional(HttpContext),
+}) {
+  get retryable() {
+    return false
+  }
+}
+
+export class ProviderInternalReason extends Schema.Class<ProviderInternalReason>("LLM.Error.ProviderInternal")({
+  _tag: Schema.tag("ProviderInternal"),
+  message: Schema.String,
+  status: Schema.Number,
+  retryAfterMs: Schema.optional(Schema.Number),
+  providerMetadata: Schema.optional(ProviderMetadata),
+  http: Schema.optional(HttpContext),
+}) {
+  get retryable() {
+    return true
+  }
+}
+
+export class TransportReason extends Schema.Class<TransportReason>("LLM.Error.Transport")({
+  _tag: Schema.tag("Transport"),
+  message: Schema.String,
+  kind: Schema.optional(Schema.String),
+  url: Schema.optional(Schema.String),
+  http: Schema.optional(HttpContext),
+}) {
+  get retryable() {
+    return false
+  }
+}
+
+export class InvalidProviderOutputReason extends Schema.Class<InvalidProviderOutputReason>(
+  "LLM.Error.InvalidProviderOutput",
+)({
+  _tag: Schema.tag("InvalidProviderOutput"),
+  message: Schema.String,
+  route: Schema.optional(Schema.String),
+  raw: Schema.optional(Schema.String),
+  providerMetadata: Schema.optional(ProviderMetadata),
+}) {
+  get retryable() {
+    return false
+  }
+}
+
+export class UnknownProviderReason extends Schema.Class<UnknownProviderReason>("LLM.Error.UnknownProvider")({
+  _tag: Schema.tag("UnknownProvider"),
+  message: Schema.String,
+  status: Schema.optional(Schema.Number),
+  providerMetadata: Schema.optional(ProviderMetadata),
+  http: Schema.optional(HttpContext),
+}) {
+  get retryable() {
+    return false
+  }
+}
+
+export const LLMErrorReason = Schema.Union([
+  InvalidRequestReason,
+  NoRouteReason,
+  AuthenticationReason,
+  RateLimitReason,
+  QuotaExceededReason,
+  ContentPolicyReason,
+  ProviderInternalReason,
+  TransportReason,
+  InvalidProviderOutputReason,
+  UnknownProviderReason,
+]).pipe(Schema.toTaggedUnion("_tag"))
+export type LLMErrorReason = Schema.Schema.Type<typeof LLMErrorReason>
+
+export class LLMError extends Schema.TaggedErrorClass<LLMError>()("LLM.Error", {
+  module: Schema.String,
+  method: Schema.String,
+  reason: LLMErrorReason,
+}) {
+  override readonly cause = this.reason
+
+  get retryable() {
+    return this.reason.retryable
+  }
+
+  get retryAfterMs() {
+    return "retryAfterMs" in this.reason ? this.reason.retryAfterMs : undefined
+  }
+
+  override get message() {
+    return `${this.module}.${this.method}: ${this.reason.message}`
+  }
+}
+
+/**
+ * Failure type for tool execute handlers. Handlers must map their internal
+ * errors to this shape; the runtime catches `ToolFailure`s and surfaces them
+ * as `tool-error` events plus a `tool-result` of `type: "error"` so the model
+ * can self-correct.
+ *
+ * Anything thrown or yielded by a handler that is not a `ToolFailure` is
+ * treated as a defect and fails the stream.
+ */
+export class ToolFailure extends Schema.TaggedErrorClass<ToolFailure>()("LLM.ToolFailure", {
+  message: Schema.String,
+  metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
+}) {}
--- a/packages/llm/src/schema/events.ts
+++ b/packages/llm/src/schema/events.ts
@@ -0,0 +1,237 @@
+import { Schema } from "effect"
+import { FinishReason, ProtocolID, ProviderMetadata, RouteID } from "./ids"
+import { ModelRef } from "./options"
+import { ToolResultValue } from "./messages"
+
+export class Usage extends Schema.Class<Usage>("LLM.Usage")({
+  inputTokens: Schema.optional(Schema.Number),
+  outputTokens: Schema.optional(Schema.Number),
+  reasoningTokens: Schema.optional(Schema.Number),
+  cacheReadInputTokens: Schema.optional(Schema.Number),
+  cacheWriteInputTokens: Schema.optional(Schema.Number),
+  totalTokens: Schema.optional(Schema.Number),
+  native: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
+}) {}
+
+export const RequestStart = Schema.Struct({
+  type: Schema.Literal("request-start"),
+  id: Schema.String,
+  model: ModelRef,
+}).annotate({ identifier: "LLM.Event.RequestStart" })
+export type RequestStart = Schema.Schema.Type<typeof RequestStart>
+
+export const StepStart = Schema.Struct({
+  type: Schema.Literal("step-start"),
+  index: Schema.Number,
+}).annotate({ identifier: "LLM.Event.StepStart" })
+export type StepStart = Schema.Schema.Type<typeof StepStart>
+
+export const TextStart = Schema.Struct({
+  type: Schema.Literal("text-start"),
+  id: Schema.String,
+  providerMetadata: Schema.optional(ProviderMetadata),
+}).annotate({ identifier: "LLM.Event.TextStart" })
+export type TextStart = Schema.Schema.Type<typeof TextStart>
+
+export const TextDelta = Schema.Struct({
+  type: Schema.Literal("text-delta"),
+  id: Schema.optional(Schema.String),
+  text: Schema.String,
+  providerMetadata: Schema.optional(ProviderMetadata),
+}).annotate({ identifier: "LLM.Event.TextDelta" })
+export type TextDelta = Schema.Schema.Type<typeof TextDelta>
+
+export const TextEnd = Schema.Struct({
+  type: Schema.Literal("text-end"),
+  id: Schema.String,
+  providerMetadata: Schema.optional(ProviderMetadata),
+}).annotate({ identifier: "LLM.Event.TextEnd" })
+export type TextEnd = Schema.Schema.Type<typeof TextEnd>
+
+export const ReasoningDelta = Schema.Struct({
+  type: Schema.Literal("reasoning-delta"),
+  id: Schema.optional(Schema.String),
+  text: Schema.String,
+  providerMetadata: Schema.optional(ProviderMetadata),
+}).annotate({ identifier: "LLM.Event.ReasoningDelta" })
+export type ReasoningDelta = Schema.Schema.Type<typeof ReasoningDelta>
+
+export const ToolInputDelta = Schema.Struct({
+  type: Schema.Literal("tool-input-delta"),
+  id: Schema.String,
+  name: Schema.String,
+  text: Schema.String,
+  providerMetadata: Schema.optional(ProviderMetadata),
+}).annotate({ identifier: "LLM.Event.ToolInputDelta" })
+export type ToolInputDelta = Schema.Schema.Type<typeof ToolInputDelta>
+
+export const ToolCall = Schema.Struct({
+  type: Schema.Literal("tool-call"),
+  id: Schema.String,
+  name: Schema.String,
+  input: Schema.Unknown,
+  providerExecuted: Schema.optional(Schema.Boolean),
+  providerMetadata: Schema.optional(ProviderMetadata),
+}).annotate({ identifier: "LLM.Event.ToolCall" })
+export type ToolCall = Schema.Schema.Type<typeof ToolCall>
+
+export const ToolResult = Schema.Struct({
+  type: Schema.Literal("tool-result"),
+  id: Schema.String,
+  name: Schema.String,
+  result: ToolResultValue,
+  providerExecuted: Schema.optional(Schema.Boolean),
+  providerMetadata: Schema.optional(ProviderMetadata),
+}).annotate({ identifier: "LLM.Event.ToolResult" })
+export type ToolResult = Schema.Schema.Type<typeof ToolResult>
+
+export const ToolError = Schema.Struct({
+  type: Schema.Literal("tool-error"),
+  id: Schema.String,
+  name: Schema.String,
+  message: Schema.String,
+  providerMetadata: Schema.optional(ProviderMetadata),
+}).annotate({ identifier: "LLM.Event.ToolError" })
+export type ToolError = Schema.Schema.Type<typeof ToolError>
+
+export const StepFinish = Schema.Struct({
+  type: Schema.Literal("step-finish"),
+  index: Schema.Number,
+  reason: FinishReason,
+  usage: Schema.optional(Usage),
+  providerMetadata: Schema.optional(ProviderMetadata),
+}).annotate({ identifier: "LLM.Event.StepFinish" })
+export type StepFinish = Schema.Schema.Type<typeof StepFinish>
+
+export const RequestFinish = Schema.Struct({
+  type: Schema.Literal("request-finish"),
+  reason: FinishReason,
+  usage: Schema.optional(Usage),
+  providerMetadata: Schema.optional(ProviderMetadata),
+}).annotate({ identifier: "LLM.Event.RequestFinish" })
+export type RequestFinish = Schema.Schema.Type<typeof RequestFinish>
+
+export const ProviderErrorEvent = Schema.Struct({
+  type: Schema.Literal("provider-error"),
+  message: Schema.String,
+  retryable: Schema.optional(Schema.Boolean),
+  providerMetadata: Schema.optional(ProviderMetadata),
+}).annotate({ identifier: "LLM.Event.ProviderError" })
+export type ProviderErrorEvent = Schema.Schema.Type<typeof ProviderErrorEvent>
+
+const llmEventTagged = Schema.Union([
+  RequestStart,
+  StepStart,
+  TextStart,
+  TextDelta,
+  TextEnd,
+  ReasoningDelta,
+  ToolInputDelta,
+  ToolCall,
+  ToolResult,
+  ToolError,
+  StepFinish,
+  RequestFinish,
+  ProviderErrorEvent,
+]).pipe(Schema.toTaggedUnion("type"))
+
+/**
+ * camelCase aliases for `LLMEvent.guards` (provided by `Schema.toTaggedUnion`).
+ * Lets consumers write `events.filter(LLMEvent.is.toolCall)` instead of
+ * `events.filter(LLMEvent.guards["tool-call"])`.
+ */
+export const LLMEvent = Object.assign(llmEventTagged, {
+  is: {
+    requestStart: llmEventTagged.guards["request-start"],
+    stepStart: llmEventTagged.guards["step-start"],
+    textStart: llmEventTagged.guards["text-start"],
+    textDelta: llmEventTagged.guards["text-delta"],
+    textEnd: llmEventTagged.guards["text-end"],
+    reasoningDelta: llmEventTagged.guards["reasoning-delta"],
+    toolInputDelta: llmEventTagged.guards["tool-input-delta"],
+    toolCall: llmEventTagged.guards["tool-call"],
+    toolResult: llmEventTagged.guards["tool-result"],
+    toolError: llmEventTagged.guards["tool-error"],
+    stepFinish: llmEventTagged.guards["step-finish"],
+    requestFinish: llmEventTagged.guards["request-finish"],
+    providerError: llmEventTagged.guards["provider-error"],
+  },
+})
+export type LLMEvent = Schema.Schema.Type<typeof llmEventTagged>
+
+export class PreparedRequest extends Schema.Class<PreparedRequest>("LLM.PreparedRequest")({
+  id: Schema.String,
+  route: RouteID,
+  protocol: ProtocolID,
+  model: ModelRef,
+  body: Schema.Unknown,
+  metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
+}) {}
+
+/**
+ * A `PreparedRequest` whose `body` is typed as `Body`. Use with the generic
+ * on `LLMClient.prepare<Body>(...)` when the caller knows which route their
+ * request will resolve to and wants its native shape statically exposed
+ * (debug UIs, request previews, plan rendering).
+ *
+ * The runtime body is identical — the route still emits `body: unknown` — so
+ * this is a type-level assertion the caller makes about what they expect to
+ * find. The prepare runtime does not validate the assertion.
+ */
+export type PreparedRequestOf<Body> = Omit<PreparedRequest, "body"> & {
+  readonly body: Body
+}
+
+const responseText = (events: ReadonlyArray<LLMEvent>) =>
+  events
+    .filter(LLMEvent.is.textDelta)
+    .map((event) => event.text)
+    .join("")
+
+const responseReasoning = (events: ReadonlyArray<LLMEvent>) =>
+  events
+    .filter(LLMEvent.is.reasoningDelta)
+    .map((event) => event.text)
+    .join("")
+
+const responseUsage = (events: ReadonlyArray<LLMEvent>) =>
+  events.reduce<Usage | undefined>(
+    (usage, event) => ("usage" in event && event.usage !== undefined ? event.usage : usage),
+    undefined,
+  )
+
+export class LLMResponse extends Schema.Class<LLMResponse>("LLM.Response")({
+  events: Schema.Array(LLMEvent),
+  usage: Schema.optional(Usage),
+}) {
+  /** Concatenated assistant text assembled from streamed `text-delta` events. */
+  get text() {
+    return responseText(this.events)
+  }
+
+  /** Concatenated reasoning text assembled from streamed `reasoning-delta` events. */
+  get reasoning() {
+    return responseReasoning(this.events)
+  }
+
+  /** Completed tool calls emitted by the provider. */
+  get toolCalls() {
+    return this.events.filter(LLMEvent.is.toolCall)
+  }
+}
+
+export namespace LLMResponse {
+  export type Output = LLMResponse | { readonly events: ReadonlyArray<LLMEvent>; readonly usage?: Usage }
+
+  /** Concatenate assistant text from a response or collected event list. */
+  export const text = (response: Output) => responseText(response.events)
+
+  /** Return response usage, falling back to the latest usage-bearing event. */
+  export const usage = (response: Output) => response.usage ?? responseUsage(response.events)
+
+  /** Return completed tool calls from a response or collected event list. */
+  export const toolCalls = (response: Output) => response.events.filter(LLMEvent.is.toolCall)
+
+  /** Concatenate reasoning text from a response or collected event list. */
+  export const reasoning = (response: Output) => responseReasoning(response.events)
+}
--- a/packages/llm/src/schema/ids.ts
+++ b/packages/llm/src/schema/ids.ts
@@ -0,0 +1,34 @@
+import { Schema } from "effect"
+
+/** Stable string identifier for a protocol implementation. */
+export const ProtocolID = Schema.String
+export type ProtocolID = Schema.Schema.Type<typeof ProtocolID>
+
+/** Stable string identifier for the runnable route. */
+export const RouteID = Schema.String
+export type RouteID = Schema.Schema.Type<typeof RouteID>
+
+export const ModelID = Schema.String.pipe(Schema.brand("LLM.ModelID"))
+export type ModelID = typeof ModelID.Type
+
+export const ProviderID = Schema.String.pipe(Schema.brand("LLM.ProviderID"))
+export type ProviderID = typeof ProviderID.Type
+
+export const ReasoningEfforts = ["none", "minimal", "low", "medium", "high", "xhigh", "max"] as const
+export const ReasoningEffort = Schema.Literals(ReasoningEfforts)
+export type ReasoningEffort = Schema.Schema.Type<typeof ReasoningEffort>
+
+export const TextVerbosity = Schema.Literals(["low", "medium", "high"])
+export type TextVerbosity = Schema.Schema.Type<typeof TextVerbosity>
+
+export const MessageRole = Schema.Literals(["user", "assistant", "tool"])
+export type MessageRole = Schema.Schema.Type<typeof MessageRole>
+
+export const FinishReason = Schema.Literals(["stop", "length", "tool-calls", "content-filter", "error", "unknown"])
+export type FinishReason = Schema.Schema.Type<typeof FinishReason>
+
+export const JsonSchema = Schema.Record(Schema.String, Schema.Unknown)
+export type JsonSchema = Schema.Schema.Type<typeof JsonSchema>
+
+export const ProviderMetadata = Schema.Record(Schema.String, Schema.Record(Schema.String, Schema.Unknown))
+export type ProviderMetadata = Schema.Schema.Type<typeof ProviderMetadata>
--- a/packages/llm/src/schema/index.ts
+++ b/packages/llm/src/schema/index.ts
@@ -0,0 +1,5 @@
+export * from "./ids"
+export * from "./options"
+export * from "./messages"
+export * from "./events"
+export * from "./errors"
--- a/packages/llm/src/schema/messages.ts
+++ b/packages/llm/src/schema/messages.ts
@@ -0,0 +1,234 @@
+import { Schema } from "effect"
+import { JsonSchema, MessageRole, ProviderMetadata } from "./ids"
+import { CacheHint, GenerationOptions, HttpOptions, ModelRef, ProviderOptions } from "./options"
+
+const isRecord = (value: unknown): value is Record<string, unknown> =>
+  typeof value === "object" && value !== null && !Array.isArray(value)
+
+const systemPartSchema = Schema.Struct({
+  type: Schema.Literal("text"),
+  text: Schema.String,
+  cache: Schema.optional(CacheHint),
+  metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
+}).annotate({ identifier: "LLM.SystemPart" })
+export type SystemPart = Schema.Schema.Type<typeof systemPartSchema>
+
+const makeSystemPart = (text: string): SystemPart => ({ type: "text", text })
+
+export const SystemPart = Object.assign(systemPartSchema, {
+  make: makeSystemPart,
+  content: (input?: string | SystemPart | ReadonlyArray<SystemPart>) => {
+    if (input === undefined) return []
+    return typeof input === "string" ? [makeSystemPart(input)] : Array.isArray(input) ? [...input] : [input]
+  },
+})
+
+export const TextPart = Schema.Struct({
+  type: Schema.Literal("text"),
+  text: Schema.String,
+  cache: Schema.optional(CacheHint),
+  metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
+  providerMetadata: Schema.optional(ProviderMetadata),
+}).annotate({ identifier: "LLM.Content.Text" })
+export type TextPart = Schema.Schema.Type<typeof TextPart>
+
+export const MediaPart = Schema.Struct({
+  type: Schema.Literal("media"),
+  mediaType: Schema.String,
+  data: Schema.Union([Schema.String, Schema.Uint8Array]),
+  filename: Schema.optional(Schema.String),
+  metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
+}).annotate({ identifier: "LLM.Content.Media" })
+export type MediaPart = Schema.Schema.Type<typeof MediaPart>
+
+const isToolResultValue = (value: unknown): value is ToolResultValue =>
+  isRecord(value) && (value.type === "text" || value.type === "json" || value.type === "error") && "value" in value
+
+export const ToolResultValue = Object.assign(
+  Schema.Struct({
+    type: Schema.Literals(["json", "text", "error"]),
+    value: Schema.Unknown,
+  }).annotate({ identifier: "LLM.ToolResult" }),
+  {
+    make: (value: unknown, type: ToolResultValue["type"] = "json"): ToolResultValue =>
+      isToolResultValue(value) ? value : { type, value },
+  },
+)
+export type ToolResultValue = Schema.Schema.Type<typeof ToolResultValue>
+
+export const ToolCallPart = Object.assign(
+  Schema.Struct({
+    type: Schema.Literal("tool-call"),
+    id: Schema.String,
+    name: Schema.String,
+    input: Schema.Unknown,
+    providerExecuted: Schema.optional(Schema.Boolean),
+    metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
+    providerMetadata: Schema.optional(ProviderMetadata),
+  }).annotate({ identifier: "LLM.Content.ToolCall" }),
+  {
+    make: (input: Omit<ToolCallPart, "type">): ToolCallPart => ({ type: "tool-call", ...input }),
+  },
+)
+export type ToolCallPart = Schema.Schema.Type<typeof ToolCallPart>
+
+export const ToolResultPart = Object.assign(
+  Schema.Struct({
+    type: Schema.Literal("tool-result"),
+    id: Schema.String,
+    name: Schema.String,
+    result: ToolResultValue,
+    providerExecuted: Schema.optional(Schema.Boolean),
+    metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
+    providerMetadata: Schema.optional(ProviderMetadata),
+  }).annotate({ identifier: "LLM.Content.ToolResult" }),
+  {
+    make: (
+      input: Omit<ToolResultPart, "type" | "result"> & {
+        readonly result: unknown
+        readonly resultType?: ToolResultValue["type"]
+      },
+    ): ToolResultPart => ({
+      type: "tool-result",
+      id: input.id,
+      name: input.name,
+      result: ToolResultValue.make(input.result, input.resultType),
+      providerExecuted: input.providerExecuted,
+      metadata: input.metadata,
+      providerMetadata: input.providerMetadata,
+    }),
+  },
+)
+export type ToolResultPart = Schema.Schema.Type<typeof ToolResultPart>
+
+export const ReasoningPart = Schema.Struct({
+  type: Schema.Literal("reasoning"),
+  text: Schema.String,
+  encrypted: Schema.optional(Schema.String),
+  metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
+  providerMetadata: Schema.optional(ProviderMetadata),
+}).annotate({ identifier: "LLM.Content.Reasoning" })
+export type ReasoningPart = Schema.Schema.Type<typeof ReasoningPart>
+
+export const ContentPart = Schema.Union([TextPart, MediaPart, ToolCallPart, ToolResultPart, ReasoningPart]).pipe(
+  Schema.toTaggedUnion("type"),
+)
+export type ContentPart = Schema.Schema.Type<typeof ContentPart>
+
+export class Message extends Schema.Class<Message>("LLM.Message")({
+  id: Schema.optional(Schema.String),
+  role: MessageRole,
+  content: Schema.Array(ContentPart),
+  metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
+  native: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
+}) {}
+
+export namespace Message {
+  export type ContentInput = string | ContentPart | ReadonlyArray<ContentPart>
+  export type Input = Omit<ConstructorParameters<typeof Message>[0], "content"> & {
+    readonly content: ContentInput
+  }
+
+  export const text = (value: string): ContentPart => ({ type: "text", text: value })
+
+  export const content = (input: ContentInput) =>
+    typeof input === "string" ? [text(input)] : Array.isArray(input) ? [...input] : [input]
+
+  export const make = (input: Message | Input) => {
+    if (input instanceof Message) return input
+    return new Message({ ...input, content: content(input.content) })
+  }
+
+  export const user = (content: ContentInput) => make({ role: "user", content })
+
+  export const assistant = (content: ContentInput) => make({ role: "assistant", content })
+
+  export const tool = (result: ToolResultPart | Parameters<typeof ToolResultPart.make>[0]) =>
+    make({ role: "tool", content: ["type" in result ? result : ToolResultPart.make(result)] })
+}
+
+export class ToolDefinition extends Schema.Class<ToolDefinition>("LLM.ToolDefinition")({
+  name: Schema.String,
+  description: Schema.String,
+  inputSchema: JsonSchema,
+  metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
+  native: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
+}) {}
+
+export namespace ToolDefinition {
+  export type Input = ToolDefinition | ConstructorParameters<typeof ToolDefinition>[0]
+
+  /** Normalize tool definition input into the canonical `ToolDefinition` class. */
+  export const make = (input: Input) => (input instanceof ToolDefinition ? input : new ToolDefinition(input))
+}
+
+export class ToolChoice extends Schema.Class<ToolChoice>("LLM.ToolChoice")({
+  type: Schema.Literals(["auto", "none", "required", "tool"]),
+  name: Schema.optional(Schema.String),
+}) {}
+
+export namespace ToolChoice {
+  export type Mode = Exclude<ToolChoice["type"], "tool">
+  export type Input = ToolChoice | ConstructorParameters<typeof ToolChoice>[0] | ToolDefinition | string
+
+  const isMode = (value: string): value is Mode => value === "auto" || value === "none" || value === "required"
+
+  /** Select a specific named tool. */
+  export const named = (value: string) => new ToolChoice({ type: "tool", name: value })
+
+  /** Normalize ergonomic tool-choice inputs into the canonical `ToolChoice` class. */
+  export const make = (input: Input) => {
+    if (input instanceof ToolChoice) return input
+    if (input instanceof ToolDefinition) return named(input.name)
+    if (typeof input === "string") return isMode(input) ? new ToolChoice({ type: input }) : named(input)
+    return new ToolChoice(input)
+  }
+}
+
+export const ResponseFormat = Schema.Union([
+  Schema.Struct({ type: Schema.Literal("text") }),
+  Schema.Struct({ type: Schema.Literal("json"), schema: JsonSchema }),
+  Schema.Struct({ type: Schema.Literal("tool"), tool: ToolDefinition }),
+]).pipe(Schema.toTaggedUnion("type"))
+export type ResponseFormat = Schema.Schema.Type<typeof ResponseFormat>
+
+export class LLMRequest extends Schema.Class<LLMRequest>("LLM.Request")({
+  id: Schema.optional(Schema.String),
+  model: ModelRef,
+  system: Schema.Array(SystemPart),
+  messages: Schema.Array(Message),
+  tools: Schema.Array(ToolDefinition),
+  toolChoice: Schema.optional(ToolChoice),
+  generation: Schema.optional(GenerationOptions),
+  providerOptions: Schema.optional(ProviderOptions),
+  http: Schema.optional(HttpOptions),
+  responseFormat: Schema.optional(ResponseFormat),
+  metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
+}) {}
+
+export namespace LLMRequest {
+  export type Input = ConstructorParameters<typeof LLMRequest>[0]
+
+  export const input = (request: LLMRequest): Input => ({
+    id: request.id,
+    model: request.model,
+    system: request.system,
+    messages: request.messages,
+    tools: request.tools,
+    toolChoice: request.toolChoice,
+    generation: request.generation,
+    providerOptions: request.providerOptions,
+    http: request.http,
+    responseFormat: request.responseFormat,
+    metadata: request.metadata,
+  })
+
+  export const update = (request: LLMRequest, patch: Partial<Input>) => {
+    if (Object.keys(patch).length === 0) return request
+    return new LLMRequest({
+      ...input(request),
+      ...patch,
+      model: patch.model ?? request.model,
+    })
+  }
+}
--- a/packages/llm/src/schema/options.ts
+++ b/packages/llm/src/schema/options.ts
@@ -0,0 +1,202 @@
+import { Schema } from "effect"
+import { JsonSchema, ModelID, ProviderID, RouteID } from "./ids"
+
+const isRecord = (value: unknown): value is Record<string, unknown> =>
+  typeof value === "object" && value !== null && !Array.isArray(value)
+
+export const mergeJsonRecords = (
+  ...items: ReadonlyArray<Record<string, unknown> | undefined>
+): Record<string, unknown> | undefined => {
+  const defined = items.filter((item): item is Record<string, unknown> => item !== undefined)
+  if (defined.length === 0) return undefined
+  if (defined.length === 1 && Object.values(defined[0]).every((value) => value !== undefined)) return defined[0]
+  const result: Record<string, unknown> = {}
+  for (const item of defined) {
+    for (const [key, value] of Object.entries(item)) {
+      if (value === undefined) continue
+      result[key] = isRecord(result[key]) && isRecord(value) ? mergeJsonRecords(result[key], value) : value
+    }
+  }
+  return Object.keys(result).length === 0 ? undefined : result
+}
+
+const mergeStringRecords = (
+  ...items: ReadonlyArray<Record<string, string> | undefined>
+): Record<string, string> | undefined => {
+  const defined = items.filter((item): item is Record<string, string> => item !== undefined)
+  if (defined.length === 0) return undefined
+  if (defined.length === 1) return defined[0]
+  const result = Object.fromEntries(
+    defined.flatMap((item) =>
+      Object.entries(item).filter((entry): entry is [string, string] => entry[1] !== undefined),
+    ),
+  )
+  return Object.keys(result).length === 0 ? undefined : result
+}
+
+export const ProviderOptions = Schema.Record(Schema.String, Schema.Record(Schema.String, Schema.Unknown))
+export type ProviderOptions = Schema.Schema.Type<typeof ProviderOptions>
+
+export const mergeProviderOptions = (
+  ...items: ReadonlyArray<ProviderOptions | undefined>
+): ProviderOptions | undefined => {
+  const result: Record<string, Record<string, unknown>> = {}
+  for (const item of items) {
+    if (!item) continue
+    for (const [provider, options] of Object.entries(item)) {
+      const merged = mergeJsonRecords(result[provider], options)
+      if (merged) result[provider] = merged
+    }
+  }
+  return Object.keys(result).length === 0 ? undefined : result
+}
+
+export class HttpOptions extends Schema.Class<HttpOptions>("LLM.HttpOptions")({
+  body: Schema.optional(JsonSchema),
+  headers: Schema.optional(Schema.Record(Schema.String, Schema.String)),
+  query: Schema.optional(Schema.Record(Schema.String, Schema.String)),
+}) {}
+
+export namespace HttpOptions {
+  export type Input = HttpOptions | ConstructorParameters<typeof HttpOptions>[0]
+
+  /** Normalize HTTP option input into the canonical `HttpOptions` class. */
+  export const make = (input: Input) => (input instanceof HttpOptions ? input : new HttpOptions(input))
+}
+
+export const mergeHttpOptions = (...items: ReadonlyArray<HttpOptions | undefined>): HttpOptions | undefined => {
+  const body = mergeJsonRecords(...items.map((item) => item?.body))
+  const headers = mergeStringRecords(...items.map((item) => item?.headers))
+  const query = mergeStringRecords(...items.map((item) => item?.query))
+  if (!body && !headers && !query) return undefined
+  return new HttpOptions({ body, headers, query })
+}
+
+export class GenerationOptions extends Schema.Class<GenerationOptions>("LLM.GenerationOptions")({
+  maxTokens: Schema.optional(Schema.Number),
+  temperature: Schema.optional(Schema.Number),
+  topP: Schema.optional(Schema.Number),
+  topK: Schema.optional(Schema.Number),
+  frequencyPenalty: Schema.optional(Schema.Number),
+  presencePenalty: Schema.optional(Schema.Number),
+  seed: Schema.optional(Schema.Number),
+  stop: Schema.optional(Schema.Array(Schema.String)),
+}) {}
+
+export namespace GenerationOptions {
+  export type Input = GenerationOptions | ConstructorParameters<typeof GenerationOptions>[0]
+
+  /** Normalize generation option input into the canonical `GenerationOptions` class. */
+  export const make = (input: Input = {}) => (input instanceof GenerationOptions ? input : new GenerationOptions(input))
+}
+
+export type GenerationOptionsFields = {
+  readonly maxTokens?: number
+  readonly temperature?: number
+  readonly topP?: number
+  readonly topK?: number
+  readonly frequencyPenalty?: number
+  readonly presencePenalty?: number
+  readonly seed?: number
+  readonly stop?: ReadonlyArray<string>
+}
+
+export type GenerationOptionsInput = GenerationOptions | GenerationOptionsFields
+
+const latestGeneration = <Key extends keyof GenerationOptionsFields>(
+  items: ReadonlyArray<GenerationOptionsInput | undefined>,
+  key: Key,
+) => items.findLast((item) => item?.[key] !== undefined)?.[key]
+
+export const mergeGenerationOptions = (...items: ReadonlyArray<GenerationOptionsInput | undefined>) => {
+  const result = new GenerationOptions({
+    maxTokens: latestGeneration(items, "maxTokens"),
+    temperature: latestGeneration(items, "temperature"),
+    topP: latestGeneration(items, "topP"),
+    topK: latestGeneration(items, "topK"),
+    frequencyPenalty: latestGeneration(items, "frequencyPenalty"),
+    presencePenalty: latestGeneration(items, "presencePenalty"),
+    seed: latestGeneration(items, "seed"),
+    stop: latestGeneration(items, "stop"),
+  })
+  return Object.values(result).some((value) => value !== undefined) ? result : undefined
+}
+
+export class ModelLimits extends Schema.Class<ModelLimits>("LLM.ModelLimits")({
+  context: Schema.optional(Schema.Number),
+  output: Schema.optional(Schema.Number),
+}) {}
+
+export namespace ModelLimits {
+  export type Input = ModelLimits | ConstructorParameters<typeof ModelLimits>[0]
+
+  /** Normalize model limit input into the canonical `ModelLimits` class. */
+  export const make = (input: Input | undefined) =>
+    input instanceof ModelLimits ? input : new ModelLimits(input ?? {})
+}
+
+export class ModelRef extends Schema.Class<ModelRef>("LLM.ModelRef")({
+  id: ModelID,
+  provider: ProviderID,
+  route: RouteID,
+  baseURL: Schema.String,
+  /** Provider-specific API key convenience. Provider helpers normalize this into `auth`. */
+  apiKey: Schema.optional(Schema.String),
+  /** Optional transport auth policy. Opaque because it may contain functions. */
+  auth: Schema.optional(Schema.Any),
+  headers: Schema.optional(Schema.Record(Schema.String, Schema.String)),
+  /**
+   * Query params appended to the request URL by `Endpoint.baseURL`. Used for
+   * deployment-level URL-scoped settings such as Azure's `api-version` or any
+   * provider that requires a per-request key in the URL. Generic concern, so
+   * lives as a typed first-class field instead of `native`.
+   */
+  queryParams: Schema.optional(Schema.Record(Schema.String, Schema.String)),
+  limits: ModelLimits,
+  /** Provider-neutral generation defaults. Request-level values override them. */
+  generation: Schema.optional(GenerationOptions),
+  /** Provider-owned typed-at-the-facade options for non-portable knobs. */
+  providerOptions: Schema.optional(ProviderOptions),
+  /** Serializable raw HTTP overlays applied to the final outgoing request. */
+  http: Schema.optional(HttpOptions),
+  /**
+   * Provider-specific opaque options. Reach for this only when the value is
+   * genuinely provider-private and does not fit a typed axis (e.g. Bedrock's
+   * `aws_credentials` / `aws_region` for SigV4). Anything used by more than
+   * one route should grow into a typed field instead.
+   */
+  native: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
+}) {}
+
+export namespace ModelRef {
+  export type Input = ConstructorParameters<typeof ModelRef>[0]
+
+  export const input = (model: ModelRef): Input => ({
+    id: model.id,
+    provider: model.provider,
+    route: model.route,
+    baseURL: model.baseURL,
+    apiKey: model.apiKey,
+    auth: model.auth,
+    headers: model.headers,
+    queryParams: model.queryParams,
+    limits: model.limits,
+    generation: model.generation,
+    providerOptions: model.providerOptions,
+    http: model.http,
+    native: model.native,
+  })
+
+  export const update = (model: ModelRef, patch: Partial<Input>) => {
+    if (Object.keys(patch).length === 0) return model
+    return new ModelRef({
+      ...input(model),
+      ...patch,
+    })
+  }
+}
+
+export class CacheHint extends Schema.Class<CacheHint>("LLM.CacheHint")({
+  type: Schema.Literals(["ephemeral", "persistent"]),
+  ttlSeconds: Schema.optional(Schema.Number),
+}) {}
--- a/packages/llm/src/tool-runtime.ts
+++ b/packages/llm/src/tool-runtime.ts
@@ -0,0 +1,240 @@
+import { Effect, Stream } from "effect"
+import type { Concurrency } from "effect/Types"
+import {
+  type ContentPart,
+  type FinishReason,
+  type LLMError,
+  type LLMEvent,
+  LLMRequest,
+  Message,
+  type ProviderMetadata,
+  ToolCallPart,
+  ToolFailure,
+  ToolResultPart,
+  type ToolResultValue,
+} from "./schema"
+import { type AnyTool, type ExecutableTools, type Tools, toDefinitions } from "./tool"
+
+export interface RuntimeState {
+  readonly step: number
+  readonly request: LLMRequest
+}
+
+export type StopCondition = (state: RuntimeState) => boolean
+
+export type ToolExecution = "auto" | "none"
+
+interface RunOptionsBase {
+  readonly request: LLMRequest
+  readonly concurrency?: Concurrency
+  readonly stopWhen?: StopCondition
+}
+
+export type RunOptions<T extends Tools> = RunOptionsAuto<T & ExecutableTools> | RunOptionsNone<T>
+
+export interface RunOptionsAuto<T extends ExecutableTools> extends RunOptionsBase {
+  readonly request: LLMRequest
+  readonly tools: T
+  readonly toolExecution?: "auto"
+}
+
+export interface RunOptionsNone<T extends Tools> extends RunOptionsBase {
+  readonly request: LLMRequest
+  readonly tools: T
+  /** Advertise tool schemas but leave model-emitted tool calls for the caller. */
+  readonly toolExecution: "none"
+}
+
+export type StreamOptions<T extends Tools> = RunOptions<T> & {
+  readonly stream: (request: LLMRequest) => Stream.Stream<LLMEvent, LLMError>
+}
+
+export const stepCountIs =
+  (count: number): StopCondition =>
+  (state) =>
+    state.step + 1 >= count
+
+/**
+ * Run a model with typed tools. This helper owns tool orchestration, while the
+ * caller supplies the actual model stream function. It can advertise schemas
+ * only (`toolExecution: "none"`), execute one step, or continue model rounds
+ * when `stopWhen` is provided.
+ */
+export const stream = <T extends Tools>(options: StreamOptions<T>): Stream.Stream<LLMEvent, LLMError> => {
+  const concurrency = options.concurrency ?? 10
+  const tools = options.tools as Tools
+  const runtimeTools = toDefinitions(tools)
+  const runtimeToolNames = new Set(runtimeTools.map((tool) => tool.name))
+  const initialRequest =
+    runtimeTools.length === 0
+      ? options.request
+      : LLMRequest.update(options.request, {
+          tools: [...options.request.tools.filter((tool) => !runtimeToolNames.has(tool.name)), ...runtimeTools],
+        })
+
+  const loop = (request: LLMRequest, step: number): Stream.Stream<LLMEvent, LLMError> =>
+    Stream.unwrap(
+      Effect.gen(function* () {
+        const state: StepState = { assistantContent: [], toolCalls: [], finishReason: undefined }
+
+        const modelStream = options
+          .stream(request)
+          .pipe(Stream.tap((event) => Effect.sync(() => accumulate(state, event))))
+
+        const continuation = Stream.unwrap(
+          Effect.gen(function* () {
+            if (state.finishReason !== "tool-calls" || state.toolCalls.length === 0) return Stream.empty
+            if (options.toolExecution === "none") return Stream.empty
+
+            const dispatched = yield* Effect.forEach(
+              state.toolCalls,
+              (call) => dispatch(tools, call).pipe(Effect.map((result) => [call, result] as const)),
+              { concurrency },
+            )
+            const resultStream = Stream.fromIterable(dispatched.flatMap(([call, result]) => emitEvents(call, result)))
+
+            if (!options.stopWhen) return resultStream
+            if (options.stopWhen({ step, request })) return resultStream
+
+            return resultStream.pipe(Stream.concat(loop(followUpRequest(request, state, dispatched), step + 1)))
+          }),
+        )
+
+        return modelStream.pipe(Stream.concat(continuation))
+      }),
+    )
+
+  return loop(initialRequest, 0)
+}
+
+interface StepState {
+  assistantContent: ContentPart[]
+  toolCalls: ToolCallPart[]
+  finishReason: FinishReason | undefined
+}
+
+const accumulate = (state: StepState, event: LLMEvent) => {
+  if (event.type === "text-delta") {
+    appendStreamingText(state, "text", event.text, event.providerMetadata)
+    return
+  }
+  if (event.type === "reasoning-delta") {
+    appendStreamingText(state, "reasoning", event.text, event.providerMetadata)
+    return
+  }
+  if (event.type === "tool-call") {
+    const part = ToolCallPart.make({
+      id: event.id,
+      name: event.name,
+      input: event.input,
+      providerExecuted: event.providerExecuted,
+      providerMetadata: event.providerMetadata,
+    })
+    state.assistantContent.push(part)
+    if (!event.providerExecuted) state.toolCalls.push(part)
+    return
+  }
+  if (event.type === "tool-result" && event.providerExecuted) {
+    state.assistantContent.push(
+      ToolResultPart.make({
+        id: event.id,
+        name: event.name,
+        result: event.result,
+        providerExecuted: true,
+        providerMetadata: event.providerMetadata,
+      }),
+    )
+    return
+  }
+  if (event.type === "request-finish") {
+    state.finishReason = event.reason
+  }
+}
+
+const sameProviderMetadata = (left: ProviderMetadata | undefined, right: ProviderMetadata | undefined) =>
+  left === right || JSON.stringify(left) === JSON.stringify(right)
+
+const mergeProviderMetadata = (left: ProviderMetadata | undefined, right: ProviderMetadata | undefined) => {
+  if (!left) return right
+  if (!right) return left
+  return Object.fromEntries(
+    Array.from(new Set([...Object.keys(left), ...Object.keys(right)])).map((provider) => [
+      provider,
+      { ...left[provider], ...right[provider] },
+    ]),
+  )
+}
+
+const appendStreamingText = (
+  state: StepState,
+  type: "text" | "reasoning",
+  text: string,
+  providerMetadata: ProviderMetadata | undefined,
+) => {
+  const last = state.assistantContent.at(-1)
+  if (last?.type === type && text.length === 0) {
+    state.assistantContent[state.assistantContent.length - 1] = {
+      ...last,
+      providerMetadata: mergeProviderMetadata(last.providerMetadata, providerMetadata),
+    }
+    return
+  }
+  if (last?.type === type && sameProviderMetadata(last.providerMetadata, providerMetadata)) {
+    state.assistantContent[state.assistantContent.length - 1] = { ...last, text: `${last.text}${text}` }
+    return
+  }
+  state.assistantContent.push({ type, text, providerMetadata })
+}
+
+const dispatch = (tools: Tools, call: ToolCallPart): Effect.Effect<ToolResultValue> => {
+  const tool = tools[call.name]
+  if (!tool) return Effect.succeed({ type: "error" as const, value: `Unknown tool: ${call.name}` })
+  if (!tool.execute)
+    return Effect.succeed({ type: "error" as const, value: `Tool has no execute handler: ${call.name}` })
+
+  return decodeAndExecute(tool, call.input).pipe(
+    Effect.catchTag("LLM.ToolFailure", (failure) =>
+      Effect.succeed({ type: "error" as const, value: failure.message } satisfies ToolResultValue),
+    ),
+  )
+}
+
+const decodeAndExecute = (tool: AnyTool, input: unknown): Effect.Effect<ToolResultValue, ToolFailure> =>
+  tool._decode(input).pipe(
+    Effect.mapError((error) => new ToolFailure({ message: `Invalid tool input: ${error.message}` })),
+    Effect.flatMap((decoded) => tool.execute!(decoded)),
+    Effect.flatMap((value) =>
+      tool._encode(value).pipe(
+        Effect.mapError(
+          (error) =>
+            new ToolFailure({
+              message: `Tool returned an invalid value for its success schema: ${error.message}`,
+            }),
+        ),
+      ),
+    ),
+    Effect.map((encoded): ToolResultValue => ({ type: "json", value: encoded })),
+  )
+
+const emitEvents = (call: ToolCallPart, result: ToolResultValue): ReadonlyArray<LLMEvent> =>
+  result.type === "error"
+    ? [
+        { type: "tool-error", id: call.id, name: call.name, message: String(result.value) },
+        { type: "tool-result", id: call.id, name: call.name, result },
+      ]
+    : [{ type: "tool-result", id: call.id, name: call.name, result }]
+
+const followUpRequest = (
+  request: LLMRequest,
+  state: StepState,
+  dispatched: ReadonlyArray<readonly [ToolCallPart, ToolResultValue]>,
+) =>
+  LLMRequest.update(request, {
+    messages: [
+      ...request.messages,
+      Message.assistant(state.assistantContent),
+      ...dispatched.map(([call, result]) => Message.tool({ id: call.id, name: call.name, result })),
+    ],
+  })
+
+export const ToolRuntime = { stream, stepCountIs } as const
--- a/packages/llm/src/tool.ts
+++ b/packages/llm/src/tool.ts
@@ -0,0 +1,185 @@
+import { Effect, JsonSchema, Schema } from "effect"
+import type { ToolDefinition as ToolDefinitionClass } from "./schema"
+import { ToolDefinition, ToolFailure } from "./schema"
+
+/**
+ * Schema constraint for tool parameters / success values: no decoding or
+ * encoding services are allowed. Tools should be self-contained — anything
+ * beyond pure data conversion belongs in the handler closure.
+ */
+export type ToolSchema<T> = Schema.Codec<T, any, never, never>
+
+export type ToolExecute<Parameters extends ToolSchema<any>, Success extends ToolSchema<any>> = (
+  params: Schema.Schema.Type<Parameters>,
+) => Effect.Effect<Schema.Schema.Type<Success>, ToolFailure>
+
+/**
+ * A type-safe LLM tool. Each tool bundles its own description, parameter
+ * Schema and success Schema. The execute handler is optional: omit it when you
+ * only want to expose a tool schema to the model and handle tool calls outside
+ * this package.
+ *
+ * Errors must be expressed as `ToolFailure`. Unmapped errors and defects fail
+ * the stream.
+ *
+ * Internally each tool also carries memoized codecs and a precomputed
+ * `ToolDefinition` so the runtime doesn't rebuild them per invocation.
+ */
+export interface Tool<Parameters extends ToolSchema<any>, Success extends ToolSchema<any>> {
+  readonly description: string
+  readonly parameters: Parameters
+  readonly success: Success
+  readonly execute?: ToolExecute<Parameters, Success>
+  /** @internal */
+  readonly _decode: (input: unknown) => Effect.Effect<Schema.Schema.Type<Parameters>, Schema.SchemaError>
+  /** @internal */
+  readonly _encode: (value: Schema.Schema.Type<Success>) => Effect.Effect<unknown, Schema.SchemaError>
+  /** @internal */
+  readonly _definition: ToolDefinitionClass
+}
+
+export type AnyTool = Tool<ToolSchema<any>, ToolSchema<any>>
+
+export type ExecutableTool<Parameters extends ToolSchema<any>, Success extends ToolSchema<any>> = Tool<
+  Parameters,
+  Success
+> & {
+  readonly execute: ToolExecute<Parameters, Success>
+}
+
+export type AnyExecutableTool = ExecutableTool<ToolSchema<any>, ToolSchema<any>>
+
+export type ExecutableTools = Record<string, AnyExecutableTool>
+
+type TypedToolConfig = {
+  readonly description: string
+  readonly parameters: ToolSchema<any>
+  readonly success: ToolSchema<any>
+  readonly execute?: ToolExecute<ToolSchema<any>, ToolSchema<any>>
+}
+
+type DynamicToolConfig = {
+  readonly description: string
+  readonly jsonSchema: JsonSchema.JsonSchema
+  readonly execute?: (params: unknown) => Effect.Effect<unknown, ToolFailure>
+}
+
+/**
+ * Constructs a tool. Two input modes:
+ *
+ * 1. **Typed** — pass Effect `parameters` and `success` Schemas; inputs and
+ *    outputs are statically typed and decoded/encoded automatically.
+ *
+ *    ```ts
+ *    Tool.make({
+ *      description: "Get current weather",
+ *      parameters: Schema.Struct({ city: Schema.String }),
+ *      success: Schema.Struct({ temperature: Schema.Number }),
+ *      execute: ({ city }) => Effect.succeed({ temperature: 22 }),
+ *    })
+ *    ```
+ *
+ * 2. **Dynamic** — pass raw JSON Schema as `jsonSchema`. Use this when the
+ *    schema comes from an external source (MCP server, plugin manifest,
+ *    dynamic config) and is not known at compile time. Inputs are typed as
+ *    `unknown`; the handler is responsible for any validation it needs.
+ *
+ *    ```ts
+ *    Tool.make({
+ *      description: "Look something up",
+ *      jsonSchema: { type: "object", properties: { ... } },
+ *      execute: (params) => Effect.succeed(...),
+ *    })
+ *    ```
+ *
+ * In both modes the produced tool flows through `toDefinitions(...)` and the
+ * runtime identically.
+ */
+export function make<Parameters extends ToolSchema<any>, Success extends ToolSchema<any>>(config: {
+  readonly description: string
+  readonly parameters: Parameters
+  readonly success: Success
+  readonly execute: ToolExecute<Parameters, Success>
+}): ExecutableTool<Parameters, Success>
+export function make<Parameters extends ToolSchema<any>, Success extends ToolSchema<any>>(config: {
+  readonly description: string
+  readonly parameters: Parameters
+  readonly success: Success
+  readonly execute?: undefined
+}): Tool<Parameters, Success>
+export function make(config: {
+  readonly description: string
+  readonly jsonSchema: JsonSchema.JsonSchema
+  readonly execute: (params: unknown) => Effect.Effect<unknown, ToolFailure>
+}): AnyExecutableTool
+export function make(config: {
+  readonly description: string
+  readonly jsonSchema: JsonSchema.JsonSchema
+  readonly execute?: undefined
+}): AnyTool
+export function make(config: TypedToolConfig | DynamicToolConfig): AnyTool {
+  if ("jsonSchema" in config) {
+    return {
+      description: config.description,
+      parameters: Schema.Unknown as ToolSchema<unknown>,
+      success: Schema.Unknown as ToolSchema<unknown>,
+      execute: config.execute,
+      _decode: Effect.succeed,
+      _encode: Effect.succeed,
+      _definition: new ToolDefinition({
+        name: "",
+        description: config.description,
+        inputSchema: config.jsonSchema,
+      }),
+    }
+  }
+  return {
+    description: config.description,
+    parameters: config.parameters,
+    success: config.success,
+    execute: config.execute,
+    _decode: Schema.decodeUnknownEffect(config.parameters),
+    _encode: Schema.encodeEffect(config.success),
+    _definition: new ToolDefinition({
+      name: "",
+      description: config.description,
+      inputSchema: toJsonSchema(config.parameters),
+    }),
+  }
+}
+
+export const tool = make
+
+/**
+ * A record of named tools. The record key becomes the tool name on the wire.
+ */
+export type Tools = Record<string, AnyTool>
+
+/**
+ * Convert a tools record into the `ToolDefinition[]` shape that
+ * `LLMRequest.tools` expects. The runtime calls this internally; consumers
+ * that build `LLMRequest` themselves can use it too.
+ *
+ * Tool names come from the record keys, so the per-tool cached
+ * `_definition` is rebuilt with the correct name here. The JSON Schema body
+ * is reused.
+ */
+export const toDefinitions = (tools: Tools): ReadonlyArray<ToolDefinitionClass> =>
+  Object.entries(tools).map(
+    ([name, item]) =>
+      new ToolDefinition({
+        name,
+        description: item._definition.description,
+        inputSchema: item._definition.inputSchema,
+      }),
+  )
+
+const toJsonSchema = (schema: Schema.Top): JsonSchema.JsonSchema => {
+  const document = Schema.toJsonSchemaDocument(schema)
+  if (Object.keys(document.definitions).length === 0) return document.schema
+  return { ...document.schema, $defs: document.definitions }
+}
+
+export { ToolFailure }
+
+export * as Tool from "./tool"
--- a/packages/llm/test/adapter.test.ts
+++ b/packages/llm/test/adapter.test.ts
@@ -0,0 +1,175 @@
+import { describe, expect } from "bun:test"
+import { Effect, Schema, Stream } from "effect"
+import { LLM } from "../src"
+import { Route, Endpoint, LLMClient, Protocol, type RouteModelInput, type FramingDef } from "../src/route"
+import { ModelRef } from "../src/schema"
+import { testEffect } from "./lib/effect"
+import { dynamicResponse } from "./lib/http"
+
+const updateModel = (model: ModelRef, patch: Partial<ModelRef.Input>) => ModelRef.update(model, patch)
+
+const Json = Schema.fromJsonString(Schema.Unknown)
+const encodeJson = Schema.encodeSync(Json)
+
+type FakeBody = {
+  readonly body: string
+}
+
+const FakeEvent = Schema.Union([
+  Schema.Struct({ type: Schema.Literal("text"), text: Schema.String }),
+  Schema.Struct({ type: Schema.Literal("finish"), reason: Schema.Literal("stop") }),
+])
+type FakeEvent = Schema.Schema.Type<typeof FakeEvent>
+const decodeFakeEvents = Schema.decodeUnknownEffect(Schema.fromJsonString(Schema.Array(FakeEvent)))
+
+const fakeFraming: FramingDef<FakeEvent> = {
+  id: "fake-json-array",
+  frame: (bytes) =>
+    Stream.fromEffect(
+      bytes.pipe(
+        Stream.decodeText(),
+        Stream.runFold(
+          () => "",
+          (text, event) => text + event,
+        ),
+        Effect.flatMap(decodeFakeEvents),
+        Effect.orDie,
+      ),
+    ).pipe(Stream.flatMap(Stream.fromIterable)),
+}
+
+const request = LLM.request({
+  id: "req_1",
+  model: LLM.model({
+    id: "fake-model",
+    provider: "fake-provider",
+    route: "fake",
+    baseURL: "https://fake.local",
+  }),
+  prompt: "hello",
+})
+
+const raiseEvent = (event: FakeEvent): import("../src/schema").LLMEvent =>
+  event.type === "finish" ? { type: "request-finish", reason: event.reason } : { type: "text-delta", text: event.text }
+
+const fakeProtocol = Protocol.make<FakeBody, FakeEvent, FakeEvent, void>({
+  id: "fake",
+  body: {
+    schema: Schema.Struct({
+      body: Schema.String,
+    }),
+    from: (request) =>
+      Effect.succeed({
+        body: [
+          ...request.messages
+            .flatMap((message) => message.content)
+            .filter((part) => part.type === "text")
+            .map((part) => part.text),
+          ...request.tools.map((tool) => `tool:${tool.name}:${tool.description}`),
+        ].join("\n"),
+      }),
+  },
+  stream: {
+    event: FakeEvent,
+    initial: () => undefined,
+    step: (state, event) => Effect.succeed([state, [raiseEvent(event)]] as const),
+  },
+})
+
+const fake = Route.make({
+  id: "fake",
+  protocol: fakeProtocol,
+  endpoint: Endpoint.path("/chat"),
+  framing: fakeFraming,
+})
+
+const gemini = Route.make({
+  id: "gemini-fake",
+  protocol: fakeProtocol,
+  endpoint: Endpoint.path("/chat"),
+  framing: fakeFraming,
+})
+
+const echoLayer = dynamicResponse(({ text, respond }) =>
+  Effect.succeed(
+    respond(
+      encodeJson([
+        { type: "text", text: `echo:${text}` },
+        { type: "finish", reason: "stop" },
+      ]),
+    ),
+  ),
+)
+
+const it = testEffect(echoLayer)
+
+describe("llm route", () => {
+  it.effect("stream and generate use the route pipeline", () =>
+    Effect.gen(function* () {
+      const llm = yield* LLMClient.Service
+      const events = Array.from(yield* llm.stream(request).pipe(Stream.runCollect))
+      const response = yield* llm.generate(request)
+
+      expect(events.map((event) => event.type)).toEqual(["text-delta", "request-finish"])
+      expect(response.events.map((event) => event.type)).toEqual(["text-delta", "request-finish"])
+    }),
+  )
+
+  it.effect("selects routes by request route", () =>
+    Effect.gen(function* () {
+      const llm = yield* LLMClient.Service
+      const prepared = yield* llm.prepare(
+        LLM.updateRequest(request, { model: updateModel(request.model, { route: "gemini-fake" }) }),
+      )
+
+      expect(prepared.route).toBe("gemini-fake")
+    }),
+  )
+
+  it.effect("maps model input before building refs", () =>
+    Effect.gen(function* () {
+      const mapped = Route.model<RouteModelInput & { readonly region?: string }>(
+        fake,
+        { provider: "fake-provider", baseURL: "https://fake.local" },
+        {
+          mapInput: (input) => {
+            const { region, ...rest } = input
+            return { ...rest, native: { region } }
+          },
+        },
+      )
+
+      expect(mapped({ id: "fake-model", region: "us-east-1" }).native).toEqual({ region: "us-east-1" })
+    }),
+  )
+
+  it.effect("rejects duplicate route ids", () =>
+    Effect.gen(function* () {
+      expect(() =>
+        Route.make({
+          id: "fake",
+          protocol: Protocol.make({
+            ...fakeProtocol,
+            body: {
+              ...fakeProtocol.body,
+              from: () => Effect.succeed({ body: "late-default" }),
+            },
+          }),
+          endpoint: Endpoint.path("/chat"),
+          framing: fakeFraming,
+        }),
+      ).toThrow('Duplicate LLM route id "fake"')
+    }),
+  )
+
+  it.effect("rejects missing route", () =>
+    Effect.gen(function* () {
+      const llm = yield* LLMClient.Service
+      const error = yield* llm
+        .prepare(LLM.updateRequest(request, { model: updateModel(request.model, { route: "missing" }) }))
+        .pipe(Effect.flip)
+
+      expect(error.message).toContain("No LLM route")
+    }),
+  )
+})
--- a/packages/llm/test/auth-options.types.ts
+++ b/packages/llm/test/auth-options.types.ts
@@ -0,0 +1,100 @@
+import { Config } from "effect"
+import type { Auth } from "../src/route/auth"
+import type { ModelFactory } from "../src/route/auth-options"
+import { Auth as RuntimeAuth } from "../src/route/auth"
+import * as Azure from "../src/providers/azure"
+import * as OpenAI from "../src/providers/openai"
+
+type BaseOptions = {
+  readonly baseURL?: string
+  readonly headers?: Record<string, string>
+}
+
+type Model = {
+  readonly id: string
+}
+
+declare const auth: Auth
+declare const optionalAuthModel: ModelFactory<BaseOptions, "optional", Model>
+declare const requiredAuthModel: ModelFactory<BaseOptions, "required", Model>
+const configApiKey = Config.redacted("OPENAI_API_KEY")
+
+optionalAuthModel("gpt-4.1-mini")
+optionalAuthModel("gpt-4.1-mini", {})
+optionalAuthModel("gpt-4.1-mini", { apiKey: "sk-test" })
+optionalAuthModel("gpt-4.1-mini", { apiKey: configApiKey })
+optionalAuthModel("gpt-4.1-mini", { auth })
+optionalAuthModel("gpt-4.1-mini", { auth, baseURL: "https://gateway.example.com/v1" })
+optionalAuthModel("gpt-4.1-mini", { apiKey: "sk-test", headers: { "x-source": "test" } })
+
+// @ts-expect-error auth is an override, so apiKey cannot be supplied with it.
+optionalAuthModel("gpt-4.1-mini", { apiKey: "sk-test", auth })
+
+requiredAuthModel("custom-model", { apiKey: "key" })
+requiredAuthModel("custom-model", { apiKey: configApiKey })
+requiredAuthModel("custom-model", { auth })
+requiredAuthModel("custom-model", { auth, headers: { "x-tenant-id": "tenant" } })
+
+// @ts-expect-error providers without config fallback need apiKey or auth.
+requiredAuthModel("custom-model")
+
+// @ts-expect-error providers without config fallback need apiKey or auth.
+requiredAuthModel("custom-model", {})
+
+// @ts-expect-error auth is an override, so apiKey cannot be supplied with it.
+requiredAuthModel("custom-model", { apiKey: "key", auth })
+
+OpenAI.responses("gpt-4.1-mini")
+OpenAI.responses("gpt-4.1-mini", {})
+OpenAI.responses("gpt-4.1-mini", { apiKey: "sk-test" })
+OpenAI.responses("gpt-4.1-mini", { apiKey: configApiKey })
+OpenAI.responses("gpt-4.1-mini", { auth: RuntimeAuth.bearer("oauth-token") })
+OpenAI.responses("gpt-4.1-mini", {
+  auth: RuntimeAuth.headers({ authorization: "Bearer gateway" }),
+  baseURL: "https://gateway.example.com/v1",
+})
+OpenAI.responses("gpt-4.1-mini", {
+  generation: { maxTokens: 100 },
+  providerOptions: { openai: { store: false } },
+})
+
+// @ts-expect-error apiKey only accepts string, Redacted<string>, or Config<string | Redacted<string>>.
+OpenAI.responses("gpt-4.1-mini", { apiKey: 123 })
+
+// @ts-expect-error provider helpers reject unknown top-level options.
+OpenAI.responses("gpt-4.1-mini", { bogus: true })
+
+// @ts-expect-error common generation options remain typed.
+OpenAI.responses("gpt-4.1-mini", { generation: { maxTokens: "many" } })
+
+// @ts-expect-error provider-native options remain typed.
+OpenAI.responses("gpt-4.1-mini", { providerOptions: { openai: { store: "false" } } })
+
+// @ts-expect-error auth is an override, so OpenAI rejects apiKey with auth.
+OpenAI.responses("gpt-4.1-mini", { apiKey: "sk-test", auth: RuntimeAuth.bearer("oauth-token") })
+
+OpenAI.chat("gpt-4.1-mini")
+OpenAI.chat("gpt-4.1-mini", { apiKey: "sk-test" })
+OpenAI.chat("gpt-4.1-mini", { apiKey: configApiKey })
+OpenAI.chat("gpt-4.1-mini", { auth: RuntimeAuth.bearer("oauth-token") })
+
+// @ts-expect-error auth is an override, so OpenAI Chat rejects apiKey with auth.
+OpenAI.chat("gpt-4.1-mini", { apiKey: "sk-test", auth: RuntimeAuth.bearer("oauth-token") })
+
+// @ts-expect-error Azure requires at least one of `resourceName` or `baseURL`.
+Azure.responses("deployment")
+Azure.responses("deployment", { apiKey: "azure-key", resourceName: "resource" })
+Azure.responses("deployment", { apiKey: configApiKey, resourceName: "resource" })
+Azure.responses("deployment", { auth: RuntimeAuth.header("api-key", "azure-key"), resourceName: "resource" })
+
+// @ts-expect-error auth is an override, so Azure rejects apiKey with auth.
+Azure.responses("deployment", { apiKey: "azure-key", auth: RuntimeAuth.header("api-key", "override") })
+
+// @ts-expect-error Azure requires at least one of `resourceName` or `baseURL`.
+Azure.chat("deployment")
+Azure.chat("deployment", { apiKey: "azure-key", resourceName: "resource" })
+Azure.chat("deployment", { apiKey: configApiKey, resourceName: "resource" })
+Azure.chat("deployment", { auth: RuntimeAuth.header("api-key", "azure-key"), resourceName: "resource" })
+
+// @ts-expect-error auth is an override, so Azure Chat rejects apiKey with auth.
+Azure.chat("deployment", { apiKey: "azure-key", auth: RuntimeAuth.header("api-key", "override") })
--- a/packages/llm/test/auth.test.ts
+++ b/packages/llm/test/auth.test.ts
@@ -0,0 +1,101 @@
+import { describe, expect } from "bun:test"
+import { ConfigProvider, Effect } from "effect"
+import { Headers } from "effect/unstable/http"
+import { LLM } from "../src"
+import { Auth } from "../src/route/auth"
+import { it } from "./lib/effect"
+
+const request = LLM.request({
+  id: "req_auth",
+  model: LLM.model({ id: "fake-model", provider: "fake", route: "fake", baseURL: "https://fake.local" }),
+  prompt: "hello",
+})
+
+const input = {
+  request,
+  method: "POST" as const,
+  url: "https://example.test/v1/chat",
+  body: "{}",
+  headers: Headers.fromInput({ "x-existing": "yes" }),
+}
+
+const withEnv = (env: Record<string, string>) => Effect.provide(ConfigProvider.layer(ConfigProvider.fromEnv({ env })))
+
+describe("Auth", () => {
+  it.effect("renders a config credential as bearer auth", () =>
+    Effect.gen(function* () {
+      const headers = yield* Auth.config("OPENAI_API_KEY")
+        .bearer()
+        .apply(input)
+        .pipe(withEnv({ OPENAI_API_KEY: "sk-test" }))
+
+      expect(headers.authorization).toBe("Bearer sk-test")
+      expect(headers["x-existing"]).toBe("yes")
+    }),
+  )
+
+  it.effect("falls back between credential sources before rendering", () =>
+    Effect.gen(function* () {
+      const headers = yield* Auth.config("PRIMARY_KEY")
+        .orElse(Auth.value("fallback-key"))
+        .pipe(Auth.header("x-api-key"))
+        .apply(input)
+        .pipe(withEnv({}))
+
+      expect(headers["x-api-key"]).toBe("fallback-key")
+      expect(headers["x-existing"]).toBe("yes")
+    }),
+  )
+
+  it.effect("composes header auth in sequence", () =>
+    Effect.gen(function* () {
+      const headers = yield* Auth.headers({ "x-tenant-id": "tenant-1" })
+        .andThen(Auth.bearer("gateway-token"))
+        .apply(input)
+
+      expect(headers["x-tenant-id"]).toBe("tenant-1")
+      expect(headers.authorization).toBe("Bearer gateway-token")
+      expect(headers["x-existing"]).toBe("yes")
+    }),
+  )
+
+  it.effect("renders a direct secret as a custom header", () =>
+    Effect.gen(function* () {
+      const headers = yield* Auth.header("api-key", "direct-key").apply(input)
+
+      expect(headers["api-key"]).toBe("direct-key")
+      expect(headers["x-existing"]).toBe("yes")
+    }),
+  )
+
+  it.effect("renders bearer auth into a custom header", () =>
+    Effect.gen(function* () {
+      const headers = yield* Auth.bearerHeader("cf-aig-authorization", "gateway-token").apply(input)
+
+      expect(headers["cf-aig-authorization"]).toBe("Bearer gateway-token")
+      expect(headers["x-existing"]).toBe("yes")
+    }),
+  )
+
+  it.effect("falls back between full auth values", () =>
+    Effect.gen(function* () {
+      const headers = yield* Auth.config("OPENAI_API_KEY")
+        .bearer()
+        .orElse(Auth.headers({ authorization: "Bearer supplied" }))
+        .apply(input)
+        .pipe(withEnv({}))
+
+      expect(headers.authorization).toBe("Bearer supplied")
+      expect(headers["x-existing"]).toBe("yes")
+    }),
+  )
+
+  it.effect("can intentionally leave auth untouched", () =>
+    Effect.gen(function* () {
+      const headers = yield* Auth.none.apply(input)
+
+      expect(headers.authorization).toBeUndefined()
+      expect(headers["x-existing"]).toBe("yes")
+    }),
+  )
+})
--- a/packages/llm/test/endpoint.test.ts
+++ b/packages/llm/test/endpoint.test.ts
@@ -0,0 +1,57 @@
+import { describe, expect, test } from "bun:test"
+import { LLM } from "../src"
+import { Endpoint } from "../src/route"
+
+const request = (
+  input: {
+    readonly baseURL: string
+    readonly queryParams?: Record<string, string>
+  },
+) =>
+  LLM.request({
+    model: LLM.model({
+      id: "model-1",
+      provider: "test",
+      route: "test-route",
+      baseURL: input.baseURL,
+      queryParams: input.queryParams,
+    }),
+    prompt: "hello",
+  })
+
+describe("Endpoint", () => {
+  test("appends a static path to the model's baseURL", () => {
+    const url = Endpoint.render(Endpoint.path("/chat"), {
+      request: request({ baseURL: "https://api.example.test/v1/" }),
+      body: {},
+    })
+
+    expect(url.toString()).toBe("https://api.example.test/v1/chat")
+  })
+
+  test("model query params are appended to the rendered URL", () => {
+    const url = Endpoint.render(Endpoint.path("/chat?alt=sse"), {
+      request: request({
+        baseURL: "https://custom.example.test/root/",
+        queryParams: { "api-version": "2026-01-01", alt: "json" },
+      }),
+      body: {},
+    })
+
+    expect(url.toString()).toBe("https://custom.example.test/root/chat?alt=json&api-version=2026-01-01")
+  })
+
+  test("path may be a function of the validated body", () => {
+    const url = Endpoint.render(
+      Endpoint.path<{ readonly modelId: string }>(({ body }) => `/model/${encodeURIComponent(body.modelId)}/converse-stream`),
+      {
+        request: request({ baseURL: "https://bedrock-runtime.us-east-1.amazonaws.com" }),
+        body: { modelId: "us.amazon.nova-micro-v1:0" },
+      },
+    )
+
+    expect(url.toString()).toBe(
+      "https://bedrock-runtime.us-east-1.amazonaws.com/model/us.amazon.nova-micro-v1%3A0/converse-stream",
+    )
+  })
+})
--- a/packages/llm/test/executor.test.ts
+++ b/packages/llm/test/executor.test.ts
@@ -0,0 +1,416 @@
+import { describe, expect } from "bun:test"
+import { Effect, Fiber, Layer, Random, Ref } from "effect"
+import * as TestClock from "effect/testing/TestClock"
+import { Headers, HttpClient, HttpClientRequest, HttpClientResponse } from "effect/unstable/http"
+import { LLM, LLMError } from "../src"
+import { LLMClient, RequestExecutor } from "../src/route"
+import * as OpenAIChat from "../src/protocols/openai-chat"
+import { dynamicResponse } from "./lib/http"
+import { deltaChunk } from "./lib/openai-chunks"
+import { sseRaw } from "./lib/sse"
+import { it } from "./lib/effect"
+
+const request = HttpClientRequest.post("https://provider.test/v1/chat?api_key=secret&key=secret&debug=1").pipe(
+  HttpClientRequest.setHeaders(Headers.fromInput({ authorization: "Bearer secret", "x-safe": "visible" })),
+)
+
+const secretRequest = HttpClientRequest.post("https://provider.test/v1/chat?api_key=query-secret-123&debug=1").pipe(
+  HttpClientRequest.setHeaders(Headers.fromInput({ authorization: "Bearer header-secret-456" })),
+)
+
+const responsesLayer = (responses: ReadonlyArray<Response>) =>
+  RequestExecutor.layer.pipe(
+    Layer.provide(
+      Layer.unwrap(
+        Effect.gen(function* () {
+          const cursor = yield* Ref.make(0)
+          return Layer.succeed(
+            HttpClient.HttpClient,
+            HttpClient.make((request) =>
+              Effect.gen(function* () {
+                const index = yield* Ref.getAndUpdate(cursor, (value) => value + 1)
+                return HttpClientResponse.fromWeb(request, responses[index] ?? responses[responses.length - 1])
+              }),
+            ),
+          )
+        }),
+      ),
+    ),
+  )
+
+const countedResponsesLayer = (attempts: Ref.Ref<number>, responses: ReadonlyArray<Response>) =>
+  RequestExecutor.layer.pipe(
+    Layer.provide(
+      Layer.unwrap(
+        Effect.gen(function* () {
+          const cursor = yield* Ref.make(0)
+          return Layer.succeed(
+            HttpClient.HttpClient,
+            HttpClient.make((request) =>
+              Effect.gen(function* () {
+                yield* Ref.update(attempts, (value) => value + 1)
+                const index = yield* Ref.getAndUpdate(cursor, (value) => value + 1)
+                return HttpClientResponse.fromWeb(request, responses[index] ?? responses[responses.length - 1])
+              }),
+            ),
+          )
+        }),
+      ),
+    ),
+  )
+
+const randomMidpoint = {
+  nextDoubleUnsafe: () => 0.5,
+  nextIntUnsafe: () => 0,
+}
+
+const expectLLMError = (error: unknown) => {
+  expect(error).toBeInstanceOf(LLMError)
+  if (!(error instanceof LLMError)) throw new Error("expected LLMError")
+  return error
+}
+
+const errorHttp = (error: LLMError) => ("http" in error.reason ? error.reason.http : undefined)
+
+describe("RequestExecutor", () => {
+  it.effect("returns redacted diagnostics for retryable rate limits", () =>
+    Effect.gen(function* () {
+      const executor = yield* RequestExecutor.Service
+      const error = yield* executor.execute(request).pipe(Effect.flip)
+
+      expectLLMError(error)
+      expect(error).toMatchObject({
+        retryable: true,
+        retryAfterMs: 0,
+        reason: {
+          _tag: "RateLimit",
+          rateLimit: { retryAfterMs: 0 },
+          http: {
+            requestId: "req_123",
+            request: {
+              method: "POST",
+              url: "https://provider.test/v1/chat?api_key=%3Credacted%3E&key=%3Credacted%3E&debug=1",
+              headers: { authorization: "<redacted>", "x-safe": "visible" },
+            },
+            response: {
+              status: 429,
+              headers: {
+                "retry-after-ms": "0",
+                "x-request-id": "req_123",
+                "x-api-key": "<redacted>",
+              },
+            },
+          },
+        },
+      })
+      expect(errorHttp(error)?.body).toBe("rate limited")
+    }).pipe(
+      Effect.provide(
+        responsesLayer([
+          ...Array.from(
+            { length: 3 },
+            () =>
+              new Response("rate limited", {
+                status: 429,
+                headers: { "retry-after-ms": "0", "x-request-id": "req_123", "x-api-key": "secret" },
+              }),
+          ),
+        ]),
+      ),
+    ),
+  )
+
+  it.effect("honors current redacted header names in diagnostics", () =>
+    Effect.gen(function* () {
+      const executor = yield* RequestExecutor.Service
+      const error = yield* executor.execute(request).pipe(Effect.flip)
+
+      expectLLMError(error)
+      expect(errorHttp(error)?.request.headers["x-safe"]).toBe("<redacted>")
+      expect(errorHttp(error)?.response?.headers["x-safe"]).toBe("<redacted>")
+    }).pipe(
+      Effect.provide(responsesLayer([new Response("bad", { status: 400, headers: { "x-safe": "response-secret" } })])),
+      Effect.provideService(Headers.CurrentRedactedNames, ["x-safe"]),
+    ),
+  )
+
+  it.effect("extracts OpenAI-style rate-limit diagnostics", () =>
+    Effect.gen(function* () {
+      const executor = yield* RequestExecutor.Service
+      const error = yield* executor.execute(request).pipe(Effect.flip)
+
+      expectLLMError(error)
+      expect(error.reason).toMatchObject({ _tag: "RateLimit" })
+      expect(error.reason._tag === "RateLimit" ? error.reason.rateLimit : undefined).toEqual({
+        retryAfterMs: 0,
+        limit: { requests: "500", tokens: "30000" },
+        remaining: { requests: "499", tokens: "29900" },
+        reset: { requests: "1s", tokens: "10s" },
+      })
+    }).pipe(
+      Effect.provide(
+        responsesLayer(
+          Array.from(
+            { length: 3 },
+            () =>
+              new Response("rate limited", {
+                status: 429,
+                headers: {
+                  "retry-after-ms": "0",
+                  "x-ratelimit-limit-requests": "500",
+                  "x-ratelimit-limit-tokens": "30000",
+                  "x-ratelimit-remaining-requests": "499",
+                  "x-ratelimit-remaining-tokens": "29900",
+                  "x-ratelimit-reset-requests": "1s",
+                  "x-ratelimit-reset-tokens": "10s",
+                },
+              }),
+          ),
+        ),
+      ),
+    ),
+  )
+
+  it.effect("extracts Anthropic-style rate-limit diagnostics", () =>
+    Effect.gen(function* () {
+      const executor = yield* RequestExecutor.Service
+      const error = yield* executor.execute(request).pipe(Effect.flip)
+
+      expectLLMError(error)
+      expect(error.reason).toMatchObject({ _tag: "ProviderInternal" })
+      expect(errorHttp(error)?.rateLimit).toEqual({
+        retryAfterMs: 0,
+        limit: { requests: "100", "input-tokens": "10000" },
+        remaining: { requests: "12", "input-tokens": "9000" },
+        reset: { requests: "2026-05-06T12:00:00Z", "input-tokens": "2026-05-06T12:00:10Z" },
+      })
+    }).pipe(
+      Effect.provide(
+        responsesLayer(
+          Array.from(
+            { length: 3 },
+            () =>
+              new Response("overloaded", {
+                status: 529,
+                headers: {
+                  "retry-after-ms": "0",
+                  "anthropic-ratelimit-requests-limit": "100",
+                  "anthropic-ratelimit-requests-remaining": "12",
+                  "anthropic-ratelimit-requests-reset": "2026-05-06T12:00:00Z",
+                  "anthropic-ratelimit-input-tokens-limit": "10000",
+                  "anthropic-ratelimit-input-tokens-remaining": "9000",
+                  "anthropic-ratelimit-input-tokens-reset": "2026-05-06T12:00:10Z",
+                },
+              }),
+          ),
+        ),
+      ),
+    ),
+  )
+
+  it.effect("retries retryable status responses before returning the stream", () =>
+    Effect.gen(function* () {
+      const executor = yield* RequestExecutor.Service
+      const response = yield* executor.execute(request)
+
+      expect(response.status).toBe(200)
+      expect(yield* response.text).toBe("ok")
+    }).pipe(
+      Effect.provide(
+        responsesLayer([
+          new Response("busy", { status: 503, headers: { "retry-after-ms": "0" } }),
+          new Response("ok", { status: 200 }),
+        ]),
+      ),
+    ),
+  )
+
+  it.effect("marks 504 and 529 status responses retryable", () =>
+    Effect.gen(function* () {
+      const failWith = (status: number) =>
+        Effect.gen(function* () {
+          const executor = yield* RequestExecutor.Service
+          const error = yield* executor.execute(request).pipe(Effect.flip)
+
+          expectLLMError(error)
+          expect(error.reason).toMatchObject({ _tag: "ProviderInternal", status })
+          expect(error.retryable).toBe(true)
+        }).pipe(
+          Effect.provide(
+            responsesLayer(
+              Array.from(
+                { length: 3 },
+                () =>
+                  new Response("retry", {
+                    status,
+                    headers: { "retry-after-ms": "0" },
+                  }),
+              ),
+            ),
+          ),
+        )
+
+      yield* failWith(504)
+      yield* failWith(529)
+    }),
+  )
+
+  it.effect("does not retry non-retryable status responses and truncates large bodies", () =>
+    Effect.gen(function* () {
+      const executor = yield* RequestExecutor.Service
+      const error = yield* executor.execute(request).pipe(Effect.flip)
+
+      expectLLMError(error)
+      expect(error.reason).toMatchObject({ _tag: "Authentication" })
+      expect(error.retryable).toBe(false)
+      expect(errorHttp(error)?.bodyTruncated).toBe(true)
+      expect(errorHttp(error)?.body).toHaveLength(16_384)
+    }).pipe(
+      Effect.provide(
+        responsesLayer([
+          new Response("x".repeat(20_000), { status: 401 }),
+          new Response("should not retry", { status: 200 }),
+        ]),
+      ),
+    ),
+  )
+
+  it.effect("redacts common secret fields in response bodies", () =>
+    Effect.gen(function* () {
+      const executor = yield* RequestExecutor.Service
+      const error = yield* executor.execute(request).pipe(Effect.flip)
+
+      expectLLMError(error)
+      expect(errorHttp(error)?.body).toContain('"key":"<redacted>"')
+      expect(errorHttp(error)?.body).toContain("api_key=<redacted>")
+      expect(errorHttp(error)?.body).not.toContain("body-secret")
+      expect(errorHttp(error)?.body).not.toContain("query-secret")
+    }).pipe(
+      Effect.provide(
+        responsesLayer([
+          new Response('{"error":{"message":"bad","key":"body-secret","detail":"api_key=query-secret"}}', {
+            status: 400,
+          }),
+        ]),
+      ),
+    ),
+  )
+
+  it.effect("redacts echoed request secret values in response bodies", () =>
+    Effect.gen(function* () {
+      const executor = yield* RequestExecutor.Service
+      const error = yield* executor.execute(secretRequest).pipe(Effect.flip)
+
+      expectLLMError(error)
+      expect(errorHttp(error)?.body).toContain("provider echoed <redacted>")
+      expect(errorHttp(error)?.body).toContain("authorization <redacted>")
+      expect(errorHttp(error)?.body).not.toContain("query-secret-123")
+      expect(errorHttp(error)?.body).not.toContain("header-secret-456")
+    }).pipe(
+      Effect.provide(
+        responsesLayer([
+          new Response("provider echoed query-secret-123 and authorization header-secret-456", { status: 400 }),
+        ]),
+      ),
+    ),
+  )
+
+  it.effect("honors Retry-After delta seconds before retrying", () =>
+    Effect.gen(function* () {
+      const attempts = yield* Ref.make(0)
+      return yield* Effect.gen(function* () {
+        const executor = yield* RequestExecutor.Service
+        const fiber = yield* executor.execute(request).pipe(Effect.forkChild)
+
+        yield* Effect.yieldNow
+        expect(yield* Ref.get(attempts)).toBe(1)
+
+        yield* TestClock.adjust(1_999)
+        yield* Effect.yieldNow
+        expect(yield* Ref.get(attempts)).toBe(1)
+
+        yield* TestClock.adjust(1)
+        const response = yield* Fiber.join(fiber)
+
+        expect(response.status).toBe(200)
+        expect(yield* Ref.get(attempts)).toBe(2)
+      }).pipe(
+        Effect.provide(
+          countedResponsesLayer(attempts, [
+            new Response("busy", { status: 503, headers: { "retry-after": "2" } }),
+            new Response("ok", { status: 200 }),
+          ]),
+        ),
+      )
+    }),
+  )
+
+  it.effect("uses exponential jittered delay when retry-after is absent", () =>
+    Effect.gen(function* () {
+      const attempts = yield* Ref.make(0)
+      return yield* Effect.gen(function* () {
+        const executor = yield* RequestExecutor.Service
+        const fiber = yield* executor.execute(request).pipe(Effect.flip, Effect.forkChild)
+
+        yield* Effect.yieldNow
+        expect(yield* Ref.get(attempts)).toBe(1)
+
+        yield* TestClock.adjust(499)
+        yield* Effect.yieldNow
+        expect(yield* Ref.get(attempts)).toBe(1)
+
+        yield* TestClock.adjust(1)
+        yield* Effect.yieldNow
+        expect(yield* Ref.get(attempts)).toBe(2)
+
+        yield* TestClock.adjust(999)
+        yield* Effect.yieldNow
+        expect(yield* Ref.get(attempts)).toBe(2)
+
+        yield* TestClock.adjust(1)
+        const error = yield* Fiber.join(fiber)
+
+        expectLLMError(error)
+        expect(error.reason).toMatchObject({ _tag: "ProviderInternal" })
+        expect(yield* Ref.get(attempts)).toBe(3)
+      }).pipe(
+        Effect.provide(
+          countedResponsesLayer(attempts, [
+            new Response("busy", { status: 503 }),
+            new Response("still busy", { status: 503 }),
+            new Response("done retrying", { status: 503 }),
+          ]),
+        ),
+      )
+    }).pipe(Effect.provideService(Random.Random, randomMidpoint)),
+  )
+
+  it.effect("does not retry after a successful response reaches stream parsing", () =>
+    Effect.gen(function* () {
+      const attempts = yield* Ref.make(0)
+      const model = OpenAIChat.model({ id: "gpt-4o-mini", baseURL: "https://api.openai.test/v1" })
+      const error = yield* LLMClient.generate(LLM.request({ model, prompt: "Say hello." })).pipe(
+        Effect.provide(
+          dynamicResponse((input) =>
+            Ref.update(attempts, (value) => value + 1).pipe(
+              Effect.as(
+                input.respond(
+                  sseRaw(
+                    `data: ${JSON.stringify(deltaChunk({ role: "assistant", content: "Hello" }))}`,
+                    "data: not-json",
+                  ),
+                  { headers: { "content-type": "text/event-stream" } },
+                ),
+              ),
+            ),
+          ),
+        ),
+        Effect.flip,
+      )
+
+      expectLLMError(error)
+      expect(error.reason).toMatchObject({ _tag: "InvalidProviderOutput" })
+      expect(yield* Ref.get(attempts)).toBe(1)
+    }),
+  )
+})
--- a/packages/llm/test/exports.test.ts
+++ b/packages/llm/test/exports.test.ts
@@ -0,0 +1,56 @@
+import { describe, expect, test } from "bun:test"
+import { LLM, LLMClient, Provider } from "@opencode-ai/llm"
+import { Route, Protocol } from "@opencode-ai/llm/route"
+import { Provider as ProviderSubpath } from "@opencode-ai/llm/provider"
+import { Cloudflare, OpenAI, OpenAICompatible, OpenRouter, XAI } from "@opencode-ai/llm/providers"
+import * as GitHubCopilot from "@opencode-ai/llm/providers/github-copilot"
+import { OpenAIChat, OpenAICompatibleChat, OpenAIResponses } from "@opencode-ai/llm/protocols"
+import * as AnthropicMessages from "@opencode-ai/llm/protocols/anthropic-messages"
+
+describe("public exports", () => {
+  test("root exposes app-facing runtime APIs", () => {
+    expect(LLM.request).toBeFunction()
+    expect(LLMClient.Service).toBeFunction()
+    expect(LLMClient.layer).toBeDefined()
+    expect(Provider.make).toBeFunction()
+    expect(ProviderSubpath.make).toBe(Provider.make)
+  })
+
+  test("route barrel exposes route-authoring APIs", () => {
+    expect(Route.make).toBeFunction()
+    expect(Protocol.make).toBeFunction()
+  })
+
+  test("provider barrels expose user-facing facades", () => {
+    expect(OpenAI.model).toBeFunction()
+    expect(OpenAI.provider.model).toBe(OpenAI.model)
+    expect(OpenAI.apis.responses).toBe(OpenAI.responses)
+    expect(OpenAI.apis.responsesWebSocket).toBe(OpenAI.responsesWebSocket)
+    expect(OpenAICompatible.deepseek.model).toBeFunction()
+    expect(Cloudflare.model).toBeFunction()
+    expect(Cloudflare.provider.model).toBe(Cloudflare.model)
+    expect(Cloudflare.aiGateway).toBeFunction()
+    expect(Cloudflare.workersAI).toBeFunction()
+    expect(OpenRouter.model).toBeFunction()
+    expect(OpenRouter.provider.model).toBe(OpenRouter.model)
+    expect(XAI.model).toBeFunction()
+    expect(XAI.provider.model).toBe(XAI.model)
+    expect(XAI.apis.responses).toBe(XAI.responses)
+    expect(XAI.apis.chat).toBe(XAI.chat)
+    expect(XAI.responses("grok-4.3", { apiKey: "fixture" })).toMatchObject({
+      route: "openai-responses",
+    })
+    expect(XAI.chat("grok-4.3", { apiKey: "fixture" })).toMatchObject({
+      route: "openai-compatible-chat",
+    })
+    expect(GitHubCopilot.model).toBeFunction()
+  })
+
+  test("protocol barrels expose supported low-level routes", () => {
+    expect(OpenAIChat.route.id).toBe("openai-chat")
+    expect(OpenAICompatibleChat.route.id).toBe("openai-compatible-chat")
+    expect(OpenAIResponses.route.id).toBe("openai-responses")
+    expect(OpenAIResponses.webSocketRoute.id).toBe("openai-responses-websocket")
+    expect(AnthropicMessages.route.id).toBe("anthropic-messages")
+  })
+})
--- a/packages/llm/test/fixtures/recordings/anthropic-messages/accepts-malformed-assistant-tool-order-with-default-patch.json
+++ b/packages/llm/test/fixtures/recordings/anthropic-messages/accepts-malformed-assistant-tool-order-with-default-patch.json
@@ -0,0 +1,29 @@
+{
+  "version": 1,
+  "metadata": {
+    "name": "anthropic-messages/accepts-malformed-assistant-tool-order-with-default-patch",
+    "recordedAt": "2026-05-05T20:09:16.245Z",
+    "tags": ["prefix:anthropic-messages", "provider:anthropic", "protocol:anthropic-messages", "tool"]
+  },
+  "interactions": [
+    {
+      "transport": "http",
+      "request": {
+        "method": "POST",
+        "url": "https://api.anthropic.com/v1/messages",
+        "headers": {
+          "anthropic-version": "2023-06-01",
+          "content-type": "application/json"
+        },
+        "body": "{\"model\":\"claude-haiku-4-5-20251001\",\"messages\":[{\"role\":\"assistant\",\"content\":[{\"type\":\"text\",\"text\":\"I will check the weather.\"}]},{\"role\":\"assistant\",\"content\":[{\"type\":\"tool_use\",\"id\":\"call_1\",\"name\":\"get_weather\",\"input\":{\"city\":\"Paris\"}}]},{\"role\":\"user\",\"content\":[{\"type\":\"tool_result\",\"tool_use_id\":\"call_1\",\"content\":\"{\\\"temperature\\\":\\\"72F\\\"}\"}]},{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"Use that result to answer briefly.\",\"cache_control\":{\"type\":\"ephemeral\"}}]}],\"tools\":[{\"name\":\"get_weather\",\"description\":\"Get weather\",\"input_schema\":{\"type\":\"object\",\"properties\":{}}}],\"stream\":true,\"max_tokens\":4096}"
+      },
+      "response": {
+        "status": 200,
+        "headers": {
+          "content-type": "text/event-stream; charset=utf-8"
+        },
+        "body": "event: message_start\ndata: {\"type\":\"message_start\",\"message\":{\"model\":\"claude-haiku-4-5-20251001\",\"id\":\"msg_01SikJVFaMR1XLMtavUhvuog\",\"type\":\"message\",\"role\":\"assistant\",\"content\":[],\"stop_reason\":null,\"stop_sequence\":null,\"stop_details\":null,\"usage\":{\"input_tokens\":638,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"cache_creation\":{\"ephemeral_5m_input_tokens\":0,\"ephemeral_1h_input_tokens\":0},\"output_tokens\":1,\"service_tier\":\"standard\",\"inference_geo\":\"not_available\"}}               }\n\nevent: content_block_start\ndata: {\"type\":\"content_block_start\",\"index\":0,\"content_block\":{\"type\":\"text\",\"text\":\"\"}          }\n\nevent: ping\ndata: {\"type\": \"ping\"}\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\"The\"}     }\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\" weather in Paris is currently 72°F.\"}        }\n\nevent: content_block_stop\ndata: {\"type\":\"content_block_stop\",\"index\":0}\n\nevent: message_delta\ndata: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"end_turn\",\"stop_sequence\":null,\"stop_details\":null},\"usage\":{\"input_tokens\":638,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"output_tokens\":14}              }\n\nevent: message_stop\ndata: {\"type\":\"message_stop\"             }\n\n"
+      }
+    }
+  ]
+}
--- a/packages/llm/test/fixtures/recordings/anthropic-messages/claude-opus-4-7-drives-a-tool-loop.json
+++ b/packages/llm/test/fixtures/recordings/anthropic-messages/claude-opus-4-7-drives-a-tool-loop.json
@@ -0,0 +1,56 @@
+{
+  "version": 1,
+  "metadata": {
+    "name": "anthropic-messages/claude-opus-4-7-drives-a-tool-loop",
+    "recordedAt": "2026-05-03T19:59:44.186Z",
+    "tags": [
+      "prefix:anthropic-messages",
+      "provider:anthropic",
+      "protocol:anthropic-messages",
+      "tool",
+      "tool-loop",
+      "golden",
+      "flagship"
+    ]
+  },
+  "interactions": [
+    {
+      "transport": "http",
+      "request": {
+        "method": "POST",
+        "url": "https://api.anthropic.com/v1/messages",
+        "headers": {
+          "anthropic-version": "2023-06-01",
+          "content-type": "application/json"
+        },
+        "body": "{\"model\":\"claude-opus-4-7\",\"system\":[{\"type\":\"text\",\"text\":\"Use the get_weather tool, then answer in one short sentence.\"}],\"messages\":[{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"What is the weather in Paris?\"}]}],\"tools\":[{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"input_schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}],\"stream\":true,\"max_tokens\":80}"
+      },
+      "response": {
+        "status": 200,
+        "headers": {
+          "content-type": "text/event-stream; charset=utf-8"
+        },
+        "body": "event: message_start\ndata: {\"type\":\"message_start\",\"message\":{\"model\":\"claude-opus-4-7\",\"id\":\"msg_01DgAEgLgB1ZhavZon4qGE1t\",\"type\":\"message\",\"role\":\"assistant\",\"content\":[],\"stop_reason\":null,\"stop_sequence\":null,\"stop_details\":null,\"usage\":{\"input_tokens\":798,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"cache_creation\":{\"ephemeral_5m_input_tokens\":0,\"ephemeral_1h_input_tokens\":0},\"output_tokens\":0,\"service_tier\":\"standard\",\"inference_geo\":\"global\"}}    }\n\nevent: content_block_start\ndata: {\"type\":\"content_block_start\",\"index\":0,\"content_block\":{\"type\":\"tool_use\",\"id\":\"toolu_01M8nJQQMxqpv1VaPYuJKT4j\",\"name\":\"get_weather\",\"input\":{},\"caller\":{\"type\":\"direct\"}}              }\n\nevent: ping\ndata: {\"type\": \"ping\"}\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"input_json_delta\",\"partial_json\":\"\"} }\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"input_json_delta\",\"partial_json\":\"{\\\"city\\\": \"}              }\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"input_json_delta\",\"partial_json\":\"\\\"Pa\"}    }\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"input_json_delta\",\"partial_json\":\"ris\\\"}\"}  }\n\nevent: content_block_stop\ndata: {\"type\":\"content_block_stop\",\"index\":0           }\n\nevent: message_delta\ndata: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"tool_use\",\"stop_sequence\":null,\"stop_details\":null},\"usage\":{\"input_tokens\":798,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"output_tokens\":66}             }\n\nevent: message_stop\ndata: {\"type\":\"message_stop\"            }\n\n"
+      }
+    },
+    {
+      "transport": "http",
+      "request": {
+        "method": "POST",
+        "url": "https://api.anthropic.com/v1/messages",
+        "headers": {
+          "anthropic-version": "2023-06-01",
+          "content-type": "application/json"
+        },
+        "body": "{\"model\":\"claude-opus-4-7\",\"system\":[{\"type\":\"text\",\"text\":\"Use the get_weather tool, then answer in one short sentence.\"}],\"messages\":[{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"What is the weather in Paris?\"}]},{\"role\":\"assistant\",\"content\":[{\"type\":\"tool_use\",\"id\":\"toolu_01M8nJQQMxqpv1VaPYuJKT4j\",\"name\":\"get_weather\",\"input\":{\"city\":\"Paris\"}}]},{\"role\":\"user\",\"content\":[{\"type\":\"tool_result\",\"tool_use_id\":\"toolu_01M8nJQQMxqpv1VaPYuJKT4j\",\"content\":\"{\\\"temperature\\\":22,\\\"condition\\\":\\\"sunny\\\"}\"}]}],\"tools\":[{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"input_schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}],\"stream\":true,\"max_tokens\":80}"
+      },
+      "response": {
+        "status": 200,
+        "headers": {
+          "content-type": "text/event-stream; charset=utf-8"
+        },
+        "body": "event: message_start\ndata: {\"type\":\"message_start\",\"message\":{\"model\":\"claude-opus-4-7\",\"id\":\"msg_011KJqj32QjkrUAiBFxhmEoG\",\"type\":\"message\",\"role\":\"assistant\",\"content\":[],\"stop_reason\":null,\"stop_sequence\":null,\"stop_details\":null,\"usage\":{\"input_tokens\":895,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"cache_creation\":{\"ephemeral_5m_input_tokens\":0,\"ephemeral_1h_input_tokens\":0},\"output_tokens\":5,\"service_tier\":\"standard\",\"inference_geo\":\"global\"}}       }\n\nevent: content_block_start\ndata: {\"type\":\"content_block_start\",\"index\":0,\"content_block\":{\"type\":\"text\",\"text\":\"\"}            }\n\nevent: ping\ndata: {\"type\": \"ping\"}\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\"Paris is curr\"}   }\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\"ently sunny at 22°C.\"}           }\n\nevent: content_block_stop\ndata: {\"type\":\"content_block_stop\",\"index\":0        }\n\nevent: message_delta\ndata: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"end_turn\",\"stop_sequence\":null,\"stop_details\":null},\"usage\":{\"input_tokens\":895,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"output_tokens\":19}}\n\nevent: message_stop\ndata: {\"type\":\"message_stop\"}\n\n"
+      }
+    }
+  ]
+}
--- a/packages/llm/test/fixtures/recordings/anthropic-messages/rejects-malformed-assistant-tool-order-without-patch.json
+++ b/packages/llm/test/fixtures/recordings/anthropic-messages/rejects-malformed-assistant-tool-order-without-patch.json
@@ -0,0 +1,29 @@
+{
+  "version": 1,
+  "metadata": {
+    "name": "anthropic-messages/rejects-malformed-assistant-tool-order-without-patch",
+    "recordedAt": "2026-05-05T20:08:42.597Z",
+    "tags": ["prefix:anthropic-messages", "provider:anthropic", "protocol:anthropic-messages", "tool", "sad-path"]
+  },
+  "interactions": [
+    {
+      "transport": "http",
+      "request": {
+        "method": "POST",
+        "url": "https://api.anthropic.com/v1/messages",
+        "headers": {
+          "anthropic-version": "2023-06-01",
+          "content-type": "application/json"
+        },
+        "body": "{\"model\":\"claude-haiku-4-5-20251001\",\"messages\":[{\"role\":\"assistant\",\"content\":[{\"type\":\"tool_use\",\"id\":\"call_1\",\"name\":\"get_weather\",\"input\":{\"city\":\"Paris\"}},{\"type\":\"text\",\"text\":\"I will check the weather.\"}]},{\"role\":\"user\",\"content\":[{\"type\":\"tool_result\",\"tool_use_id\":\"call_1\",\"content\":\"{\\\"temperature\\\":\\\"72F\\\"}\"}]},{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"Use that result to answer briefly.\"}]}],\"tools\":[{\"name\":\"get_weather\",\"description\":\"Get weather\",\"input_schema\":{\"type\":\"object\",\"properties\":{}}}],\"stream\":true,\"max_tokens\":4096}"
+      },
+      "response": {
+        "status": 400,
+        "headers": {
+          "content-type": "application/json"
+        },
+        "body": "{\"type\":\"error\",\"error\":{\"type\":\"invalid_request_error\",\"message\":\"messages.1: `tool_use` ids were found without `tool_result` blocks immediately after: call_1. Each `tool_use` block must have a corresponding `tool_result` block in the next message.\"},\"request_id\":\"req_011Cak2XdJgnzxKCY2BC2Beh\"}"
+      }
+    }
+  ]
+}
--- a/packages/llm/test/fixtures/recordings/anthropic-messages/streams-text.json
+++ b/packages/llm/test/fixtures/recordings/anthropic-messages/streams-text.json
@@ -0,0 +1,29 @@
+{
+  "version": 1,
+  "metadata": {
+    "name": "anthropic-messages/streams-text",
+    "recordedAt": "2026-04-28T21:18:45.535Z",
+    "tags": ["prefix:anthropic-messages", "provider:anthropic", "protocol:anthropic-messages"]
+  },
+  "interactions": [
+    {
+      "transport": "http",
+      "request": {
+        "method": "POST",
+        "url": "https://api.anthropic.com/v1/messages",
+        "headers": {
+          "anthropic-version": "2023-06-01",
+          "content-type": "application/json"
+        },
+        "body": "{\"model\":\"claude-haiku-4-5-20251001\",\"system\":[{\"type\":\"text\",\"text\":\"You are concise.\"}],\"messages\":[{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"Reply with exactly: Hello!\"}]}],\"stream\":true,\"max_tokens\":20,\"temperature\":0}"
+      },
+      "response": {
+        "status": 200,
+        "headers": {
+          "content-type": "text/event-stream; charset=utf-8"
+        },
+        "body": "event: message_start\ndata: {\"type\":\"message_start\",\"message\":{\"model\":\"claude-haiku-4-5-20251001\",\"id\":\"msg_01UodR8c3ezAK8rAfi8HAs8g\",\"type\":\"message\",\"role\":\"assistant\",\"content\":[],\"stop_reason\":null,\"stop_sequence\":null,\"stop_details\":null,\"usage\":{\"input_tokens\":18,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"cache_creation\":{\"ephemeral_5m_input_tokens\":0,\"ephemeral_1h_input_tokens\":0},\"output_tokens\":2,\"service_tier\":\"standard\",\"inference_geo\":\"not_available\"}}          }\n\nevent: content_block_start\ndata: {\"type\":\"content_block_start\",\"index\":0,\"content_block\":{\"type\":\"text\",\"text\":\"\"}    }\n\nevent: ping\ndata: {\"type\": \"ping\"}\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\"Hello!\"}    }\n\nevent: content_block_stop\ndata: {\"type\":\"content_block_stop\",\"index\":0   }\n\nevent: message_delta\ndata: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"end_turn\",\"stop_sequence\":null,\"stop_details\":null},\"usage\":{\"input_tokens\":18,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"output_tokens\":5}             }\n\nevent: message_stop\ndata: {\"type\":\"message_stop\"         }\n\n"
+      }
+    }
+  ]
+}
--- a/packages/llm/test/fixtures/recordings/anthropic-messages/streams-tool-call.json
+++ b/packages/llm/test/fixtures/recordings/anthropic-messages/streams-tool-call.json
@@ -0,0 +1,29 @@
+{
+  "version": 1,
+  "metadata": {
+    "name": "anthropic-messages/streams-tool-call",
+    "recordedAt": "2026-04-28T21:18:46.878Z",
+    "tags": ["prefix:anthropic-messages", "provider:anthropic", "protocol:anthropic-messages", "tool"]
+  },
+  "interactions": [
+    {
+      "transport": "http",
+      "request": {
+        "method": "POST",
+        "url": "https://api.anthropic.com/v1/messages",
+        "headers": {
+          "anthropic-version": "2023-06-01",
+          "content-type": "application/json"
+        },
+        "body": "{\"model\":\"claude-haiku-4-5-20251001\",\"system\":[{\"type\":\"text\",\"text\":\"Call tools exactly as requested.\"}],\"messages\":[{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"Call get_weather with city exactly Paris.\"}]}],\"tools\":[{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"input_schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}],\"tool_choice\":{\"type\":\"tool\",\"name\":\"get_weather\"},\"stream\":true,\"max_tokens\":80,\"temperature\":0}"
+      },
+      "response": {
+        "status": 200,
+        "headers": {
+          "content-type": "text/event-stream; charset=utf-8"
+        },
+        "body": "event: message_start\ndata: {\"type\":\"message_start\",\"message\":{\"model\":\"claude-haiku-4-5-20251001\",\"id\":\"msg_01RYgU7NUPMK4B9v8S7gVpCS\",\"type\":\"message\",\"role\":\"assistant\",\"content\":[],\"stop_reason\":null,\"stop_sequence\":null,\"stop_details\":null,\"usage\":{\"input_tokens\":677,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"cache_creation\":{\"ephemeral_5m_input_tokens\":0,\"ephemeral_1h_input_tokens\":0},\"output_tokens\":16,\"service_tier\":\"standard\",\"inference_geo\":\"not_available\"}}             }\n\nevent: content_block_start\ndata: {\"type\":\"content_block_start\",\"index\":0,\"content_block\":{\"type\":\"tool_use\",\"id\":\"toolu_012rmAruviySvUXSjgCPWVRu\",\"name\":\"get_weather\",\"input\":{},\"caller\":{\"type\":\"direct\"}}      }\n\nevent: ping\ndata: {\"type\": \"ping\"}\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"input_json_delta\",\"partial_json\":\"\"}             }\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"input_json_delta\",\"partial_json\":\"{\\\"city\\\":\"}    }\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"input_json_delta\",\"partial_json\":\" \\\"Paris\\\"}\"}     }\n\nevent: content_block_stop\ndata: {\"type\":\"content_block_stop\",\"index\":0     }\n\nevent: message_delta\ndata: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"tool_use\",\"stop_sequence\":null,\"stop_details\":null},\"usage\":{\"input_tokens\":677,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"output_tokens\":33}              }\n\nevent: message_stop\ndata: {\"type\":\"message_stop\"     }\n\n"
+      }
+    }
+  ]
+}
--- a/packages/llm/test/fixtures/recordings/bedrock-converse/drives-a-tool-loop.json
+++ b/packages/llm/test/fixtures/recordings/bedrock-converse/drives-a-tool-loop.json
--- a/packages/llm/test/fixtures/recordings/bedrock-converse/streams-a-tool-call.json
+++ b/packages/llm/test/fixtures/recordings/bedrock-converse/streams-a-tool-call.json
@@ -0,0 +1,29 @@
+{
+  "version": 1,
+  "metadata": {
+    "name": "bedrock-converse/streams-a-tool-call",
+    "recordedAt": "2026-04-28T21:18:46.929Z",
+    "tags": ["prefix:bedrock-converse", "provider:amazon-bedrock", "protocol:bedrock-converse", "tool"]
+  },
+  "interactions": [
+    {
+      "transport": "http",
+      "request": {
+        "method": "POST",
+        "url": "https://bedrock-runtime.us-east-1.amazonaws.com/model/us.amazon.nova-micro-v1%3A0/converse-stream",
+        "headers": {
+          "content-type": "application/json"
+        },
+        "body": "{\"modelId\":\"us.amazon.nova-micro-v1:0\",\"messages\":[{\"role\":\"user\",\"content\":[{\"text\":\"Call get_weather with city exactly Paris.\"}]}],\"system\":[{\"text\":\"Call tools exactly as requested.\"}],\"inferenceConfig\":{\"maxTokens\":80,\"temperature\":0},\"toolConfig\":{\"tools\":[{\"toolSpec\":{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"inputSchema\":{\"json\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}}}],\"toolChoice\":{\"tool\":{\"name\":\"get_weather\"}}}}"
+      },
+      "response": {
+        "status": 200,
+        "headers": {
+          "content-type": "application/vnd.amazon.eventstream"
+        },
+        "body": "AAAAuQAAAFL9kIXUCzpldmVudC10eXBlBwAMbWVzc2FnZVN0YXJ0DTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsicCI6ImFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6QUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVowMTIzNDU2NyIsInJvbGUiOiJhc3Npc3RhbnQifWf51EkAAAEMAAAAV56BJZoLOmV2ZW50LXR5cGUHABFjb250ZW50QmxvY2tTdGFydA06Y29udGVudC10eXBlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbnR7ImNvbnRlbnRCbG9ja0luZGV4IjowLCJwIjoiYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNERUZHSElKS0xNTk9QUVJTVFUiLCJzdGFydCI6eyJ0b29sVXNlIjp7Im5hbWUiOiJnZXRfd2VhdGhlciIsInRvb2xVc2VJZCI6InRvb2x1c2VfNmExcFB2bmM5OUdMS08zS0drVUEyTiJ9fX2LR7PFAAAA4gAAAFfCOY+BCzpldmVudC10eXBlBwARY29udGVudEJsb2NrRGVsdGENOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJjb250ZW50QmxvY2tJbmRleCI6MCwiZGVsdGEiOnsidG9vbFVzZSI6eyJpbnB1dCI6IntcImNpdHlcIjpcIlBhcmlzXCJ9In19LCJwIjoiYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNERUZHSElKS0xNTiJ9RkW+2gAAAIcAAABW5OxHKgs6ZXZlbnQtdHlwZQcAEGNvbnRlbnRCbG9ja1N0b3ANOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJjb250ZW50QmxvY2tJbmRleCI6MCwicCI6ImFiYyJ9y6nrtwAAAK4AAABRtlmf/As6ZXZlbnQtdHlwZQcAC21lc3NhZ2VTdG9wDTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsicCI6ImFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6QUJDREVGR0hJSktMTU5PUFFSUyIsInN0b3BSZWFzb24iOiJ0b29sX3VzZSJ9MTlQawAAAOIAAABOplInQQs6ZXZlbnQtdHlwZQcACG1ldGFkYXRhDTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsibWV0cmljcyI6eyJsYXRlbmN5TXMiOjM1NX0sInAiOiJhYmNkZWZnaGlqayIsInVzYWdlIjp7ImlucHV0VG9rZW5zIjo0MTksIm91dHB1dFRva2VucyI6MTYsInNlcnZlclRvb2xVc2FnZSI6e30sInRvdGFsVG9rZW5zIjo0MzV9fU1tVJc=",
+        "bodyEncoding": "base64"
+      }
+    }
+  ]
+}
--- a/packages/llm/test/fixtures/recordings/bedrock-converse/streams-text.json
+++ b/packages/llm/test/fixtures/recordings/bedrock-converse/streams-text.json
@@ -0,0 +1,29 @@
+{
+  "version": 1,
+  "metadata": {
+    "name": "bedrock-converse/streams-text",
+    "recordedAt": "2026-04-28T21:18:46.553Z",
+    "tags": ["prefix:bedrock-converse", "provider:amazon-bedrock", "protocol:bedrock-converse"]
+  },
+  "interactions": [
+    {
+      "transport": "http",
+      "request": {
+        "method": "POST",
+        "url": "https://bedrock-runtime.us-east-1.amazonaws.com/model/us.amazon.nova-micro-v1%3A0/converse-stream",
+        "headers": {
+          "content-type": "application/json"
+        },
+        "body": "{\"modelId\":\"us.amazon.nova-micro-v1:0\",\"messages\":[{\"role\":\"user\",\"content\":[{\"text\":\"Say hello.\"}]}],\"system\":[{\"text\":\"Reply with the single word 'Hello'.\"}],\"inferenceConfig\":{\"maxTokens\":16,\"temperature\":0}}"
+      },
+      "response": {
+        "status": 200,
+        "headers": {
+          "content-type": "application/vnd.amazon.eventstream"
+        },
+        "body": "AAAAmQAAAFI8UarQCzpldmVudC10eXBlBwAMbWVzc2FnZVN0YXJ0DTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsicCI6ImFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6QUIiLCJyb2xlIjoiYXNzaXN0YW50In3SL1jNAAAAvQAAAFd4etebCzpldmVudC10eXBlBwARY29udGVudEJsb2NrRGVsdGENOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJjb250ZW50QmxvY2tJbmRleCI6MCwiZGVsdGEiOnsidGV4dCI6IkhlbGxvIn0sInAiOiJhYmNkZWZnaGlqa2xtbm9wcXJzdHV2d3h5ekFCQ0RFIn2B0NR6AAAAxgAAAFf2eAZFCzpldmVudC10eXBlBwARY29udGVudEJsb2NrRGVsdGENOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJjb250ZW50QmxvY2tJbmRleCI6MCwiZGVsdGEiOnsidGV4dCI6IiJ9LCJwIjoiYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNERUZHSElKS0xNTk9QUVJTIn3XaHMvAAAAhwAAAFbk7EcqCzpldmVudC10eXBlBwAQY29udGVudEJsb2NrU3RvcA06Y29udGVudC10eXBlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbnR7ImNvbnRlbnRCbG9ja0luZGV4IjowLCJwIjoiYWJjIn3Lqeu3AAAAjwAAAFFK+JlICzpldmVudC10eXBlBwALbWVzc2FnZVN0b3ANOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJwIjoiYWJjZGVmZ2hpamtsbW4iLCJzdG9wUmVhc29uIjoiZW5kX3R1cm4ifZ+RQqEAAAECAAAATkXaMzsLOmV2ZW50LXR5cGUHAAhtZXRhZGF0YQ06Y29udGVudC10eXBlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbnR7Im1ldHJpY3MiOnsibGF0ZW5jeU1zIjozMDZ9LCJwIjoiYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNERUZHSElKS0xNTk9QUVJTVCIsInVzYWdlIjp7ImlucHV0VG9rZW5zIjoxMiwib3V0cHV0VG9rZW5zIjoyLCJzZXJ2ZXJUb29sVXNhZ2UiOnt9LCJ0b3RhbFRva2VucyI6MTR9fSnnkUk=",
+        "bodyEncoding": "base64"
+      }
+    }
+  ]
+}
--- a/packages/llm/test/fixtures/recordings/cloudflare-ai-gateway/cloudflare-ai-gateway-workers-ai-gpt-oss-20b-tools-tool-call.json
+++ b/packages/llm/test/fixtures/recordings/cloudflare-ai-gateway/cloudflare-ai-gateway-workers-ai-gpt-oss-20b-tools-tool-call.json
--- a/packages/llm/test/fixtures/recordings/cloudflare-ai-gateway/cloudflare-ai-gateway-workers-ai-llama-3-1-8b-text.json
+++ b/packages/llm/test/fixtures/recordings/cloudflare-ai-gateway/cloudflare-ai-gateway-workers-ai-llama-3-1-8b-text.json
@@ -0,0 +1,37 @@
+{
+  "version": 1,
+  "metadata": {
+    "name": "cloudflare-ai-gateway/cloudflare-ai-gateway-workers-ai-llama-3-1-8b-text",
+    "recordedAt": "2026-05-08T15:55:48.952Z",
+    "provider": "cloudflare-ai-gateway",
+    "route": "cloudflare-ai-gateway",
+    "transport": "http",
+    "model": "workers-ai/@cf/meta/llama-3.1-8b-instruct",
+    "tags": [
+      "prefix:cloudflare-ai-gateway",
+      "provider:cloudflare-ai-gateway",
+      "text",
+      "golden"
+    ]
+  },
+  "interactions": [
+    {
+      "transport": "http",
+      "request": {
+        "method": "POST",
+        "url": "https://gateway.ai.cloudflare.com/v1/{account}/{gateway}/compat/chat/completions",
+        "headers": {
+          "content-type": "application/json"
+        },
+        "body": "{\"model\":\"workers-ai/@cf/meta/llama-3.1-8b-instruct\",\"messages\":[{\"role\":\"system\",\"content\":\"You are concise.\"},{\"role\":\"user\",\"content\":\"Reply exactly with: Hello!\"}],\"stream\":true,\"stream_options\":{\"include_usage\":true},\"max_tokens\":40,\"temperature\":0}"
+      },
+      "response": {
+        "status": 200,
+        "headers": {
+          "content-type": "text/event-stream"
+        },
+        "body": "data: {\"id\":\"id-1778255748911\",\"created\":1778255748,\"model\":\"@cf/meta/llama-3.1-8b-instruct\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"Hello\"}}]}\n\ndata: {\"id\":\"id-1778255748911\",\"created\":1778255748,\"model\":\"@cf/meta/llama-3.1-8b-instruct\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"!\"}}]}\n\ndata: {\"id\":\"id-1778255748911\",\"object\":\"chat.completion.chunk\",\"created\":1778255748,\"model\":\"@cf/meta/llama-3.1-8b-instruct\",\"choices\":[{\"index\":0,\"delta\":{},\"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":45,\"completion_tokens\":2,\"total_tokens\":47}}\n\ndata: {\"id\":\"id-1778255748911\",\"object\":\"chat.completion.chunk\",\"created\":1778255748,\"model\":\"@cf/meta/llama-3.1-8b-instruct\",\"choices\":[{\"index\":0,\"delta\":{},\"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":0,\"completion_tokens\":0,\"total_tokens\":0,\"prompt_tokens_details\":{\"cached_tokens\":0}}}\n\ndata: [DONE]\n\n"
+      }
+    }
+  ]
+}
--- a/packages/llm/test/fixtures/recordings/cloudflare-workers-ai/cloudflare-workers-ai-gpt-oss-20b-tools-tool-call.json
+++ b/packages/llm/test/fixtures/recordings/cloudflare-workers-ai/cloudflare-workers-ai-gpt-oss-20b-tools-tool-call.json
--- a/packages/llm/test/fixtures/recordings/cloudflare-workers-ai/cloudflare-workers-ai-llama-3-1-8b-text.json
+++ b/packages/llm/test/fixtures/recordings/cloudflare-workers-ai/cloudflare-workers-ai-llama-3-1-8b-text.json
@@ -0,0 +1,37 @@
+{
+  "version": 1,
+  "metadata": {
+    "name": "cloudflare-workers-ai/cloudflare-workers-ai-llama-3-1-8b-text",
+    "recordedAt": "2026-05-08T15:56:18.284Z",
+    "provider": "cloudflare-workers-ai",
+    "route": "cloudflare-workers-ai",
+    "transport": "http",
+    "model": "@cf/meta/llama-3.1-8b-instruct",
+    "tags": [
+      "prefix:cloudflare-workers-ai",
+      "provider:cloudflare-workers-ai",
+      "text",
+      "golden"
+    ]
+  },
+  "interactions": [
+    {
+      "transport": "http",
+      "request": {
+        "method": "POST",
+        "url": "https://api.cloudflare.com/client/v4/accounts/{account}/ai/v1/chat/completions",
+        "headers": {
+          "content-type": "application/json"
+        },
+        "body": "{\"model\":\"@cf/meta/llama-3.1-8b-instruct\",\"messages\":[{\"role\":\"system\",\"content\":\"You are concise.\"},{\"role\":\"user\",\"content\":\"Reply exactly with: Hello!\"}],\"stream\":true,\"stream_options\":{\"include_usage\":true},\"max_tokens\":40,\"temperature\":0}"
+      },
+      "response": {
+        "status": 200,
+        "headers": {
+          "content-type": "text/event-stream"
+        },
+        "body": "data: {\"id\":\"id-1778255778230\",\"created\":1778255778,\"model\":\"@cf/meta/llama-3.1-8b-instruct\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"Hello\"}}]}\n\ndata: {\"id\":\"id-1778255778230\",\"created\":1778255778,\"model\":\"@cf/meta/llama-3.1-8b-instruct\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"!\"}}]}\n\ndata: {\"id\":\"id-1778255778230\",\"object\":\"chat.completion.chunk\",\"created\":1778255778,\"model\":\"@cf/meta/llama-3.1-8b-instruct\",\"choices\":[{\"index\":0,\"delta\":{},\"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":45,\"completion_tokens\":2,\"total_tokens\":47}}\n\ndata: {\"id\":\"id-1778255778230\",\"object\":\"chat.completion.chunk\",\"created\":1778255778,\"model\":\"@cf/meta/llama-3.1-8b-instruct\",\"choices\":[{\"index\":0,\"delta\":{},\"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":0,\"completion_tokens\":0,\"total_tokens\":0,\"prompt_tokens_details\":{\"cached_tokens\":0}}}\n\ndata: [DONE]\n\n"
+      }
+    }
+  ]
+}
--- a/packages/llm/test/fixtures/recordings/gemini/streams-text.json
+++ b/packages/llm/test/fixtures/recordings/gemini/streams-text.json
@@ -0,0 +1,28 @@
+{
+  "version": 1,
+  "metadata": {
+    "name": "gemini/streams-text",
+    "recordedAt": "2026-04-28T21:18:47.483Z",
+    "tags": ["prefix:gemini", "provider:google", "protocol:gemini"]
+  },
+  "interactions": [
+    {
+      "transport": "http",
+      "request": {
+        "method": "POST",
+        "url": "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:streamGenerateContent?alt=sse",
+        "headers": {
+          "content-type": "application/json"
+        },
+        "body": "{\"contents\":[{\"role\":\"user\",\"parts\":[{\"text\":\"Reply with exactly: Hello!\"}]}],\"systemInstruction\":{\"parts\":[{\"text\":\"You are concise.\"}]},\"generationConfig\":{\"maxOutputTokens\":80,\"temperature\":0}}"
+      },
+      "response": {
+        "status": 200,
+        "headers": {
+          "content-type": "text/event-stream"
+        },
+        "body": "data: {\"candidates\": [{\"content\": {\"parts\": [{\"text\": \"Hello!\"}],\"role\": \"model\"},\"finishReason\": \"STOP\",\"index\": 0}],\"usageMetadata\": {\"promptTokenCount\": 11,\"candidatesTokenCount\": 2,\"totalTokenCount\": 29,\"promptTokensDetails\": [{\"modality\": \"TEXT\",\"tokenCount\": 11}],\"thoughtsTokenCount\": 16},\"modelVersion\": \"gemini-2.5-flash\",\"responseId\": \"NyTxaczMAZ-b_uMP6u--iQg\"}\r\n\r\n"
+      }
+    }
+  ]
+}
--- a/packages/llm/test/fixtures/recordings/gemini/streams-tool-call.json
+++ b/packages/llm/test/fixtures/recordings/gemini/streams-tool-call.json
@@ -0,0 +1,28 @@
+{
+  "version": 1,
+  "metadata": {
+    "name": "gemini/streams-tool-call",
+    "recordedAt": "2026-04-28T21:18:48.285Z",
+    "tags": ["prefix:gemini", "provider:google", "protocol:gemini", "tool"]
+  },
+  "interactions": [
+    {
+      "transport": "http",
+      "request": {
+        "method": "POST",
+        "url": "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:streamGenerateContent?alt=sse",
+        "headers": {
+          "content-type": "application/json"
+        },
+        "body": "{\"contents\":[{\"role\":\"user\",\"parts\":[{\"text\":\"Call get_weather with city exactly Paris.\"}]}],\"systemInstruction\":{\"parts\":[{\"text\":\"Call tools exactly as requested.\"}]},\"tools\":[{\"functionDeclarations\":[{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"parameters\":{\"required\":[\"city\"],\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}}}}]}],\"toolConfig\":{\"functionCallingConfig\":{\"mode\":\"ANY\",\"allowedFunctionNames\":[\"get_weather\"]}},\"generationConfig\":{\"maxOutputTokens\":80,\"temperature\":0}}"
+      },
+      "response": {
+        "status": 200,
+        "headers": {
+          "content-type": "text/event-stream"
+        },
+        "body": "data: {\"candidates\": [{\"content\": {\"parts\": [{\"functionCall\": {\"name\": \"get_weather\",\"args\": {\"city\": \"Paris\"}},\"thoughtSignature\": \"CiQBDDnWx5RcSsS1UMbykQ5HWlrMu6wrxXGUhmZ0uRKLaMhDZaEKXwEMOdbHVoJAlfbOQyKB378pDZ/gkjWr3HP+dWw1us1kMG22g4G3oJvuTq/SrWS+7KYtSlvOxCKhW2l/2/TczpyGyGmANmsusDcxF1SKOYA5/8Hg0nI24MAlT3+91V/MCoUBAQw51seClFLy3E71v2H44F1kpmjgz8FeTRZofrjbaazfrT+w8Yxgdr3UgGagLMY4OadZemQTWckq9IAqRum78hrBg6NGtQvn15SbtfTNqI4PcxX/+qPo4/g4/ZT5kVORDhVqO8BVP/RA5GQ3ce3sRK8hSkvQlXSoXIPpHh6x7hBezIGXzw==\"}],\"role\": \"model\"},\"finishReason\": \"STOP\",\"index\": 0,\"finishMessage\": \"Model generated function call(s).\"}],\"usageMetadata\": {\"promptTokenCount\": 55,\"candidatesTokenCount\": 15,\"totalTokenCount\": 115,\"promptTokensDetails\": [{\"modality\": \"TEXT\",\"tokenCount\": 55}],\"thoughtsTokenCount\": 45},\"modelVersion\": \"gemini-2.5-flash\",\"responseId\": \"NyTxaYuTJ_OW_uMPgIPKgAg\"}\r\n\r\n"
+      }
+    }
+  ]
+}
--- a/packages/llm/test/fixtures/recordings/openai-chat/continues-after-tool-result.json
+++ b/packages/llm/test/fixtures/recordings/openai-chat/continues-after-tool-result.json
--- a/packages/llm/test/fixtures/recordings/openai-chat/drives-a-tool-loop-end-to-end.json
+++ b/packages/llm/test/fixtures/recordings/openai-chat/drives-a-tool-loop-end-to-end.json
--- a/packages/llm/test/fixtures/recordings/openai-chat/streams-text.json
+++ b/packages/llm/test/fixtures/recordings/openai-chat/streams-text.json
@@ -0,0 +1,28 @@
+{
+  "version": 1,
+  "metadata": {
+    "name": "openai-chat/streams-text",
+    "recordedAt": "2026-05-06T01:33:30.542Z",
+    "tags": ["prefix:openai-chat", "provider:openai", "protocol:openai-chat"]
+  },
+  "interactions": [
+    {
+      "transport": "http",
+      "request": {
+        "method": "POST",
+        "url": "https://api.openai.com/v1/chat/completions",
+        "headers": {
+          "content-type": "application/json"
+        },
+        "body": "{\"model\":\"gpt-4o-mini\",\"messages\":[{\"role\":\"system\",\"content\":\"You are concise.\"},{\"role\":\"user\",\"content\":\"Say hello in one short sentence.\"}],\"stream\":true,\"stream_options\":{\"include_usage\":true},\"max_tokens\":20,\"temperature\":0}"
+      },
+      "response": {
+        "status": 200,
+        "headers": {
+          "content-type": "text/event-stream; charset=utf-8"
+        },
+        "body": "data: {\"id\":\"chatcmpl-DcLQgbFetadY4JFl0fHK0g7OYsCOL\",\"object\":\"chat.completion.chunk\",\"created\":1778031210,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_57133166c6\",\"choices\":[{\"index\":0,\"delta\":{\"role\":\"assistant\",\"content\":\"\",\"refusal\":null},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"g9SWm2h6J\"}\n\ndata: {\"id\":\"chatcmpl-DcLQgbFetadY4JFl0fHK0g7OYsCOL\",\"object\":\"chat.completion.chunk\",\"created\":1778031210,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_57133166c6\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"Hello\"},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"lVzwlh\"}\n\ndata: {\"id\":\"chatcmpl-DcLQgbFetadY4JFl0fHK0g7OYsCOL\",\"object\":\"chat.completion.chunk\",\"created\":1778031210,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_57133166c6\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"!\"},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"onzhziaLGv\"}\n\ndata: {\"id\":\"chatcmpl-DcLQgbFetadY4JFl0fHK0g7OYsCOL\",\"object\":\"chat.completion.chunk\",\"created\":1778031210,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_57133166c6\",\"choices\":[{\"index\":0,\"delta\":{},\"logprobs\":null,\"finish_reason\":\"stop\"}],\"usage\":null,\"obfuscation\":\"LzUj1\"}\n\ndata: {\"id\":\"chatcmpl-DcLQgbFetadY4JFl0fHK0g7OYsCOL\",\"object\":\"chat.completion.chunk\",\"created\":1778031210,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_57133166c6\",\"choices\":[],\"usage\":{\"prompt_tokens\":22,\"completion_tokens\":2,\"total_tokens\":24,\"prompt_tokens_details\":{\"cached_tokens\":0,\"audio_tokens\":0},\"completion_tokens_details\":{\"reasoning_tokens\":0,\"audio_tokens\":0,\"accepted_prediction_tokens\":0,\"rejected_prediction_tokens\":0}},\"obfuscation\":\"emMuPcvvOkI\"}\n\ndata: [DONE]\n\n"
+      }
+    }
+  ]
+}
--- a/packages/llm/test/fixtures/recordings/openai-chat/streams-tool-call.json
+++ b/packages/llm/test/fixtures/recordings/openai-chat/streams-tool-call.json
@@ -0,0 +1,28 @@
+{
+  "version": 1,
+  "metadata": {
+    "name": "openai-chat/streams-tool-call",
+    "recordedAt": "2026-05-06T01:33:31.127Z",
+    "tags": ["prefix:openai-chat", "provider:openai", "protocol:openai-chat", "tool"]
+  },
+  "interactions": [
+    {
+      "transport": "http",
+      "request": {
+        "method": "POST",
+        "url": "https://api.openai.com/v1/chat/completions",
+        "headers": {
+          "content-type": "application/json"
+        },
+        "body": "{\"model\":\"gpt-4o-mini\",\"messages\":[{\"role\":\"system\",\"content\":\"Call tools exactly as requested.\"},{\"role\":\"user\",\"content\":\"Call get_weather with city exactly Paris.\"}],\"tools\":[{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}}],\"tool_choice\":{\"type\":\"function\",\"function\":{\"name\":\"get_weather\"}},\"stream\":true,\"stream_options\":{\"include_usage\":true},\"max_tokens\":80,\"temperature\":0}"
+      },
+      "response": {
+        "status": 200,
+        "headers": {
+          "content-type": "text/event-stream; charset=utf-8"
+        },
+        "body": "data: {\"id\":\"chatcmpl-DcLQgGuIIwnMHqZMRCOwZMLir5SkK\",\"object\":\"chat.completion.chunk\",\"created\":1778031210,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_d0a1738203\",\"choices\":[{\"index\":0,\"delta\":{\"role\":\"assistant\",\"content\":null,\"tool_calls\":[{\"index\":0,\"id\":\"call_5wBV98AvGPwOyC6a2HtKh85w\",\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"arguments\":\"\"}}],\"refusal\":null},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"hrw8\"}\n\ndata: {\"id\":\"chatcmpl-DcLQgGuIIwnMHqZMRCOwZMLir5SkK\",\"object\":\"chat.completion.chunk\",\"created\":1778031210,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_d0a1738203\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"{\\\"\"}}]},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"MzOlaTohF20Sbb\"}\n\ndata: {\"id\":\"chatcmpl-DcLQgGuIIwnMHqZMRCOwZMLir5SkK\",\"object\":\"chat.completion.chunk\",\"created\":1778031210,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_d0a1738203\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"city\"}}]},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"QuYBQ5vYEUVxR\"}\n\ndata: {\"id\":\"chatcmpl-DcLQgGuIIwnMHqZMRCOwZMLir5SkK\",\"object\":\"chat.completion.chunk\",\"created\":1778031210,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_d0a1738203\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"\\\":\\\"\"}}]},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"spyXlsV2hl6l\"}\n\ndata: {\"id\":\"chatcmpl-DcLQgGuIIwnMHqZMRCOwZMLir5SkK\",\"object\":\"chat.completion.chunk\",\"created\":1778031210,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_d0a1738203\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"Paris\"}}]},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"Db1cjFKa6YAI\"}\n\ndata: {\"id\":\"chatcmpl-DcLQgGuIIwnMHqZMRCOwZMLir5SkK\",\"object\":\"chat.completion.chunk\",\"created\":1778031210,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_d0a1738203\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"\\\"}\"}}]},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"oPu35nrhXcjTL5\"}\n\ndata: {\"id\":\"chatcmpl-DcLQgGuIIwnMHqZMRCOwZMLir5SkK\",\"object\":\"chat.completion.chunk\",\"created\":1778031210,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_d0a1738203\",\"choices\":[{\"index\":0,\"delta\":{},\"logprobs\":null,\"finish_reason\":\"stop\"}],\"usage\":null,\"obfuscation\":\"63TVy\"}\n\ndata: {\"id\":\"chatcmpl-DcLQgGuIIwnMHqZMRCOwZMLir5SkK\",\"object\":\"chat.completion.chunk\",\"created\":1778031210,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_d0a1738203\",\"choices\":[],\"usage\":{\"prompt_tokens\":67,\"completion_tokens\":5,\"total_tokens\":72,\"prompt_tokens_details\":{\"cached_tokens\":0,\"audio_tokens\":0},\"completion_tokens_details\":{\"reasoning_tokens\":0,\"audio_tokens\":0,\"accepted_prediction_tokens\":0,\"rejected_prediction_tokens\":0}},\"obfuscation\":\"NxJjur40z4H\"}\n\ndata: [DONE]\n\n"
+      }
+    }
+  ]
+}
--- a/Show More
+++ b/Show More