Add native LLM core foundation (#24712)

This commit is contained in:
Kit Langton
2026-05-08 16:56:20 -04:00
committed by GitHub
parent dc7d665e94
commit 5bb7b23440
144 changed files with 17052 additions and 2 deletions

1
.gitignore vendored
View File

@@ -3,6 +3,7 @@ node_modules
.worktrees
.sst
.env
.env.local
.idea
.vscode
.codex

5
.gitleaksignore Normal file
View File

@@ -0,0 +1,5 @@
# Fake secret-looking strings used by HTTP recorder redaction tests.
afa57acfda894e0ebf3c637dd710310b705c0a2f:packages/http-recorder/test/record-replay.test.ts:generic-api-key:69
afa57acfda894e0ebf3c637dd710310b705c0a2f:packages/http-recorder/test/record-replay.test.ts:generic-api-key:92
afa57acfda894e0ebf3c637dd710310b705c0a2f:packages/http-recorder/test/record-replay.test.ts:generic-api-key:146
afa57acfda894e0ebf3c637dd710310b705c0a2f:packages/http-recorder/test/record-replay.test.ts:gcp-api-key:71

View File

@@ -111,6 +111,7 @@
"zod": "catalog:",
},
"devDependencies": {
"@types/bun": "catalog:",
"@typescript/native-preview": "catalog:",
"@webgpu/types": "0.1.54",
"typescript": "catalog:",
@@ -302,6 +303,7 @@
"devDependencies": {
"@cloudflare/workers-types": "catalog:",
"@tailwindcss/vite": "catalog:",
"@types/bun": "catalog:",
"@types/luxon": "catalog:",
"@typescript/native-preview": "catalog:",
"tailwindcss": "catalog:",
@@ -325,6 +327,37 @@
"typescript": "catalog:",
},
},
"packages/http-recorder": {
"name": "@opencode-ai/http-recorder",
"version": "0.0.0",
"dependencies": {
"@effect/platform-node": "catalog:",
"effect": "catalog:",
},
"devDependencies": {
"@tsconfig/bun": "catalog:",
"@types/bun": "catalog:",
"@typescript/native-preview": "catalog:",
},
},
"packages/llm": {
"name": "@opencode-ai/llm",
"version": "1.14.25",
"dependencies": {
"@smithy/eventstream-codec": "4.2.14",
"@smithy/util-utf8": "4.2.2",
"aws4fetch": "1.0.20",
"effect": "catalog:",
},
"devDependencies": {
"@clack/prompts": "1.0.0-alpha.1",
"@effect/platform-node": "catalog:",
"@opencode-ai/http-recorder": "workspace:*",
"@tsconfig/bun": "catalog:",
"@types/bun": "catalog:",
"@typescript/native-preview": "catalog:",
},
},
"packages/opencode": {
"name": "opencode",
"version": "1.14.41",
@@ -1552,6 +1585,10 @@
"@opencode-ai/function": ["@opencode-ai/function@workspace:packages/function"],
"@opencode-ai/http-recorder": ["@opencode-ai/http-recorder@workspace:packages/http-recorder"],
"@opencode-ai/llm": ["@opencode-ai/llm@workspace:packages/llm"],
"@opencode-ai/plugin": ["@opencode-ai/plugin@workspace:packages/plugin"],
"@opencode-ai/script": ["@opencode-ai/script@workspace:packages/script"],
@@ -5566,6 +5603,10 @@
"@opencode-ai/desktop/typescript": ["typescript@5.6.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-hjcS1mhfuyi4WW8IWtjP7brDrG2cuDZukyrYrSauoXGNgx0S7zceP07adYkJycEr56BOUTNPzbInooiN3fn1qw=="],
"@opencode-ai/llm/@smithy/eventstream-codec": ["@smithy/eventstream-codec@4.2.14", "", { "dependencies": { "@aws-crypto/crc32": "5.2.0", "@smithy/types": "^4.14.1", "@smithy/util-hex-encoding": "^4.2.2", "tslib": "^2.6.2" } }, "sha512-erZq0nOIpzfeZdCyzZjdJb4nVSKLUmSkaQUVkRGQTXs30gyUGeKnrYEg+Xe1W5gE3aReS7IgsvANwVPxSzY6Pw=="],
"@opencode-ai/llm/@smithy/util-utf8": ["@smithy/util-utf8@4.2.2", "", { "dependencies": { "@smithy/util-buffer-from": "^4.2.2", "tslib": "^2.6.2" } }, "sha512-75MeYpjdWRe8M5E3AW0O4Cx3UadweS+cwdXjwYGBW5h/gxxnbeZ877sLPX/ZJA9GVTlL/qG0dXP29JWFCD1Ayw=="],
"@opencode-ai/ui/@solid-primitives/resize-observer": ["@solid-primitives/resize-observer@2.1.3", "", { "dependencies": { "@solid-primitives/event-listener": "^2.4.3", "@solid-primitives/rootless": "^1.5.2", "@solid-primitives/static-store": "^0.1.2", "@solid-primitives/utils": "^6.3.2" }, "peerDependencies": { "solid-js": "^1.6.12" } }, "sha512-zBLje5E06TgOg93S7rGPldmhDnouNGhvfZVKOp+oG2XU8snA+GoCSSCz1M+jpNAg5Ek2EakU5UVQqL152WmdXQ=="],
"@opencode-ai/web/@shikijs/transformers": ["@shikijs/transformers@3.20.0", "", { "dependencies": { "@shikijs/core": "3.20.0", "@shikijs/types": "3.20.0" } }, "sha512-PrHHMRr3Q5W1qB/42kJW6laqFyWdhrPF2hNR9qjOm1xcSiAO3hAHo7HaVyHE6pMyevmy3i51O8kuGGXC78uK3g=="],
@@ -6632,6 +6673,8 @@
"@opencode-ai/desktop/@actions/artifact/@actions/http-client": ["@actions/http-client@2.2.3", "", { "dependencies": { "tunnel": "^0.0.6", "undici": "^5.25.4" } }, "sha512-mx8hyJi/hjFvbPokCg4uRd4ZX78t+YyRPtnKWwIl+RzNaVuFpQHfmlGVfsKEJN8LwTCvL+DfVgAM04XaHkm6bA=="],
"@opencode-ai/llm/@smithy/eventstream-codec/@smithy/types": ["@smithy/types@4.14.1", "", { "dependencies": { "tslib": "^2.6.2" } }, "sha512-59b5HtSVrVR/eYNei3BUj3DCPKD/G7EtDDe7OEJE7i7FtQFugYo6MxbotS8mVJkLNVf8gYaAlEBwwtJ9HzhWSg=="],
"@opencode-ai/web/@shikijs/transformers/@shikijs/core": ["@shikijs/core@3.20.0", "", { "dependencies": { "@shikijs/types": "3.20.0", "@shikijs/vscode-textmate": "^10.0.2", "@types/hast": "^3.0.4", "hast-util-to-html": "^9.0.5" } }, "sha512-f2ED7HYV4JEk827mtMDwe/yQ25pRiXZmtHjWF8uzZKuKiEsJR7Ce1nuQ+HhV9FzDcbIo4ObBCD9GPTzNuy9S1g=="],
"@opencode-ai/web/@shikijs/transformers/@shikijs/types": ["@shikijs/types@3.20.0", "", { "dependencies": { "@shikijs/vscode-textmate": "^10.0.2", "@types/hast": "^3.0.4" } }, "sha512-lhYAATn10nkZcBQ0BlzSbJA3wcmL5MXUUF8d2Zzon6saZDlToKaiRX60n2+ZaHJCmXEcZRWNzn+k9vplr8Jhsw=="],

View File

@@ -35,6 +35,7 @@
"zod": "catalog:"
},
"devDependencies": {
"@types/bun": "catalog:",
"@typescript/native-preview": "catalog:",
"@webgpu/types": "0.1.54",
"typescript": "catalog:",

View File

@@ -12,7 +12,7 @@
"allowJs": true,
"strict": true,
"noEmit": true,
"types": ["vite/client", "@webgpu/types"],
"types": ["vite/client", "@webgpu/types", "bun"],
"isolatedModules": true,
"paths": {
"~/*": ["./src/*"]

View File

@@ -32,6 +32,7 @@
"@cloudflare/workers-types": "catalog:",
"@tailwindcss/vite": "catalog:",
"@typescript/native-preview": "catalog:",
"@types/bun": "catalog:",
"@types/luxon": "catalog:",
"tailwindcss": "catalog:",
"typescript": "catalog:",

View File

@@ -11,7 +11,7 @@
"allowJs": true,
"noEmit": true,
"strict": true,
"types": ["@cloudflare/workers-types", "vite/client"],
"types": ["@cloudflare/workers-types", "vite/client", "bun"],
"isolatedModules": true,
"paths": {
"~/*": ["./src/*"]

View File

@@ -0,0 +1,26 @@
{
"$schema": "https://json.schemastore.org/package.json",
"version": "0.0.0",
"name": "@opencode-ai/http-recorder",
"type": "module",
"license": "MIT",
"private": true,
"scripts": {
"test": "bun test --timeout 30000",
"test:ci": "mkdir -p .artifacts/unit && bun test --timeout 30000 --reporter=junit --reporter-outfile=.artifacts/unit/junit.xml",
"typecheck": "tsgo --noEmit"
},
"exports": {
".": "./src/index.ts",
"./*": "./src/*.ts"
},
"devDependencies": {
"@tsconfig/bun": "catalog:",
"@types/bun": "catalog:",
"@typescript/native-preview": "catalog:"
},
"dependencies": {
"@effect/platform-node": "catalog:",
"effect": "catalog:"
}
}

View File

@@ -0,0 +1,105 @@
import { Context, Effect, FileSystem, Layer, PlatformError, Ref } from "effect"
import * as path from "node:path"
import { cassetteSecretFindings, type SecretFinding } from "./redaction"
import type { Cassette, CassetteMetadata, Interaction } from "./schema"
import { cassetteFor, cassettePath, DEFAULT_RECORDINGS_DIR, formatCassette, parseCassette } from "./storage"
export interface Entry {
readonly name: string
readonly path: string
}
export interface Interface {
readonly path: (name: string) => string
readonly read: (name: string) => Effect.Effect<Cassette, PlatformError.PlatformError>
readonly write: (name: string, cassette: Cassette) => Effect.Effect<void, PlatformError.PlatformError>
readonly append: (
name: string,
interaction: Interaction,
metadata: CassetteMetadata | undefined,
) => Effect.Effect<
{
readonly cassette: Cassette
readonly findings: ReadonlyArray<SecretFinding>
},
PlatformError.PlatformError
>
readonly exists: (name: string) => Effect.Effect<boolean>
readonly list: () => Effect.Effect<ReadonlyArray<Entry>, PlatformError.PlatformError>
readonly scan: (cassette: Cassette) => ReadonlyArray<SecretFinding>
}
export class Service extends Context.Service<Service, Interface>()("@opencode-ai/http-recorder/Cassette") {}
export const layer = (options: { readonly directory?: string } = {}) =>
Layer.effect(
Service,
Effect.gen(function* () {
const fileSystem = yield* FileSystem.FileSystem
const directory = options.directory ?? DEFAULT_RECORDINGS_DIR
const recorded = yield* Ref.make(new Map<string, ReadonlyArray<Interaction>>())
const pathFor = (name: string) => cassettePath(name, directory)
const walk = (directory: string): Effect.Effect<ReadonlyArray<string>, PlatformError.PlatformError> =>
Effect.gen(function* () {
const entries = yield* fileSystem
.readDirectory(directory)
.pipe(Effect.catch(() => Effect.succeed([] as string[])))
const nested = yield* Effect.forEach(entries, (entry) => {
const full = path.join(directory, entry)
return fileSystem.stat(full).pipe(
Effect.flatMap((stat) => (stat.type === "Directory" ? walk(full) : Effect.succeed([full]))),
Effect.catch(() => Effect.succeed([] as string[])),
)
})
return nested.flat()
})
const read = Effect.fn("Cassette.read")(function* (name: string) {
return parseCassette(yield* fileSystem.readFileString(pathFor(name)))
})
const write = Effect.fn("Cassette.write")(function* (name: string, cassette: Cassette) {
yield* fileSystem.makeDirectory(path.dirname(pathFor(name)), { recursive: true })
yield* fileSystem.writeFileString(pathFor(name), formatCassette(cassette))
})
const append = Effect.fn("Cassette.append")(function* (
name: string,
interaction: Interaction,
metadata: CassetteMetadata | undefined,
) {
const interactions = yield* Ref.updateAndGet(recorded, (previous) =>
new Map(previous).set(name, [...(previous.get(name) ?? []), interaction]),
)
const cassette = cassetteFor(name, interactions.get(name) ?? [], metadata)
const findings = cassetteSecretFindings(cassette)
if (findings.length === 0) yield* write(name, cassette)
return { cassette, findings }
})
const exists = Effect.fn("Cassette.exists")(function* (name: string) {
return yield* fileSystem.access(pathFor(name)).pipe(
Effect.as(true),
Effect.catch(() => Effect.succeed(false)),
)
})
const list = Effect.fn("Cassette.list")(function* () {
return (yield* walk(directory))
.filter((file) => file.endsWith(".json"))
.map((file) => ({
name: path.relative(directory, file).replace(/\\/g, "/").replace(/\.json$/, ""),
path: file,
}))
.toSorted((a, b) => a.name.localeCompare(b.name))
})
return Service.of({ path: pathFor, read, write, append, exists, list, scan: cassetteSecretFindings })
}),
)
export const defaultLayer = layer()
export * as Cassette from "./cassette"

View File

@@ -0,0 +1,95 @@
import { Option } from "effect"
import { Headers, HttpBody, HttpClientRequest, UrlParams } from "effect/unstable/http"
import { decodeJson } from "./matching"
import { REDACTED, redactUrl, secretFindings } from "./redaction"
import { httpInteractions, type Cassette, type RequestSnapshot } from "./schema"
const safeText = (value: unknown) => {
if (value === undefined) return "undefined"
if (secretFindings(value).length > 0) return JSON.stringify(REDACTED)
const text = typeof value === "string" ? JSON.stringify(value) : JSON.stringify(value)
if (!text) return String(value)
return text.length > 300 ? `${text.slice(0, 300)}...` : text
}
const jsonBody = (body: string) => Option.getOrUndefined(decodeJson(body))
const valueDiffs = (expected: unknown, received: unknown, base = "$", limit = 8): ReadonlyArray<string> => {
if (Object.is(expected, received)) return []
if (
expected &&
received &&
typeof expected === "object" &&
typeof received === "object" &&
!Array.isArray(expected) &&
!Array.isArray(received)
) {
return [...new Set([...Object.keys(expected), ...Object.keys(received)])]
.toSorted()
.flatMap((key) =>
valueDiffs(
(expected as Record<string, unknown>)[key],
(received as Record<string, unknown>)[key],
`${base}.${key}`,
limit,
),
)
.slice(0, limit)
}
if (Array.isArray(expected) && Array.isArray(received)) {
return Array.from({ length: Math.max(expected.length, received.length) }, (_, index) => index)
.flatMap((index) => valueDiffs(expected[index], received[index], `${base}[${index}]`, limit))
.slice(0, limit)
}
return [`${base} expected ${safeText(expected)}, received ${safeText(received)}`]
}
const headerDiffs = (expected: Record<string, string>, received: Record<string, string>) =>
[...new Set([...Object.keys(expected), ...Object.keys(received)])].toSorted().flatMap((key) => {
if (expected[key] === received[key]) return []
if (expected[key] === undefined) return [` ${key} unexpected ${safeText(received[key])}`]
if (received[key] === undefined) return [` ${key} missing expected ${safeText(expected[key])}`]
return [` ${key} expected ${safeText(expected[key])}, received ${safeText(received[key])}`]
})
export const requestDiff = (expected: RequestSnapshot, received: RequestSnapshot) => {
const lines = []
if (expected.method !== received.method) {
lines.push("method:", ` expected ${expected.method}, received ${received.method}`)
}
if (expected.url !== received.url) {
lines.push("url:", ` expected ${expected.url}`, ` received ${received.url}`)
}
const headers = headerDiffs(expected.headers, received.headers)
if (headers.length > 0) lines.push("headers:", ...headers.slice(0, 8))
const expectedBody = jsonBody(expected.body)
const receivedBody = jsonBody(received.body)
const body =
expectedBody !== undefined && receivedBody !== undefined
? valueDiffs(expectedBody, receivedBody).map((line) => ` ${line}`)
: expected.body === received.body
? []
: [` expected ${safeText(expected.body)}, received ${safeText(received.body)}`]
if (body.length > 0) lines.push("body:", ...body)
return lines
}
export const mismatchDetail = (cassette: Cassette, incoming: RequestSnapshot) => {
const interactions = httpInteractions(cassette)
if (interactions.length === 0) return "cassette has no recorded HTTP interactions"
const ranked = interactions
.map((interaction, index) => ({ index, lines: requestDiff(interaction.request, incoming) }))
.toSorted((a, b) => a.lines.length - b.lines.length || a.index - b.index)
const best = ranked[0]
return ["no recorded interaction matched", `closest interaction: #${best.index + 1}`, ...best.lines].join("\n")
}
export const redactedErrorRequest = (request: HttpClientRequest.HttpClientRequest) =>
HttpClientRequest.makeWith(
request.method,
redactUrl(request.url),
UrlParams.empty,
Option.none(),
Headers.empty,
HttpBody.empty,
)

View File

@@ -0,0 +1,211 @@
import { NodeFileSystem } from "@effect/platform-node"
import { Effect, Layer, Option, Ref } from "effect"
import {
FetchHttpClient,
HttpClient,
HttpClientError,
HttpClientRequest,
HttpClientResponse,
} from "effect/unstable/http"
import { redactedErrorRequest, mismatchDetail, requestDiff } from "./diff"
import { defaultMatcher, decodeJson, type RequestMatcher } from "./matching"
import { redactHeaders, redactUrl, type SecretFinding } from "./redaction"
import {
httpInteractions,
type Cassette,
type CassetteMetadata,
type HttpInteraction,
type ResponseSnapshot,
} from "./schema"
import * as CassetteService from "./cassette"
export const DEFAULT_REQUEST_HEADERS: ReadonlyArray<string> = ["content-type", "accept", "openai-beta"]
const DEFAULT_RESPONSE_HEADERS: ReadonlyArray<string> = ["content-type"]
export type RecordReplayMode = "record" | "replay" | "passthrough"
export interface RecordReplayOptions {
readonly mode?: RecordReplayMode
readonly directory?: string
readonly metadata?: CassetteMetadata
readonly redact?: {
readonly headers?: ReadonlyArray<string>
readonly query?: ReadonlyArray<string>
readonly url?: (url: string) => string
}
readonly requestHeaders?: ReadonlyArray<string>
readonly responseHeaders?: ReadonlyArray<string>
readonly redactBody?: (body: unknown) => unknown
readonly dispatch?: "match" | "sequential"
readonly match?: RequestMatcher
}
const responseHeaders = (
response: HttpClientResponse.HttpClientResponse,
allow: ReadonlyArray<string>,
redact: ReadonlyArray<string> | undefined,
) => {
const merged = redactHeaders(response.headers as Record<string, string>, allow, redact)
if (!merged["content-type"]) merged["content-type"] = "text/event-stream"
return merged
}
const BINARY_CONTENT_TYPES: ReadonlyArray<string> = ["vnd.amazon.eventstream", "octet-stream"]
const isBinaryContentType = (contentType: string | undefined) => {
if (!contentType) return false
const lower = contentType.toLowerCase()
return BINARY_CONTENT_TYPES.some((token) => lower.includes(token))
}
const captureResponseBody = (response: HttpClientResponse.HttpClientResponse, contentType: string | undefined) =>
isBinaryContentType(contentType)
? response.arrayBuffer.pipe(
Effect.map((bytes) => ({ body: Buffer.from(bytes).toString("base64"), bodyEncoding: "base64" as const })),
)
: response.text.pipe(Effect.map((body) => ({ body })))
const decodeResponseBody = (snapshot: ResponseSnapshot) =>
snapshot.bodyEncoding === "base64" ? Buffer.from(snapshot.body, "base64") : snapshot.body
const fixtureMissing = (request: HttpClientRequest.HttpClientRequest, name: string) =>
new HttpClientError.HttpClientError({
reason: new HttpClientError.TransportError({
request: redactedErrorRequest(request),
description: `Fixture "${name}" not found. Run with RECORD=true to create it.`,
}),
})
const fixtureMismatch = (request: HttpClientRequest.HttpClientRequest, name: string, detail: string) =>
new HttpClientError.HttpClientError({
reason: new HttpClientError.TransportError({
request: redactedErrorRequest(request),
description: `Fixture "${name}" does not match the current request: ${detail}. Run with RECORD=true to update it.`,
}),
})
const unsafeCassette = (
request: HttpClientRequest.HttpClientRequest,
name: string,
findings: ReadonlyArray<SecretFinding>,
) =>
new HttpClientError.HttpClientError({
reason: new HttpClientError.TransportError({
request: redactedErrorRequest(request),
description: `Refusing to write cassette "${name}" because it contains possible secrets: ${findings
.map((item) => `${item.path} (${item.reason})`)
.join(", ")}`,
}),
})
export const recordingLayer = (
name: string,
options: Omit<RecordReplayOptions, "directory"> = {},
): Layer.Layer<HttpClient.HttpClient, never, HttpClient.HttpClient | CassetteService.Service> =>
Layer.effect(
HttpClient.HttpClient,
Effect.gen(function* () {
const upstream = yield* HttpClient.HttpClient
const cassetteService = yield* CassetteService.Service
const requestHeadersAllow = options.requestHeaders ?? DEFAULT_REQUEST_HEADERS
const responseHeadersAllow = options.responseHeaders ?? DEFAULT_RESPONSE_HEADERS
const match = options.match ?? defaultMatcher
const mode = options.mode ?? "replay"
const sequential = options.dispatch === "sequential"
const replay = yield* Ref.make<Cassette | undefined>(undefined)
const cursor = yield* Ref.make(0)
const snapshotRequest = (request: HttpClientRequest.HttpClientRequest) =>
Effect.gen(function* () {
const web = yield* HttpClientRequest.toWeb(request).pipe(Effect.orDie)
const raw = yield* Effect.promise(() => web.text())
const body = options.redactBody
? Option.match(decodeJson(raw), {
onNone: () => raw,
onSome: (parsed) => JSON.stringify(options.redactBody?.(parsed)),
})
: raw
return {
method: web.method,
url: redactUrl(web.url, options.redact?.query, options.redact?.url),
headers: redactHeaders(
Object.fromEntries(web.headers.entries()),
requestHeadersAllow,
options.redact?.headers,
),
body,
}
})
const selectInteraction = (cassette: Cassette, incoming: HttpInteraction["request"]) =>
Effect.gen(function* () {
const interactions = httpInteractions(cassette)
if (sequential) {
const index = yield* Ref.get(cursor)
const interaction = interactions[index]
if (!interaction)
return { interaction, detail: `interaction ${index + 1} of ${interactions.length} not recorded` }
if (!match(incoming, interaction.request)) {
return { interaction: undefined, detail: requestDiff(interaction.request, incoming).join("\n") }
}
yield* Ref.update(cursor, (n) => n + 1)
return { interaction, detail: "" }
}
const interaction = interactions.find((candidate) => match(incoming, candidate.request))
return { interaction, detail: interaction ? "" : mismatchDetail(cassette, incoming) }
})
const loadReplay = (request: HttpClientRequest.HttpClientRequest) =>
Effect.gen(function* () {
const cached = yield* Ref.get(replay)
if (cached) return cached
const cassette = yield* cassetteService.read(name).pipe(Effect.mapError(() => fixtureMissing(request, name)))
yield* Ref.set(replay, cassette)
return cassette
})
return HttpClient.make((request) => {
if (mode === "passthrough") return upstream.execute(request)
if (mode === "record") {
return Effect.gen(function* () {
const currentRequest = yield* snapshotRequest(request)
const response = yield* upstream.execute(request)
const headers = responseHeaders(response, responseHeadersAllow, options.redact?.headers)
const captured = yield* captureResponseBody(response, headers["content-type"])
const interaction: HttpInteraction = {
transport: "http",
request: currentRequest,
response: { status: response.status, headers, ...captured },
}
const result = yield* cassetteService.append(name, interaction, options.metadata).pipe(Effect.orDie)
const findings = result.findings
if (findings.length > 0) return yield* unsafeCassette(request, name, findings)
return HttpClientResponse.fromWeb(
request,
new Response(decodeResponseBody(interaction.response), interaction.response),
)
})
}
return Effect.gen(function* () {
const cassette = yield* loadReplay(request)
const incoming = yield* snapshotRequest(request)
const { interaction, detail } = yield* selectInteraction(cassette, incoming)
if (!interaction) return yield* fixtureMismatch(request, name, detail)
return HttpClientResponse.fromWeb(
request,
new Response(decodeResponseBody(interaction.response), interaction.response),
)
})
})
}),
)
export const cassetteLayer = (name: string, options: RecordReplayOptions = {}): Layer.Layer<HttpClient.HttpClient> =>
recordingLayer(name, options).pipe(
Layer.provide(CassetteService.layer({ directory: options.directory })),
Layer.provide(FetchHttpClient.layer),
Layer.provide(NodeFileSystem.layer),
)

View File

@@ -0,0 +1,10 @@
export * from "./schema"
export * from "./redaction"
export * from "./matching"
export * from "./diff"
export * from "./storage"
export * from "./websocket"
export * from "./effect"
export * as Cassette from "./cassette"
export * as HttpRecorder from "."

View File

@@ -0,0 +1,36 @@
import { Option, Schema } from "effect"
import type { RequestSnapshot } from "./schema"
const JsonValue = Schema.fromJsonString(Schema.Unknown)
export const decodeJson = Schema.decodeUnknownOption(JsonValue)
const isRecord = (value: unknown): value is Record<string, unknown> =>
value !== null && typeof value === "object" && !Array.isArray(value)
export const canonicalizeJson = (value: unknown): unknown => {
if (Array.isArray(value)) return value.map(canonicalizeJson)
if (isRecord(value)) {
return Object.fromEntries(
Object.keys(value)
.toSorted()
.map((key) => [key, canonicalizeJson(value[key])]),
)
}
return value
}
export type RequestMatcher = (incoming: RequestSnapshot, recorded: RequestSnapshot) => boolean
export const canonicalSnapshot = (snapshot: RequestSnapshot): string =>
JSON.stringify({
method: snapshot.method,
url: snapshot.url,
headers: canonicalizeJson(snapshot.headers),
body: Option.match(decodeJson(snapshot.body), {
onNone: () => snapshot.body,
onSome: canonicalizeJson,
}),
})
export const defaultMatcher: RequestMatcher = (incoming, recorded) =>
canonicalSnapshot(incoming) === canonicalSnapshot(recorded)

View File

@@ -0,0 +1,112 @@
import type { Cassette } from "./schema"
export const REDACTED = "[REDACTED]"
const DEFAULT_REDACT_HEADERS = [
"authorization",
"cookie",
"proxy-authorization",
"set-cookie",
"x-api-key",
"x-amz-security-token",
"x-goog-api-key",
]
const DEFAULT_REDACT_QUERY = [
"access_token",
"api-key",
"api_key",
"apikey",
"code",
"key",
"signature",
"sig",
"token",
"x-amz-credential",
"x-amz-security-token",
"x-amz-signature",
]
const SECRET_PATTERNS: ReadonlyArray<{ readonly label: string; readonly pattern: RegExp }> = [
{ label: "bearer token", pattern: /\bBearer\s+[A-Za-z0-9._~+/=-]{16,}\b/i },
{ label: "API key", pattern: /\bsk-[A-Za-z0-9][A-Za-z0-9_-]{20,}\b/ },
{ label: "Anthropic API key", pattern: /\bsk-ant-[A-Za-z0-9_-]{20,}\b/ },
{ label: "Google API key", pattern: /\bAIza[0-9A-Za-z_-]{20,}\b/ },
{ label: "AWS access key", pattern: /\b(?:AKIA|ASIA)[0-9A-Z]{16}\b/ },
{ label: "GitHub token", pattern: /\bgh[pousr]_[A-Za-z0-9_]{20,}\b/ },
{ label: "private key", pattern: /-----BEGIN [A-Z ]*PRIVATE KEY-----/ },
]
const ENV_SECRET_NAMES = /(?:API|AUTH|BEARER|CREDENTIAL|KEY|PASSWORD|SECRET|TOKEN)/i
const SAFE_ENV_VALUES = new Set(["fixture", "test", "test-key"])
const envSecrets = () =>
Object.entries(process.env).flatMap(([name, value]) => {
if (!value) return []
if (!ENV_SECRET_NAMES.test(name)) return []
if (value.length < 12) return []
if (SAFE_ENV_VALUES.has(value.toLowerCase())) return []
return [{ name, value }]
})
const pathFor = (base: string, key: string) => (base ? `${base}.${key}` : key)
const stringEntries = (value: unknown, base = ""): ReadonlyArray<{ readonly path: string; readonly value: string }> => {
if (typeof value === "string") return [{ path: base, value }]
if (Array.isArray(value)) return value.flatMap((item, index) => stringEntries(item, `${base}[${index}]`))
if (value && typeof value === "object") {
return Object.entries(value).flatMap(([key, child]) => stringEntries(child, pathFor(base, key)))
}
return []
}
const redactionSet = (values: ReadonlyArray<string> | undefined, defaults: ReadonlyArray<string>) =>
new Set([...defaults, ...(values ?? [])].map((value) => value.toLowerCase()))
export type UrlRedactor = (url: string) => string
export const redactUrl = (raw: string, query: ReadonlyArray<string> = DEFAULT_REDACT_QUERY, urlRedactor?: UrlRedactor) => {
if (!URL.canParse(raw)) return urlRedactor?.(raw) ?? raw
const url = new URL(raw)
if (url.username) url.username = REDACTED
if (url.password) url.password = REDACTED
const redacted = redactionSet(query, DEFAULT_REDACT_QUERY)
for (const key of [...url.searchParams.keys()]) {
if (redacted.has(key.toLowerCase())) url.searchParams.set(key, REDACTED)
}
return urlRedactor?.(url.toString()) ?? url.toString()
}
export const redactHeaders = (
headers: Record<string, string>,
allow: ReadonlyArray<string>,
redact: ReadonlyArray<string> = DEFAULT_REDACT_HEADERS,
) => {
const allowed = new Set(allow.map((name) => name.toLowerCase()))
const redacted = redactionSet(redact, DEFAULT_REDACT_HEADERS)
return Object.fromEntries(
Object.entries(headers)
.map(([name, value]) => [name.toLowerCase(), value] as const)
.filter(([name]) => allowed.has(name))
.map(([name, value]) => [name, redacted.has(name) ? REDACTED : value] as const)
.toSorted(([a], [b]) => a.localeCompare(b)),
)
}
export type SecretFinding = {
readonly path: string
readonly reason: string
}
export const secretFindings = (value: unknown): ReadonlyArray<SecretFinding> =>
stringEntries(value).flatMap((entry) => [
...SECRET_PATTERNS.filter((item) => item.pattern.test(entry.value)).map((item) => ({
path: entry.path,
reason: item.label,
})),
...envSecrets()
.filter((item) => entry.value.includes(item.value))
.map((item) => ({ path: entry.path, reason: `environment secret ${item.name}` })),
])
export const cassetteSecretFindings = (cassette: Cassette) => secretFindings(cassette)

View File

@@ -0,0 +1,67 @@
import { Schema } from "effect"
export const RequestSnapshotSchema = Schema.Struct({
method: Schema.String,
url: Schema.String,
headers: Schema.Record(Schema.String, Schema.String),
body: Schema.String,
})
export type RequestSnapshot = Schema.Schema.Type<typeof RequestSnapshotSchema>
export const ResponseSnapshotSchema = Schema.Struct({
status: Schema.Number,
headers: Schema.Record(Schema.String, Schema.String),
body: Schema.String,
bodyEncoding: Schema.optional(Schema.Literals(["text", "base64"])),
})
export type ResponseSnapshot = Schema.Schema.Type<typeof ResponseSnapshotSchema>
export const CassetteMetadataSchema = Schema.Record(Schema.String, Schema.Unknown)
export type CassetteMetadata = Schema.Schema.Type<typeof CassetteMetadataSchema>
export const HttpInteractionSchema = Schema.Struct({
transport: Schema.tag("http"),
request: RequestSnapshotSchema,
response: ResponseSnapshotSchema,
})
export type HttpInteraction = Schema.Schema.Type<typeof HttpInteractionSchema>
export const WebSocketFrameSchema = Schema.Union([
Schema.Struct({ kind: Schema.tag("text"), body: Schema.String }),
Schema.Struct({ kind: Schema.tag("binary"), body: Schema.String, bodyEncoding: Schema.Literal("base64") }),
])
export type WebSocketFrame = Schema.Schema.Type<typeof WebSocketFrameSchema>
export const WebSocketInteractionSchema = Schema.Struct({
transport: Schema.tag("websocket"),
open: Schema.Struct({
url: Schema.String,
headers: Schema.Record(Schema.String, Schema.String),
}),
client: Schema.Array(WebSocketFrameSchema),
server: Schema.Array(WebSocketFrameSchema),
})
export type WebSocketInteraction = Schema.Schema.Type<typeof WebSocketInteractionSchema>
export const InteractionSchema = Schema.Union([HttpInteractionSchema, WebSocketInteractionSchema]).pipe(
Schema.toTaggedUnion("transport"),
)
export type Interaction = Schema.Schema.Type<typeof InteractionSchema>
export const isHttpInteraction = InteractionSchema.guards.http
export const isWebSocketInteraction = InteractionSchema.guards.websocket
export const httpInteractions = (cassette: Cassette) => cassette.interactions.filter(isHttpInteraction)
export const webSocketInteractions = (cassette: Cassette) => cassette.interactions.filter(isWebSocketInteraction)
export const CassetteSchema = Schema.Struct({
version: Schema.Literal(1),
metadata: Schema.optional(CassetteMetadataSchema),
interactions: Schema.Array(InteractionSchema),
})
export type Cassette = Schema.Schema.Type<typeof CassetteSchema>
export const decodeCassette = Schema.decodeUnknownSync(CassetteSchema)
export const encodeCassette = Schema.encodeSync(CassetteSchema)

View File

@@ -0,0 +1,34 @@
import { Option } from "effect"
import * as fs from "node:fs"
import * as path from "node:path"
import { encodeCassette, decodeCassette, type Cassette, type CassetteMetadata, type Interaction } from "./schema"
export const DEFAULT_RECORDINGS_DIR = path.resolve(process.cwd(), "test", "fixtures", "recordings")
export const cassettePath = (name: string, directory = DEFAULT_RECORDINGS_DIR) => path.join(directory, `${name}.json`)
export const metadataFor = (name: string, metadata: CassetteMetadata | undefined): CassetteMetadata => ({
name,
recordedAt: new Date().toISOString(),
...(metadata ?? {}),
})
export const cassetteFor = (
name: string,
interactions: ReadonlyArray<Interaction>,
metadata: CassetteMetadata | undefined,
): Cassette => ({
version: 1,
metadata: metadataFor(name, metadata),
interactions,
})
export const formatCassette = (cassette: Cassette) => `${JSON.stringify(encodeCassette(cassette), null, 2)}\n`
export const parseCassette = (raw: string) => decodeCassette(JSON.parse(raw))
export const hasCassetteSync = (name: string, options: { readonly directory?: string } = {}) => {
const file = cassettePath(name, options.directory)
if (!fs.existsSync(file)) return false
return Option.isSome(Option.liftThrowable(parseCassette)(fs.readFileSync(file, "utf8")))
}

View File

@@ -0,0 +1,204 @@
import { Effect, Option, Ref, Scope, Stream } from "effect"
import type { Headers } from "effect/unstable/http"
import * as CassetteService from "./cassette"
import { canonicalizeJson, decodeJson } from "./matching"
import { redactHeaders, redactUrl, type SecretFinding } from "./redaction"
import { webSocketInteractions, type CassetteMetadata, type WebSocketFrame, type WebSocketInteraction } from "./schema"
export const DEFAULT_WEBSOCKET_REQUEST_HEADERS: ReadonlyArray<string> = ["content-type", "accept", "openai-beta"]
export interface WebSocketRequest {
readonly url: string
readonly headers: Headers.Headers
}
export interface WebSocketConnection<E> {
readonly sendText: (message: string) => Effect.Effect<void, E>
readonly messages: Stream.Stream<string | Uint8Array, E>
readonly close: Effect.Effect<void>
}
export interface WebSocketExecutor<E> {
readonly open: (request: WebSocketRequest) => Effect.Effect<WebSocketConnection<E>, E>
}
export interface WebSocketRecordReplayOptions<E> {
readonly name: string
readonly mode?: "record" | "replay" | "passthrough"
readonly metadata?: CassetteMetadata
readonly cassette: CassetteService.Interface
readonly live: WebSocketExecutor<E>
readonly redact?: {
readonly headers?: ReadonlyArray<string>
readonly query?: ReadonlyArray<string>
readonly url?: (url: string) => string
}
readonly requestHeaders?: ReadonlyArray<string>
readonly compareClientMessagesAsJson?: boolean
}
const headersRecord = (headers: Headers.Headers) =>
Object.fromEntries(
Object.entries(headers as Record<string, unknown>)
.filter((entry): entry is [string, string] => typeof entry[1] === "string")
.toSorted(([a], [b]) => a.localeCompare(b)),
)
const openSnapshot = (
request: WebSocketRequest,
options: Pick<WebSocketRecordReplayOptions<never>, "redact" | "requestHeaders"> = {},
) => ({
url: redactUrl(request.url, options.redact?.query, options.redact?.url),
headers: redactHeaders(
headersRecord(request.headers),
options.requestHeaders ?? DEFAULT_WEBSOCKET_REQUEST_HEADERS,
options.redact?.headers,
),
})
const textFrame = (body: string): WebSocketFrame => ({ kind: "text", body })
const frameText = (frame: WebSocketFrame) => {
if (frame.kind === "text") return frame.body
return new TextDecoder().decode(Buffer.from(frame.body, "base64"))
}
const frameMessage = (frame: WebSocketFrame) =>
frame.kind === "text" ? frame.body : new Uint8Array(Buffer.from(frame.body, "base64"))
const receivedFrame = (message: string | Uint8Array): WebSocketFrame =>
typeof message === "string"
? textFrame(message)
: { kind: "binary", body: Buffer.from(message).toString("base64"), bodyEncoding: "base64" }
const unsafeCassette = (name: string, findings: ReadonlyArray<SecretFinding>) =>
new Error(
`Refusing to write WebSocket cassette "${name}" because it contains possible secrets: ${findings
.map((item) => `${item.path} (${item.reason})`)
.join(", ")}`,
)
const mismatch = (message: string, actual: unknown, expected: unknown) =>
new Error(`${message}: expected ${JSON.stringify(expected)}, received ${JSON.stringify(actual)}`)
const assertEqual = (message: string, actual: unknown, expected: unknown) =>
Effect.sync(() => {
if (JSON.stringify(actual) === JSON.stringify(expected)) return
throw mismatch(message, actual, expected)
})
const jsonOrText = (value: string) => Option.match(decodeJson(value), { onNone: () => value, onSome: canonicalizeJson })
const compareClientMessage = (actual: string, expected: WebSocketFrame | undefined, index: number, asJson: boolean) => {
if (!expected)
return Effect.sync(() => {
throw new Error(`Unexpected WebSocket client frame ${index + 1}: ${actual}`)
})
const expectedText = frameText(expected)
if (!asJson) return assertEqual(`WebSocket client frame ${index + 1}`, actual, expectedText)
return assertEqual(`WebSocket client JSON frame ${index + 1}`, jsonOrText(actual), jsonOrText(expectedText))
}
export const makeWebSocketExecutor = <E>(
options: WebSocketRecordReplayOptions<E>,
): Effect.Effect<WebSocketExecutor<E>, never, Scope.Scope> =>
Effect.gen(function* () {
const mode = options.mode ?? "replay"
if (mode === "passthrough") return options.live
if (mode === "record") {
return {
open: (request) =>
Effect.gen(function* () {
const client: WebSocketFrame[] = []
const server: WebSocketFrame[] = []
const connection = yield* options.live.open(request)
const closed = yield* Ref.make(false)
const closeOnce = Effect.gen(function* () {
if (yield* Ref.getAndSet(closed, true)) return
yield* connection.close
const result = yield* options.cassette
.append(
options.name,
{ transport: "websocket", open: openSnapshot(request, options), client, server },
options.metadata,
)
.pipe(Effect.orDie)
if (result.findings.length > 0) yield* Effect.die(unsafeCassette(options.name, result.findings))
})
return {
sendText: (message: string) =>
connection.sendText(message).pipe(Effect.tap(() => Effect.sync(() => client.push(textFrame(message))))),
messages: connection.messages.pipe(
Stream.map((message) => {
server.push(receivedFrame(message))
return message
}),
),
close: closeOnce,
}
}),
}
}
const replay = yield* Ref.make<{ readonly interactions: ReadonlyArray<WebSocketInteraction> } | undefined>(
undefined,
)
const cursor = yield* Ref.make(0)
yield* Effect.addFinalizer(() =>
Effect.gen(function* () {
const input = yield* Ref.get(replay)
if (!input) return
yield* assertEqual(
`Unused recorded WebSocket interactions in ${options.name}`,
yield* Ref.get(cursor),
input.interactions.length,
)
}),
)
const loadReplay = Effect.fn("WebSocketRecorder.loadReplay")(function* () {
const cached = yield* Ref.get(replay)
if (cached) return cached
const input = {
interactions: webSocketInteractions(yield* options.cassette.read(options.name).pipe(Effect.orDie)),
}
yield* Ref.set(replay, input)
return input
})
return {
open: (request) => {
return Effect.gen(function* () {
const input = yield* loadReplay()
const index = yield* Ref.getAndUpdate(cursor, (value) => value + 1)
const interaction = input.interactions[index]
if (!interaction) return yield* Effect.die(new Error(`No recorded WebSocket interaction for ${request.url}`))
yield* assertEqual(`WebSocket open frame ${index + 1}`, openSnapshot(request, options), interaction.open)
const messageIndex = yield* Ref.make(0)
return {
sendText: (message: string) =>
Effect.gen(function* () {
const current = yield* Ref.getAndUpdate(messageIndex, (value) => value + 1)
yield* compareClientMessage(
message,
interaction.client[current],
current,
options.compareClientMessagesAsJson === true,
)
}),
messages: Stream.fromIterable(interaction.server).pipe(Stream.map(frameMessage)),
close: Effect.gen(function* () {
yield* assertEqual(
`WebSocket client frame count for interaction ${index + 1}`,
yield* Ref.get(messageIndex),
interaction.client.length,
)
}),
}
})
},
}
})

View File

@@ -0,0 +1,41 @@
{
"version": 1,
"interactions": [
{
"transport": "http",
"request": {
"method": "POST",
"url": "https://example.test/echo",
"headers": {
"content-type": "application/json"
},
"body": "{\"step\":1}"
},
"response": {
"status": 200,
"headers": {
"content-type": "application/json"
},
"body": "{\"reply\":\"first\"}"
}
},
{
"transport": "http",
"request": {
"method": "POST",
"url": "https://example.test/echo",
"headers": {
"content-type": "application/json"
},
"body": "{\"step\":2}"
},
"response": {
"status": 200,
"headers": {
"content-type": "application/json"
},
"body": "{\"reply\":\"second\"}"
}
}
]
}

View File

@@ -0,0 +1,41 @@
{
"version": 1,
"interactions": [
{
"transport": "http",
"request": {
"method": "POST",
"url": "https://example.test/poll",
"headers": {
"content-type": "application/json"
},
"body": "{\"id\":\"job_1\"}"
},
"response": {
"status": 200,
"headers": {
"content-type": "application/json"
},
"body": "{\"status\":\"pending\"}"
}
},
{
"transport": "http",
"request": {
"method": "POST",
"url": "https://example.test/poll",
"headers": {
"content-type": "application/json"
},
"body": "{\"id\":\"job_1\"}"
},
"response": {
"status": 200,
"headers": {
"content-type": "application/json"
},
"body": "{\"status\":\"complete\"}"
}
}
]
}

View File

@@ -0,0 +1,322 @@
import { NodeFileSystem } from "@effect/platform-node"
import { describe, expect, test } from "bun:test"
import { Cause, Effect, Exit, Scope, Stream } from "effect"
import { Headers, HttpBody, HttpClient, HttpClientRequest } from "effect/unstable/http"
import * as fs from "node:fs"
import * as os from "node:os"
import * as path from "node:path"
import { HttpRecorder } from "../src"
import { redactedErrorRequest } from "../src/diff"
const post = (url: string, body: object) =>
Effect.gen(function* () {
const http = yield* HttpClient.HttpClient
const request = HttpClientRequest.post(url, {
headers: { "content-type": "application/json" },
body: HttpBody.text(JSON.stringify(body), "application/json"),
})
const response = yield* http.execute(request)
return yield* response.text
})
const run = <A, E>(effect: Effect.Effect<A, E, HttpClient.HttpClient>) =>
Effect.runPromise(effect.pipe(Effect.provide(HttpRecorder.cassetteLayer("record-replay/multi-step"))))
const runWith = <A, E>(
name: string,
options: HttpRecorder.RecordReplayOptions,
effect: Effect.Effect<A, E, HttpClient.HttpClient>,
) => Effect.runPromise(effect.pipe(Effect.provide(HttpRecorder.cassetteLayer(name, options))))
const runRecorder = <A, E>(effect: Effect.Effect<A, E, HttpRecorder.Cassette.Service | Scope.Scope>) =>
Effect.runPromise(
Effect.scoped(
effect.pipe(
Effect.provide(
HttpRecorder.Cassette.layer({ directory: fs.mkdtempSync(path.join(os.tmpdir(), "http-recorder-")) }),
),
Effect.provide(NodeFileSystem.layer),
),
),
)
const failureText = (exit: Exit.Exit<unknown, unknown>) => {
if (Exit.isSuccess(exit)) return ""
return Cause.prettyErrors(exit.cause).join("\n")
}
describe("http-recorder", () => {
test("redacts sensitive URL query parameters", () => {
expect(
HttpRecorder.redactUrl(
"https://example.test/path?key=secret-google-key&api_key=secret-openai-key&safe=value&X-Amz-Signature=secret-signature",
),
).toBe(
"https://example.test/path?key=%5BREDACTED%5D&api_key=%5BREDACTED%5D&safe=value&X-Amz-Signature=%5BREDACTED%5D",
)
})
test("redacts URL credentials", () => {
expect(HttpRecorder.redactUrl("https://user:password@example.test/path?safe=value")).toBe(
"https://%5BREDACTED%5D:%5BREDACTED%5D@example.test/path?safe=value",
)
})
test("applies custom URL redaction after built-in redaction", () => {
expect(
HttpRecorder.redactUrl(
"https://example.test/accounts/real-account/path?key=secret-key",
undefined,
(url) => url.replace("/accounts/real-account/", "/accounts/{account}/"),
),
).toBe("https://example.test/accounts/{account}/path?key=%5BREDACTED%5D")
})
test("redacts sensitive headers when allow-listed", () => {
expect(
HttpRecorder.redactHeaders(
{
authorization: "Bearer secret-token",
"content-type": "application/json",
"x-custom-token": "custom-secret",
"x-api-key": "secret-key",
"x-goog-api-key": "secret-google-key",
},
["authorization", "content-type", "x-api-key", "x-goog-api-key", "x-custom-token"],
["x-custom-token"],
),
).toEqual({
authorization: "[REDACTED]",
"content-type": "application/json",
"x-api-key": "[REDACTED]",
"x-custom-token": "[REDACTED]",
"x-goog-api-key": "[REDACTED]",
})
})
test("redacts error requests without retaining headers, params, or body", () => {
const request = HttpClientRequest.post("https://example.test/path", {
headers: { authorization: "Bearer super-secret" },
body: HttpBody.text("super-secret-body", "text/plain"),
}).pipe(HttpClientRequest.setUrlParam("api_key", "super-secret-key"))
expect(redactedErrorRequest(request).toJSON()).toMatchObject({
url: "https://example.test/path",
urlParams: { params: [] },
headers: {},
body: { _tag: "Empty" },
})
})
test("detects secret-looking values without returning the secret", () => {
expect(
HttpRecorder.cassetteSecretFindings({
version: 1,
interactions: [
{
transport: "http",
request: {
method: "POST",
url: "https://example.test/path?key=sk-123456789012345678901234",
headers: {},
body: JSON.stringify({ nested: "AIzaSyDHibiBRvJZLsFnPYPoiTwxY4ztQ55yqCE" }),
},
response: {
status: 200,
headers: {},
body: "Bearer abcdefghijklmnopqrstuvwxyz",
},
},
],
}),
).toEqual([
{ path: "interactions[0].request.url", reason: "API key" },
{ path: "interactions[0].request.body", reason: "Google API key" },
{ path: "interactions[0].response.body", reason: "bearer token" },
])
})
test("detects secret-looking values inside metadata", () => {
expect(
HttpRecorder.cassetteSecretFindings({
version: 1,
metadata: { token: "sk-123456789012345678901234" },
interactions: [],
}),
).toEqual([{ path: "metadata.token", reason: "API key" }])
})
test("formats websocket cassettes with shared metadata", () => {
const cassette = HttpRecorder.cassetteFor(
"websocket/basic",
[
{
transport: "websocket",
open: { url: "wss://example.test/realtime", headers: { "content-type": "application/json" } },
client: [{ kind: "text", body: JSON.stringify({ type: "response.create" }) }],
server: [{ kind: "text", body: JSON.stringify({ type: "response.completed" }) }],
},
],
{ provider: "openai" },
)
expect(cassette.metadata).toMatchObject({ name: "websocket/basic", provider: "openai" })
expect(HttpRecorder.parseCassette(HttpRecorder.formatCassette(cassette))).toEqual(cassette)
})
test("replays websocket interactions from the shared cassette service", async () => {
await runRecorder(
Effect.gen(function* () {
const cassette = yield* HttpRecorder.Cassette.Service
yield* cassette.write(
"websocket/replay",
HttpRecorder.cassetteFor(
"websocket/replay",
[
{
transport: "websocket",
open: { url: "wss://example.test/realtime", headers: { "content-type": "application/json" } },
client: [{ kind: "text", body: JSON.stringify({ type: "response.create" }) }],
server: [{ kind: "text", body: JSON.stringify({ type: "response.completed" }) }],
},
],
undefined,
),
)
const executor = yield* HttpRecorder.makeWebSocketExecutor({
name: "websocket/replay",
cassette,
compareClientMessagesAsJson: true,
live: { open: () => Effect.die(new Error("unexpected live WebSocket open")) },
})
const connection = yield* executor.open({
url: "wss://example.test/realtime",
headers: Headers.fromInput({ "content-type": "application/json" }),
})
yield* connection.sendText(JSON.stringify({ type: "response.create" }))
const messages: Array<string | Uint8Array> = []
yield* connection.messages.pipe(Stream.runForEach((message) => Effect.sync(() => messages.push(message))))
yield* connection.close
expect(messages).toEqual([JSON.stringify({ type: "response.completed" })])
}),
)
})
test("records websocket interactions into the shared cassette service", async () => {
await runRecorder(
Effect.gen(function* () {
const cassette = yield* HttpRecorder.Cassette.Service
const executor = yield* HttpRecorder.makeWebSocketExecutor({
name: "websocket/record",
mode: "record",
metadata: { provider: "test" },
cassette,
live: {
open: () =>
Effect.succeed({
sendText: () => Effect.void,
messages: Stream.fromIterable([JSON.stringify({ type: "response.completed" })]),
close: Effect.void,
}),
},
})
const connection = yield* executor.open({
url: "wss://example.test/realtime",
headers: Headers.fromInput({ "content-type": "application/json" }),
})
yield* connection.sendText(JSON.stringify({ type: "response.create" }))
yield* connection.messages.pipe(Stream.runDrain)
yield* connection.close
expect(yield* cassette.read("websocket/record")).toMatchObject({
metadata: { name: "websocket/record", provider: "test" },
interactions: [
{
transport: "websocket",
open: { url: "wss://example.test/realtime", headers: { "content-type": "application/json" } },
client: [{ kind: "text", body: JSON.stringify({ type: "response.create" }) }],
server: [{ kind: "text", body: JSON.stringify({ type: "response.completed" }) }],
},
],
})
}),
)
})
test("default matcher dispatches multi-interaction cassettes by request shape", async () => {
await run(
Effect.gen(function* () {
expect(yield* post("https://example.test/echo", { step: 2 })).toBe('{"reply":"second"}')
expect(yield* post("https://example.test/echo", { step: 1 })).toBe('{"reply":"first"}')
}),
)
})
test("sequential dispatch returns recorded responses in order for identical requests", async () => {
await runWith(
"record-replay/retry",
{ dispatch: "sequential" },
Effect.gen(function* () {
expect(yield* post("https://example.test/poll", { id: "job_1" })).toBe('{"status":"pending"}')
expect(yield* post("https://example.test/poll", { id: "job_1" })).toBe('{"status":"complete"}')
}),
)
})
test("default matcher returns the first match for identical requests", async () => {
await runWith(
"record-replay/retry",
{},
Effect.gen(function* () {
expect(yield* post("https://example.test/poll", { id: "job_1" })).toBe('{"status":"pending"}')
expect(yield* post("https://example.test/poll", { id: "job_1" })).toBe('{"status":"pending"}')
}),
)
})
test("sequential dispatch reports cursor exhaustion when more requests are made than recorded", async () => {
await runWith(
"record-replay/multi-step",
{ dispatch: "sequential" },
Effect.gen(function* () {
yield* post("https://example.test/echo", { step: 1 })
yield* post("https://example.test/echo", { step: 2 })
const exit = yield* Effect.exit(post("https://example.test/echo", { step: 3 }))
expect(Exit.isFailure(exit)).toBe(true)
}),
)
})
test("sequential dispatch still validates each recorded request", async () => {
await runWith(
"record-replay/multi-step",
{ dispatch: "sequential" },
Effect.gen(function* () {
yield* post("https://example.test/echo", { step: 1 })
const exit = yield* Effect.exit(post("https://example.test/echo", { step: 3 }))
expect(Exit.isFailure(exit)).toBe(true)
expect(failureText(exit)).toContain("$.step expected 2, received 3")
expect(yield* post("https://example.test/echo", { step: 2 })).toBe('{"reply":"second"}')
}),
)
})
test("mismatch diagnostics show closest redacted request differences", async () => {
await run(
Effect.gen(function* () {
const exit = yield* Effect.exit(
post("https://example.test/echo?api_key=secret-value", { step: 3, token: "sk-123456789012345678901234" }),
)
const message = failureText(exit)
expect(message).toContain("closest interaction: #1")
expect(message).toContain("url:")
expect(message).toContain("https://example.test/echo?api_key=%5BREDACTED%5D")
expect(message).toContain("body:")
expect(message).toContain("$.step expected 1, received 3")
expect(message).toContain('$.token expected undefined, received "[REDACTED]"')
expect(message).not.toContain("sk-123456789012345678901234")
}),
)
})
})

View File

@@ -0,0 +1,15 @@
{
"$schema": "https://json.schemastore.org/tsconfig",
"extends": "@tsconfig/bun/tsconfig.json",
"compilerOptions": {
"lib": ["ESNext", "DOM", "DOM.Iterable"],
"noUncheckedIndexedAccess": false,
"plugins": [
{
"name": "@effect/language-service",
"transform": "@effect/language-service/transform",
"namespaceImportPackages": ["effect", "@effect/*"]
}
]
}
}

294
packages/llm/AGENTS.md Normal file
View File

@@ -0,0 +1,294 @@
# LLM Package Guide
## Effect
- Prefer `HttpClient.HttpClient` / `HttpClientResponse.HttpClientResponse` over web `fetch` / `Response` at package boundaries.
- Use `Stream.Stream` for streaming data flow. Avoid ad hoc async generators or manual web reader loops unless an Effect `Stream` API cannot model the behavior.
- Use Effect Schema codecs for JSON encode/decode (`Schema.fromJsonString(...)`) instead of direct `JSON.parse` / `JSON.stringify` in implementation code.
- In `Effect.gen`, yield yieldable errors directly (`return yield* new MyError(...)`) instead of `Effect.fail(new MyError(...))`.
- Use `Effect.void` instead of `Effect.succeed(undefined)` when the successful value is intentionally void.
## Tests
- Use `testEffect(...)` from `test/lib/effect.ts` for tests requiring Effect layers.
- Keep provider tests fixture-first. Live provider calls must stay behind `RECORD=true` and required API-key checks.
## Architecture
This package is an Effect Schema-first LLM core. The Schema classes in `src/schema/` are the canonical runtime data model. Convenience functions in `src/llm.ts` are thin constructors that return those same Schema class instances; they should improve callsites without creating a second model.
### Request Flow
The intended callsite is:
```ts
const request = LLM.request({
model: OpenAI.model("gpt-4o-mini", { apiKey }),
system: "You are concise.",
prompt: "Say hello.",
})
const response = yield* LLMClient.generate(request)
```
`LLM.request(...)` builds an `LLMRequest`. `LLMClient.generate(...)` selects a registered route by `request.model.route`, builds the provider-native body, asks the route's transport for a real `HttpClientRequest.HttpClientRequest`, sends it through `RequestExecutor.Service`, parses the provider stream into common `LLMEvent`s, and finally returns an `LLMResponse`.
Use `LLMClient.stream(request)` when callers want incremental `LLMEvent`s. Use `LLMClient.generate(request)` when callers want those same events collected into an `LLMResponse`. Use `LLMClient.prepare<Body>(request)` to compile a request through the route pipeline without sending it — the optional `Body` type argument narrows `.body` to the route's native shape (e.g. `prepare<OpenAIChatBody>(...)` returns a `PreparedRequestOf<OpenAIChatBody>`). The runtime body is identical; the generic is a type-level assertion.
Filter or narrow `LLMEvent` streams with `LLMEvent.is.*` (camelCase guards, e.g. `events.filter(LLMEvent.is.toolCall)`). The kebab-case `LLMEvent.guards["tool-call"]` form also works but prefer `is.*` in new code.
### Routes
A route is the registered, runnable composition of four orthogonal pieces:
- **`Protocol`** (`src/route/protocol.ts`) — semantic API contract. Owns request body construction (`body.from`), the body schema (`body.schema`), the streaming-event schema (`stream.event`), and the event-to-`LLMEvent` state machine (`stream.step`). `Route.make(...)` validates and JSON-encodes the body from `body.schema` and decodes frames with `stream.event`. Examples: `OpenAIChat.protocol`, `OpenAIResponses.protocol`, `AnthropicMessages.protocol`, `Gemini.protocol`, `BedrockConverse.protocol`.
- **`Endpoint`** (`src/route/endpoint.ts`) — path construction. The host always lives on `model.baseURL`; the endpoint just supplies the path. `Endpoint.path("/chat/completions")` is the common case; pass a function for paths that embed the model id or a body field (e.g. `Endpoint.path(({ body }) => `/model/${body.modelId}/converse-stream`)`).
- **`Auth`** (`src/route/auth.ts`) — per-request transport authentication. Routes read `model.apiKey` at request time via `Auth.bearer` (the default; sets `Authorization: Bearer <apiKey>`) or `Auth.apiKeyHeader(name)` for providers that use a custom header (Anthropic `x-api-key`, Gemini `x-goog-api-key`). Routes that need per-request signing (Bedrock SigV4, future Vertex IAM, Azure AAD) implement `Auth` as a function that signs the body and merges signed headers into the result.
- **`Framing`** (`src/route/framing.ts`) — bytes → frames. SSE (`Framing.sse`) is shared; Bedrock keeps its AWS event-stream framing as a typed `Framing<object>` value alongside its protocol.
Compose them via `Route.make(...)`:
```ts
export const route = Route.make({
id: "openai-chat",
provider: "openai",
protocol: OpenAIChat.protocol,
transport: HttpTransport.httpJson({
endpoint: Endpoint.path("/chat/completions"),
auth: Auth.bearer(),
framing: Framing.sse,
encodeBody,
}),
defaults: {
baseURL: "https://api.openai.com/v1",
capabilities: capabilities({ tools: { calls: true, streamingInput: true } }),
},
})
```
The four-axis decomposition is the reason DeepSeek, TogetherAI, Cerebras, Baseten, Fireworks, and DeepInfra all reuse `OpenAIChat.protocol` verbatim — each provider deployment is a 5-15 line `Route.make(...)` call instead of a 300-400 line route clone. Bug fixes in one protocol propagate to every consumer of that protocol in a single commit.
When a provider ships a non-HTTP transport (OpenAI's WebSocket Responses backend, hypothetical bidirectional streaming APIs), the seam is `Transport``WebSocketTransport.json(...)` constructs a transport whose `prepare` builds a WebSocket URL and message and whose `frames` yields decoded text from the socket. Same protocol, different transport.
### URL Construction
`model.baseURL` is required; `Endpoint` only carries the path. Each protocol's `Route.make` includes a canonical URL in `defaults.baseURL` (e.g. `https://api.openai.com/v1`); provider helpers can override by passing `baseURL` in their input. Routes that have no canonical URL (OpenAI-compatible Chat, GitHub Copilot) set `baseURL: string` (required) on their input type so TypeScript catches a missing host at the call site.
For providers where the URL is derived from typed inputs (Azure resource name, Bedrock region), the provider helper computes `baseURL` at model construction time. Use `AtLeastOne<T>` from `route/auth-options.ts` for inputs that accept either of two derivation paths (Azure: `resourceName` or `baseURL`).
### Provider Definitions
Provider-facing APIs are defined with `Provider.make(...)` from `src/provider.ts`:
```ts
export const provider = Provider.make({
id: ProviderID.make("openai"),
model: responses,
apis: { responses, chat },
})
export const model = provider.model
export const apis = provider.apis
```
Keep provider definitions small and explicit:
- Use only `id`, `model`, and optional `apis` in `Provider.make(...)`.
- Use branded `ProviderID.make(...)` and `ModelID.make(...)` where ids are constructed directly.
- Use `model` for the default API path and `apis` for named provider-native alternatives such as OpenAI `responses` versus `chat`.
- Do not add author-facing `kind`, `version`, or `routes` fields.
- Export lower-level `routes` arrays separately only when advanced internal wiring needs them.
- Prefer `apiKey` as provider-specific sugar and `auth` as the explicit override; keep them mutually exclusive in provider option types with `ProviderAuthOption`.
- Resolve `apiKey``Auth` with `AuthOptions.bearer(options, "<PROVIDER>_API_KEY")` (it honors an explicit `auth` override and falls back to `Auth.config(envVar)` so missing keys surface a typed `Authentication` error rather than a runtime crash).
Built-in providers are namespace modules from `src/providers/index.ts`, so aliases like `OpenAI.model(...)`, `OpenAI.responses(...)`, and `OpenAI.apis.chat(...)` are fine. External provider packages should default-export the `Provider.make(...)` result and may add named aliases if useful.
### Folder layout
```
packages/llm/src/
schema/ canonical Schema model, split by concern
ids.ts branded IDs, literal types, ProviderMetadata
options.ts Generation/Provider/Http options, Capabilities, Limits, ModelRef
messages.ts content parts, Message, ToolDefinition, LLMRequest
events.ts Usage, individual events, LLMEvent, PreparedRequest, LLMResponse
errors.ts error reasons, LLMError, ToolFailure
index.ts barrel
llm.ts request constructors and convenience helpers
route/
index.ts @opencode-ai/llm/route advanced barrel
client.ts Route.make + LLMClient.prepare/stream/generate
executor.ts RequestExecutor service + transport error mapping
protocol.ts Protocol type + Protocol.make
endpoint.ts Endpoint type + Endpoint.path
auth.ts Auth type + Auth.bearer / Auth.apiKeyHeader / Auth.passthrough
auth-options.ts ProviderAuthOption shape, AuthOptions.bearer, AtLeastOne helper
framing.ts Framing type + Framing.sse
transport/ transport implementations
index.ts Transport type + HttpTransport / WebSocketTransport namespaces
http.ts HttpTransport.httpJson — POST + framing
websocket.ts WebSocketTransport.json + WebSocketExecutor service
protocols/
shared.ts ProviderShared toolkit used inside protocol impls
openai-chat.ts protocol + route (compose OpenAIChat.protocol)
openai-responses.ts
anthropic-messages.ts
gemini.ts
bedrock-converse.ts
bedrock-event-stream.ts framing for AWS event-stream binary frames
openai-compatible-chat.ts route that reuses OpenAIChat.protocol, no canonical URL
utils/ per-protocol helpers (auth, cache, media, tool-stream, ...)
providers/
openai-compatible.ts generic compatible helper + family model helpers
openai-compatible-profile.ts family defaults (deepseek, togetherai, ...)
azure.ts / amazon-bedrock.ts / github-copilot.ts / google.ts / xai.ts / openai.ts / anthropic.ts / openrouter.ts
tool.ts typed tool() helper
tool-runtime.ts implementation helpers for LLMClient tool execution
```
The dependency arrow points down: `providers/*.ts` files import `protocols`, `endpoint`, `auth`, and `framing`; protocols do not import provider metadata. Lower-level modules know nothing about specific providers.
### Shared protocol helpers
`ProviderShared` exports a small toolkit used inside protocol implementations to keep them focused on provider-native shapes:
- `joinText(parts)` — joins an array of `TextPart` (or anything with a `.text`) with newlines. Use this anywhere a protocol flattens text content into a single string for a provider field.
- `parseToolInput(route, name, raw)` — Schema-decodes a tool-call argument string with the canonical "Invalid JSON input for `<route>` tool call `<name>`" error message. Treats empty input as `{}`.
- `parseJson(route, raw, message)` — generic JSON-via-Schema decode for non-tool bodies.
- `eventError(route, message, ...)` — typed `InvalidProviderOutput` constructor for stream-time decode failures.
- `validateWith(decoder)` — maps Schema decode errors to `InvalidRequest`. `Route.make(...)` uses this for body validation; lower-level routes can reuse it.
- `matchToolChoice(provider, choice, branches)` — branches over `LLMRequest["toolChoice"]` for provider-specific lowering.
If you find yourself copying a 3-to-5-line snippet between two protocols, lift it into `ProviderShared` next to these helpers rather than duplicating.
### Tools
Tool loops are represented in common messages and events:
```ts
const call = LLM.toolCall({ id: "call_1", name: "lookup", input: { query: "weather" } })
const result = LLM.toolMessage({ id: "call_1", name: "lookup", result: { forecast: "sunny" } })
const followUp = LLM.request({
model,
messages: [LLM.user("Weather?"), LLM.assistant([call]), result],
})
```
Routes lower these into provider-native assistant tool-call messages and tool-result messages. Streaming providers should emit `tool-input-delta` events while arguments arrive, then a final `tool-call` event with parsed input.
### Tool runtime
`LLM.stream({ request, tools })` executes model-requested tools with full type safety. Plain `LLM.stream(request)` only streams the model; if `request.tools` contains schemas, tool calls are returned for the caller to handle. Use `toolExecution: "none"` to pass executable tool definitions as schemas without invoking handlers. Add `stopWhen` to opt into follow-up model rounds after tool results.
```ts
const get_weather = tool({
description: "Get current weather for a city",
parameters: Schema.Struct({ city: Schema.String }),
success: Schema.Struct({ temperature: Schema.Number, condition: Schema.String }),
execute: ({ city }) =>
Effect.gen(function* () {
// city: string — typed from parameters Schema
const data = yield* WeatherApi.fetch(city)
return { temperature: data.temp, condition: data.cond }
// return type checked against success Schema
}),
})
const events = yield* LLM.stream({
request,
tools: { get_weather, get_time, ... },
stopWhen: LLM.stepCountIs(10),
}).pipe(Stream.runCollect)
```
The runtime:
- Adds tool definitions (derived from each tool's `parameters` Schema via `Schema.toJsonSchemaDocument`) onto `request.tools`.
- Streams the model.
- On `tool-call`: looks up the named tool, decodes input against `parameters` Schema, dispatches to the typed `execute`, encodes the result against `success` Schema, emits `tool-result`.
- Emits local `tool-result` events in the same step by default.
- Loops only when `stopWhen` is provided and the step finishes with `tool-calls`, appending the assistant + tool messages.
Handler dependencies (services, permissions, plugin hooks, abort handling) are closed over by the consumer at tool-construction time. The runtime's only environment requirement is `RequestExecutor.Service`. Build the tools record inside an `Effect.gen` once and reuse it across many runs.
Errors must be expressed as `ToolFailure`. The runtime catches it and emits a `tool-error` event, then a `tool-result` of `type: "error"`, so the model can self-correct on the next step. Anything that is not a `ToolFailure` is treated as a defect and fails the stream. Three recoverable error paths produce `tool-error` events:
- The model called an unknown tool name.
- Input failed the `parameters` Schema.
- The handler returned a `ToolFailure`.
Provider-defined / hosted tools (Anthropic `web_search` / `code_execution` / `web_fetch`, OpenAI Responses `web_search_call` / `file_search_call` / `code_interpreter_call` / `mcp_call` / `local_shell_call` / `image_generation_call` / `computer_use_call`) pass through the runtime untouched:
- Routes surface the model's call as a `tool-call` event with `providerExecuted: true`, and the provider's result as a matching `tool-result` event with `providerExecuted: true`.
- The runtime detects `providerExecuted` on `tool-call` and **skips client dispatch** — no handler is invoked and no `tool-error` is raised for "unknown tool". The provider already executed it.
- Both events are appended to the assistant message in `assistantContent` so the next round's history carries the call + result for context. Anthropic encodes them back as `server_tool_use` + `web_search_tool_result` (or `code_execution_tool_result` / `web_fetch_tool_result`) blocks; OpenAI Responses callers typically use `previous_response_id` instead of resending hosted-tool items.
Add provider-defined tools to `request.tools` (no runtime entry needed). The matching route must know how to lower the tool definition into the provider-native shape; right now Anthropic accepts `web_search` / `code_execution` / `web_fetch` and OpenAI Responses accepts the hosted tool names listed above.
## Protocol File Style
Protocol files should look self-similar. Provider quirks belong behind named helpers so a new route can be reviewed by comparing the same sections across files.
### Section order
Use this order for every protocol module:
1. Public model input
2. Request body schema
3. Streaming event schema
4. Parser state
5. Request body construction (`fromRequest`)
6. Stream parsing (`step` and per-event handlers)
7. Protocol and route
8. Model helper
### Rules
- Keep protocol files focused on the protocol. Move provider-specific projection, signing, media normalization, or other bulky transformations into `src/protocols/utils/*`.
- Use `Effect.fn("Provider.fromRequest")` for request body construction entrypoints. Use `Effect.fn(...)` for event handlers that yield effects; keep purely synchronous handlers as plain functions returning a `StepResult` that the dispatcher lifts via `Effect.succeed(...)`.
- Parser state owns terminal information. The state machine records finish reason, usage, and pending tool calls; emit one terminal `request-finish` (or `provider-error`) when a `terminal` event arrives. If a provider splits reason and usage across events, merge them in parser state before flushing.
- Emit exactly one terminal `request-finish` event for a completed response. Use `stream.terminal` to signal the run is over and have `step` emit the final event.
- Use shared helpers for repeated protocol policy such as text joining, usage totals, JSON parsing, and tool-call accumulation. `ToolStream` (`protocols/utils/tool-stream.ts`) accumulates streamed tool-call arguments uniformly.
- Make intentional provider differences explicit in helper names or comments. If two protocol files differ visually, the reason should be obvious from the names.
- Prefer dispatched per-event handlers (`onMessageStart`, `onContentBlockDelta`, ...) called from a small top-level `step` switch over a long if-chain. The dispatcher keeps the event surface visible at a glance.
- Keep tests in the same conceptual order as the protocol: basic prepare, tools prepare, unsupported lowering, text/usage parsing, tool streaming, finish reasons, provider errors.
### Review checklist
- Can the file be skimmed side-by-side with `openai-chat.ts` without hunting for equivalent sections?
- Are provider quirks named, isolated, and covered by focused tests?
- Does request body construction validate unsupported common content at the protocol boundary?
- Does stream parsing emit stable common events without leaking provider event order to callers?
- Does `toolChoice: "none"` behavior read as intentional?
## Recording Tests
Recorded tests use one cassette file per scenario. A cassette holds an ordered array of `{ request, response }` interactions, so multi-step flows (tool loops, retries, polling) record into a single file. Use `recordedTests({ prefix, requires })` and let the helper derive cassette names from test names:
```ts
const recorded = recordedTests({ prefix: "openai-chat", requires: ["OPENAI_API_KEY"] })
recorded.effect("streams text", () =>
Effect.gen(function* () {
// test body
}),
)
```
Replay is the default. `RECORD=true` records fresh cassettes and requires the listed env vars. Cassettes are written as pretty-printed JSON so multi-interaction diffs stay reviewable.
Pass `provider`, `protocol`, and optional `tags` to `recordedTests(...)` / `recorded.effect.with(...)` so cassettes carry searchable metadata. Use recorded-test filters to replay or record a narrow subset without rewriting a whole file:
- `RECORDED_PROVIDER=openai` matches tests tagged with `provider:openai`; comma-separated values are allowed.
- `RECORDED_PREFIX=openai-chat` matches cassette groups by `recordedTests({ prefix })`; comma-separated values are allowed.
- `RECORDED_TAGS=tool` requires all listed tags to be present, e.g. `RECORDED_TAGS=provider:togetherai,tool`.
- `RECORDED_TEST="streams text"` matches by test name, kebab-case test id, or cassette path.
Filters apply in replay and record mode. Combine them with `RECORD=true` when refreshing only one provider or scenario.
**Binary response bodies.** Most providers stream text (SSE, JSON). AWS Bedrock streams binary AWS event-stream frames whose CRC32 fields would be mangled by a UTF-8 round-trip — those bodies are stored as base64 with `bodyEncoding: "base64"` on the response snapshot. Detection is by `Content-Type` in `@opencode-ai/http-recorder` (currently `application/vnd.amazon.eventstream` and `application/octet-stream`); cassettes for SSE/JSON routes omit the field and decode as text.
**Matching strategies.** Replay defaults to structural matching, which finds an interaction by comparing method, URL, allow-listed headers, and the canonical JSON body. This is the right choice for tool loops because each round's request differs (the message history grows). For scenarios where successive requests are byte-identical and expect different responses (retries, polling), pass `dispatch: "sequential"` in `RecordReplayOptions` — replay then walks the cassette in record order via an internal cursor. `scriptedResponses` (in `test/lib/http.ts`) is the deterministic counterpart for tests that don't need a live provider; it scripts response bodies in order without reading from disk.
Do not blanket re-record an entire test file when adding one cassette. `RECORD=true` rewrites every recorded case that runs, and provider streams contain volatile IDs, timestamps, fingerprints, and obfuscation fields. Prefer deleting the one cassette you intend to refresh, or run a focused test pattern that only registers the scenario you want to record. Keep stable existing cassettes unchanged unless their request shape or expected behavior changed.

View File

@@ -0,0 +1,242 @@
import { Config, Effect, Formatter, Layer, Schema, Stream } from "effect"
import { LLM, LLMClient, Provider, ProviderID, Tool, type ProviderModelOptions } from "@opencode-ai/llm"
import { Route, Auth, Endpoint, Framing, Protocol, RequestExecutor } from "@opencode-ai/llm/route"
import { OpenAI } from "@opencode-ai/llm/providers"
/**
* A runnable walkthrough of the LLM package use-site API.
*
* Run from `packages/llm` with an OpenAI key in the environment:
*
* OPENAI_API_KEY=... bun example/tutorial.ts
*
* The file is intentionally written as a normal TypeScript program. You can
* hover imports and local values to see how the public API is typed.
*/
const apiKey = Config.redacted("OPENAI_API_KEY")
// 1. Pick a model. The provider helper records provider identity, protocol
// choice, capabilities, deployment options, authentication, and defaults.
const model = OpenAI.model("gpt-4o-mini", {
apiKey,
generation: { maxTokens: 160 },
providerOptions: {
openai: { store: false },
},
})
// 2. Build a provider-neutral request. This is useful when reusing one request
// across generate and stream examples.
//
// Options can live on both the model and the request:
//
// - `generation`: common controls such as max tokens, temperature, topP/topK,
// penalties, seed, and stop sequences.
// - `providerOptions`: namespaced provider-native behavior. For example,
// OpenAI cache keys and store behavior, Anthropic thinking, Gemini thinking
// config, or OpenRouter routing/reasoning.
// - `http`: last-resort serializable overlays for final request body, headers,
// and query params. Prefer typed `providerOptions` when a field is stable.
//
// Model options are defaults. Request options override them for this call.
const request = LLM.request({
model,
system: "You are concise and practical.",
prompt: "Tell me a joke",
generation: { maxTokens: 80, temperature: 0.7 },
providerOptions: {
openai: { promptCacheKey: "tutorial-joke" },
},
})
// `http` is intentionally not needed for normal calls. This shows the shape for
// newly released provider fields before they deserve a typed provider option.
const rawOverlayExample = LLM.request({
model,
prompt: "Show the final HTTP overlay shape.",
http: {
body: { metadata: { example: "tutorial" } },
headers: { "x-opencode-tutorial": "1" },
query: { debug: "1" },
},
})
// 3. `generate` sends the request and collects the event stream into one
// response object. `response.text` is the collected text output.
const generateOnce = Effect.gen(function* () {
const response = yield* LLM.generate(request)
console.log("\n== generate ==")
console.log("generated text:", response.text)
console.log("usage", Formatter.formatJson(response.usage, { space: 2 }))
})
// 4. `stream` exposes provider output as common `LLMEvent`s for UIs that want
// incremental text, reasoning, tool input, usage, or finish events.
const streamText = LLM.stream(request).pipe(
Stream.tap((event) =>
Effect.sync(() => {
if (event.type === "text-delta") process.stdout.write(`\ntext: ${event.text}`)
if (event.type === "request-finish") process.stdout.write(`\nfinish: ${event.reason}\n`)
}),
),
Stream.runDrain,
)
// 5. Tools are typed with Effect Schema. Passing tools to `LLMClient.stream`
// adds their definitions to the request and dispatches matching tool calls.
// Add `stopWhen` to opt into follow-up model rounds after tool results.
const tools = {
get_weather: Tool.make({
description: "Get current weather for a city.",
parameters: Schema.Struct({ city: Schema.String }),
success: Schema.Struct({ forecast: Schema.String }),
execute: (input) => Effect.succeed({ forecast: `${input.city}: sunny, 72F` }),
}),
}
const streamWithTools = LLM.stream({
request: LLM.request({
model,
prompt: "Use get_weather for San Francisco, then answer in one sentence.",
generation: { maxTokens: 80, temperature: 0 },
}),
tools,
stopWhen: LLM.stepCountIs(3),
}).pipe(
Stream.tap((event) =>
Effect.sync(() => {
if (event.type === "tool-call") console.log("tool call", event.name, event.input)
if (event.type === "tool-result") console.log("tool result", event.name, event.result)
if (event.type === "text-delta") process.stdout.write(event.text)
}),
),
Stream.runDrain,
)
// 6. `generateObject` is the structured-output helper. It forces a synthetic
// tool call internally, so the same call site works across providers instead of
// depending on provider-specific JSON mode flags.
const WeatherReport = Schema.Struct({
city: Schema.String,
forecast: Schema.String,
highFahrenheit: Schema.Number,
})
const generateStructuredObject = Effect.gen(function* () {
const response = yield* LLM.generateObject({
model,
system: "Return only structured weather data.",
prompt: "Give me today's weather for San Francisco.",
schema: WeatherReport,
generation: { maxTokens: 120, temperature: 0 },
})
console.log("\n== generateObject ==")
console.log(Formatter.formatJson(response.object, { space: 2 }))
})
// If the shape is only known at runtime, pass raw JSON Schema instead. The
// `.object` type is `unknown`; callers that need static types should validate it.
const generateDynamicObject = LLM.generateObject({
model,
prompt: "Extract the city and forecast from: San Francisco is sunny.",
jsonSchema: {
type: "object",
properties: {
city: { type: "string" },
forecast: { type: "string" },
},
required: ["city", "forecast"],
},
})
// -----------------------------------------------------------------------------
// Part 2: provider composition with a fake provider
// -----------------------------------------------------------------------------
// A protocol is the provider-native API shape: common request -> body, response
// frames -> common events. This fake one turns text prompts into a JSON body
// and treats every SSE frame as output text.
const FakeBody = Schema.Struct({
model: Schema.String,
input: Schema.String,
})
type FakeBody = Schema.Schema.Type<typeof FakeBody>
const FakeProtocol = Protocol.make<FakeBody, string, string, void>({
// Protocol ids are open strings, so external packages can define their own
// protocols without changing this package.
id: "fake-echo",
body: {
schema: FakeBody,
from: (request) =>
Effect.succeed({
model: request.model.id,
input: request.messages
.flatMap((message) => message.content)
.filter((part) => part.type === "text")
.map((part) => part.text)
.join("\n"),
}),
},
stream: {
event: Schema.String,
initial: () => undefined,
step: (_, frame) => Effect.succeed([undefined, [{ type: "text-delta", text: frame }]] as const),
onHalt: () => [{ type: "request-finish", reason: "stop" }],
},
})
// An route is the runnable binding for that protocol. It adds the deployment
// axes that the protocol deliberately does not know: URL, auth, and framing.
const FakeAdapter = Route.make({
id: "fake-echo",
protocol: FakeProtocol,
endpoint: Endpoint.path("/v1/echo"),
auth: Auth.passthrough,
framing: Framing.sse,
})
// A provider module exports a Provider definition. The default `model` helper
// sets provider identity, protocol id, and the route id resolved by the registry.
const fakeEchoModel = Route.model(FakeAdapter, { provider: "fake-echo", baseURL: "https://fake.local" })
const FakeEcho = Provider.make({
id: ProviderID.make("fake-echo"),
model: (id: string, options: ProviderModelOptions = {}) => fakeEchoModel({ id, ...options }),
})
// `LLMClient.prepare` is the lower-level inspection hook: it compiles through
// body conversion, validation, endpoint, auth, and HTTP construction without
// sending anything over the network.
const inspectFakeProvider = Effect.gen(function* () {
const prepared = yield* LLMClient.prepare(
LLM.request({
model: FakeEcho.model("tiny-echo"),
prompt: "Show me the provider pipeline.",
}),
)
console.log("\n== fake provider prepare ==")
console.log("route:", prepared.route)
console.log("body:", Formatter.formatJson(prepared.body, { space: 2 }))
})
// Provide the LLM runtime and the HTTP request executor once. Keep one path
// enabled at a time so the tutorial can demonstrate generate, prepare, stream,
// or tool-loop behavior without spending tokens on every example.
const requestExecutorLayer = RequestExecutor.defaultLayer
const llmClientLayer = LLMClient.layer.pipe(Layer.provide(requestExecutorLayer))
const program = Effect.gen(function* () {
// yield* generateOnce
// yield* inspectFakeProvider
// yield* LLMClient.prepare(rawOverlayExample).pipe(Effect.andThen((prepared) => Effect.sync(() => console.log(prepared.body))))
// yield* streamText
// yield* generateStructuredObject
// yield* generateDynamicObject.pipe(Effect.andThen((response) => Effect.sync(() => console.log(response.object))))
yield* streamWithTools
}).pipe(Effect.provide(Layer.mergeAll(requestExecutorLayer, llmClientLayer)))
Effect.runPromise(program)

51
packages/llm/package.json Normal file
View File

@@ -0,0 +1,51 @@
{
"$schema": "https://json.schemastore.org/package.json",
"version": "1.14.25",
"name": "@opencode-ai/llm",
"type": "module",
"license": "MIT",
"private": true,
"scripts": {
"setup:recording-env": "bun run script/setup-recording-env.ts",
"test": "bun test --timeout 30000",
"typecheck": "tsgo --noEmit"
},
"exports": {
".": "./src/index.ts",
"./route": "./src/route/index.ts",
"./provider": "./src/provider.ts",
"./providers": "./src/providers/index.ts",
"./providers/amazon-bedrock": "./src/providers/amazon-bedrock.ts",
"./providers/anthropic": "./src/providers/anthropic.ts",
"./providers/azure": "./src/providers/azure.ts",
"./providers/cloudflare": "./src/providers/cloudflare.ts",
"./providers/github-copilot": "./src/providers/github-copilot.ts",
"./providers/google": "./src/providers/google.ts",
"./providers/openai": "./src/providers/openai.ts",
"./providers/openai-compatible": "./src/providers/openai-compatible.ts",
"./providers/openai-compatible-profile": "./src/providers/openai-compatible-profile.ts",
"./providers/openrouter": "./src/providers/openrouter.ts",
"./providers/xai": "./src/providers/xai.ts",
"./protocols": "./src/protocols/index.ts",
"./protocols/anthropic-messages": "./src/protocols/anthropic-messages.ts",
"./protocols/bedrock-converse": "./src/protocols/bedrock-converse.ts",
"./protocols/gemini": "./src/protocols/gemini.ts",
"./protocols/openai-chat": "./src/protocols/openai-chat.ts",
"./protocols/openai-compatible-chat": "./src/protocols/openai-compatible-chat.ts",
"./protocols/openai-responses": "./src/protocols/openai-responses.ts"
},
"devDependencies": {
"@clack/prompts": "1.0.0-alpha.1",
"@effect/platform-node": "catalog:",
"@opencode-ai/http-recorder": "workspace:*",
"@tsconfig/bun": "catalog:",
"@types/bun": "catalog:",
"@typescript/native-preview": "catalog:"
},
"dependencies": {
"@smithy/eventstream-codec": "4.2.14",
"@smithy/util-utf8": "4.2.2",
"aws4fetch": "1.0.20",
"effect": "catalog:"
}
}

View File

@@ -0,0 +1,250 @@
import * as fs from "node:fs/promises"
import * as path from "node:path"
const RECORDINGS_DIR = path.resolve(import.meta.dir, "..", "test", "fixtures", "recordings")
const MODELS_DEV_URL = "https://models.dev/api.json"
type JsonRecord = Record<string, unknown>
type Pricing = {
readonly input?: number
readonly output?: number
readonly cache_read?: number
readonly cache_write?: number
readonly reasoning?: number
}
type Usage = {
readonly inputTokens: number
readonly outputTokens: number
readonly cacheReadTokens: number
readonly cacheWriteTokens: number
readonly reasoningTokens: number
readonly reportedCost: number
}
type Row = Usage & {
readonly cassette: string
readonly provider: string
readonly model: string
readonly estimatedCost: number
readonly pricingSource: string
}
const isRecord = (value: unknown): value is JsonRecord =>
value !== null && typeof value === "object" && !Array.isArray(value)
const asNumber = (value: unknown) => (typeof value === "number" && Number.isFinite(value) ? value : 0)
const asString = (value: unknown) => (typeof value === "string" ? value : undefined)
const readJson = async (file: string) => JSON.parse(await Bun.file(file).text()) as unknown
const walk = async (dir: string): Promise<ReadonlyArray<string>> =>
(await fs.readdir(dir, { withFileTypes: true }))
.flatMap((entry) => {
const file = path.join(dir, entry.name)
return entry.isDirectory() ? [] : [file]
})
.concat(
...(await Promise.all(
(await fs.readdir(dir, { withFileTypes: true }))
.filter((entry) => entry.isDirectory())
.map((entry) => walk(path.join(dir, entry.name))),
)),
)
const providerFromUrl = (url: string) => {
if (url.includes("api.openai.com")) return "openai"
if (url.includes("api.anthropic.com")) return "anthropic"
if (url.includes("generativelanguage.googleapis.com")) return "google"
if (url.includes("bedrock")) return "amazon-bedrock"
if (url.includes("openrouter.ai")) return "openrouter"
if (url.includes("api.x.ai")) return "xai"
if (url.includes("api.groq.com")) return "groq"
if (url.includes("api.deepseek.com")) return "deepseek"
if (url.includes("api.together.xyz")) return "togetherai"
return "unknown"
}
const providerAliases: Record<string, ReadonlyArray<string>> = {
openai: ["openai"],
anthropic: ["anthropic"],
google: ["google"],
"amazon-bedrock": ["amazon-bedrock"],
openrouter: ["openrouter", "openai", "anthropic", "google"],
xai: ["xai"],
groq: ["groq"],
deepseek: ["deepseek"],
togetherai: ["togetherai"],
}
const modelAliases = (model: string) => [
model,
model.replace(/^models\//, ""),
model.replace(/-\d{8}$/, ""),
model.replace(/-\d{4}-\d{2}-\d{2}$/, ""),
model.replace(/-\d{4}-\d{2}-\d{2}$/, "").replace(/-\d{8}$/, ""),
model.replace(/^openai\//, ""),
model.replace(/^anthropic\//, ""),
model.replace(/^google\//, ""),
]
const pricingFor = (models: JsonRecord, provider: string, model: string) => {
for (const providerID of providerAliases[provider] ?? [provider]) {
const providerEntry = models[providerID]
if (!isRecord(providerEntry) || !isRecord(providerEntry.models)) continue
for (const modelID of modelAliases(model)) {
const modelEntry = providerEntry.models[modelID]
if (isRecord(modelEntry) && isRecord(modelEntry.cost))
return { pricing: modelEntry.cost as Pricing, source: `${providerID}/${modelID}` }
}
}
return { pricing: undefined, source: "missing" }
}
const estimateCost = (usage: Usage, pricing: Pricing | undefined) => {
if (!pricing) return 0
return (
(usage.inputTokens * (pricing.input ?? 0) +
usage.outputTokens * (pricing.output ?? 0) +
usage.cacheReadTokens * (pricing.cache_read ?? 0) +
usage.cacheWriteTokens * (pricing.cache_write ?? 0) +
usage.reasoningTokens * (pricing.reasoning ?? 0)) /
1_000_000
)
}
const emptyUsage = (): Usage => ({
inputTokens: 0,
outputTokens: 0,
cacheReadTokens: 0,
cacheWriteTokens: 0,
reasoningTokens: 0,
reportedCost: 0,
})
const addUsage = (a: Usage, b: Usage): Usage => ({
inputTokens: a.inputTokens + b.inputTokens,
outputTokens: a.outputTokens + b.outputTokens,
cacheReadTokens: a.cacheReadTokens + b.cacheReadTokens,
cacheWriteTokens: a.cacheWriteTokens + b.cacheWriteTokens,
reasoningTokens: a.reasoningTokens + b.reasoningTokens,
reportedCost: a.reportedCost + b.reportedCost,
})
const usageFromObject = (usage: unknown): Usage => {
if (!isRecord(usage)) return emptyUsage()
const promptDetails = isRecord(usage.prompt_tokens_details) ? usage.prompt_tokens_details : {}
const completionDetails = isRecord(usage.completion_tokens_details) ? usage.completion_tokens_details : {}
const inputDetails = isRecord(usage.input_tokens_details) ? usage.input_tokens_details : {}
const outputDetails = isRecord(usage.output_tokens_details) ? usage.output_tokens_details : {}
const cacheWriteTokens = asNumber(promptDetails.cache_write_tokens) + asNumber(inputDetails.cache_write_tokens)
return {
inputTokens: asNumber(usage.prompt_tokens) + asNumber(usage.input_tokens),
outputTokens: asNumber(usage.completion_tokens) + asNumber(usage.output_tokens),
cacheReadTokens: asNumber(promptDetails.cached_tokens) + asNumber(inputDetails.cached_tokens),
cacheWriteTokens,
reasoningTokens: asNumber(completionDetails.reasoning_tokens) + asNumber(outputDetails.reasoning_tokens),
reportedCost: asNumber(usage.cost),
}
}
const jsonPayloads = (body: string) =>
body
.split("\n")
.map((line) => line.trim())
.filter((line) => line.startsWith("data:"))
.map((line) => line.slice("data:".length).trim())
.filter((line) => line !== "" && line !== "[DONE]")
.flatMap((line) => {
try {
return [JSON.parse(line) as unknown]
} catch {
return []
}
})
const usageFromResponseBody = (body: string) =>
jsonPayloads(body).reduce<Usage>((usage, payload) => {
if (!isRecord(payload)) return usage
return addUsage(
usage,
addUsage(
usageFromObject(payload.usage),
usageFromObject(isRecord(payload.response) ? payload.response.usage : undefined),
),
)
}, emptyUsage())
const modelFromRequest = (request: unknown) => {
if (!isRecord(request)) return "unknown"
const requestBody = asString(request.body)
if (!requestBody) return "unknown"
try {
const body = JSON.parse(requestBody) as unknown
if (!isRecord(body)) return "unknown"
return asString(body.model) ?? "unknown"
} catch {
return "unknown"
}
}
const rowFor = (models: JsonRecord, file: string, cassette: unknown): Row | undefined => {
if (!isRecord(cassette) || !Array.isArray(cassette.interactions)) return undefined
const first = cassette.interactions.find(isRecord)
if (!first || !isRecord(first.request)) return undefined
const provider = providerFromUrl(asString(first.request.url) ?? "")
const model = modelFromRequest(first.request)
const usage = cassette.interactions.filter(isRecord).reduce<Usage>((total, interaction) => {
if (!isRecord(interaction.response)) return total
const responseBody = asString(interaction.response.body)
if (!responseBody) return total
return addUsage(total, usageFromResponseBody(responseBody))
}, emptyUsage())
const priced = pricingFor(models, provider, model)
return {
cassette: path.relative(RECORDINGS_DIR, file),
provider,
model,
...usage,
estimatedCost: estimateCost(usage, priced.pricing),
pricingSource: priced.source,
}
}
const money = (value: number) => (value === 0 ? "$0.000000" : `$${value.toFixed(6)}`)
const tokens = (value: number) => value.toLocaleString("en-US")
const models = (await (await fetch(MODELS_DEV_URL)).json()) as JsonRecord
const rows = (
await Promise.all(
(await walk(RECORDINGS_DIR))
.filter((file) => file.endsWith(".json"))
.map(async (file) => rowFor(models, file, await readJson(file))),
)
).filter((row): row is Row => row !== undefined)
const totals = rows.reduce(
(total, row) => ({
...addUsage(total, row),
estimatedCost: total.estimatedCost + row.estimatedCost,
}),
{ ...emptyUsage(), estimatedCost: 0 },
)
console.log("# Recording Cost Report")
console.log("")
console.log(`Pricing: ${MODELS_DEV_URL}`)
console.log(`Cassettes: ${rows.length}`)
console.log(`Reported cost: ${money(totals.reportedCost)}`)
console.log(`Estimated cost: ${money(totals.estimatedCost)}`)
console.log("")
console.log("| Provider | Model | Input | Output | Reasoning | Reported | Estimated | Pricing | Cassette |")
console.log("|---|---:|---:|---:|---:|---:|---:|---|---|")
for (const row of rows.toSorted((a, b) => b.reportedCost + b.estimatedCost - (a.reportedCost + a.estimatedCost))) {
if (row.inputTokens + row.outputTokens + row.reasoningTokens + row.reportedCost + row.estimatedCost === 0) continue
console.log(
`| ${row.provider} | ${row.model} | ${tokens(row.inputTokens)} | ${tokens(row.outputTokens)} | ${tokens(row.reasoningTokens)} | ${money(row.reportedCost)} | ${money(row.estimatedCost)} | ${row.pricingSource} | ${row.cassette} |`,
)
}

View File

@@ -0,0 +1,537 @@
#!/usr/bin/env bun
import { NodeFileSystem } from "@effect/platform-node"
import * as path from "node:path"
import * as prompts from "@clack/prompts"
import { AwsV4Signer } from "aws4fetch"
import { Config, ConfigProvider, Effect, FileSystem, PlatformError, Redacted } from "effect"
import { FetchHttpClient, HttpClient, HttpClientRequest, type HttpClientResponse } from "effect/unstable/http"
import * as ProviderShared from "../src/protocols/shared"
import * as Cloudflare from "../src/providers/cloudflare"
type Provider = {
readonly id: string
readonly label: string
readonly tier: "core" | "canary" | "compatible" | "optional"
readonly note: string
readonly vars: ReadonlyArray<{
readonly name: string
readonly label?: string
readonly optional?: boolean
readonly secret?: boolean
}>
readonly validate?: (env: Env) => Effect.Effect<string | undefined, unknown, HttpClient.HttpClient>
}
type Env = Record<string, string>
const PROVIDERS: ReadonlyArray<Provider> = [
{
id: "openai",
label: "OpenAI",
tier: "core",
note: "Native OpenAI Chat / Responses recorded tests",
vars: [{ name: "OPENAI_API_KEY" }],
validate: (env) => validateBearer("https://api.openai.com/v1/models", Redacted.make(env.OPENAI_API_KEY)),
},
{
id: "anthropic",
label: "Anthropic",
tier: "core",
note: "Native Anthropic Messages recorded tests",
vars: [{ name: "ANTHROPIC_API_KEY" }],
validate: (env) =>
HttpClientRequest.get("https://api.anthropic.com/v1/models").pipe(
HttpClientRequest.setHeaders({
"anthropic-version": "2023-06-01",
"x-api-key": Redacted.value(Redacted.make(env.ANTHROPIC_API_KEY)),
}),
executeRequest,
),
},
{
id: "google",
label: "Google Gemini",
tier: "core",
note: "Native Gemini recorded tests",
vars: [{ name: "GOOGLE_GENERATIVE_AI_API_KEY" }],
validate: (env) =>
HttpClientRequest.get(
`https://generativelanguage.googleapis.com/v1beta/models?key=${encodeURIComponent(env.GOOGLE_GENERATIVE_AI_API_KEY)}`,
).pipe(executeRequest),
},
{
id: "bedrock",
label: "Amazon Bedrock",
tier: "core",
note: "Native Bedrock Converse recorded tests",
vars: [
{ name: "AWS_ACCESS_KEY_ID" },
{ name: "AWS_SECRET_ACCESS_KEY" },
{ name: "AWS_SESSION_TOKEN", optional: true },
{ name: "BEDROCK_RECORDING_REGION", optional: true },
{ name: "BEDROCK_MODEL_ID", optional: true },
],
validate: (env) => validateBedrock(env),
},
{
id: "groq",
label: "Groq",
tier: "canary",
note: "Fast OpenAI-compatible canary for text/tool streaming",
vars: [{ name: "GROQ_API_KEY" }],
validate: (env) => validateBearer("https://api.groq.com/openai/v1/models", Redacted.make(env.GROQ_API_KEY)),
},
{
id: "openrouter",
label: "OpenRouter",
tier: "canary",
note: "Router canary for OpenAI-compatible text/tool streaming",
vars: [{ name: "OPENROUTER_API_KEY" }],
validate: (env) =>
validateChat({
url: "https://openrouter.ai/api/v1/chat/completions",
token: Redacted.make(env.OPENROUTER_API_KEY),
model: "openai/gpt-4o-mini",
}),
},
{
id: "xai",
label: "xAI",
tier: "canary",
note: "OpenAI-compatible xAI chat endpoint",
vars: [{ name: "XAI_API_KEY" }],
validate: (env) => validateBearer("https://api.x.ai/v1/models", Redacted.make(env.XAI_API_KEY)),
},
{
id: "cloudflare-ai-gateway",
label: "Cloudflare AI Gateway",
tier: "canary",
note: "Cloudflare Unified/OpenAI-compatible gateway; supports provider/model ids like workers-ai/@cf/...",
vars: [
{ name: "CLOUDFLARE_ACCOUNT_ID", label: "Cloudflare account ID", secret: false },
{ name: "CLOUDFLARE_GATEWAY_ID", label: "Cloudflare AI Gateway ID (defaults to default)", optional: true, secret: false },
{ name: "CLOUDFLARE_API_TOKEN", label: "Cloudflare AI Gateway token" },
],
validate: (env) =>
validateChat({
url: `${Cloudflare.aiGatewayBaseURL({
accountId: env.CLOUDFLARE_ACCOUNT_ID,
gatewayId: env.CLOUDFLARE_GATEWAY_ID || undefined,
})}/chat/completions`,
token: Redacted.make(envValue(env, Cloudflare.aiGatewayAuthEnvVars)),
tokenHeader: "cf-aig-authorization",
model: "workers-ai/@cf/meta/llama-3.1-8b-instruct",
}),
},
{
id: "cloudflare-workers-ai",
label: "Cloudflare Workers AI",
tier: "canary",
note: "Direct Workers AI OpenAI-compatible endpoint; supports model ids like @cf/meta/...",
vars: [
{ name: "CLOUDFLARE_ACCOUNT_ID", label: "Cloudflare account ID", secret: false },
{ name: "CLOUDFLARE_API_KEY", label: "Cloudflare Workers AI API token" },
],
validate: (env) =>
validateChat({
url: `${Cloudflare.workersAIBaseURL({ accountId: env.CLOUDFLARE_ACCOUNT_ID })}/chat/completions`,
token: Redacted.make(envValue(env, Cloudflare.workersAIAuthEnvVars)),
model: "@cf/meta/llama-3.1-8b-instruct",
}),
},
{
id: "deepseek",
label: "DeepSeek",
tier: "compatible",
note: "Existing OpenAI-compatible recorded tests",
vars: [{ name: "DEEPSEEK_API_KEY" }],
validate: (env) => validateBearer("https://api.deepseek.com/models", Redacted.make(env.DEEPSEEK_API_KEY)),
},
{
id: "togetherai",
label: "TogetherAI",
tier: "compatible",
note: "Existing OpenAI-compatible text/tool recorded tests",
vars: [{ name: "TOGETHER_AI_API_KEY" }],
validate: (env) => validateBearer("https://api.together.xyz/v1/models", Redacted.make(env.TOGETHER_AI_API_KEY)),
},
{
id: "mistral",
label: "Mistral",
tier: "optional",
note: "OpenAI-compatible bridge; native reasoning parity is follow-up work",
vars: [{ name: "MISTRAL_API_KEY" }],
validate: (env) => validateBearer("https://api.mistral.ai/v1/models", Redacted.make(env.MISTRAL_API_KEY)),
},
{
id: "perplexity",
label: "Perplexity",
tier: "optional",
note: "OpenAI-compatible bridge; citations/search metadata are follow-up work",
vars: [{ name: "PERPLEXITY_API_KEY" }],
validate: (env) => validateBearer("https://api.perplexity.ai/models", Redacted.make(env.PERPLEXITY_API_KEY)),
},
{
id: "venice",
label: "Venice",
tier: "optional",
note: "OpenAI-compatible bridge",
vars: [{ name: "VENICE_API_KEY" }],
validate: (env) => validateBearer("https://api.venice.ai/api/v1/models", Redacted.make(env.VENICE_API_KEY)),
},
{
id: "cerebras",
label: "Cerebras",
tier: "optional",
note: "OpenAI-compatible bridge",
vars: [{ name: "CEREBRAS_API_KEY" }],
validate: (env) => validateBearer("https://api.cerebras.ai/v1/models", Redacted.make(env.CEREBRAS_API_KEY)),
},
{
id: "deepinfra",
label: "DeepInfra",
tier: "optional",
note: "OpenAI-compatible bridge",
vars: [{ name: "DEEPINFRA_API_KEY" }],
validate: (env) =>
validateBearer("https://api.deepinfra.com/v1/openai/models", Redacted.make(env.DEEPINFRA_API_KEY)),
},
{
id: "fireworks",
label: "Fireworks",
tier: "optional",
note: "OpenAI-compatible bridge",
vars: [{ name: "FIREWORKS_API_KEY" }],
validate: (env) =>
validateBearer("https://api.fireworks.ai/inference/v1/models", Redacted.make(env.FIREWORKS_API_KEY)),
},
{
id: "baseten",
label: "Baseten",
tier: "optional",
note: "OpenAI-compatible bridge",
vars: [{ name: "BASETEN_API_KEY" }],
},
]
const args = process.argv.slice(2)
const hasFlag = (name: string) => args.includes(name)
const option = (name: string) => {
const index = args.indexOf(name)
if (index === -1) return undefined
return args[index + 1]
}
const envPath = path.resolve(process.cwd(), option("--env") ?? ".env.local")
const checkOnly = hasFlag("--check")
const providerOption = option("--providers")
const interactive = Boolean(process.stdin.isTTY && process.stdout.isTTY)
const envNames = Array.from(new Set(PROVIDERS.flatMap((provider) => provider.vars.map((item) => item.name))))
const providersForOption = (value: string | undefined) => {
if (!value || value === "recommended")
return PROVIDERS.filter((provider) => provider.tier === "core" || provider.tier === "canary")
if (value === "recorded") return PROVIDERS.filter((provider) => provider.tier !== "optional")
if (value === "all") return PROVIDERS
const ids = new Set(
value
.split(",")
.map((item) => item.trim())
.filter(Boolean),
)
return PROVIDERS.filter((provider) => ids.has(provider.id))
}
const chooseProviders = async () => {
if (providerOption) return providersForOption(providerOption)
return providersForOption("recommended")
}
const catchMissingFile = (error: PlatformError.PlatformError) => {
if (error.reason._tag === "NotFound") return Effect.succeed("")
return Effect.fail(error)
}
const readEnvFile = Effect.fn("RecordingEnv.readFile")(function* () {
const fileSystem = yield* FileSystem.FileSystem
return yield* fileSystem.readFileString(envPath).pipe(Effect.catch(catchMissingFile))
})
const readConfigString = (provider: ConfigProvider.ConfigProvider, name: string) =>
Config.string(name)
.parse(provider)
.pipe(
Effect.match({
onFailure: () => undefined,
onSuccess: (value) => value,
}),
)
const parseEnv = Effect.fn("RecordingEnv.parseEnv")(function* (contents: string) {
const provider = ConfigProvider.fromDotEnvContents(contents)
return Object.fromEntries(
(yield* Effect.forEach(envNames, (name) =>
readConfigString(provider, name).pipe(Effect.map((value) => [name, value] as const)),
)).filter((entry): entry is readonly [string, string] => entry[1] !== undefined),
)
})
const quote = (value: string) => JSON.stringify(value)
const status = (name: string, fileEnv: Env) => {
if (fileEnv[name]) return "file"
if (process.env[name]) return "shell"
return "missing"
}
const statusLine = (provider: Provider, fileEnv: Env) =>
[
`${provider.label} (${provider.tier})`,
provider.note,
...provider.vars.map((item) => {
const value = status(item.name, fileEnv)
const suffix = item.optional ? " optional" : ""
return ` ${value === "missing" ? "missing" : "set"} ${item.name}${suffix}${value === "shell" ? " (shell only)" : ""}`
}),
].join("\n")
const printStatus = (providers: ReadonlyArray<Provider>, fileEnv: Env) => {
prompts.note(providers.map((provider) => statusLine(provider, fileEnv)).join("\n\n"), `Recording env: ${envPath}`)
}
const exitIfCancel = <A>(value: A | symbol): A => {
if (!prompts.isCancel(value)) return value as A
prompts.cancel("Cancelled")
process.exit(130)
}
const upsertEnv = (contents: string, values: Env) => {
const names = Object.keys(values)
const seen = new Set<string>()
const lines = contents.split(/\r?\n/).map((line) => {
const match = line.match(/^\s*(?:export\s+)?([A-Za-z_][A-Za-z0-9_]*)\s*=/)
if (!match || !names.includes(match[1])) return line
seen.add(match[1])
return `${match[1]}=${quote(values[match[1]])}`
})
const missing = names.filter((name) => !seen.has(name))
if (missing.length === 0) return lines.join("\n").replace(/\n*$/, "\n")
const prefix = lines.join("\n").trimEnd()
const block = [
"",
"# Added by bun run setup:recording-env",
...missing.map((name) => `${name}=${quote(values[name])}`),
].join("\n")
return `${prefix}${block}\n`
}
const providerRequiredStatus = (provider: Provider, fileEnv: Env) => {
const required = requiredVars(provider)
if (required.some((item) => status(item.name, fileEnv) === "missing")) return "missing"
if (required.some((item) => status(item.name, fileEnv) === "shell")) return "set in shell"
return "already added"
}
const requiredVars = (provider: Provider) => provider.vars.filter((item) => !item.optional)
const promptVars = (provider: Provider) => provider.vars.filter((item) => !item.optional || item.secret === false)
const processEnv = (): Env =>
Object.fromEntries(Object.entries(process.env).filter((entry): entry is [string, string] => entry[1] !== undefined))
const envValue = (env: Env, names: ReadonlyArray<string>) => names.map((name) => env[name]).find(Boolean) ?? ""
const envWithValues = (fileEnv: Env, values: Env): Env => ({
...processEnv(),
...fileEnv,
...values,
})
const responseError = Effect.fn("RecordingEnv.responseError")(function* (
response: HttpClientResponse.HttpClientResponse,
) {
if (response.status >= 200 && response.status < 300) return undefined
const body = yield* response.text.pipe(Effect.catch(() => Effect.succeed("")))
return `${response.status}${body ? `: ${body.slice(0, 180)}` : ""}`
})
const executeRequest = Effect.fn("RecordingEnv.executeRequest")(function* (
request: HttpClientRequest.HttpClientRequest,
) {
const http = yield* HttpClient.HttpClient
return yield* http.execute(request).pipe(Effect.flatMap(responseError))
})
const validateBearer = (url: string, token: Redacted.Redacted<string>, headers: Record<string, string> = {}) =>
HttpClientRequest.get(url).pipe(
HttpClientRequest.setHeaders({ ...headers, authorization: `Bearer ${Redacted.value(token)}` }),
executeRequest,
)
const validateChat = (input: {
readonly url: string
readonly token: Redacted.Redacted<string>
readonly tokenHeader?: string
readonly model: string
readonly headers?: Record<string, string>
}) =>
ProviderShared.jsonPost({
url: input.url,
headers: { ...input.headers, [input.tokenHeader ?? "authorization"]: `Bearer ${Redacted.value(input.token)}` },
body: ProviderShared.encodeJson({
model: input.model,
messages: [{ role: "user", content: "Reply with exactly: ok" }],
max_tokens: 3,
temperature: 0,
}),
}).pipe(executeRequest)
const validateBedrock = (env: Env) =>
Effect.gen(function* () {
const request = yield* Effect.promise(() =>
new AwsV4Signer({
url: `https://bedrock.${env.BEDROCK_RECORDING_REGION || "us-east-1"}.amazonaws.com/foundation-models`,
method: "GET",
service: "bedrock",
region: env.BEDROCK_RECORDING_REGION || "us-east-1",
accessKeyId: env.AWS_ACCESS_KEY_ID,
secretAccessKey: env.AWS_SECRET_ACCESS_KEY,
sessionToken: env.AWS_SESSION_TOKEN || undefined,
}).sign(),
)
return yield* HttpClientRequest.get(request.url.toString()).pipe(
HttpClientRequest.setHeaders(Object.fromEntries(request.headers.entries())),
executeRequest,
)
})
const validateProvider = Effect.fn("RecordingEnv.validateProvider")(function* (provider: Provider, env: Env) {
return yield* (provider.validate?.(env) ?? Effect.succeed("no lightweight validator")).pipe(
Effect.catch((error) => {
if (error instanceof Error) return Effect.succeed(error.message)
return Effect.succeed(String(error))
}),
)
})
const validateProviders = Effect.fn("RecordingEnv.validateProviders")(function* (
providers: ReadonlyArray<Provider>,
env: Env,
) {
const spinner = prompts.spinner()
spinner.start("Validating credentials")
const results = yield* Effect.forEach(
providers,
(provider) => validateProvider(provider, env).pipe(Effect.map((error) => ({ provider, error }))),
{ concurrency: 4 },
)
spinner.stop("Validation complete")
prompts.note(
results
.map(
(result) =>
`${result.error ? "failed" : "ok"} ${result.provider.label}${result.error ? ` - ${result.error}` : ""}`,
)
.join("\n"),
"Credential validation",
)
})
const writeEnvFile = Effect.fn("RecordingEnv.writeFile")(function* (contents: string) {
const fileSystem = yield* FileSystem.FileSystem
yield* fileSystem.makeDirectory(path.dirname(envPath), { recursive: true })
yield* fileSystem.writeFileString(envPath, contents, { mode: 0o600 })
})
const prompt = <A>(run: () => Promise<A | symbol>) => Effect.promise(run).pipe(Effect.map(exitIfCancel))
const chooseConfigurableProviders = Effect.fn("RecordingEnv.chooseConfigurableProviders")(function* (
providers: ReadonlyArray<Provider>,
fileEnv: Env,
) {
const configurable = providers.filter((provider) => requiredVars(provider).length > 0)
const selected = yield* prompt<ReadonlyArray<string>>(() =>
prompts.multiselect({
message: "Select provider credentials to add or override",
options: configurable.map((provider) => ({
value: provider.id,
label: provider.label,
hint: `${providerRequiredStatus(provider, fileEnv)} - ${requiredVars(provider)
.map((item) => item.name)
.join(", ")}`,
})),
initialValues: configurable
.filter((provider) => providerRequiredStatus(provider, fileEnv) === "missing")
.map((provider) => provider.id),
}),
)
return configurable.filter((provider) => selected.includes(provider.id))
})
const promptEnvVar = (item: Provider["vars"][number]) =>
prompt<string>(() => {
const input = {
message: item.label ?? item.name,
validate: (input: string | undefined) => {
if (item.optional) return undefined
return !input || input.length === 0 ? "Leave blank by pressing Esc/cancel, or paste a value" : undefined
},
}
return item.secret === false ? prompts.text(input) : prompts.password(input)
})
const promptProviderValues = Effect.fn("RecordingEnv.promptProviderValues")(function* (
providers: ReadonlyArray<Provider>,
) {
const values: Env = {}
for (const provider of providers) {
prompts.log.info(`${provider.label}: ${provider.note}`)
for (const item of promptVars(provider)) {
if (values[item.name]) continue
const value = yield* promptEnvVar(item)
if (value !== "") values[item.name] = value
}
}
return values
})
const main = Effect.fn("RecordingEnv.main")(function* () {
prompts.intro("LLM recording credentials")
const contents = yield* readEnvFile()
const fileEnv = yield* parseEnv(contents)
const providers = yield* Effect.promise(() => chooseProviders())
printStatus(providers, fileEnv)
if (checkOnly) {
prompts.outro("Check complete")
return
}
if (!interactive) {
prompts.outro("Run this command in a terminal to enter credentials")
return
}
const selectedProviders = yield* chooseConfigurableProviders(providers, fileEnv)
const values = yield* promptProviderValues(selectedProviders)
if (Object.keys(values).length === 0) {
prompts.outro("No changes")
return
}
if (
interactive &&
(yield* prompt(() => prompts.confirm({ message: "Validate credentials before saving?", initialValue: true })))
) {
yield* validateProviders(selectedProviders, envWithValues(fileEnv, values))
}
yield* writeEnvFile(upsertEnv(contents, values))
prompts.log.success(
`Saved ${Object.keys(values).length} value${Object.keys(values).length === 1 ? "" : "s"} to ${envPath}`,
)
prompts.outro("Keep .env.local local. Store shared team credentials in a password manager or vault.")
})
await Effect.runPromise(main().pipe(Effect.provide(NodeFileSystem.layer), Effect.provide(FetchHttpClient.layer)))

35
packages/llm/src/index.ts Normal file
View File

@@ -0,0 +1,35 @@
export { LLMClient, modelLimits, modelRef } from "./route/client"
export { Auth } from "./route/auth"
export { Provider } from "./provider"
export type {
RouteModelInput,
RouteRoutedModelInput,
Interface as LLMClientShape,
Service as LLMClientService,
ModelRefInput,
} from "./route/client"
export * from "./schema"
export { Tool, ToolFailure, toDefinitions, tool } from "./tool"
export type {
AnyExecutableTool,
AnyTool,
ExecutableTool,
ExecutableTools,
Tool as ToolShape,
ToolExecute,
Tools,
ToolSchema,
} from "./tool"
export type {
RunOptions as ToolRunOptions,
RuntimeState as ToolRuntimeState,
StopCondition as ToolStopCondition,
ToolExecution,
} from "./tool-runtime"
export * as LLM from "./llm"
export type {
Definition as ProviderDefinition,
ModelFactory as ProviderModelFactory,
ModelOptions as ProviderModelOptions,
} from "./provider"

224
packages/llm/src/llm.ts Normal file
View File

@@ -0,0 +1,224 @@
import { Effect, JsonSchema, Schema } from "effect"
import {
LLMClient,
modelLimits,
modelRef,
type ModelRefInput,
} from "./route/client"
import {
GenerationOptions,
HttpOptions,
InvalidProviderOutputReason,
LLMError,
LLMEvent,
LLMRequest,
LLMResponse,
Message,
SystemPart,
ToolChoice,
ToolDefinition,
type ContentPart,
ToolCallPart,
ToolResultPart,
} from "./schema"
import { make as makeTool, type ToolSchema } from "./tool"
export type ModelInput = ModelRefInput
export type MessageInput = Message.Input
export type ToolChoiceInput = ToolChoice.Input
export type ToolChoiceMode = ToolChoice.Mode
export type ToolResultInput = Parameters<typeof ToolResultPart.make>[0]
/** Input accepted by `LLM.request`, normalized into the canonical `LLMRequest` class. */
export type RequestInput = Omit<
ConstructorParameters<typeof LLMRequest>[0],
"system" | "messages" | "tools" | "toolChoice" | "generation" | "http" | "providerOptions"
> & {
readonly system?: string | SystemPart | ReadonlyArray<SystemPart>
readonly prompt?: string | ContentPart | ReadonlyArray<ContentPart>
readonly messages?: ReadonlyArray<Message | MessageInput>
readonly tools?: ReadonlyArray<ToolDefinition.Input>
readonly toolChoice?: ToolChoiceInput
readonly generation?: GenerationOptions.Input
readonly providerOptions?: ConstructorParameters<typeof LLMRequest>[0]["providerOptions"]
readonly http?: HttpOptions.Input
}
export const limits = modelLimits
export const text = Message.text
export const system = SystemPart.make
export const message = Message.make
export const user = Message.user
export const assistant = Message.assistant
export const model = modelRef
export const toolDefinition = ToolDefinition.make
export const toolCall = ToolCallPart.make
export const toolResult = ToolResultPart.make
export const toolMessage = Message.tool
export const toolChoiceName = ToolChoice.named
export const toolChoice = ToolChoice.make
export const generation = GenerationOptions.make
export const generate = LLMClient.generate
export const stream = LLMClient.stream
export const stepCountIs = LLMClient.stepCountIs
export const requestInput = (input: LLMRequest): RequestInput => ({
...LLMRequest.input(input),
})
export const request = (input: RequestInput) => {
const {
system: requestSystem,
prompt,
messages,
tools,
toolChoice: requestToolChoice,
generation: requestGeneration,
providerOptions: requestProviderOptions,
http: requestHttp,
...rest
} = input
return new LLMRequest({
...rest,
system: SystemPart.content(requestSystem),
messages: [...(messages?.map(message) ?? []), ...(prompt === undefined ? [] : [user(prompt)])],
tools: tools?.map(toolDefinition) ?? [],
toolChoice: requestToolChoice ? toolChoice(requestToolChoice) : undefined,
generation: requestGeneration === undefined ? undefined : generation(requestGeneration),
providerOptions: requestProviderOptions,
http: requestHttp === undefined ? undefined : HttpOptions.make(requestHttp),
})
}
export const updateRequest = (input: LLMRequest, patch: Partial<RequestInput>) =>
request({ ...requestInput(input), ...patch })
const GENERATE_OBJECT_TOOL_NAME = "generate_object"
const GENERATE_OBJECT_TOOL_DESCRIPTION = "Return the structured result by calling this tool."
type GenerateObjectBase = Omit<RequestInput, "tools" | "toolChoice" | "responseFormat">
export class GenerateObjectResponse<T> {
constructor(
readonly object: T,
readonly response: LLMResponse,
) {}
get events() {
return this.response.events
}
get usage() {
return this.response.usage
}
}
export interface GenerateObjectOptions<S extends ToolSchema<any>> extends GenerateObjectBase {
readonly schema: S
}
export interface GenerateObjectDynamicOptions extends GenerateObjectBase {
/** Raw JSON Schema object describing the expected output shape. */
readonly jsonSchema: JsonSchema.JsonSchema
}
const runGenerateObject = Effect.fn("LLM.generateObject")(function* (
options: GenerateObjectBase,
tool: ReturnType<typeof makeTool>,
) {
const baseRequest = request(options)
const generateRequest = LLMRequest.update(baseRequest, {
toolChoice: ToolChoice.named(GENERATE_OBJECT_TOOL_NAME),
})
const response = yield* LLMClient.generate({
request: generateRequest,
tools: { [GENERATE_OBJECT_TOOL_NAME]: tool },
toolExecution: "none",
})
const call = response.toolCalls.find(
(event) => LLMEvent.is.toolCall(event) && event.name === GENERATE_OBJECT_TOOL_NAME,
)
if (!call || !LLMEvent.is.toolCall(call))
return yield* new LLMError({
module: "LLM",
method: "generateObject",
reason: new InvalidProviderOutputReason({
message: `generateObject: model did not call the forced \`${GENERATE_OBJECT_TOOL_NAME}\` tool`,
}),
})
const object = yield* tool._decode(call.input).pipe(
Effect.mapError(
(error) =>
new LLMError({
module: "LLM",
method: "generateObject",
reason: new InvalidProviderOutputReason({
message: `generateObject: tool input failed schema decode: ${error.message}`,
}),
}),
),
)
return new GenerateObjectResponse(object, response)
})
/**
* Run a model and decode its output against `schema`. Works on every protocol
* because it forces a synthetic tool call internally — provider-native JSON
* modes are intentionally avoided so behaviour is uniform.
*
* Two input modes:
*
* 1. `schema: EffectSchema<T>` — `.object` is decoded and typed as `T`.
* Decode failures surface as `LLMError`.
* 2. `jsonSchema: JsonSchema.JsonSchema` — `.object` is `unknown`. Use when
* the schema is only available at runtime (MCP, plugin manifests). Caller validates.
*/
export function generateObject<S extends ToolSchema<any>>(
options: GenerateObjectOptions<S>,
): Effect.Effect<GenerateObjectResponse<Schema.Schema.Type<S>>, LLMError>
export function generateObject(options: GenerateObjectDynamicOptions): Effect.Effect<GenerateObjectResponse<unknown>, LLMError>
export function generateObject(
options: GenerateObjectOptions<ToolSchema<any>> | GenerateObjectDynamicOptions,
) {
if ("schema" in options) {
const { schema, ...rest } = options
return runGenerateObject(
rest,
makeTool({
description: GENERATE_OBJECT_TOOL_DESCRIPTION,
parameters: schema,
success: Schema.Unknown as ToolSchema<unknown>,
execute: () => Effect.void,
}),
)
}
const { jsonSchema, ...rest } = options
return runGenerateObject(
rest,
makeTool({
description: GENERATE_OBJECT_TOOL_DESCRIPTION,
jsonSchema,
execute: () => Effect.void,
}),
)
}

View File

@@ -0,0 +1,592 @@
import { Effect, Schema } from "effect"
import { Route } from "../route/client"
import { Auth } from "../route/auth"
import { Endpoint } from "../route/endpoint"
import { Framing } from "../route/framing"
import { Protocol } from "../route/protocol"
import {
Usage,
type CacheHint,
type FinishReason,
type LLMEvent,
type LLMRequest,
type ProviderMetadata,
type ToolCallPart,
type ToolDefinition,
type ToolResultPart,
} from "../schema"
import { JsonObject, optionalArray, optionalNull, ProviderShared } from "./shared"
import { ToolStream } from "./utils/tool-stream"
const ADAPTER = "anthropic-messages"
export const DEFAULT_BASE_URL = "https://api.anthropic.com/v1"
export const PATH = "/messages"
// =============================================================================
// Request Body Schema
// =============================================================================
const AnthropicCacheControl = Schema.Struct({ type: Schema.tag("ephemeral") })
const AnthropicTextBlock = Schema.Struct({
type: Schema.tag("text"),
text: Schema.String,
cache_control: Schema.optional(AnthropicCacheControl),
})
type AnthropicTextBlock = Schema.Schema.Type<typeof AnthropicTextBlock>
const AnthropicThinkingBlock = Schema.Struct({
type: Schema.tag("thinking"),
thinking: Schema.String,
signature: Schema.optional(Schema.String),
cache_control: Schema.optional(AnthropicCacheControl),
})
const AnthropicToolUseBlock = Schema.Struct({
type: Schema.tag("tool_use"),
id: Schema.String,
name: Schema.String,
input: Schema.Unknown,
cache_control: Schema.optional(AnthropicCacheControl),
})
type AnthropicToolUseBlock = Schema.Schema.Type<typeof AnthropicToolUseBlock>
const AnthropicServerToolUseBlock = Schema.Struct({
type: Schema.tag("server_tool_use"),
id: Schema.String,
name: Schema.String,
input: Schema.Unknown,
cache_control: Schema.optional(AnthropicCacheControl),
})
type AnthropicServerToolUseBlock = Schema.Schema.Type<typeof AnthropicServerToolUseBlock>
// Server tool result blocks: web_search_tool_result, code_execution_tool_result,
// and web_fetch_tool_result. The provider executes the tool and inlines the
// structured result into the assistant turn — there is no client tool_result
// round-trip. We round-trip the structured `content` payload as opaque JSON so
// the next request can echo it back when continuing the conversation.
const AnthropicServerToolResultType = Schema.Literals([
"web_search_tool_result",
"code_execution_tool_result",
"web_fetch_tool_result",
])
type AnthropicServerToolResultType = Schema.Schema.Type<typeof AnthropicServerToolResultType>
const AnthropicServerToolResultBlock = Schema.Struct({
type: AnthropicServerToolResultType,
tool_use_id: Schema.String,
content: Schema.Unknown,
cache_control: Schema.optional(AnthropicCacheControl),
})
type AnthropicServerToolResultBlock = Schema.Schema.Type<typeof AnthropicServerToolResultBlock>
const AnthropicToolResultBlock = Schema.Struct({
type: Schema.tag("tool_result"),
tool_use_id: Schema.String,
content: Schema.String,
is_error: Schema.optional(Schema.Boolean),
cache_control: Schema.optional(AnthropicCacheControl),
})
const AnthropicUserBlock = Schema.Union([AnthropicTextBlock, AnthropicToolResultBlock])
const AnthropicAssistantBlock = Schema.Union([
AnthropicTextBlock,
AnthropicThinkingBlock,
AnthropicToolUseBlock,
AnthropicServerToolUseBlock,
AnthropicServerToolResultBlock,
])
type AnthropicAssistantBlock = Schema.Schema.Type<typeof AnthropicAssistantBlock>
type AnthropicToolResultBlock = Schema.Schema.Type<typeof AnthropicToolResultBlock>
const AnthropicMessage = Schema.Union([
Schema.Struct({ role: Schema.Literal("user"), content: Schema.Array(AnthropicUserBlock) }),
Schema.Struct({ role: Schema.Literal("assistant"), content: Schema.Array(AnthropicAssistantBlock) }),
]).pipe(Schema.toTaggedUnion("role"))
type AnthropicMessage = Schema.Schema.Type<typeof AnthropicMessage>
const AnthropicTool = Schema.Struct({
name: Schema.String,
description: Schema.String,
input_schema: JsonObject,
cache_control: Schema.optional(AnthropicCacheControl),
})
type AnthropicTool = Schema.Schema.Type<typeof AnthropicTool>
const AnthropicToolChoice = Schema.Union([
Schema.Struct({ type: Schema.Literals(["auto", "any"]) }),
Schema.Struct({ type: Schema.tag("tool"), name: Schema.String }),
])
const AnthropicThinking = Schema.Struct({
type: Schema.tag("enabled"),
budget_tokens: Schema.Number,
})
const AnthropicBodyFields = {
model: Schema.String,
system: optionalArray(AnthropicTextBlock),
messages: Schema.Array(AnthropicMessage),
tools: optionalArray(AnthropicTool),
tool_choice: Schema.optional(AnthropicToolChoice),
stream: Schema.Literal(true),
max_tokens: Schema.Number,
temperature: Schema.optional(Schema.Number),
top_p: Schema.optional(Schema.Number),
top_k: Schema.optional(Schema.Number),
stop_sequences: optionalArray(Schema.String),
thinking: Schema.optional(AnthropicThinking),
}
const AnthropicMessagesBody = Schema.Struct(AnthropicBodyFields)
export type AnthropicMessagesBody = Schema.Schema.Type<typeof AnthropicMessagesBody>
const AnthropicUsage = Schema.Struct({
input_tokens: Schema.optional(Schema.Number),
output_tokens: Schema.optional(Schema.Number),
cache_creation_input_tokens: optionalNull(Schema.Number),
cache_read_input_tokens: optionalNull(Schema.Number),
})
type AnthropicUsage = Schema.Schema.Type<typeof AnthropicUsage>
const AnthropicStreamBlock = Schema.Struct({
type: Schema.String,
id: Schema.optional(Schema.String),
name: Schema.optional(Schema.String),
text: Schema.optional(Schema.String),
thinking: Schema.optional(Schema.String),
signature: Schema.optional(Schema.String),
input: Schema.optional(Schema.Unknown),
// *_tool_result blocks arrive whole as content_block_start (no streaming
// delta) with the structured payload in `content` and the originating
// server_tool_use id in `tool_use_id`.
tool_use_id: Schema.optional(Schema.String),
content: Schema.optional(Schema.Unknown),
})
const AnthropicStreamDelta = Schema.Struct({
type: Schema.optional(Schema.String),
text: Schema.optional(Schema.String),
thinking: Schema.optional(Schema.String),
partial_json: Schema.optional(Schema.String),
signature: Schema.optional(Schema.String),
stop_reason: optionalNull(Schema.String),
stop_sequence: optionalNull(Schema.String),
})
const AnthropicEvent = Schema.Struct({
type: Schema.String,
index: Schema.optional(Schema.Number),
message: Schema.optional(Schema.Struct({ usage: Schema.optional(AnthropicUsage) })),
content_block: Schema.optional(AnthropicStreamBlock),
delta: Schema.optional(AnthropicStreamDelta),
usage: Schema.optional(AnthropicUsage),
error: Schema.optional(Schema.Struct({ type: Schema.String, message: Schema.String })),
})
type AnthropicEvent = Schema.Schema.Type<typeof AnthropicEvent>
interface ParserState {
readonly tools: ToolStream.State<number>
readonly usage?: Usage
}
const invalid = ProviderShared.invalidRequest
// =============================================================================
// Request Lowering
// =============================================================================
const cacheControl = (cache: CacheHint | undefined) =>
cache?.type === "ephemeral" ? { type: "ephemeral" as const } : undefined
const anthropicMetadata = (metadata: Record<string, unknown>): ProviderMetadata => ({ anthropic: metadata })
const signatureFromMetadata = (metadata: ProviderMetadata | undefined): string | undefined => {
const anthropic = metadata?.anthropic
if (!ProviderShared.isRecord(anthropic)) return undefined
return typeof anthropic.signature === "string" ? anthropic.signature : undefined
}
const lowerTool = (tool: ToolDefinition): AnthropicTool => ({
name: tool.name,
description: tool.description,
input_schema: tool.inputSchema,
})
const lowerToolChoice = (toolChoice: NonNullable<LLMRequest["toolChoice"]>) =>
ProviderShared.matchToolChoice("Anthropic Messages", toolChoice, {
auto: () => ({ type: "auto" as const }),
none: () => undefined,
required: () => ({ type: "any" as const }),
tool: (name) => ({ type: "tool" as const, name }),
})
const lowerToolCall = (part: ToolCallPart): AnthropicToolUseBlock => ({
type: "tool_use",
id: part.id,
name: part.name,
input: part.input,
})
const lowerServerToolCall = (part: ToolCallPart): AnthropicServerToolUseBlock => ({
type: "server_tool_use",
id: part.id,
name: part.name,
input: part.input,
})
// Server tool result blocks are typed by name. Anthropic ships three today;
// extend this list when new server tools land. The block content is the
// structured payload returned by the provider, which we round-trip as-is.
const serverToolResultType = (name: string): AnthropicServerToolResultType | undefined => {
if (name === "web_search") return "web_search_tool_result"
if (name === "code_execution") return "code_execution_tool_result"
if (name === "web_fetch") return "web_fetch_tool_result"
return undefined
}
const lowerServerToolResult = Effect.fn("AnthropicMessages.lowerServerToolResult")(function* (part: ToolResultPart) {
const wireType = serverToolResultType(part.name)
if (!wireType)
return yield* invalid(`Anthropic Messages does not know how to round-trip server tool result for ${part.name}`)
return { type: wireType, tool_use_id: part.id, content: part.result.value } satisfies AnthropicServerToolResultBlock
})
const lowerMessages = Effect.fn("AnthropicMessages.lowerMessages")(function* (request: LLMRequest) {
const messages: AnthropicMessage[] = []
for (const message of request.messages) {
if (message.role === "user") {
const content: AnthropicTextBlock[] = []
for (const part of message.content) {
if (!ProviderShared.supportsContent(part, ["text"]))
return yield* ProviderShared.unsupportedContent("Anthropic Messages", "user", ["text"])
content.push({ type: "text", text: part.text, cache_control: cacheControl(part.cache) })
}
messages.push({ role: "user", content })
continue
}
if (message.role === "assistant") {
const content: AnthropicAssistantBlock[] = []
for (const part of message.content) {
if (part.type === "text") {
content.push({ type: "text", text: part.text, cache_control: cacheControl(part.cache) })
continue
}
if (part.type === "reasoning") {
content.push({
type: "thinking",
thinking: part.text,
signature: part.encrypted ?? signatureFromMetadata(part.providerMetadata),
})
continue
}
if (part.type === "tool-call") {
content.push(part.providerExecuted ? lowerServerToolCall(part) : lowerToolCall(part))
continue
}
if (part.type === "tool-result" && part.providerExecuted) {
content.push(yield* lowerServerToolResult(part))
continue
}
return yield* invalid(
`Anthropic Messages assistant messages only support text, reasoning, and tool-call content for now`,
)
}
messages.push({ role: "assistant", content })
continue
}
const content: AnthropicToolResultBlock[] = []
for (const part of message.content) {
if (!ProviderShared.supportsContent(part, ["tool-result"]))
return yield* ProviderShared.unsupportedContent("Anthropic Messages", "tool", ["tool-result"])
content.push({
type: "tool_result",
tool_use_id: part.id,
content: ProviderShared.toolResultText(part),
is_error: part.result.type === "error" ? true : undefined,
})
}
messages.push({ role: "user", content })
}
return messages
})
const anthropicOptions = (request: LLMRequest) => request.providerOptions?.anthropic
const lowerThinking = Effect.fn("AnthropicMessages.lowerThinking")(function* (request: LLMRequest) {
const thinking = anthropicOptions(request)?.thinking
if (!ProviderShared.isRecord(thinking) || thinking.type !== "enabled") return undefined
const budget =
typeof thinking.budgetTokens === "number"
? thinking.budgetTokens
: typeof thinking.budget_tokens === "number"
? thinking.budget_tokens
: undefined
if (budget === undefined) return yield* invalid("Anthropic thinking provider option requires budgetTokens")
return { type: "enabled" as const, budget_tokens: budget }
})
const fromRequest = Effect.fn("AnthropicMessages.fromRequest")(function* (request: LLMRequest) {
const toolChoice = request.toolChoice ? yield* lowerToolChoice(request.toolChoice) : undefined
const generation = request.generation
return {
model: request.model.id,
system:
request.system.length === 0
? undefined
: request.system.map((part) => ({
type: "text" as const,
text: part.text,
cache_control: cacheControl(part.cache),
})),
messages: yield* lowerMessages(request),
tools: request.tools.length === 0 || request.toolChoice?.type === "none" ? undefined : request.tools.map(lowerTool),
tool_choice: toolChoice,
stream: true as const,
max_tokens: generation?.maxTokens ?? request.model.limits.output ?? 4096,
temperature: generation?.temperature,
top_p: generation?.topP,
top_k: generation?.topK,
stop_sequences: generation?.stop,
thinking: yield* lowerThinking(request),
}
})
// =============================================================================
// Stream Parsing
// =============================================================================
const mapFinishReason = (reason: string | null | undefined): FinishReason => {
if (reason === "end_turn" || reason === "stop_sequence" || reason === "pause_turn") return "stop"
if (reason === "max_tokens") return "length"
if (reason === "tool_use") return "tool-calls"
if (reason === "refusal") return "content-filter"
return "unknown"
}
const mapUsage = (usage: AnthropicUsage | undefined): Usage | undefined => {
if (!usage) return undefined
return new Usage({
inputTokens: usage.input_tokens,
outputTokens: usage.output_tokens,
cacheReadInputTokens: usage.cache_read_input_tokens ?? undefined,
cacheWriteInputTokens: usage.cache_creation_input_tokens ?? undefined,
totalTokens: ProviderShared.totalTokens(usage.input_tokens, usage.output_tokens, undefined),
native: usage,
})
}
// Anthropic emits usage on `message_start` and again on `message_delta` — the
// final delta carries the authoritative totals. Right-biased merge: each
// field prefers `right` when defined, falls back to `left`. `totalTokens` is
// recomputed from the merged input/output to stay consistent.
const mergeUsage = (left: Usage | undefined, right: Usage | undefined) => {
if (!left) return right
if (!right) return left
const inputTokens = right.inputTokens ?? left.inputTokens
const outputTokens = right.outputTokens ?? left.outputTokens
return new Usage({
inputTokens,
outputTokens,
cacheReadInputTokens: right.cacheReadInputTokens ?? left.cacheReadInputTokens,
cacheWriteInputTokens: right.cacheWriteInputTokens ?? left.cacheWriteInputTokens,
totalTokens: ProviderShared.totalTokens(inputTokens, outputTokens, undefined),
native: { ...left.native, ...right.native },
})
}
// Server tool result blocks come whole in `content_block_start` (no streaming
// delta sequence). We convert the payload to a `tool-result` event with
// `providerExecuted: true`. The runtime appends it to the assistant message
// for round-trip; downstream consumers can inspect `result.value` for the
// structured payload.
const SERVER_TOOL_RESULT_NAMES: Record<AnthropicServerToolResultType, string> = {
web_search_tool_result: "web_search",
code_execution_tool_result: "code_execution",
web_fetch_tool_result: "web_fetch",
}
const isServerToolResultType = (type: string): type is AnthropicServerToolResultType => type in SERVER_TOOL_RESULT_NAMES
const serverToolResultEvent = (block: NonNullable<AnthropicEvent["content_block"]>): LLMEvent | undefined => {
if (!block.type || !isServerToolResultType(block.type)) return undefined
const errorPayload =
typeof block.content === "object" && block.content !== null && "type" in block.content
? String((block.content as Record<string, unknown>).type)
: ""
const isError = errorPayload.endsWith("_tool_result_error")
return {
type: "tool-result",
id: block.tool_use_id ?? "",
name: SERVER_TOOL_RESULT_NAMES[block.type],
result: isError ? { type: "error", value: block.content } : { type: "json", value: block.content },
providerExecuted: true,
providerMetadata: anthropicMetadata({ blockType: block.type }),
}
}
type StepResult = readonly [ParserState, ReadonlyArray<LLMEvent>]
const NO_EVENTS: StepResult["1"] = []
const onMessageStart = (state: ParserState, event: AnthropicEvent): StepResult => {
const usage = mapUsage(event.message?.usage)
return [usage ? { ...state, usage: mergeUsage(state.usage, usage) } : state, NO_EVENTS]
}
const onContentBlockStart = (state: ParserState, event: AnthropicEvent): StepResult => {
const block = event.content_block
if (!block) return [state, NO_EVENTS]
if ((block.type === "tool_use" || block.type === "server_tool_use") && event.index !== undefined) {
return [
{
...state,
tools: ToolStream.start(state.tools, event.index, {
id: block.id ?? String(event.index),
name: block.name ?? "",
providerExecuted: block.type === "server_tool_use",
}),
},
NO_EVENTS,
]
}
if (block.type === "text" && block.text) {
return [state, [{ type: "text-delta", text: block.text }]]
}
if (block.type === "thinking" && block.thinking) {
return [
state,
[
{
type: "reasoning-delta",
text: block.thinking,
...(block.signature ? { providerMetadata: anthropicMetadata({ signature: block.signature }) } : {}),
},
],
]
}
const result = serverToolResultEvent(block)
return [state, result ? [result] : NO_EVENTS]
}
const onContentBlockDelta = Effect.fn("AnthropicMessages.onContentBlockDelta")(function* (
state: ParserState,
event: AnthropicEvent,
) {
const delta = event.delta
if (delta?.type === "text_delta" && delta.text) {
return [state, [{ type: "text-delta", text: delta.text }]] satisfies StepResult
}
if (delta?.type === "thinking_delta" && delta.thinking) {
return [state, [{ type: "reasoning-delta", text: delta.thinking }]] satisfies StepResult
}
if (delta?.type === "signature_delta" && delta.signature) {
return [
state,
[{ type: "reasoning-delta", text: "", providerMetadata: anthropicMetadata({ signature: delta.signature }) }],
] satisfies StepResult
}
if (delta?.type === "input_json_delta" && event.index !== undefined) {
if (!delta.partial_json) return [state, NO_EVENTS] satisfies StepResult
const result = ToolStream.appendExisting(
ADAPTER,
state.tools,
event.index,
delta.partial_json,
"Anthropic Messages tool argument delta is missing its tool call",
)
if (ToolStream.isError(result)) return yield* result
return [{ ...state, tools: result.tools }, result.event ? [result.event] : NO_EVENTS] satisfies StepResult
}
return [state, NO_EVENTS] satisfies StepResult
})
const onContentBlockStop = Effect.fn("AnthropicMessages.onContentBlockStop")(function* (
state: ParserState,
event: AnthropicEvent,
) {
if (event.index === undefined) return [state, NO_EVENTS] satisfies StepResult
const result = yield* ToolStream.finish(ADAPTER, state.tools, event.index)
return [{ ...state, tools: result.tools }, result.event ? [result.event] : NO_EVENTS] satisfies StepResult
})
const onMessageDelta = (state: ParserState, event: AnthropicEvent): StepResult => {
const usage = mergeUsage(state.usage, mapUsage(event.usage))
return [
{ ...state, usage },
[
{
type: "request-finish",
reason: mapFinishReason(event.delta?.stop_reason),
usage,
...(event.delta?.stop_sequence
? { providerMetadata: anthropicMetadata({ stopSequence: event.delta.stop_sequence }) }
: {}),
},
],
]
}
const onError = (state: ParserState, event: AnthropicEvent): StepResult => [
state,
[{ type: "provider-error", message: event.error?.message ?? "Anthropic Messages stream error" }],
]
const step = (state: ParserState, event: AnthropicEvent) => {
if (event.type === "message_start") return Effect.succeed(onMessageStart(state, event))
if (event.type === "content_block_start") return Effect.succeed(onContentBlockStart(state, event))
if (event.type === "content_block_delta") return onContentBlockDelta(state, event)
if (event.type === "content_block_stop") return onContentBlockStop(state, event)
if (event.type === "message_delta") return Effect.succeed(onMessageDelta(state, event))
if (event.type === "error") return Effect.succeed(onError(state, event))
return Effect.succeed<StepResult>([state, NO_EVENTS])
}
// =============================================================================
// Protocol And Anthropic Route
// =============================================================================
/**
* The Anthropic Messages protocol — request body construction, body schema,
* and the streaming-event state machine. Used by native Anthropic Cloud and
* (once registered) Vertex Anthropic / Bedrock-hosted Anthropic passthrough.
*/
export const protocol = Protocol.make({
id: ADAPTER,
body: {
schema: AnthropicMessagesBody,
from: fromRequest,
},
stream: {
event: Protocol.jsonEvent(AnthropicEvent),
initial: () => ({ tools: ToolStream.empty<number>() }),
step,
},
})
export const route = Route.make({
id: ADAPTER,
protocol,
endpoint: Endpoint.path(PATH),
auth: Auth.apiKeyHeader("x-api-key"),
framing: Framing.sse,
headers: () => ({ "anthropic-version": "2023-06-01" }),
})
// =============================================================================
// Model Helper
// =============================================================================
export const model = Route.model(route, {
provider: "anthropic",
baseURL: DEFAULT_BASE_URL,
})
export * as AnthropicMessages from "./anthropic-messages"

View File

@@ -0,0 +1,531 @@
import { Effect, Schema } from "effect"
import { Route, type RouteModelInput } from "../route/client"
import { Endpoint } from "../route/endpoint"
import { Protocol } from "../route/protocol"
import {
Usage,
type CacheHint,
type FinishReason,
type LLMEvent,
type LLMRequest,
type ToolCallPart,
type ToolDefinition,
type ToolResultPart,
} from "../schema"
import { BedrockEventStream } from "./bedrock-event-stream"
import { JsonObject, optionalArray, ProviderShared } from "./shared"
import { BedrockAuth, type Credentials as BedrockCredentials } from "./utils/bedrock-auth"
import { BedrockCache } from "./utils/bedrock-cache"
import { BedrockMedia } from "./utils/bedrock-media"
import { ToolStream } from "./utils/tool-stream"
const ADAPTER = "bedrock-converse"
export type { Credentials as BedrockCredentials } from "./utils/bedrock-auth"
// =============================================================================
// Public Model Input
// =============================================================================
export type BedrockConverseModelInput = RouteModelInput & {
/**
* Bearer API key (Bedrock's newer API key auth). Sets the `Authorization`
* header and bypasses SigV4 signing. Mutually exclusive with `credentials`.
*/
readonly apiKey?: string
/**
* AWS credentials for SigV4 signing. The route signs each request at
* `toHttp` time using `aws4fetch`. Mutually exclusive with `apiKey`.
*/
readonly credentials?: BedrockCredentials
readonly headers?: Record<string, string>
}
// =============================================================================
// Request Body Schema
// =============================================================================
const BedrockTextBlock = Schema.Struct({
text: Schema.String,
})
type BedrockTextBlock = Schema.Schema.Type<typeof BedrockTextBlock>
const BedrockToolUseBlock = Schema.Struct({
toolUse: Schema.Struct({
toolUseId: Schema.String,
name: Schema.String,
input: Schema.Unknown,
}),
})
type BedrockToolUseBlock = Schema.Schema.Type<typeof BedrockToolUseBlock>
const BedrockToolResultContentItem = Schema.Union([
Schema.Struct({ text: Schema.String }),
Schema.Struct({ json: Schema.Unknown }),
])
const BedrockToolResultBlock = Schema.Struct({
toolResult: Schema.Struct({
toolUseId: Schema.String,
content: Schema.Array(BedrockToolResultContentItem),
status: Schema.optional(Schema.Literals(["success", "error"])),
}),
})
type BedrockToolResultBlock = Schema.Schema.Type<typeof BedrockToolResultBlock>
const BedrockReasoningBlock = Schema.Struct({
reasoningContent: Schema.Struct({
reasoningText: Schema.optional(
Schema.Struct({
text: Schema.String,
signature: Schema.optional(Schema.String),
}),
),
}),
})
const BedrockUserBlock = Schema.Union([
BedrockTextBlock,
BedrockMedia.ImageBlock,
BedrockMedia.DocumentBlock,
BedrockToolResultBlock,
BedrockCache.CachePointBlock,
])
type BedrockUserBlock = Schema.Schema.Type<typeof BedrockUserBlock>
const BedrockAssistantBlock = Schema.Union([
BedrockTextBlock,
BedrockReasoningBlock,
BedrockToolUseBlock,
BedrockCache.CachePointBlock,
])
type BedrockAssistantBlock = Schema.Schema.Type<typeof BedrockAssistantBlock>
const BedrockMessage = Schema.Union([
Schema.Struct({ role: Schema.Literal("user"), content: Schema.Array(BedrockUserBlock) }),
Schema.Struct({ role: Schema.Literal("assistant"), content: Schema.Array(BedrockAssistantBlock) }),
]).pipe(Schema.toTaggedUnion("role"))
type BedrockMessage = Schema.Schema.Type<typeof BedrockMessage>
const BedrockSystemBlock = Schema.Union([BedrockTextBlock, BedrockCache.CachePointBlock])
type BedrockSystemBlock = Schema.Schema.Type<typeof BedrockSystemBlock>
const BedrockTool = Schema.Struct({
toolSpec: Schema.Struct({
name: Schema.String,
description: Schema.String,
inputSchema: Schema.Struct({
json: JsonObject,
}),
}),
})
type BedrockTool = Schema.Schema.Type<typeof BedrockTool>
const BedrockToolChoice = Schema.Union([
Schema.Struct({ auto: Schema.Struct({}) }),
Schema.Struct({ any: Schema.Struct({}) }),
Schema.Struct({ tool: Schema.Struct({ name: Schema.String }) }),
])
const BedrockBodyFields = {
modelId: Schema.String,
messages: Schema.Array(BedrockMessage),
system: optionalArray(BedrockSystemBlock),
inferenceConfig: Schema.optional(
Schema.Struct({
maxTokens: Schema.optional(Schema.Number),
temperature: Schema.optional(Schema.Number),
topP: Schema.optional(Schema.Number),
stopSequences: optionalArray(Schema.String),
}),
),
toolConfig: Schema.optional(
Schema.Struct({
tools: Schema.Array(BedrockTool),
toolChoice: Schema.optional(BedrockToolChoice),
}),
),
additionalModelRequestFields: Schema.optional(JsonObject),
}
const BedrockConverseBody = Schema.Struct(BedrockBodyFields)
export type BedrockConverseBody = Schema.Schema.Type<typeof BedrockConverseBody>
const BedrockUsageSchema = Schema.Struct({
inputTokens: Schema.optional(Schema.Number),
outputTokens: Schema.optional(Schema.Number),
totalTokens: Schema.optional(Schema.Number),
cacheReadInputTokens: Schema.optional(Schema.Number),
cacheWriteInputTokens: Schema.optional(Schema.Number),
})
type BedrockUsageSchema = Schema.Schema.Type<typeof BedrockUsageSchema>
// Streaming event shape — the AWS event stream wraps each JSON payload by its
// `:event-type` header (e.g. `messageStart`, `contentBlockDelta`). We
// reconstruct that wrapping in `decodeFrames` below so the event schema can
// stay a plain discriminated record.
const BedrockEvent = Schema.Struct({
messageStart: Schema.optional(Schema.Struct({ role: Schema.String })),
contentBlockStart: Schema.optional(
Schema.Struct({
contentBlockIndex: Schema.Number,
start: Schema.optional(
Schema.Struct({
toolUse: Schema.optional(Schema.Struct({ toolUseId: Schema.String, name: Schema.String })),
}),
),
}),
),
contentBlockDelta: Schema.optional(
Schema.Struct({
contentBlockIndex: Schema.Number,
delta: Schema.optional(
Schema.Struct({
text: Schema.optional(Schema.String),
toolUse: Schema.optional(Schema.Struct({ input: Schema.String })),
reasoningContent: Schema.optional(
Schema.Struct({
text: Schema.optional(Schema.String),
signature: Schema.optional(Schema.String),
}),
),
}),
),
}),
),
contentBlockStop: Schema.optional(Schema.Struct({ contentBlockIndex: Schema.Number })),
messageStop: Schema.optional(
Schema.Struct({
stopReason: Schema.String,
additionalModelResponseFields: Schema.optional(Schema.Unknown),
}),
),
metadata: Schema.optional(
Schema.Struct({
usage: Schema.optional(BedrockUsageSchema),
metrics: Schema.optional(Schema.Unknown),
}),
),
internalServerException: Schema.optional(Schema.Struct({ message: Schema.String })),
modelStreamErrorException: Schema.optional(Schema.Struct({ message: Schema.String })),
validationException: Schema.optional(Schema.Struct({ message: Schema.String })),
throttlingException: Schema.optional(Schema.Struct({ message: Schema.String })),
serviceUnavailableException: Schema.optional(Schema.Struct({ message: Schema.String })),
})
type BedrockEvent = Schema.Schema.Type<typeof BedrockEvent>
// =============================================================================
// Request Lowering
// =============================================================================
const lowerTool = (tool: ToolDefinition): BedrockTool => ({
toolSpec: {
name: tool.name,
description: tool.description,
inputSchema: { json: tool.inputSchema },
},
})
const textWithCache = (
text: string,
cache: CacheHint | undefined,
): Array<BedrockTextBlock | BedrockCache.CachePointBlock> => {
const cachePoint = BedrockCache.block(cache)
return cachePoint ? [{ text }, cachePoint] : [{ text }]
}
const lowerToolChoice = (toolChoice: NonNullable<LLMRequest["toolChoice"]>) =>
ProviderShared.matchToolChoice("Bedrock Converse", toolChoice, {
auto: () => ({ auto: {} }) as const,
none: () => undefined,
required: () => ({ any: {} }) as const,
tool: (name) => ({ tool: { name } }) as const,
})
const lowerToolCall = (part: ToolCallPart): BedrockToolUseBlock => ({
toolUse: {
toolUseId: part.id,
name: part.name,
input: part.input,
},
})
const lowerToolResult = (part: ToolResultPart): BedrockToolResultBlock => ({
toolResult: {
toolUseId: part.id,
content:
part.result.type === "text" || part.result.type === "error"
? [{ text: ProviderShared.toolResultText(part) }]
: [{ json: part.result.value }],
status: part.result.type === "error" ? "error" : "success",
},
})
const lowerMessages = Effect.fn("BedrockConverse.lowerMessages")(function* (request: LLMRequest) {
const messages: BedrockMessage[] = []
for (const message of request.messages) {
if (message.role === "user") {
const content: BedrockUserBlock[] = []
for (const part of message.content) {
if (!ProviderShared.supportsContent(part, ["text", "media"]))
return yield* ProviderShared.unsupportedContent("Bedrock Converse", "user", ["text", "media"])
if (part.type === "text") {
content.push(...textWithCache(part.text, part.cache))
continue
}
if (part.type === "media") {
content.push(yield* BedrockMedia.lower(part))
continue
}
}
messages.push({ role: "user", content })
continue
}
if (message.role === "assistant") {
const content: BedrockAssistantBlock[] = []
for (const part of message.content) {
if (!ProviderShared.supportsContent(part, ["text", "reasoning", "tool-call"]))
return yield* ProviderShared.unsupportedContent("Bedrock Converse", "assistant", [
"text",
"reasoning",
"tool-call",
])
if (part.type === "text") {
content.push(...textWithCache(part.text, part.cache))
continue
}
if (part.type === "reasoning") {
content.push({
reasoningContent: {
reasoningText: { text: part.text, signature: part.encrypted },
},
})
continue
}
if (part.type === "tool-call") {
content.push(lowerToolCall(part))
continue
}
}
messages.push({ role: "assistant", content })
continue
}
const content: BedrockToolResultBlock[] = []
for (const part of message.content) {
if (!ProviderShared.supportsContent(part, ["tool-result"]))
return yield* ProviderShared.unsupportedContent("Bedrock Converse", "tool", ["tool-result"])
content.push(lowerToolResult(part))
}
messages.push({ role: "user", content })
}
return messages
})
// System prompts share the cache-point convention: emit the text block, then
// optionally a positional `cachePoint` marker.
const lowerSystem = (system: ReadonlyArray<LLMRequest["system"][number]>): BedrockSystemBlock[] =>
system.flatMap((part) => textWithCache(part.text, part.cache))
const fromRequest = Effect.fn("BedrockConverse.fromRequest")(function* (request: LLMRequest) {
const toolChoice = request.toolChoice ? yield* lowerToolChoice(request.toolChoice) : undefined
const generation = request.generation
return {
modelId: request.model.id,
messages: yield* lowerMessages(request),
system: request.system.length === 0 ? undefined : lowerSystem(request.system),
inferenceConfig:
generation?.maxTokens === undefined &&
generation?.temperature === undefined &&
generation?.topP === undefined &&
(generation?.stop === undefined || generation.stop.length === 0)
? undefined
: {
maxTokens: generation?.maxTokens,
temperature: generation?.temperature,
topP: generation?.topP,
stopSequences: generation?.stop,
},
toolConfig:
request.tools.length > 0 && request.toolChoice?.type !== "none"
? { tools: request.tools.map(lowerTool), toolChoice }
: undefined,
}
})
// =============================================================================
// Stream Parsing
// =============================================================================
const mapFinishReason = (reason: string): FinishReason => {
if (reason === "end_turn" || reason === "stop_sequence") return "stop"
if (reason === "max_tokens") return "length"
if (reason === "tool_use") return "tool-calls"
if (reason === "content_filtered" || reason === "guardrail_intervened") return "content-filter"
return "unknown"
}
const mapUsage = (usage: BedrockUsageSchema | undefined): Usage | undefined => {
if (!usage) return undefined
return new Usage({
inputTokens: usage.inputTokens,
outputTokens: usage.outputTokens,
totalTokens: ProviderShared.totalTokens(usage.inputTokens, usage.outputTokens, usage.totalTokens),
cacheReadInputTokens: usage.cacheReadInputTokens,
cacheWriteInputTokens: usage.cacheWriteInputTokens,
native: usage,
})
}
interface ParserState {
readonly tools: ToolStream.State<number>
// Bedrock splits the finish into `messageStop` (carries `stopReason`) and
// `metadata` (carries usage). Hold the terminal event in state so `onHalt`
// can emit exactly one finish after both chunks have had a chance to arrive.
readonly pendingFinish: { readonly reason: FinishReason; readonly usage?: Usage } | undefined
}
const step = (state: ParserState, event: BedrockEvent) =>
Effect.gen(function* () {
if (event.contentBlockStart?.start?.toolUse) {
const index = event.contentBlockStart.contentBlockIndex
return [
{
...state,
tools: ToolStream.start(state.tools, index, {
id: event.contentBlockStart.start.toolUse.toolUseId,
name: event.contentBlockStart.start.toolUse.name,
}),
},
[],
] as const
}
if (event.contentBlockDelta?.delta?.text) {
return [state, [{ type: "text-delta" as const, text: event.contentBlockDelta.delta.text }]] as const
}
if (event.contentBlockDelta?.delta?.reasoningContent?.text) {
return [
state,
[{ type: "reasoning-delta" as const, text: event.contentBlockDelta.delta.reasoningContent.text }],
] as const
}
if (event.contentBlockDelta?.delta?.toolUse) {
const index = event.contentBlockDelta.contentBlockIndex
const result = ToolStream.appendExisting(
ADAPTER,
state.tools,
index,
event.contentBlockDelta.delta.toolUse.input,
"Bedrock Converse tool delta is missing its tool call",
)
if (ToolStream.isError(result)) return yield* result
return [{ ...state, tools: result.tools }, result.event ? [result.event] : []] as const
}
if (event.contentBlockStop) {
const result = yield* ToolStream.finish(ADAPTER, state.tools, event.contentBlockStop.contentBlockIndex)
return [{ ...state, tools: result.tools }, result.event ? [result.event] : []] as const
}
if (event.messageStop) {
return [
{
...state,
pendingFinish: { reason: mapFinishReason(event.messageStop.stopReason), usage: state.pendingFinish?.usage },
},
[],
] as const
}
if (event.metadata) {
const usage = mapUsage(event.metadata.usage)
return [{ ...state, pendingFinish: { reason: state.pendingFinish?.reason ?? "stop", usage } }, []] as const
}
if (event.internalServerException || event.modelStreamErrorException || event.serviceUnavailableException) {
const message =
event.internalServerException?.message ??
event.modelStreamErrorException?.message ??
event.serviceUnavailableException?.message ??
"Bedrock Converse stream error"
return [state, [{ type: "provider-error" as const, message, retryable: true }]] as const
}
if (event.validationException || event.throttlingException) {
const message =
event.validationException?.message ?? event.throttlingException?.message ?? "Bedrock Converse error"
return [
state,
[{ type: "provider-error" as const, message, retryable: event.throttlingException !== undefined }],
] as const
}
return [state, []] as const
})
const framing = BedrockEventStream.framing(ADAPTER)
const onHalt = (state: ParserState): ReadonlyArray<LLMEvent> =>
state.pendingFinish
? [{ type: "request-finish", reason: state.pendingFinish.reason, usage: state.pendingFinish.usage }]
: []
// =============================================================================
// Protocol And Bedrock Route
// =============================================================================
/**
* The Bedrock Converse protocol — request body construction, body schema, and
* the streaming-event state machine.
*/
export const protocol = Protocol.make({
id: ADAPTER,
body: {
schema: BedrockConverseBody,
from: fromRequest,
},
stream: {
event: BedrockEvent,
initial: () => ({ tools: ToolStream.empty<number>(), pendingFinish: undefined }),
step,
onHalt,
},
})
export const route = Route.make({
id: ADAPTER,
protocol,
// Bedrock's URL embeds the region in the host (set on `model.baseURL` by
// the provider helper from credentials) and the validated modelId in the
// path. We read the validated body so the URL matches the body that gets
// signed.
endpoint: Endpoint.path<BedrockConverseBody>(
({ body }) => `/model/${encodeURIComponent(body.modelId)}/converse-stream`,
),
auth: BedrockAuth.auth,
framing,
})
export const nativeCredentials = BedrockAuth.nativeCredentials
const bedrockModel = Route.model(
route,
{
provider: "bedrock",
},
{
mapInput: (input: BedrockConverseModelInput) => {
const { credentials, ...rest } = input
const region = credentials?.region ?? "us-east-1"
return {
...rest,
baseURL: rest.baseURL ?? `https://bedrock-runtime.${region}.amazonaws.com`,
native: nativeCredentials(input.native, credentials),
}
},
},
)
export const model = bedrockModel
export * as BedrockConverse from "./bedrock-converse"

View File

@@ -0,0 +1,87 @@
import { EventStreamCodec } from "@smithy/eventstream-codec"
import { fromUtf8, toUtf8 } from "@smithy/util-utf8"
import { Effect, Stream } from "effect"
import type { Framing } from "../route/framing"
import { ProviderShared } from "./shared"
// Bedrock streams responses using the AWS event stream binary protocol — each
// frame is `[length:4][headers-length:4][prelude-crc:4][headers][payload][crc:4]`.
// We use `@smithy/eventstream-codec` to validate framing and CRCs, then
// reconstruct the JSON wrapping by `:event-type` so the chunk schema can match.
const eventCodec = new EventStreamCodec(toUtf8, fromUtf8)
const utf8 = new TextDecoder()
// Cursor-tracking buffer state. Bytes accumulate in `buffer`; `offset` is the
// read position. Reading by `subarray` is zero-copy. We only allocate a fresh
// buffer when a new network chunk arrives and we need to append.
interface FrameBufferState {
readonly buffer: Uint8Array
readonly offset: number
}
const initialFrameBuffer: FrameBufferState = { buffer: new Uint8Array(0), offset: 0 }
const appendChunk = (state: FrameBufferState, chunk: Uint8Array): FrameBufferState => {
const remaining = state.buffer.length - state.offset
// Compact: drop the consumed prefix and append the new chunk in one alloc.
// This bounds buffer growth to at most one network chunk past the live
// window, regardless of stream length.
const next = new Uint8Array(remaining + chunk.length)
next.set(state.buffer.subarray(state.offset), 0)
next.set(chunk, remaining)
return { buffer: next, offset: 0 }
}
const consumeFrames = (route: string) => (state: FrameBufferState, chunk: Uint8Array) =>
Effect.gen(function* () {
let cursor = appendChunk(state, chunk)
const out: object[] = []
while (cursor.buffer.length - cursor.offset >= 4) {
const view = cursor.buffer.subarray(cursor.offset)
const totalLength = new DataView(view.buffer, view.byteOffset, view.byteLength).getUint32(0, false)
if (view.length < totalLength) break
const decoded = yield* Effect.try({
try: () => eventCodec.decode(view.subarray(0, totalLength)),
catch: (error) =>
ProviderShared.eventError(
route,
`Failed to decode Bedrock Converse event-stream frame: ${
error instanceof Error ? error.message : String(error)
}`,
),
})
cursor = { buffer: cursor.buffer, offset: cursor.offset + totalLength }
if (decoded.headers[":message-type"]?.value !== "event") continue
const eventType = decoded.headers[":event-type"]?.value
if (typeof eventType !== "string") continue
const payload = utf8.decode(decoded.body)
if (!payload) continue
// The AWS event stream pads short payloads with a `p` field. Drop it
// before handing the object to the chunk schema. JSON decode goes
// through the shared Schema-driven codec to satisfy the package rule
// against ad-hoc `JSON.parse` calls.
const parsed = (yield* ProviderShared.parseJson(
route,
payload,
"Failed to parse Bedrock Converse event-stream payload",
)) as Record<string, unknown>
delete parsed.p
out.push({ [eventType]: parsed })
}
return [cursor, out] as const
})
/**
* AWS event-stream framing for Bedrock Converse. Each frame is decoded by
* `@smithy/eventstream-codec` (length + header + payload + CRC) and rewrapped
* under its `:event-type` header so the chunk schema can match the JSON
* payload directly.
*/
export const framing = (route: string): Framing<object> => ({
id: "aws-event-stream",
frame: (bytes) => bytes.pipe(Stream.mapAccumEffect(() => initialFrameBuffer, consumeFrames(route))),
})
export * as BedrockEventStream from "./bedrock-event-stream"

View File

@@ -0,0 +1,397 @@
import { Effect, Schema } from "effect"
import { Route } from "../route/client"
import { Auth } from "../route/auth"
import { Endpoint } from "../route/endpoint"
import { Framing } from "../route/framing"
import { Protocol } from "../route/protocol"
import {
Usage,
type FinishReason,
type LLMEvent,
type LLMRequest,
type MediaPart,
type TextPart,
type ToolCallPart,
type ToolDefinition,
} from "../schema"
import { JsonObject, optionalArray, ProviderShared } from "./shared"
import { GeminiToolSchema } from "./utils/gemini-tool-schema"
const ADAPTER = "gemini"
export const DEFAULT_BASE_URL = "https://generativelanguage.googleapis.com/v1beta"
// =============================================================================
// Request Body Schema
// =============================================================================
const GeminiTextPart = Schema.Struct({
text: Schema.String,
thought: Schema.optional(Schema.Boolean),
thoughtSignature: Schema.optional(Schema.String),
})
const GeminiInlineDataPart = Schema.Struct({
inlineData: Schema.Struct({
mimeType: Schema.String,
data: Schema.String,
}),
})
const GeminiFunctionCallPart = Schema.Struct({
functionCall: Schema.Struct({
name: Schema.String,
args: Schema.Unknown,
}),
thoughtSignature: Schema.optional(Schema.String),
})
const GeminiFunctionResponsePart = Schema.Struct({
functionResponse: Schema.Struct({
name: Schema.String,
response: Schema.Unknown,
}),
})
const GeminiContentPart = Schema.Union([
GeminiTextPart,
GeminiInlineDataPart,
GeminiFunctionCallPart,
GeminiFunctionResponsePart,
])
const GeminiContent = Schema.Struct({
role: Schema.Literals(["user", "model"]),
parts: Schema.Array(GeminiContentPart),
})
type GeminiContent = Schema.Schema.Type<typeof GeminiContent>
const GeminiSystemInstruction = Schema.Struct({
parts: Schema.Array(Schema.Struct({ text: Schema.String })),
})
const GeminiFunctionDeclaration = Schema.Struct({
name: Schema.String,
description: Schema.String,
parameters: Schema.optional(JsonObject),
})
const GeminiTool = Schema.Struct({
functionDeclarations: Schema.Array(GeminiFunctionDeclaration),
})
const GeminiToolConfig = Schema.Struct({
functionCallingConfig: Schema.Struct({
mode: Schema.Literals(["AUTO", "NONE", "ANY"]),
allowedFunctionNames: optionalArray(Schema.String),
}),
})
const GeminiThinkingConfig = Schema.Struct({
thinkingBudget: Schema.optional(Schema.Number),
includeThoughts: Schema.optional(Schema.Boolean),
})
const GeminiGenerationConfig = Schema.Struct({
maxOutputTokens: Schema.optional(Schema.Number),
temperature: Schema.optional(Schema.Number),
topP: Schema.optional(Schema.Number),
topK: Schema.optional(Schema.Number),
stopSequences: optionalArray(Schema.String),
thinkingConfig: Schema.optional(GeminiThinkingConfig),
})
const GeminiBodyFields = {
contents: Schema.Array(GeminiContent),
systemInstruction: Schema.optional(GeminiSystemInstruction),
tools: optionalArray(GeminiTool),
toolConfig: Schema.optional(GeminiToolConfig),
generationConfig: Schema.optional(GeminiGenerationConfig),
}
const GeminiBody = Schema.Struct(GeminiBodyFields)
export type GeminiBody = Schema.Schema.Type<typeof GeminiBody>
const GeminiUsage = Schema.Struct({
cachedContentTokenCount: Schema.optional(Schema.Number),
thoughtsTokenCount: Schema.optional(Schema.Number),
promptTokenCount: Schema.optional(Schema.Number),
candidatesTokenCount: Schema.optional(Schema.Number),
totalTokenCount: Schema.optional(Schema.Number),
})
type GeminiUsage = Schema.Schema.Type<typeof GeminiUsage>
const GeminiCandidate = Schema.Struct({
content: Schema.optional(GeminiContent),
finishReason: Schema.optional(Schema.String),
})
const GeminiEvent = Schema.Struct({
candidates: optionalArray(GeminiCandidate),
usageMetadata: Schema.optional(GeminiUsage),
})
type GeminiEvent = Schema.Schema.Type<typeof GeminiEvent>
interface ParserState {
readonly finishReason?: string
readonly hasToolCalls: boolean
readonly nextToolCallId: number
readonly usage?: Usage
}
const invalid = ProviderShared.invalidRequest
const mediaData = ProviderShared.mediaBytes
// =============================================================================
// Tool Schema Conversion
// =============================================================================
// Tool-schema conversion has two distinct concerns:
//
// 1. Sanitize — fix common authoring mistakes Gemini rejects: integer/number
// enums (must be strings), `required` entries that don't match a property,
// untyped arrays (`items` must be present), and `properties`/`required`
// keys on non-object scalars. Mirrors OpenCode's historical Gemini rules.
//
// 2. Project — lossy mapping from JSON Schema to Gemini's schema dialect:
// drop empty objects, derive `nullable: true` from `type: [..., "null"]`,
// coerce `const` to `[const]` enum, recurse properties/items, propagate
// only an allowlisted set of keys (description, required, format, type,
// properties, items, allOf, anyOf, oneOf, minLength). Anything outside the
// allowlist (e.g. `additionalProperties`, `$ref`) is silently dropped.
//
// Sanitize runs first, then project. The implementation lives in
// `utils/gemini-tool-schema` so this protocol keeps the same shape as the other
// provider protocols.
// =============================================================================
// Request Lowering
// =============================================================================
const lowerTool = (tool: ToolDefinition) => ({
name: tool.name,
description: tool.description,
parameters: GeminiToolSchema.convert(tool.inputSchema),
})
const lowerToolConfig = (toolChoice: NonNullable<LLMRequest["toolChoice"]>) =>
ProviderShared.matchToolChoice("Gemini", toolChoice, {
auto: () => ({ functionCallingConfig: { mode: "AUTO" as const } }),
none: () => ({ functionCallingConfig: { mode: "NONE" as const } }),
required: () => ({ functionCallingConfig: { mode: "ANY" as const } }),
tool: (name) => ({ functionCallingConfig: { mode: "ANY" as const, allowedFunctionNames: [name] } }),
})
const lowerUserPart = (part: TextPart | MediaPart) =>
part.type === "text" ? { text: part.text } : { inlineData: { mimeType: part.mediaType, data: mediaData(part) } }
const lowerToolCall = (part: ToolCallPart) => ({
functionCall: { name: part.name, args: part.input },
})
const lowerMessages = Effect.fn("Gemini.lowerMessages")(function* (request: LLMRequest) {
const contents: GeminiContent[] = []
for (const message of request.messages) {
if (message.role === "user") {
const parts: Array<Schema.Schema.Type<typeof GeminiContentPart>> = []
for (const part of message.content) {
if (!ProviderShared.supportsContent(part, ["text", "media"]))
return yield* ProviderShared.unsupportedContent("Gemini", "user", ["text", "media"])
parts.push(lowerUserPart(part))
}
contents.push({ role: "user", parts })
continue
}
if (message.role === "assistant") {
const parts: Array<Schema.Schema.Type<typeof GeminiContentPart>> = []
for (const part of message.content) {
if (!ProviderShared.supportsContent(part, ["text", "reasoning", "tool-call"]))
return yield* ProviderShared.unsupportedContent("Gemini", "assistant", ["text", "reasoning", "tool-call"])
if (part.type === "text") {
parts.push({ text: part.text })
continue
}
if (part.type === "reasoning") {
parts.push({ text: part.text, thought: true })
continue
}
if (part.type === "tool-call") {
parts.push(lowerToolCall(part))
continue
}
}
contents.push({ role: "model", parts })
continue
}
const parts: Array<Schema.Schema.Type<typeof GeminiContentPart>> = []
for (const part of message.content) {
if (!ProviderShared.supportsContent(part, ["tool-result"]))
return yield* ProviderShared.unsupportedContent("Gemini", "tool", ["tool-result"])
parts.push({
functionResponse: {
name: part.name,
response: {
name: part.name,
content: ProviderShared.toolResultText(part),
},
},
})
}
contents.push({ role: "user", parts })
}
return contents
})
const geminiOptions = (request: LLMRequest) => request.providerOptions?.gemini
const thinkingConfig = (request: LLMRequest) => {
const value = geminiOptions(request)?.thinkingConfig
if (!ProviderShared.isRecord(value)) return undefined
const result = {
thinkingBudget: typeof value.thinkingBudget === "number" ? value.thinkingBudget : undefined,
includeThoughts: typeof value.includeThoughts === "boolean" ? value.includeThoughts : undefined,
}
return Object.values(result).some((item) => item !== undefined) ? result : undefined
}
const fromRequest = Effect.fn("Gemini.fromRequest")(function* (request: LLMRequest) {
const toolsEnabled = request.tools.length > 0 && request.toolChoice?.type !== "none"
const generation = request.generation
const generationConfig = {
maxOutputTokens: generation?.maxTokens,
temperature: generation?.temperature,
topP: generation?.topP,
topK: generation?.topK,
stopSequences: generation?.stop,
thinkingConfig: thinkingConfig(request),
}
return {
contents: yield* lowerMessages(request),
systemInstruction:
request.system.length === 0 ? undefined : { parts: [{ text: ProviderShared.joinText(request.system) }] },
tools: toolsEnabled ? [{ functionDeclarations: request.tools.map(lowerTool) }] : undefined,
toolConfig: toolsEnabled && request.toolChoice ? yield* lowerToolConfig(request.toolChoice) : undefined,
generationConfig: Object.values(generationConfig).some((value) => value !== undefined)
? generationConfig
: undefined,
}
})
// =============================================================================
// Stream Parsing
// =============================================================================
const mapUsage = (usage: GeminiUsage | undefined) => {
if (!usage) return undefined
return new Usage({
inputTokens: usage.promptTokenCount,
outputTokens: usage.candidatesTokenCount,
reasoningTokens: usage.thoughtsTokenCount,
cacheReadInputTokens: usage.cachedContentTokenCount,
totalTokens: ProviderShared.totalTokens(usage.promptTokenCount, usage.candidatesTokenCount, usage.totalTokenCount),
native: usage,
})
}
const mapFinishReason = (finishReason: string | undefined, hasToolCalls: boolean): FinishReason => {
if (finishReason === "STOP") return hasToolCalls ? "tool-calls" : "stop"
if (finishReason === "MAX_TOKENS") return "length"
if (
finishReason === "IMAGE_SAFETY" ||
finishReason === "RECITATION" ||
finishReason === "SAFETY" ||
finishReason === "BLOCKLIST" ||
finishReason === "PROHIBITED_CONTENT" ||
finishReason === "SPII"
)
return "content-filter"
if (finishReason === "MALFORMED_FUNCTION_CALL") return "error"
return "unknown"
}
const finish = (state: ParserState): ReadonlyArray<LLMEvent> =>
state.finishReason || state.usage
? [{ type: "request-finish", reason: mapFinishReason(state.finishReason, state.hasToolCalls), usage: state.usage }]
: []
const step = (state: ParserState, event: GeminiEvent) => {
const nextState = {
...state,
usage: event.usageMetadata ? (mapUsage(event.usageMetadata) ?? state.usage) : state.usage,
}
const candidate = event.candidates?.[0]
if (!candidate?.content)
return Effect.succeed([
{ ...nextState, finishReason: candidate?.finishReason ?? nextState.finishReason },
[],
] as const)
const events: LLMEvent[] = []
let hasToolCalls = nextState.hasToolCalls
let nextToolCallId = nextState.nextToolCallId
for (const part of candidate.content.parts) {
if ("text" in part && part.text.length > 0) {
events.push({ type: part.thought ? "reasoning-delta" : "text-delta", text: part.text })
continue
}
if ("functionCall" in part) {
const input = part.functionCall.args
const id = `tool_${nextToolCallId++}`
events.push({ type: "tool-call", id, name: part.functionCall.name, input })
hasToolCalls = true
}
}
return Effect.succeed([
{
...nextState,
hasToolCalls,
nextToolCallId,
finishReason: candidate.finishReason ?? nextState.finishReason,
},
events,
] as const)
}
// =============================================================================
// Protocol And Gemini Route
// =============================================================================
/**
* The Gemini protocol — request body construction, body schema, and the
* streaming-event state machine. Used by Google AI Studio Gemini and (once
* registered) Vertex Gemini.
*/
export const protocol = Protocol.make({
id: ADAPTER,
body: {
schema: GeminiBody,
from: fromRequest,
},
stream: {
event: Protocol.jsonEvent(GeminiEvent),
initial: () => ({ hasToolCalls: false, nextToolCallId: 0 }),
step,
onHalt: finish,
},
})
export const route = Route.make({
id: ADAPTER,
protocol,
// Gemini's path embeds the model id and pins SSE framing at the URL level.
endpoint: Endpoint.path(({ request }) => `/models/${request.model.id}:streamGenerateContent?alt=sse`),
auth: Auth.apiKeyHeader("x-goog-api-key"),
framing: Framing.sse,
})
// =============================================================================
// Model Helper
// =============================================================================
export const model = Route.model(route, {
provider: "google",
baseURL: DEFAULT_BASE_URL,
})
export * as Gemini from "./gemini"

View File

@@ -0,0 +1,6 @@
export * as AnthropicMessages from "./anthropic-messages"
export * as BedrockConverse from "./bedrock-converse"
export * as Gemini from "./gemini"
export * as OpenAIChat from "./openai-chat"
export * as OpenAICompatibleChat from "./openai-compatible-chat"
export * as OpenAIResponses from "./openai-responses"

View File

@@ -0,0 +1,404 @@
import { Array as Arr, Effect, Schema } from "effect"
import { Route } from "../route/client"
import { Auth } from "../route/auth"
import { Endpoint } from "../route/endpoint"
import { Framing } from "../route/framing"
import { HttpTransport } from "../route/transport"
import { Protocol } from "../route/protocol"
import {
Usage,
type FinishReason,
type LLMEvent,
type LLMRequest,
type TextPart,
type ToolCallPart,
type ToolDefinition,
} from "../schema"
import { isRecord, JsonObject, optionalArray, optionalNull, ProviderShared } from "./shared"
import { OpenAIOptions } from "./utils/openai-options"
import { ToolStream } from "./utils/tool-stream"
const ADAPTER = "openai-chat"
export const DEFAULT_BASE_URL = "https://api.openai.com/v1"
export const PATH = "/chat/completions"
// =============================================================================
// Request Body Schema
// =============================================================================
// The body schema is the provider-native JSON body. `fromRequest` below builds
// this shape from the common `LLMRequest`, then `Route.make` validates and
// JSON-encodes it before transport.
const OpenAIChatFunction = Schema.Struct({
name: Schema.String,
description: Schema.String,
parameters: JsonObject,
})
const OpenAIChatTool = Schema.Struct({
type: Schema.tag("function"),
function: OpenAIChatFunction,
})
type OpenAIChatTool = Schema.Schema.Type<typeof OpenAIChatTool>
const OpenAIChatAssistantToolCall = Schema.Struct({
id: Schema.String,
type: Schema.tag("function"),
function: Schema.Struct({
name: Schema.String,
arguments: Schema.String,
}),
})
type OpenAIChatAssistantToolCall = Schema.Schema.Type<typeof OpenAIChatAssistantToolCall>
const OpenAIChatMessage = Schema.Union([
Schema.Struct({ role: Schema.Literal("system"), content: Schema.String }),
Schema.Struct({ role: Schema.Literal("user"), content: Schema.String }),
Schema.Struct({
role: Schema.Literal("assistant"),
content: Schema.NullOr(Schema.String),
tool_calls: optionalArray(OpenAIChatAssistantToolCall),
reasoning_content: Schema.optional(Schema.String),
}),
Schema.Struct({ role: Schema.Literal("tool"), tool_call_id: Schema.String, content: Schema.String }),
]).pipe(Schema.toTaggedUnion("role"))
type OpenAIChatMessage = Schema.Schema.Type<typeof OpenAIChatMessage>
const OpenAIChatToolChoice = Schema.Union([
Schema.Literals(["auto", "none", "required"]),
Schema.Struct({
type: Schema.tag("function"),
function: Schema.Struct({ name: Schema.String }),
}),
])
export const bodyFields = {
model: Schema.String,
messages: Schema.Array(OpenAIChatMessage),
tools: optionalArray(OpenAIChatTool),
tool_choice: Schema.optional(OpenAIChatToolChoice),
stream: Schema.Literal(true),
stream_options: Schema.optional(Schema.Struct({ include_usage: Schema.Boolean })),
store: Schema.optional(Schema.Boolean),
reasoning_effort: Schema.optional(OpenAIOptions.OpenAIReasoningEffort),
max_tokens: Schema.optional(Schema.Number),
temperature: Schema.optional(Schema.Number),
top_p: Schema.optional(Schema.Number),
frequency_penalty: Schema.optional(Schema.Number),
presence_penalty: Schema.optional(Schema.Number),
seed: Schema.optional(Schema.Number),
stop: optionalArray(Schema.String),
}
const OpenAIChatBody = Schema.Struct(bodyFields)
export type OpenAIChatBody = Schema.Schema.Type<typeof OpenAIChatBody>
// =============================================================================
// Streaming Event Schema
// =============================================================================
// The event schema is one decoded SSE `data:` payload. `Framing.sse` splits the
// byte stream into strings, then `Protocol.jsonEvent` decodes each string into
// this provider-native event shape.
const OpenAIChatUsage = Schema.Struct({
prompt_tokens: Schema.optional(Schema.Number),
completion_tokens: Schema.optional(Schema.Number),
total_tokens: Schema.optional(Schema.Number),
prompt_tokens_details: optionalNull(
Schema.Struct({
cached_tokens: Schema.optional(Schema.Number),
}),
),
completion_tokens_details: optionalNull(
Schema.Struct({
reasoning_tokens: Schema.optional(Schema.Number),
}),
),
})
const OpenAIChatToolCallDeltaFunction = Schema.Struct({
name: optionalNull(Schema.String),
arguments: optionalNull(Schema.String),
})
const OpenAIChatToolCallDelta = Schema.Struct({
index: Schema.Number,
id: optionalNull(Schema.String),
function: optionalNull(OpenAIChatToolCallDeltaFunction),
})
type OpenAIChatToolCallDelta = Schema.Schema.Type<typeof OpenAIChatToolCallDelta>
const OpenAIChatDelta = Schema.Struct({
content: optionalNull(Schema.String),
tool_calls: optionalNull(Schema.Array(OpenAIChatToolCallDelta)),
})
const OpenAIChatChoice = Schema.Struct({
delta: optionalNull(OpenAIChatDelta),
finish_reason: optionalNull(Schema.String),
})
const OpenAIChatEvent = Schema.Struct({
choices: Schema.Array(OpenAIChatChoice),
usage: optionalNull(OpenAIChatUsage),
})
type OpenAIChatEvent = Schema.Schema.Type<typeof OpenAIChatEvent>
type OpenAIChatRequestMessage = LLMRequest["messages"][number]
interface ParserState {
readonly tools: ToolStream.State<number>
readonly toolCallEvents: ReadonlyArray<LLMEvent>
readonly usage?: Usage
readonly finishReason?: FinishReason
}
const invalid = ProviderShared.invalidRequest
// =============================================================================
// Request Lowering
// =============================================================================
// Lowering is the only place that knows how common LLM messages map onto the
// OpenAI Chat wire format. Keep provider quirks here instead of leaking native
// fields into `LLMRequest`.
const lowerTool = (tool: ToolDefinition): OpenAIChatTool => ({
type: "function",
function: {
name: tool.name,
description: tool.description,
parameters: tool.inputSchema,
},
})
const lowerToolChoice = (toolChoice: NonNullable<LLMRequest["toolChoice"]>) =>
ProviderShared.matchToolChoice("OpenAI Chat", toolChoice, {
auto: () => "auto" as const,
none: () => "none" as const,
required: () => "required" as const,
tool: (name) => ({ type: "function" as const, function: { name } }),
})
const lowerToolCall = (part: ToolCallPart): OpenAIChatAssistantToolCall => ({
id: part.id,
type: "function",
function: {
name: part.name,
arguments: ProviderShared.encodeJson(part.input),
},
})
const openAICompatibleReasoningContent = (native: unknown) =>
isRecord(native) && typeof native.reasoning_content === "string" ? native.reasoning_content : undefined
const lowerUserMessage = Effect.fn("OpenAIChat.lowerUserMessage")(function* (message: OpenAIChatRequestMessage) {
const content: TextPart[] = []
for (const part of message.content) {
if (!ProviderShared.supportsContent(part, ["text"]))
return yield* ProviderShared.unsupportedContent("OpenAI Chat", "user", ["text"])
content.push(part)
}
return { role: "user" as const, content: ProviderShared.joinText(content) }
})
const lowerAssistantMessage = Effect.fn("OpenAIChat.lowerAssistantMessage")(function* (
message: OpenAIChatRequestMessage,
) {
const content: TextPart[] = []
const toolCalls: OpenAIChatAssistantToolCall[] = []
for (const part of message.content) {
if (!ProviderShared.supportsContent(part, ["text", "tool-call"]))
return yield* ProviderShared.unsupportedContent("OpenAI Chat", "assistant", ["text", "tool-call"])
if (part.type === "text") {
content.push(part)
continue
}
if (part.type === "tool-call") {
toolCalls.push(lowerToolCall(part))
continue
}
}
return {
role: "assistant" as const,
content: content.length === 0 ? null : ProviderShared.joinText(content),
tool_calls: toolCalls.length === 0 ? undefined : toolCalls,
reasoning_content: openAICompatibleReasoningContent(message.native?.openaiCompatible),
}
})
const lowerToolMessages = Effect.fn("OpenAIChat.lowerToolMessages")(function* (message: OpenAIChatRequestMessage) {
const messages: OpenAIChatMessage[] = []
for (const part of message.content) {
if (!ProviderShared.supportsContent(part, ["tool-result"]))
return yield* ProviderShared.unsupportedContent("OpenAI Chat", "tool", ["tool-result"])
messages.push({ role: "tool", tool_call_id: part.id, content: ProviderShared.toolResultText(part) })
}
return messages
})
const lowerMessage = Effect.fn("OpenAIChat.lowerMessage")(function* (message: OpenAIChatRequestMessage) {
if (message.role === "user") return [yield* lowerUserMessage(message)]
if (message.role === "assistant") return [yield* lowerAssistantMessage(message)]
return yield* lowerToolMessages(message)
})
const lowerMessages = Effect.fn("OpenAIChat.lowerMessages")(function* (request: LLMRequest) {
const system: OpenAIChatMessage[] =
request.system.length === 0 ? [] : [{ role: "system", content: ProviderShared.joinText(request.system) }]
return [...system, ...Arr.flatten(yield* Effect.forEach(request.messages, lowerMessage))]
})
const lowerOptions = Effect.fn("OpenAIChat.lowerOptions")(function* (request: LLMRequest) {
const store = OpenAIOptions.store(request)
const reasoningEffort = OpenAIOptions.reasoningEffort(request)
if (reasoningEffort && !OpenAIOptions.isReasoningEffort(reasoningEffort))
return yield* invalid(`OpenAI Chat does not support reasoning effort ${reasoningEffort}`)
return {
...(store !== undefined ? { store } : {}),
...(reasoningEffort ? { reasoning_effort: reasoningEffort } : {}),
}
})
const fromRequest = Effect.fn("OpenAIChat.fromRequest")(function* (request: LLMRequest) {
// `fromRequest` returns the provider body only. Endpoint, auth, framing,
// validation, and HTTP execution are composed by `Route.make`.
const generation = request.generation
return {
model: request.model.id,
messages: yield* lowerMessages(request),
tools: request.tools.length === 0 ? undefined : request.tools.map(lowerTool),
tool_choice: request.toolChoice ? yield* lowerToolChoice(request.toolChoice) : undefined,
stream: true as const,
stream_options: { include_usage: true },
max_tokens: generation?.maxTokens,
temperature: generation?.temperature,
top_p: generation?.topP,
frequency_penalty: generation?.frequencyPenalty,
presence_penalty: generation?.presencePenalty,
seed: generation?.seed,
stop: generation?.stop,
...(yield* lowerOptions(request)),
}
})
// =============================================================================
// Stream Parsing
// =============================================================================
// Streaming parsers are small state machines: every event returns a new state
// plus the common `LLMEvent`s produced by that event. Tool calls are accumulated
// because OpenAI streams JSON arguments across multiple deltas.
const mapFinishReason = (reason: string | null | undefined): FinishReason => {
if (reason === "stop") return "stop"
if (reason === "length") return "length"
if (reason === "content_filter") return "content-filter"
if (reason === "function_call" || reason === "tool_calls") return "tool-calls"
return "unknown"
}
const mapUsage = (usage: OpenAIChatEvent["usage"]): Usage | undefined => {
if (!usage) return undefined
return new Usage({
inputTokens: usage.prompt_tokens,
outputTokens: usage.completion_tokens,
reasoningTokens: usage.completion_tokens_details?.reasoning_tokens,
cacheReadInputTokens: usage.prompt_tokens_details?.cached_tokens,
totalTokens: ProviderShared.totalTokens(usage.prompt_tokens, usage.completion_tokens, usage.total_tokens),
native: usage,
})
}
const step = (state: ParserState, event: OpenAIChatEvent) =>
Effect.gen(function* () {
const events: LLMEvent[] = []
const usage = mapUsage(event.usage) ?? state.usage
const choice = event.choices[0]
const finishReason = choice?.finish_reason ? mapFinishReason(choice.finish_reason) : state.finishReason
const delta = choice?.delta
const toolDeltas = delta?.tool_calls ?? []
let tools = state.tools
if (delta?.content) events.push({ type: "text-delta", text: delta.content })
for (const tool of toolDeltas) {
const result = ToolStream.appendOrStart(
ADAPTER,
tools,
tool.index,
{ id: tool.id ?? undefined, name: tool.function?.name ?? undefined, text: tool.function?.arguments ?? "" },
"OpenAI Chat tool call delta is missing id or name",
)
if (ToolStream.isError(result)) return yield* result
tools = result.tools
if (result.event) events.push(result.event)
}
// Finalize accumulated tool inputs eagerly when finish_reason arrives so
// JSON parse failures fail the stream at the boundary rather than at halt.
const finished =
finishReason !== undefined && state.finishReason === undefined && Object.keys(tools).length > 0
? yield* ToolStream.finishAll(ADAPTER, tools)
: undefined
return [
{
tools: finished?.tools ?? tools,
toolCallEvents: finished?.events ?? state.toolCallEvents,
usage,
finishReason,
},
events,
] as const
})
const finishEvents = (state: ParserState): ReadonlyArray<LLMEvent> => {
const hasToolCalls = state.toolCallEvents.length > 0
const reason = state.finishReason === "stop" && hasToolCalls ? "tool-calls" : state.finishReason
return [
...state.toolCallEvents,
...(reason ? ([{ type: "request-finish", reason, usage: state.usage }] satisfies ReadonlyArray<LLMEvent>) : []),
]
}
// =============================================================================
// Protocol And OpenAI Route
// =============================================================================
/**
* The OpenAI Chat protocol — request body construction, body schema, and the
* streaming-event state machine. Reused by every route that speaks OpenAI Chat
* over HTTP+SSE: native OpenAI, DeepSeek, TogetherAI, Cerebras, Baseten,
* Fireworks, DeepInfra, and (once added) Azure OpenAI Chat.
*/
export const protocol = Protocol.make({
id: ADAPTER,
body: {
schema: OpenAIChatBody,
from: fromRequest,
},
stream: {
event: Protocol.jsonEvent(OpenAIChatEvent),
initial: () => ({ tools: ToolStream.empty<number>(), toolCallEvents: [] }),
step,
onHalt: finishEvents,
},
})
const encodeBody = Schema.encodeSync(Schema.fromJsonString(OpenAIChatBody))
export const httpTransport = HttpTransport.httpJson({
endpoint: Endpoint.path(PATH),
auth: Auth.bearer(),
framing: Framing.sse,
encodeBody,
})
export const route = Route.make({
id: ADAPTER,
provider: "openai",
protocol,
transport: httpTransport,
defaults: {
baseURL: DEFAULT_BASE_URL,
},
})
// =============================================================================
// Model Helper
// =============================================================================
export const model = route.model
export * as OpenAIChat from "./openai-chat"

View File

@@ -0,0 +1,28 @@
import { Route, type RouteRoutedModelInput } from "../route/client"
import { Endpoint } from "../route/endpoint"
import { Framing } from "../route/framing"
import * as OpenAIChat from "./openai-chat"
const ADAPTER = "openai-compatible-chat"
export type OpenAICompatibleChatModelInput = Omit<RouteRoutedModelInput, "baseURL"> & {
readonly baseURL: string
}
/**
* Route for non-OpenAI providers that expose an OpenAI Chat-compatible
* `/chat/completions` endpoint. Reuses `OpenAIChat.protocol` end-to-end and
* overrides only the route id so providers can be resolved per-family without
* colliding with native OpenAI. The model carries the host on `baseURL`,
* supplied by whichever profile/provider helper builds it.
*/
export const route = Route.make({
id: ADAPTER,
protocol: OpenAIChat.protocol,
endpoint: Endpoint.path("/chat/completions"),
framing: Framing.sse,
})
export const model = Route.model<OpenAICompatibleChatModelInput>(route)
export * as OpenAICompatibleChat from "./openai-compatible-chat"

View File

@@ -0,0 +1,575 @@
import { Effect, Schema } from "effect"
import { Route } from "../route/client"
import { Auth } from "../route/auth"
import { Endpoint } from "../route/endpoint"
import { Framing } from "../route/framing"
import { HttpTransport, WebSocketTransport } from "../route/transport"
import { Protocol } from "../route/protocol"
import {
Usage,
type FinishReason,
type LLMEvent,
type LLMRequest,
type ProviderMetadata,
type TextPart,
type ToolCallPart,
type ToolDefinition,
} from "../schema"
import { JsonObject, optionalArray, optionalNull, ProviderShared } from "./shared"
import { OpenAIOptions } from "./utils/openai-options"
import { ToolStream } from "./utils/tool-stream"
const ADAPTER = "openai-responses"
export const DEFAULT_BASE_URL = "https://api.openai.com/v1"
export const PATH = "/responses"
// =============================================================================
// Request Body Schema
// =============================================================================
const OpenAIResponsesInputText = Schema.Struct({
type: Schema.tag("input_text"),
text: Schema.String,
})
const OpenAIResponsesOutputText = Schema.Struct({
type: Schema.tag("output_text"),
text: Schema.String,
})
const OpenAIResponsesInputItem = Schema.Union([
Schema.Struct({ role: Schema.tag("system"), content: Schema.String }),
Schema.Struct({ role: Schema.tag("user"), content: Schema.Array(OpenAIResponsesInputText) }),
Schema.Struct({ role: Schema.tag("assistant"), content: Schema.Array(OpenAIResponsesOutputText) }),
Schema.Struct({
type: Schema.tag("function_call"),
call_id: Schema.String,
name: Schema.String,
arguments: Schema.String,
}),
Schema.Struct({
type: Schema.tag("function_call_output"),
call_id: Schema.String,
output: Schema.String,
}),
])
type OpenAIResponsesInputItem = Schema.Schema.Type<typeof OpenAIResponsesInputItem>
const OpenAIResponsesTool = Schema.Struct({
type: Schema.tag("function"),
name: Schema.String,
description: Schema.String,
parameters: JsonObject,
strict: Schema.optional(Schema.Boolean),
})
type OpenAIResponsesTool = Schema.Schema.Type<typeof OpenAIResponsesTool>
const OpenAIResponsesToolChoice = Schema.Union([
Schema.Literals(["auto", "none", "required"]),
Schema.Struct({ type: Schema.tag("function"), name: Schema.String }),
])
// Fields shared between the HTTP body and the WebSocket `response.create`
// message. The HTTP body adds `stream: true`; the WebSocket message adds
// `type: "response.create"`. Defining the shared shape once keeps the two
// transports in sync without a destructure-and-strip dance.
const OpenAIResponsesCoreFields = {
model: Schema.String,
input: Schema.Array(OpenAIResponsesInputItem),
tools: optionalArray(OpenAIResponsesTool),
tool_choice: Schema.optional(OpenAIResponsesToolChoice),
store: Schema.optional(Schema.Boolean),
prompt_cache_key: Schema.optional(Schema.String),
include: optionalArray(Schema.Literal("reasoning.encrypted_content")),
reasoning: Schema.optional(
Schema.Struct({
effort: Schema.optional(OpenAIOptions.OpenAIReasoningEffort),
summary: Schema.optional(Schema.Literal("auto")),
}),
),
text: Schema.optional(
Schema.Struct({
verbosity: Schema.optional(OpenAIOptions.OpenAITextVerbosity),
}),
),
max_output_tokens: Schema.optional(Schema.Number),
temperature: Schema.optional(Schema.Number),
top_p: Schema.optional(Schema.Number),
}
const OpenAIResponsesBody = Schema.Struct({
...OpenAIResponsesCoreFields,
stream: Schema.Literal(true),
})
export type OpenAIResponsesBody = Schema.Schema.Type<typeof OpenAIResponsesBody>
const OpenAIResponsesWebSocketMessage = Schema.StructWithRest(
Schema.Struct({
type: Schema.tag("response.create"),
...OpenAIResponsesCoreFields,
}),
[Schema.Record(Schema.String, Schema.Unknown)],
)
type OpenAIResponsesWebSocketMessage = Schema.Schema.Type<typeof OpenAIResponsesWebSocketMessage>
const encodeWebSocketMessage = Schema.encodeSync(Schema.fromJsonString(OpenAIResponsesWebSocketMessage))
const OpenAIResponsesUsage = Schema.Struct({
input_tokens: Schema.optional(Schema.Number),
input_tokens_details: optionalNull(Schema.Struct({ cached_tokens: Schema.optional(Schema.Number) })),
output_tokens: Schema.optional(Schema.Number),
output_tokens_details: optionalNull(Schema.Struct({ reasoning_tokens: Schema.optional(Schema.Number) })),
total_tokens: Schema.optional(Schema.Number),
})
type OpenAIResponsesUsage = Schema.Schema.Type<typeof OpenAIResponsesUsage>
const OpenAIResponsesStreamItem = Schema.Struct({
type: Schema.String,
id: Schema.optional(Schema.String),
call_id: Schema.optional(Schema.String),
name: Schema.optional(Schema.String),
arguments: Schema.optional(Schema.String),
// Hosted (provider-executed) tool fields. Each hosted tool item carries its
// own subset of these — we capture them generically so we can surface the
// call's typed input portion and round-trip the full result payload without
// hand-rolling a per-tool schema.
status: Schema.optional(Schema.String),
action: Schema.optional(Schema.Unknown),
queries: Schema.optional(Schema.Unknown),
results: Schema.optional(Schema.Unknown),
code: Schema.optional(Schema.String),
container_id: Schema.optional(Schema.String),
outputs: Schema.optional(Schema.Unknown),
server_label: Schema.optional(Schema.String),
output: Schema.optional(Schema.Unknown),
error: Schema.optional(Schema.Unknown),
})
type OpenAIResponsesStreamItem = Schema.Schema.Type<typeof OpenAIResponsesStreamItem>
const OpenAIResponsesEvent = Schema.Struct({
type: Schema.String,
delta: Schema.optional(Schema.String),
item_id: Schema.optional(Schema.String),
item: Schema.optional(OpenAIResponsesStreamItem),
response: Schema.optional(
Schema.Struct({
id: Schema.optional(Schema.String),
service_tier: Schema.optional(Schema.String),
incomplete_details: optionalNull(Schema.Struct({ reason: Schema.String })),
usage: optionalNull(OpenAIResponsesUsage),
}),
),
code: Schema.optional(Schema.String),
message: Schema.optional(Schema.String),
})
type OpenAIResponsesEvent = Schema.Schema.Type<typeof OpenAIResponsesEvent>
interface ParserState {
readonly tools: ToolStream.State<string>
readonly hasFunctionCall: boolean
}
const invalid = ProviderShared.invalidRequest
// =============================================================================
// Request Lowering
// =============================================================================
const lowerTool = (tool: ToolDefinition): OpenAIResponsesTool => ({
type: "function",
name: tool.name,
description: tool.description,
parameters: tool.inputSchema,
})
const lowerToolChoice = (toolChoice: NonNullable<LLMRequest["toolChoice"]>) =>
ProviderShared.matchToolChoice("OpenAI Responses", toolChoice, {
auto: () => "auto" as const,
none: () => "none" as const,
required: () => "required" as const,
tool: (name) => ({ type: "function" as const, name }),
})
const lowerToolCall = (part: ToolCallPart): OpenAIResponsesInputItem => ({
type: "function_call",
call_id: part.id,
name: part.name,
arguments: ProviderShared.encodeJson(part.input),
})
const lowerMessages = Effect.fn("OpenAIResponses.lowerMessages")(function* (request: LLMRequest) {
const system: OpenAIResponsesInputItem[] =
request.system.length === 0 ? [] : [{ role: "system", content: ProviderShared.joinText(request.system) }]
const input: OpenAIResponsesInputItem[] = [...system]
for (const message of request.messages) {
if (message.role === "user") {
const content: TextPart[] = []
for (const part of message.content) {
if (!ProviderShared.supportsContent(part, ["text"]))
return yield* ProviderShared.unsupportedContent("OpenAI Responses", "user", ["text"])
content.push(part)
}
input.push({ role: "user", content: content.map((part) => ({ type: "input_text", text: part.text })) })
continue
}
if (message.role === "assistant") {
const content: TextPart[] = []
for (const part of message.content) {
if (!ProviderShared.supportsContent(part, ["text", "tool-call"]))
return yield* ProviderShared.unsupportedContent("OpenAI Responses", "assistant", ["text", "tool-call"])
if (part.type === "text") {
content.push(part)
continue
}
if (part.type === "tool-call") {
input.push(lowerToolCall(part))
continue
}
}
if (content.length > 0)
input.push({ role: "assistant", content: content.map((part) => ({ type: "output_text", text: part.text })) })
continue
}
for (const part of message.content) {
if (!ProviderShared.supportsContent(part, ["tool-result"]))
return yield* ProviderShared.unsupportedContent("OpenAI Responses", "tool", ["tool-result"])
input.push({ type: "function_call_output", call_id: part.id, output: ProviderShared.toolResultText(part) })
}
}
return input
})
const lowerOptions = Effect.fn("OpenAIResponses.lowerOptions")(function* (request: LLMRequest) {
const store = OpenAIOptions.store(request)
const promptCacheKey = OpenAIOptions.promptCacheKey(request)
const effort = OpenAIOptions.reasoningEffort(request)
if (effort && !OpenAIOptions.isReasoningEffort(effort))
return yield* invalid(`OpenAI Responses does not support reasoning effort ${effort}`)
const summary = OpenAIOptions.reasoningSummary(request)
const encryptedState = OpenAIOptions.encryptedReasoning(request)
const verbosity = OpenAIOptions.textVerbosity(request)
return {
...(store !== undefined ? { store } : {}),
...(promptCacheKey ? { prompt_cache_key: promptCacheKey } : {}),
...(encryptedState ? { include: ["reasoning.encrypted_content"] as const } : {}),
...(effort || summary ? { reasoning: { effort, summary } } : {}),
...(verbosity ? { text: { verbosity } } : {}),
}
})
const fromRequest = Effect.fn("OpenAIResponses.fromRequest")(function* (request: LLMRequest) {
const generation = request.generation
return {
model: request.model.id,
input: yield* lowerMessages(request),
tools: request.tools.length === 0 ? undefined : request.tools.map(lowerTool),
tool_choice: request.toolChoice ? yield* lowerToolChoice(request.toolChoice) : undefined,
stream: true as const,
max_output_tokens: generation?.maxTokens,
temperature: generation?.temperature,
top_p: generation?.topP,
...(yield* lowerOptions(request)),
}
})
// =============================================================================
// Stream Parsing
// =============================================================================
const mapUsage = (usage: OpenAIResponsesUsage | null | undefined) => {
if (!usage) return undefined
return new Usage({
inputTokens: usage.input_tokens,
outputTokens: usage.output_tokens,
reasoningTokens: usage.output_tokens_details?.reasoning_tokens,
cacheReadInputTokens: usage.input_tokens_details?.cached_tokens,
totalTokens: ProviderShared.totalTokens(usage.input_tokens, usage.output_tokens, usage.total_tokens),
native: usage,
})
}
const mapFinishReason = (event: OpenAIResponsesEvent, hasFunctionCall: boolean): FinishReason => {
const reason = event.response?.incomplete_details?.reason
if (reason === undefined || reason === null) return hasFunctionCall ? "tool-calls" : "stop"
if (reason === "max_output_tokens") return "length"
if (reason === "content_filter") return "content-filter"
return hasFunctionCall ? "tool-calls" : "unknown"
}
const openaiMetadata = (metadata: Record<string, unknown>): ProviderMetadata => ({ openai: metadata })
// Hosted tool items (provider-executed) ship their typed input + status +
// result fields all in one item. We expose them as a `tool-call` +
// `tool-result` pair so consumers can treat them uniformly with client tools,
// only differentiated by `providerExecuted: true`.
//
// One record per OpenAI Responses item type that represents a hosted
// (provider-executed) tool call: the common name we surface, plus an `input`
// extractor that picks the fields the model actually populated for that tool.
// Falling back to `{}` when an entry isn't fully typed keeps unknown tools
// observable without rolling a per-tool schema.
const HOSTED_TOOLS = {
web_search_call: { name: "web_search", input: (item) => item.action ?? {} },
web_search_preview_call: { name: "web_search_preview", input: (item) => item.action ?? {} },
file_search_call: { name: "file_search", input: (item) => ({ queries: item.queries ?? [] }) },
code_interpreter_call: {
name: "code_interpreter",
input: (item) => ({ code: item.code, container_id: item.container_id }),
},
computer_use_call: { name: "computer_use", input: (item) => item.action ?? {} },
image_generation_call: { name: "image_generation", input: () => ({}) },
mcp_call: {
name: "mcp",
input: (item) => ({ server_label: item.server_label, name: item.name, arguments: item.arguments }),
},
local_shell_call: { name: "local_shell", input: (item) => item.action ?? {} },
} as const satisfies Record<
string,
{ readonly name: string; readonly input: (item: OpenAIResponsesStreamItem) => unknown }
>
type HostedToolType = keyof typeof HOSTED_TOOLS
const isHostedToolItem = (
item: OpenAIResponsesStreamItem,
): item is OpenAIResponsesStreamItem & { type: HostedToolType; id: string } =>
item.type in HOSTED_TOOLS && typeof item.id === "string" && item.id.length > 0
// Round-trip the full item as the structured result so consumers can extract
// outputs / sources / status without re-decoding.
const hostedToolResult = (item: OpenAIResponsesStreamItem) => {
const isError = typeof item.error !== "undefined" && item.error !== null
return isError ? { type: "error" as const, value: item.error } : { type: "json" as const, value: item }
}
const hostedToolEvents = (
item: OpenAIResponsesStreamItem & { type: HostedToolType; id: string },
): ReadonlyArray<LLMEvent> => {
const tool = HOSTED_TOOLS[item.type]
const providerMetadata = openaiMetadata({ itemId: item.id })
return [
{
type: "tool-call",
id: item.id,
name: tool.name,
input: tool.input(item),
providerExecuted: true,
providerMetadata,
},
{
type: "tool-result",
id: item.id,
name: tool.name,
result: hostedToolResult(item),
providerExecuted: true,
providerMetadata,
},
]
}
type StepResult = readonly [ParserState, ReadonlyArray<LLMEvent>]
const NO_EVENTS: StepResult["1"] = []
// `response.completed` / `response.incomplete` are clean finishes that emit a
// `request-finish` event; `response.failed` is a hard failure that emits a
// `provider-error`. All three end the stream — kept in one set so `step` and
// the protocol's `terminal` predicate stay in sync.
const TERMINAL_TYPES = new Set(["response.completed", "response.incomplete", "response.failed"])
const onOutputTextDelta = (state: ParserState, event: OpenAIResponsesEvent): StepResult => {
if (!event.delta) return [state, NO_EVENTS]
return [
state,
[
{
type: "text-delta",
id: event.item_id,
text: event.delta,
...(event.item_id ? { providerMetadata: openaiMetadata({ itemId: event.item_id }) } : {}),
},
],
]
}
const onOutputItemAdded = (state: ParserState, event: OpenAIResponsesEvent): StepResult => {
const item = event.item
if (item?.type !== "function_call" || !item.id) return [state, NO_EVENTS]
return [
{
hasFunctionCall: state.hasFunctionCall,
tools: ToolStream.start(state.tools, item.id, {
id: item.call_id ?? item.id,
name: item.name ?? "",
input: item.arguments ?? "",
providerMetadata: openaiMetadata({ itemId: item.id }),
}),
},
NO_EVENTS,
]
}
const onFunctionCallArgumentsDelta = Effect.fn("OpenAIResponses.onFunctionCallArgumentsDelta")(function* (
state: ParserState,
event: OpenAIResponsesEvent,
) {
if (!event.item_id || !event.delta) return [state, NO_EVENTS] satisfies StepResult
const result = ToolStream.appendExisting(
ADAPTER,
state.tools,
event.item_id,
event.delta,
"OpenAI Responses tool argument delta is missing its tool call",
)
if (ToolStream.isError(result)) return yield* result
return [
{ hasFunctionCall: state.hasFunctionCall, tools: result.tools },
result.event ? [result.event] : NO_EVENTS,
] satisfies StepResult
})
const onOutputItemDone = Effect.fn("OpenAIResponses.onOutputItemDone")(function* (
state: ParserState,
event: OpenAIResponsesEvent,
) {
const item = event.item
if (!item) return [state, NO_EVENTS] satisfies StepResult
if (item.type === "function_call") {
if (!item.id || !item.call_id || !item.name) return [state, NO_EVENTS] satisfies StepResult
const tools = state.tools[item.id]
? state.tools
: ToolStream.start(state.tools, item.id, { id: item.call_id, name: item.name })
const result =
item.arguments === undefined
? yield* ToolStream.finish(ADAPTER, tools, item.id)
: yield* ToolStream.finishWithInput(ADAPTER, tools, item.id, item.arguments)
return [
{ hasFunctionCall: result.event ? true : state.hasFunctionCall, tools: result.tools },
result.event ? [result.event] : NO_EVENTS,
] satisfies StepResult
}
if (isHostedToolItem(item)) return [state, hostedToolEvents(item)] satisfies StepResult
return [state, NO_EVENTS] satisfies StepResult
})
const onResponseFinish = (state: ParserState, event: OpenAIResponsesEvent): StepResult => [
state,
[
{
type: "request-finish",
reason: mapFinishReason(event, state.hasFunctionCall),
usage: mapUsage(event.response?.usage),
...(event.response?.id || event.response?.service_tier
? {
providerMetadata: openaiMetadata({
responseId: event.response.id,
serviceTier: event.response.service_tier,
}),
}
: {}),
},
],
]
const onResponseFailed = (state: ParserState, event: OpenAIResponsesEvent): StepResult => [
state,
[{ type: "provider-error", message: event.message ?? event.code ?? "OpenAI Responses response failed" }],
]
const onError = (state: ParserState, event: OpenAIResponsesEvent): StepResult => [
state,
[{ type: "provider-error", message: event.message ?? event.code ?? "OpenAI Responses stream error" }],
]
const step = (state: ParserState, event: OpenAIResponsesEvent) => {
if (event.type === "response.output_text.delta") return Effect.succeed(onOutputTextDelta(state, event))
if (event.type === "response.output_item.added") return Effect.succeed(onOutputItemAdded(state, event))
if (event.type === "response.function_call_arguments.delta") return onFunctionCallArgumentsDelta(state, event)
if (event.type === "response.output_item.done") return onOutputItemDone(state, event)
if (event.type === "response.completed" || event.type === "response.incomplete")
return Effect.succeed(onResponseFinish(state, event))
if (event.type === "response.failed") return Effect.succeed(onResponseFailed(state, event))
if (event.type === "error") return Effect.succeed(onError(state, event))
return Effect.succeed<StepResult>([state, NO_EVENTS])
}
// =============================================================================
// Protocol And OpenAI Route
// =============================================================================
/**
* The OpenAI Responses protocol — request body construction, body schema, and
* the streaming-event state machine. Used by native OpenAI and (once
* registered) Azure OpenAI Responses.
*/
export const protocol = Protocol.make({
id: ADAPTER,
body: {
schema: OpenAIResponsesBody,
from: fromRequest,
},
stream: {
event: Protocol.jsonEvent(OpenAIResponsesEvent),
initial: () => ({ hasFunctionCall: false, tools: ToolStream.empty<string>() }),
step,
terminal: (event) => TERMINAL_TYPES.has(event.type),
},
})
const encodeBody = Schema.encodeSync(Schema.fromJsonString(OpenAIResponsesBody))
const transportBase = {
endpoint: Endpoint.path<OpenAIResponsesBody>(PATH),
auth: Auth.bearer(),
encodeBody,
}
const routeDefaults = {
baseURL: DEFAULT_BASE_URL,
}
export const httpTransport = HttpTransport.httpJson({
...transportBase,
framing: Framing.sse,
})
export const route = Route.make({
id: ADAPTER,
provider: "openai",
protocol,
transport: httpTransport,
defaults: routeDefaults,
})
const decodeWebSocketMessage = ProviderShared.validateWith(Schema.decodeUnknownEffect(OpenAIResponsesWebSocketMessage))
const webSocketMessage = (body: OpenAIResponsesBody | Record<string, unknown>) =>
Effect.gen(function* () {
if (!ProviderShared.isRecord(body))
return yield* ProviderShared.invalidRequest("OpenAI Responses WebSocket body must be a JSON object")
const { stream: _stream, ...message } = body
return yield* decodeWebSocketMessage({ ...message, type: "response.create" })
})
export const webSocketTransport = WebSocketTransport.json({
...transportBase,
toMessage: webSocketMessage,
encodeMessage: encodeWebSocketMessage,
})
export const webSocketRoute = Route.make({
id: `${ADAPTER}-websocket`,
provider: "openai",
protocol,
transport: webSocketTransport,
defaults: routeDefaults,
})
// =============================================================================
// Model Helper
// =============================================================================
export const model = route.model
export const webSocketModel = webSocketRoute.model
export * as OpenAIResponses from "./openai-responses"

View File

@@ -0,0 +1,203 @@
import { Buffer } from "node:buffer"
import { Effect, Schema, Stream } from "effect"
import * as Sse from "effect/unstable/encoding/Sse"
import { Headers, HttpClientRequest } from "effect/unstable/http"
import {
InvalidProviderOutputReason,
InvalidRequestReason,
LLMError,
type ContentPart,
type LLMRequest,
type MediaPart,
type ToolResultPart,
} from "../schema"
export const Json = Schema.fromJsonString(Schema.Unknown)
export const decodeJson = Schema.decodeUnknownSync(Json)
export const encodeJson = Schema.encodeSync(Json)
export const JsonObject = Schema.Record(Schema.String, Schema.Unknown)
export const optionalArray = <const S extends Schema.Top>(schema: S) => Schema.optional(Schema.Array(schema))
export const optionalNull = <const S extends Schema.Top>(schema: S) => Schema.optional(Schema.NullOr(schema))
/**
* Plain-record narrowing. Excludes arrays so routes checking nested JSON
* Schema fragments don't accidentally treat a tuple as a key/value bag.
*/
export const isRecord = (value: unknown): value is Record<string, unknown> =>
typeof value === "object" && value !== null && !Array.isArray(value)
/**
* Streaming tool-call accumulator. Adapters that build a tool call across
* multiple `tool-input-delta` chunks store the partial JSON input string here
* and finalize it with `parseToolInput` once the call completes.
*/
export interface ToolAccumulator {
readonly id: string
readonly name: string
readonly input: string
}
/**
* `Usage.totalTokens` policy shared by every route. Honors a provider-
* supplied total; otherwise falls back to `inputTokens + outputTokens` only
* when at least one is defined. Returns `undefined` when neither input nor
* output is known so routes don't publish a misleading `0`.
*/
export const totalTokens = (
inputTokens: number | undefined,
outputTokens: number | undefined,
total: number | undefined,
) => {
if (total !== undefined) return total
if (inputTokens === undefined && outputTokens === undefined) return undefined
return (inputTokens ?? 0) + (outputTokens ?? 0)
}
export const eventError = (route: string, message: string, raw?: string) =>
new LLMError({
module: "ProviderShared",
method: "stream",
reason: new InvalidProviderOutputReason({ route, message, raw }),
})
export const parseJson = (route: string, input: string, message: string) =>
Effect.try({
try: () => decodeJson(input),
catch: () => eventError(route, message, input),
})
/**
* Join the `text` field of a list of parts with newlines. Used by routes
* that flatten system / message content arrays into a single provider string
* (OpenAI Chat `system` content, OpenAI Responses `system` content, Gemini
* `systemInstruction.parts[].text`).
*/
export const joinText = (parts: ReadonlyArray<{ readonly text: string }>) => parts.map((part) => part.text).join("\n")
/**
* Parse the streamed JSON input of a tool call. Treats an empty string as
* `"{}"` — providers occasionally finish a tool call without ever emitting
* input deltas (e.g. zero-arg tools). The error message is uniform across
* routes: `Invalid JSON input for <route> tool call <name>`.
*/
export const parseToolInput = (route: string, name: string, raw: string) =>
parseJson(route, raw || "{}", `Invalid JSON input for ${route} tool call ${name}`)
/**
* Encode a `MediaPart`'s raw bytes for inclusion in a JSON request body.
* `data: string` is assumed to already be base64 (matches caller convention
* across Gemini / Bedrock); `data: Uint8Array` is base64-encoded here. Used
* by every route that supports image / document inputs.
*/
export const mediaBytes = (part: MediaPart) =>
typeof part.data === "string" ? part.data : Buffer.from(part.data).toString("base64")
export const trimBaseUrl = (value: string) => value.replace(/\/+$/, "")
export const toolResultText = (part: ToolResultPart) => {
if (part.result.type === "text" || part.result.type === "error") return String(part.result.value)
return encodeJson(part.result.value)
}
export const errorText = (error: unknown) => {
if (error instanceof Error) return error.message
if (typeof error === "string") return error
if (typeof error === "number" || typeof error === "boolean" || typeof error === "bigint") return String(error)
if (error === null) return "null"
if (error === undefined) return "undefined"
return "Unknown stream error"
}
/**
* `framing` step for Server-Sent Events. Decodes UTF-8, runs the SSE channel
* decoder, and drops empty / `[DONE]` keep-alive events so the downstream
* `decodeChunk` sees one JSON string per element. The SSE channel emits a
* `Retry` control event on its error channel; we drop it here (we don't
* implement client-driven retries) so the public error channel stays
* `LLMError`.
*/
export const sseFraming = (bytes: Stream.Stream<Uint8Array, LLMError>): Stream.Stream<string, LLMError> =>
bytes.pipe(
Stream.decodeText(),
Stream.pipeThroughChannel(Sse.decode()),
Stream.catchTag("Retry", () => Stream.empty),
Stream.filter((event) => event.data.length > 0 && event.data !== "[DONE]"),
Stream.map((event) => event.data),
)
/**
* Canonical invalid-request constructor. Lift one-line `const invalid =
* (message) => invalidRequest(message)` aliases out of every
* route so the error constructor lives in one place. If we ever extend
* `InvalidRequestReason` with route context or trace metadata, the change
* lands here.
*/
export const invalidRequest = (message: string) =>
new LLMError({
module: "ProviderShared",
method: "request",
reason: new InvalidRequestReason({ message }),
})
export const matchToolChoice = <Auto, None, Required, Tool>(
route: string,
toolChoice: NonNullable<LLMRequest["toolChoice"]>,
cases: {
readonly auto: () => Auto
readonly none: () => None
readonly required: () => Required
readonly tool: (name: string) => Tool
},
) =>
Effect.gen(function* () {
if (toolChoice.type === "auto") return cases.auto()
if (toolChoice.type === "none") return cases.none()
if (toolChoice.type === "required") return cases.required()
if (!toolChoice.name) return yield* invalidRequest(`${route} tool choice requires a tool name`)
return cases.tool(toolChoice.name)
})
type ContentType = ContentPart["type"]
const formatContentTypes = (types: ReadonlyArray<ContentType>) => {
if (types.length <= 1) return types[0] ?? ""
if (types.length === 2) return `${types[0]} and ${types[1]}`
return `${types.slice(0, -1).join(", ")}, and ${types.at(-1)}`
}
export const supportsContent = <const Type extends ContentType>(
part: ContentPart,
types: ReadonlyArray<Type>,
): part is Extract<ContentPart, { readonly type: Type }> => (types as ReadonlyArray<ContentType>).includes(part.type)
export const unsupportedContent = (
route: string,
role: LLMRequest["messages"][number]["role"],
types: ReadonlyArray<ContentType>,
) => invalidRequest(`${route} ${role} messages only support ${formatContentTypes(types)} content for now`)
/**
* Build a `validate` step from a Schema decoder. Replaces the per-route
* lambda body `(payload) => decode(payload).pipe(Effect.mapError((e) =>
* invalid(e.message)))`. Any decode error is translated into
* `LLMError` carrying the original parse-error message.
*/
export const validateWith =
<A, I, E extends { readonly message: string }>(decode: (input: I) => Effect.Effect<A, E>) =>
(payload: I) =>
decode(payload).pipe(Effect.mapError((error) => invalidRequest(error.message)))
/**
* Build an HTTP POST with a JSON body. Sets `content-type: application/json`
* automatically after caller-supplied headers so routes cannot accidentally
* send JSON with a stale content type. The body is passed pre-encoded so
* routes can choose between
* `Schema.encodeSync(payload)` and `ProviderShared.encodeJson(payload)`.
*/
export const jsonPost = (input: { readonly url: string; readonly body: string; readonly headers?: Headers.Input }) =>
HttpClientRequest.post(input.url).pipe(
HttpClientRequest.setHeaders(Headers.set(Headers.fromInput(input.headers), "content-type", "application/json")),
HttpClientRequest.bodyText(input.body, "application/json"),
)
export * as ProviderShared from "./shared"

View File

@@ -0,0 +1,103 @@
import { AwsV4Signer } from "aws4fetch"
import { Effect, Option, Schema } from "effect"
import { Headers } from "effect/unstable/http"
import { Auth, type AuthInput } from "../../route/auth"
import type { LLMRequest } from "../../schema"
import { ProviderShared } from "../shared"
/**
* AWS credentials for SigV4 signing. Bedrock also supports Bearer API key auth
* via `model.apiKey`, which bypasses SigV4 signing. STS-vended credentials
* should be refreshed by the consumer (rebuild the model) before they expire;
* the route does not refresh.
*/
export interface Credentials {
readonly region: string
readonly accessKeyId: string
readonly secretAccessKey: string
readonly sessionToken?: string
}
const NativeCredentials = Schema.Struct({
accessKeyId: Schema.String,
secretAccessKey: Schema.String,
region: Schema.optional(Schema.String),
sessionToken: Schema.optional(Schema.String),
})
const decodeNativeCredentials = Schema.decodeUnknownOption(NativeCredentials)
export const region = (request: LLMRequest) => {
const fromNative = request.model.native?.aws_region
if (typeof fromNative === "string" && fromNative !== "") return fromNative
return (
decodeNativeCredentials(request.model.native?.aws_credentials).pipe(
Option.map((credentials) => credentials.region),
Option.getOrUndefined,
) ?? "us-east-1"
)
}
const credentialsFromInput = (request: LLMRequest): Credentials | undefined =>
decodeNativeCredentials(request.model.native?.aws_credentials).pipe(
Option.map((creds) => ({ ...creds, region: creds.region ?? region(request) })),
Option.getOrUndefined,
)
const signRequest = (input: {
readonly url: string
readonly body: string
readonly headers: Headers.Headers
readonly credentials: Credentials
}) =>
Effect.tryPromise({
try: async () => {
const signed = await new AwsV4Signer({
url: input.url,
method: "POST",
headers: Object.entries(input.headers),
body: input.body,
region: input.credentials.region,
accessKeyId: input.credentials.accessKeyId,
secretAccessKey: input.credentials.secretAccessKey,
sessionToken: input.credentials.sessionToken,
service: "bedrock",
}).sign()
return Object.fromEntries(signed.headers.entries())
},
catch: (error) =>
ProviderShared.invalidRequest(
`Bedrock Converse SigV4 signing failed: ${error instanceof Error ? error.message : String(error)}`,
),
})
/**
* Bedrock auth. `model.apiKey` (Bedrock's newer Bearer API key auth) wins if
* set; otherwise sign the exact JSON bytes with SigV4 using credentials from
* `model.native.aws_credentials`.
*/
export const auth = Auth.custom((input: AuthInput) => {
if (input.request.model.apiKey) return Auth.toEffect(Auth.bearer())(input)
return Effect.gen(function* () {
const credentials = credentialsFromInput(input.request)
if (!credentials) {
return yield* ProviderShared.invalidRequest(
"Bedrock Converse requires either model.apiKey or AWS credentials in model.native.aws_credentials",
)
}
const headersForSigning = Headers.set(input.headers, "content-type", "application/json")
const signed = yield* signRequest({ url: input.url, body: input.body, headers: headersForSigning, credentials })
return Headers.setAll(headersForSigning, signed)
})
})
export const nativeCredentials = (native: Record<string, unknown> | undefined, credentials: Credentials | undefined) =>
credentials
? {
...native,
aws_credentials: credentials,
aws_region: credentials.region,
}
: native
export * as BedrockAuth from "./bedrock-auth"

View File

@@ -0,0 +1,20 @@
import { Schema } from "effect"
import type { CacheHint } from "../../schema"
// Bedrock cache markers are positional: emit a `cachePoint` block immediately
// after the content the caller wants treated as a cacheable prefix.
export const CachePointBlock = Schema.Struct({
cachePoint: Schema.Struct({ type: Schema.tag("default") }),
})
export type CachePointBlock = Schema.Schema.Type<typeof CachePointBlock>
// Bedrock recently added optional `ttl: "5m" | "1h"` on cachePoint. Map
// `CacheHint.ttlSeconds` here once a recorded cassette validates the wire shape.
const DEFAULT: CachePointBlock = { cachePoint: { type: "default" } }
export const block = (cache: CacheHint | undefined): CachePointBlock | undefined => {
if (cache?.type !== "ephemeral" && cache?.type !== "persistent") return undefined
return DEFAULT
}
export * as BedrockCache from "./bedrock-cache"

View File

@@ -0,0 +1,80 @@
import { Effect, Schema } from "effect"
import type { MediaPart } from "../../schema"
import { ProviderShared } from "../shared"
// Bedrock Converse accepts image `format` as the file extension and
// `source.bytes` as base64 in the JSON wire format.
export const ImageFormat = Schema.Literals(["png", "jpeg", "gif", "webp"])
export type ImageFormat = Schema.Schema.Type<typeof ImageFormat>
export const ImageBlock = Schema.Struct({
image: Schema.Struct({
format: ImageFormat,
source: Schema.Struct({ bytes: Schema.String }),
}),
})
export type ImageBlock = Schema.Schema.Type<typeof ImageBlock>
// Bedrock document blocks require a user-facing name so the model can refer to
// the uploaded document.
export const DocumentFormat = Schema.Literals(["pdf", "csv", "doc", "docx", "xls", "xlsx", "html", "txt", "md"])
export type DocumentFormat = Schema.Schema.Type<typeof DocumentFormat>
export const DocumentBlock = Schema.Struct({
document: Schema.Struct({
format: DocumentFormat,
name: Schema.String,
source: Schema.Struct({ bytes: Schema.String }),
}),
})
export type DocumentBlock = Schema.Schema.Type<typeof DocumentBlock>
const IMAGE_FORMATS = {
"image/png": "png",
"image/jpeg": "jpeg",
"image/jpg": "jpeg",
"image/gif": "gif",
"image/webp": "webp",
} as const satisfies Record<string, ImageFormat>
const DOCUMENT_FORMATS = {
"application/pdf": "pdf",
"text/csv": "csv",
"application/msword": "doc",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
"application/vnd.ms-excel": "xls",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
"text/html": "html",
"text/plain": "txt",
"text/markdown": "md",
} as const satisfies Record<string, DocumentFormat>
const imageBlock = (part: MediaPart, format: ImageFormat): ImageBlock => ({
image: { format, source: { bytes: ProviderShared.mediaBytes(part) } },
})
const documentBlock = (part: MediaPart, format: DocumentFormat): DocumentBlock => ({
document: {
format,
name: part.filename ?? `document.${format}`,
source: { bytes: ProviderShared.mediaBytes(part) },
},
})
// Route by MIME. Known image/document formats lower into a typed block; anything
// else fails with a clear error instead of silently degrading to a malformed
// document block. Image MIME types not in `IMAGE_FORMATS` (e.g. `image/svg+xml`)
// get an image-specific error so the caller knows it's a format-support issue,
// not a kind-detection issue.
export const lower = (part: MediaPart) => {
const mime = part.mediaType.toLowerCase()
const imageFormat = IMAGE_FORMATS[mime as keyof typeof IMAGE_FORMATS]
if (imageFormat) return Effect.succeed(imageBlock(part, imageFormat))
if (mime.startsWith("image/"))
return ProviderShared.invalidRequest(`Bedrock Converse does not support image media type ${part.mediaType}`)
const documentFormat = DOCUMENT_FORMATS[mime as keyof typeof DOCUMENT_FORMATS]
if (documentFormat) return Effect.succeed(documentBlock(part, documentFormat))
return ProviderShared.invalidRequest(`Bedrock Converse does not support media type ${part.mediaType}`)
}
export * as BedrockMedia from "./bedrock-media"

View File

@@ -0,0 +1,101 @@
import { ProviderShared } from "../shared"
// Gemini accepts a JSON Schema-like dialect for tool parameters, but rejects a
// handful of common JSON Schema shapes. Keep this projection isolated so the
// Gemini protocol file still reads like the other protocol modules.
const SCHEMA_INTENT_KEYS = [
"type",
"properties",
"items",
"prefixItems",
"enum",
"const",
"$ref",
"additionalProperties",
"patternProperties",
"required",
"not",
"if",
"then",
"else",
]
const isRecord = ProviderShared.isRecord
const hasCombiner = (schema: unknown) =>
isRecord(schema) && (Array.isArray(schema.anyOf) || Array.isArray(schema.oneOf) || Array.isArray(schema.allOf))
const hasSchemaIntent = (schema: unknown) =>
isRecord(schema) && (hasCombiner(schema) || SCHEMA_INTENT_KEYS.some((key) => key in schema))
const sanitizeNode = (schema: unknown): unknown => {
if (!isRecord(schema)) return Array.isArray(schema) ? schema.map(sanitizeNode) : schema
const result: Record<string, unknown> = Object.fromEntries(
Object.entries(schema).map(([key, value]) => [
key,
key === "enum" && Array.isArray(value) ? value.map(String) : sanitizeNode(value),
]),
)
if (Array.isArray(result.enum) && (result.type === "integer" || result.type === "number")) result.type = "string"
const properties = result.properties
if (result.type === "object" && isRecord(properties) && Array.isArray(result.required)) {
result.required = result.required.filter((field) => typeof field === "string" && field in properties)
}
if (result.type === "array" && !hasCombiner(result)) {
result.items = result.items ?? {}
if (isRecord(result.items) && !hasSchemaIntent(result.items)) result.items = { ...result.items, type: "string" }
}
if (typeof result.type === "string" && result.type !== "object" && !hasCombiner(result)) {
delete result.properties
delete result.required
}
return result
}
const emptyObjectSchema = (schema: Record<string, unknown>) =>
schema.type === "object" &&
(!isRecord(schema.properties) || Object.keys(schema.properties).length === 0) &&
!schema.additionalProperties
const projectNode = (schema: unknown): Record<string, unknown> | undefined => {
if (!isRecord(schema)) return undefined
if (emptyObjectSchema(schema)) return undefined
return Object.fromEntries(
[
["description", schema.description],
["required", schema.required],
["format", schema.format],
["type", Array.isArray(schema.type) ? schema.type.filter((type) => type !== "null")[0] : schema.type],
["nullable", Array.isArray(schema.type) && schema.type.includes("null") ? true : undefined],
["enum", schema.const !== undefined ? [schema.const] : schema.enum],
[
"properties",
isRecord(schema.properties)
? Object.fromEntries(Object.entries(schema.properties).map(([key, value]) => [key, projectNode(value)]))
: undefined,
],
[
"items",
Array.isArray(schema.items)
? schema.items.map(projectNode)
: schema.items === undefined
? undefined
: projectNode(schema.items),
],
["allOf", Array.isArray(schema.allOf) ? schema.allOf.map(projectNode) : undefined],
["anyOf", Array.isArray(schema.anyOf) ? schema.anyOf.map(projectNode) : undefined],
["oneOf", Array.isArray(schema.oneOf) ? schema.oneOf.map(projectNode) : undefined],
["minLength", schema.minLength],
].filter((entry) => entry[1] !== undefined),
)
}
export const convert = (schema: unknown) => projectNode(sanitizeNode(schema))
export * as GeminiToolSchema from "./gemini-tool-schema"

View File

@@ -0,0 +1,55 @@
import { Schema } from "effect"
import type { LLMRequest, ReasoningEffort, TextVerbosity as TextVerbosityValue } from "../../schema"
import { ReasoningEfforts, TextVerbosity } from "../../schema"
export const OpenAIReasoningEfforts = ReasoningEfforts.filter(
(effort): effort is Exclude<ReasoningEffort, "max"> => effort !== "max",
)
export type OpenAIReasoningEffort = (typeof OpenAIReasoningEfforts)[number]
const REASONING_EFFORTS = new Set<string>(ReasoningEfforts)
const OPENAI_REASONING_EFFORTS = new Set<string>(OpenAIReasoningEfforts)
const TEXT_VERBOSITY = new Set<string>(["low", "medium", "high"])
export const OpenAIReasoningEffort = Schema.Literals(OpenAIReasoningEfforts)
export const OpenAITextVerbosity = TextVerbosity
const isAnyReasoningEffort = (effort: unknown): effort is ReasoningEffort =>
typeof effort === "string" && REASONING_EFFORTS.has(effort)
export const isReasoningEffort = (effort: unknown): effort is OpenAIReasoningEffort =>
typeof effort === "string" && OPENAI_REASONING_EFFORTS.has(effort)
const isTextVerbosity = (value: unknown): value is TextVerbosityValue =>
typeof value === "string" && TEXT_VERBOSITY.has(value)
const options = (request: LLMRequest) => request.providerOptions?.openai
export const store = (request: LLMRequest): boolean | undefined => {
const value = options(request)?.store
return typeof value === "boolean" ? value : undefined
}
export const reasoningEffort = (request: LLMRequest): ReasoningEffort | undefined => {
const value = options(request)?.reasoningEffort
return isAnyReasoningEffort(value) ? value : undefined
}
export const reasoningSummary = (request: LLMRequest): "auto" | undefined => {
return options(request)?.reasoningSummary === "auto" ? "auto" : undefined
}
export const encryptedReasoning = (request: LLMRequest) =>
options(request)?.includeEncryptedReasoning === true ? true : undefined
export const promptCacheKey = (request: LLMRequest) => {
const value = options(request)?.promptCacheKey
return typeof value === "string" ? value : undefined
}
export const textVerbosity = (request: LLMRequest) => {
const value = options(request)?.textVerbosity
return isTextVerbosity(value) ? value : undefined
}
export * as OpenAIOptions from "./openai-options"

View File

@@ -0,0 +1,196 @@
import { Effect } from "effect"
import { LLMError, type ProviderMetadata, type ToolCall, type ToolInputDelta } from "../../schema"
import { eventError, parseToolInput, type ToolAccumulator } from "../shared"
type StreamKey = string | number
/**
* One pending streamed tool call. Providers emit the tool identity and JSON
* argument text across separate chunks; `input` is the raw JSON string collected
* so far, not the parsed object.
*/
export interface PendingTool extends ToolAccumulator {
readonly providerExecuted?: boolean
readonly providerMetadata?: ProviderMetadata
}
/**
* Sparse parser state keyed by the provider's stream-local tool identifier.
*
* This key is not the final tool-call id (`call_...`). It is the id/index the
* provider uses while streaming a partial call: OpenAI Chat / Anthropic /
* Bedrock use numeric content indexes, while OpenAI Responses uses string
* `item_id`s. The generic keeps each protocol internally consistent.
*/
export type State<K extends StreamKey> = Partial<Record<K, PendingTool>>
/**
* Result of adding argument text to one pending tool call. It returns both the
* next `tools` state and the updated `tool` because parsers often need the
* current id/name immediately. `event` is present only when new text arrived;
* metadata-only deltas update identity without emitting `tool-input-delta`.
*/
export interface AppendOutcome<K extends StreamKey> {
readonly tools: State<K>
readonly tool: PendingTool
readonly event?: ToolInputDelta
}
/** Create empty accumulator state for one provider stream. */
export const empty = <K extends StreamKey>(): State<K> => ({})
const withTool = <K extends StreamKey>(tools: State<K>, key: K, tool: PendingTool): State<K> => {
return { ...tools, [key]: tool }
}
const withoutTool = <K extends StreamKey>(tools: State<K>, key: K): State<K> => {
const next = { ...tools }
delete next[key]
return next
}
const inputDelta = (tool: PendingTool, text: string): ToolInputDelta => ({
type: "tool-input-delta",
id: tool.id,
name: tool.name,
text,
...(tool.providerMetadata ? { providerMetadata: tool.providerMetadata } : {}),
})
const toolCall = (route: string, tool: PendingTool, inputOverride?: string) =>
parseToolInput(route, tool.name, inputOverride ?? tool.input).pipe(
Effect.map(
(input): ToolCall =>
tool.providerExecuted
? {
type: "tool-call",
id: tool.id,
name: tool.name,
input,
providerExecuted: true,
...(tool.providerMetadata ? { providerMetadata: tool.providerMetadata } : {}),
}
: {
type: "tool-call",
id: tool.id,
name: tool.name,
input,
...(tool.providerMetadata ? { providerMetadata: tool.providerMetadata } : {}),
},
),
)
/** Store the updated tool and produce the optional public delta event. */
const appendTool = <K extends StreamKey>(
tools: State<K>,
key: K,
tool: PendingTool,
text: string,
): AppendOutcome<K> => ({
tools: withTool(tools, key, tool),
tool,
event: text.length === 0 ? undefined : inputDelta(tool, text),
})
export const isError = <K extends StreamKey>(result: AppendOutcome<K> | LLMError): result is LLMError =>
result instanceof LLMError
/**
* Register a tool call whose start event arrived before any argument deltas.
* Used by Anthropic `content_block_start`, Bedrock `contentBlockStart`, and
* OpenAI Responses `response.output_item.added`.
*/
export const start = <K extends StreamKey>(
tools: State<K>,
key: K,
tool: Omit<PendingTool, "input"> & { readonly input?: string },
) => withTool(tools, key, { ...tool, input: tool.input ?? "" })
/**
* Append a streamed argument delta, starting the tool if this provider encodes
* identity on the first delta instead of a separate start event. OpenAI Chat has
* this shape: `tool_calls[].index` is the stream key, and `id` / `name` may only
* appear on the first delta for that index.
*/
export const appendOrStart = <K extends StreamKey>(
route: string,
tools: State<K>,
key: K,
delta: { readonly id?: string; readonly name?: string; readonly text: string },
missingToolMessage: string,
): AppendOutcome<K> | LLMError => {
const current = tools[key]
const id = delta.id ?? current?.id
const name = delta.name ?? current?.name
if (!id || !name) return eventError(route, missingToolMessage)
const tool = {
id,
name,
input: `${current?.input ?? ""}${delta.text}`,
providerExecuted: current?.providerExecuted,
providerMetadata: current?.providerMetadata,
}
if (current && delta.text.length === 0 && current.id === id && current.name === name) return { tools, tool: current }
return appendTool(tools, key, tool, delta.text)
}
/**
* Append argument text to a tool that must already have been started. This keeps
* protocols honest when their stream grammar promises a start event before any
* argument delta.
*/
export const appendExisting = <K extends StreamKey>(
route: string,
tools: State<K>,
key: K,
text: string,
missingToolMessage: string,
): AppendOutcome<K> | LLMError => {
const current = tools[key]
if (!current) return eventError(route, missingToolMessage)
if (text.length === 0) return { tools, tool: current }
return appendTool(tools, key, { ...current, input: `${current.input}${text}` }, text)
}
/**
* Finalize one pending tool call: parse the accumulated raw JSON, remove it
* from state, and return the optional public `tool-call` event. Missing keys are
* a no-op because some providers emit stop events for non-tool content blocks.
*/
export const finish = <K extends StreamKey>(route: string, tools: State<K>, key: K) =>
Effect.gen(function* () {
const tool = tools[key]
if (!tool) return { tools }
return { tools: withoutTool(tools, key), event: yield* toolCall(route, tool) }
})
/**
* Finalize one pending tool call with an authoritative final input string.
* OpenAI Responses can send accumulated deltas and then repeat the completed
* arguments on `response.output_item.done`; the final value wins.
*/
export const finishWithInput = <K extends StreamKey>(route: string, tools: State<K>, key: K, input: string) =>
Effect.gen(function* () {
const tool = tools[key]
if (!tool) return { tools }
return { tools: withoutTool(tools, key), event: yield* toolCall(route, tool, input) }
})
/**
* Finalize every pending tool call at once. OpenAI Chat has this shape: it does
* not emit per-tool stop events, so all accumulated calls finish when the choice
* receives a terminal `finish_reason`.
*/
export const finishAll = <K extends StreamKey>(route: string, tools: State<K>) =>
Effect.gen(function* () {
const pending = Object.values<PendingTool | undefined>(tools).filter(
(tool): tool is PendingTool => tool !== undefined,
)
return {
tools: empty<K>(),
events: yield* Effect.forEach(pending, (tool) => toolCall(route, tool)),
}
})
export * as ToolStream from "./tool-stream"

View File

@@ -0,0 +1,31 @@
import type { RouteModelInput } from "./route/client"
import type { ModelID, ModelRef, ProviderID } from "./schema"
export type ModelOptions = Omit<RouteModelInput, "id">
export type ModelFactory<Options extends ModelOptions = ModelOptions> = (
id: string | ModelID,
options?: Options,
) => ModelRef
type AnyModelFactory = (...args: never[]) => ModelRef
export interface Definition<Factory extends AnyModelFactory = ModelFactory> {
readonly id: ProviderID
readonly model: Factory
readonly apis?: Record<string, AnyModelFactory>
}
type DefinitionShape = {
readonly id: ProviderID
readonly model: (...args: never[]) => ModelRef
readonly apis?: Record<string, (...args: never[]) => ModelRef>
}
type NoExtraFields<Input, Shape> = Input & Record<Exclude<keyof Input, keyof Shape>, never>
export const make = <DefinitionType extends DefinitionShape>(
definition: NoExtraFields<DefinitionType, DefinitionShape>,
) => definition
export * as Provider from "./provider"

View File

@@ -0,0 +1,48 @@
import { Route, type RouteModelInput } from "../route/client"
import { Provider } from "../provider"
import { ProviderID, type ModelID } from "../schema"
import * as BedrockConverse from "../protocols/bedrock-converse"
import type { BedrockCredentials } from "../protocols/bedrock-converse"
export const id = ProviderID.make("amazon-bedrock")
export type ModelOptions = Omit<RouteModelInput, "id" | "baseURL"> & {
readonly apiKey?: string
readonly headers?: Record<string, string>
readonly credentials?: BedrockCredentials
/** AWS region. Defaults to `us-east-1` when neither this nor `credentials.region` is set. */
readonly region?: string
/** Override the computed `https://bedrock-runtime.<region>.amazonaws.com` URL. */
readonly baseURL?: string
}
type ModelInput = ModelOptions & Pick<RouteModelInput, "id">
export const routes = [BedrockConverse.route]
const bedrockBaseURL = (region: string) => `https://bedrock-runtime.${region}.amazonaws.com`
const converseModel = Route.model<ModelInput>(
BedrockConverse.route,
{
provider: "amazon-bedrock",
},
{
mapInput: (input) => {
const { credentials, region, baseURL, ...rest } = input
const resolvedRegion = region ?? credentials?.region ?? "us-east-1"
return {
...rest,
baseURL: baseURL ?? bedrockBaseURL(resolvedRegion),
native: BedrockConverse.nativeCredentials(input.native, credentials),
}
},
},
)
export const model = (modelID: string | ModelID, options: ModelOptions = {}) =>
converseModel({ ...options, id: modelID })
export const provider = Provider.make({
id,
model,
})

View File

@@ -0,0 +1,16 @@
import type { RouteModelInput } from "../route/client"
import { Provider } from "../provider"
import { ProviderID, type ModelID } from "../schema"
import * as AnthropicMessages from "../protocols/anthropic-messages"
export const id = ProviderID.make("anthropic")
export const routes = [AnthropicMessages.route]
export const model = (id: string | ModelID, options: Omit<RouteModelInput, "id" | "baseURL"> & { readonly baseURL?: string } = {}) =>
AnthropicMessages.model({ ...options, id })
export const provider = Provider.make({
id,
model,
})

View File

@@ -0,0 +1,83 @@
import { Auth } from "../route/auth"
import { type AtLeastOne, type ProviderAuthOption } from "../route/auth-options"
import { Route } from "../route/client"
import type { ModelInput } from "../llm"
import { Provider } from "../provider"
import { ProviderID, type ModelID } from "../schema"
import * as OpenAIChat from "../protocols/openai-chat"
import * as OpenAIResponses from "../protocols/openai-responses"
import { withOpenAIOptions, type OpenAIProviderOptionsInput } from "./openai-options"
export const id = ProviderID.make("azure")
const routeAuth = Auth.remove("authorization").andThen(Auth.apiKeyHeader("api-key"))
// Azure needs the customer's resource URL; supply either `resourceName`
// (helper builds the URL) or `baseURL` directly.
type AzureURL = AtLeastOne<{ readonly resourceName: string; readonly baseURL: string }>
export type ModelOptions = AzureURL &
Omit<ModelInput, "id" | "provider" | "route" | "apiKey" | "auth" | "baseURL"> &
ProviderAuthOption<"optional"> & {
readonly apiVersion?: string
readonly useCompletionUrls?: boolean
readonly providerOptions?: OpenAIProviderOptionsInput
}
type AzureModelInput = ModelOptions & Pick<ModelInput, "id">
const resourceBaseURL = (resourceName: string) => `https://${resourceName.trim()}.openai.azure.com/openai/v1`
const responsesRoute = OpenAIResponses.route.with({
id: "azure-openai-responses",
provider: id,
transport: OpenAIResponses.httpTransport.with({ auth: routeAuth }),
})
const chatRoute = OpenAIChat.route.with({
id: "azure-openai-chat",
provider: id,
transport: OpenAIChat.httpTransport.with({ auth: routeAuth }),
})
export const routes = [responsesRoute, chatRoute]
const mapInput = (input: AzureModelInput) => {
const { apiKey: _, apiVersion, resourceName, useCompletionUrls, ...rest } = input
return {
...withOpenAIOptions(input.id, rest),
auth:
"auth" in input && input.auth
? input.auth
: Auth.remove("authorization").andThen(
Auth.optional("apiKey" in input ? input.apiKey : undefined, "apiKey")
.orElse(Auth.config("AZURE_OPENAI_API_KEY"))
.pipe(Auth.header("api-key")),
),
// AtLeastOne guarantees at least one is set; baseURL wins if both are.
baseURL: rest.baseURL ?? resourceBaseURL(resourceName!),
queryParams: {
...rest.queryParams,
"api-version": apiVersion ?? rest.queryParams?.["api-version"] ?? "v1",
},
}
}
const chatModel = Route.model<AzureModelInput>(chatRoute, {}, { mapInput })
const responsesModel = Route.model<AzureModelInput>(responsesRoute, {}, { mapInput })
export const responses = (modelID: string | ModelID, options: ModelOptions) =>
responsesModel({ ...options, id: modelID })
export const chat = (modelID: string | ModelID, options: ModelOptions) => chatModel({ ...options, id: modelID })
export const model = (modelID: string | ModelID, options: ModelOptions) => {
if (options.useCompletionUrls === true) return chat(modelID, options)
return responses(modelID, options)
}
export const provider = Provider.make({
id,
model,
apis: { responses, chat },
})
export const apis = provider.apis

View File

@@ -0,0 +1,139 @@
import type { Config, Redacted } from "effect"
import { type ModelInput } from "../llm"
import { Provider } from "../provider"
import * as OpenAICompatibleChat from "../protocols/openai-compatible-chat"
import { Auth } from "../route/auth"
import { AuthOptions, type AtLeastOne, type ProviderAuthOption } from "../route/auth-options"
import { Route } from "../route/client"
import { ProviderID, type ModelID } from "../schema"
export const aiGatewayID = ProviderID.make("cloudflare-ai-gateway")
export const workersAIID = ProviderID.make("cloudflare-workers-ai")
export const id = aiGatewayID
export const aiGatewayAuthEnvVars = ["CLOUDFLARE_API_TOKEN", "CF_AIG_TOKEN"] as const
export const workersAIAuthEnvVars = ["CLOUDFLARE_API_KEY", "CLOUDFLARE_WORKERS_AI_TOKEN"] as const
type CloudflareSecret = string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>>
type GatewayURL = AtLeastOne<{
readonly accountId: string
readonly baseURL: string
}> & {
readonly gatewayId?: string
}
export type AIGatewayOptions = GatewayURL &
Omit<ModelInput, "id" | "provider" | "route" | "baseURL" | "apiKey" | "auth"> &
ProviderAuthOption<"optional"> & {
/** Cloudflare AI Gateway authentication token. Sent as `cf-aig-authorization`. */
readonly gatewayApiKey?: CloudflareSecret
}
type AIGatewayInput = AIGatewayOptions & Pick<ModelInput, "id">
type WorkersAIURL = AtLeastOne<{
readonly accountId: string
readonly baseURL: string
}>
export type WorkersAIOptions = WorkersAIURL &
Omit<ModelInput, "id" | "provider" | "route" | "baseURL" | "apiKey" | "auth"> &
ProviderAuthOption<"optional">
type WorkersAIInput = WorkersAIOptions & Pick<ModelInput, "id">
export const aiGatewayBaseURL = (input: GatewayURL) => {
if (input.baseURL) return input.baseURL
if (!input.accountId) throw new Error("Cloudflare.aiGateway requires accountId unless baseURL is supplied")
return `https://gateway.ai.cloudflare.com/v1/${encodeURIComponent(input.accountId)}/${encodeURIComponent(input.gatewayId?.trim() || "default")}/compat`
}
const aiGatewayAuth = (input: AIGatewayInput) => {
if ("auth" in input && input.auth) return input.auth
const gateway = Auth.optional(input.gatewayApiKey, "gatewayApiKey")
.orElse(Auth.config("CLOUDFLARE_API_TOKEN"))
.orElse(Auth.config("CF_AIG_TOKEN"))
.pipe(Auth.bearerHeader("cf-aig-authorization"))
if (!("apiKey" in input) || input.apiKey === undefined) return gateway
if (input.gatewayApiKey === undefined) return Auth.bearer(input.apiKey)
return Auth.bearerHeader("cf-aig-authorization", input.gatewayApiKey).andThen(Auth.bearer(input.apiKey))
}
export const workersAIBaseURL = (input: WorkersAIURL) => {
if (input.baseURL) return input.baseURL
if (!input.accountId) throw new Error("Cloudflare.workersAI requires accountId unless baseURL is supplied")
return `https://api.cloudflare.com/client/v4/accounts/${encodeURIComponent(input.accountId)}/ai/v1`
}
const workersAIAuth = (input: WorkersAIInput) => {
return AuthOptions.bearer(input, workersAIAuthEnvVars)
}
export const aiGatewayRoute = OpenAICompatibleChat.route.with({
id: "cloudflare-ai-gateway",
provider: aiGatewayID,
})
export const workersAIRoute = OpenAICompatibleChat.route.with({
id: "cloudflare-workers-ai",
provider: workersAIID,
})
export const routes = [aiGatewayRoute, workersAIRoute]
const aiGatewayModel = Route.model<AIGatewayInput>(
aiGatewayRoute,
{
provider: id,
},
{
mapInput: (input) => {
const {
accountId: _accountId,
gatewayId: _gatewayId,
apiKey: _apiKey,
gatewayApiKey: _gatewayApiKey,
auth: _auth,
...rest
} = input
return {
...rest,
auth: aiGatewayAuth(input),
baseURL: aiGatewayBaseURL(input),
}
},
},
)
const workersAIModel = Route.model<WorkersAIInput>(
workersAIRoute,
{
provider: workersAIID,
},
{
mapInput: (input) => {
const { accountId: _accountId, apiKey: _apiKey, auth: _auth, ...rest } = input
return {
...rest,
auth: workersAIAuth(input),
baseURL: workersAIBaseURL(input),
}
},
},
)
export const aiGateway = (modelID: string | ModelID, options: AIGatewayOptions) =>
aiGatewayModel({ ...options, id: modelID })
export const workersAI = (modelID: string | ModelID, options: WorkersAIOptions) =>
workersAIModel({ ...options, id: modelID })
export const model = aiGateway
export const provider = Provider.make({
id,
model,
apis: { aiGateway, workersAI },
})
export const apis = provider.apis

View File

@@ -0,0 +1,48 @@
import { Route } from "../route/client"
import type { ModelInput } from "../llm"
import { Provider } from "../provider"
import { ProviderID, type ModelID } from "../schema"
import * as OpenAIChat from "../protocols/openai-chat"
import * as OpenAIResponses from "../protocols/openai-responses"
import { withOpenAIOptions, type OpenAIProviderOptionsInput } from "./openai-options"
export const id = ProviderID.make("github-copilot")
// GitHub Copilot has no canonical public URL — callers (opencode, etc.) must
// supply `baseURL` explicitly.
export type ModelOptions = Omit<ModelInput, "id" | "provider" | "route"> & {
readonly providerOptions?: OpenAIProviderOptionsInput
}
type CopilotModelInput = ModelOptions & Pick<ModelInput, "id">
export const shouldUseResponsesApi = (modelID: string | ModelID) => {
const model = String(modelID)
const match = /^gpt-(\d+)/.exec(model)
if (!match) return false
return Number(match[1]) >= 5 && !model.startsWith("gpt-5-mini")
}
export const routes = [OpenAIResponses.route, OpenAIChat.route]
const mapInput = (input: CopilotModelInput) => withOpenAIOptions(input.id, input)
const chatModel = Route.model<CopilotModelInput>(OpenAIChat.route, { provider: id }, { mapInput })
const responsesModel = Route.model<CopilotModelInput>(OpenAIResponses.route, { provider: id }, { mapInput })
export const responses = (modelID: string | ModelID, options: ModelOptions) =>
responsesModel({ ...options, id: modelID })
export const chat = (modelID: string | ModelID, options: ModelOptions) => chatModel({ ...options, id: modelID })
export const model = (modelID: string | ModelID, options: ModelOptions) => {
const create = shouldUseResponsesApi(modelID) ? responsesModel : chatModel
return create({ ...options, id: modelID })
}
export const provider = Provider.make({
id,
model,
apis: { responses, chat },
})
export const apis = provider.apis

View File

@@ -0,0 +1,16 @@
import type { RouteModelInput } from "../route/client"
import { Provider } from "../provider"
import { ProviderID, type ModelID } from "../schema"
import * as Gemini from "../protocols/gemini"
export const id = ProviderID.make("google")
export const routes = [Gemini.route]
export const model = (id: string | ModelID, options: Omit<RouteModelInput, "id" | "baseURL"> & { readonly baseURL?: string } = {}) =>
Gemini.model({ ...options, id })
export const provider = Provider.make({
id,
model,
})

View File

@@ -0,0 +1,10 @@
export * as Anthropic from "./anthropic"
export * as AmazonBedrock from "./amazon-bedrock"
export * as Azure from "./azure"
export * as Cloudflare from "./cloudflare"
export * as GitHubCopilot from "./github-copilot"
export * as Google from "./google"
export * as OpenAI from "./openai"
export * as OpenAICompatible from "./openai-compatible"
export * as OpenRouter from "./openrouter"
export * as XAI from "./xai"

View File

@@ -0,0 +1,20 @@
export interface OpenAICompatibleProfile {
readonly provider: string
readonly baseURL: string
}
export const profiles = {
baseten: { provider: "baseten", baseURL: "https://inference.baseten.co/v1" },
cerebras: { provider: "cerebras", baseURL: "https://api.cerebras.ai/v1" },
deepinfra: { provider: "deepinfra", baseURL: "https://api.deepinfra.com/v1/openai" },
deepseek: { provider: "deepseek", baseURL: "https://api.deepseek.com/v1" },
fireworks: { provider: "fireworks", baseURL: "https://api.fireworks.ai/inference/v1" },
groq: { provider: "groq", baseURL: "https://api.groq.com/openai/v1" },
openrouter: { provider: "openrouter", baseURL: "https://openrouter.ai/api/v1" },
togetherai: { provider: "togetherai", baseURL: "https://api.together.xyz/v1" },
xai: { provider: "xai", baseURL: "https://api.x.ai/v1" },
} as const satisfies Record<string, OpenAICompatibleProfile>
export const byProvider: Record<string, OpenAICompatibleProfile> = Object.fromEntries(
Object.values(profiles).map((profile) => [profile.provider, profile]),
)

View File

@@ -0,0 +1,61 @@
import { Provider } from "../provider"
import { ProviderID, type ModelID } from "../schema"
import * as OpenAICompatibleChat from "../protocols/openai-compatible-chat"
import type { OpenAICompatibleChatModelInput } from "../protocols/openai-compatible-chat"
import { profiles, type OpenAICompatibleProfile } from "./openai-compatible-profile"
export const id = ProviderID.make("openai-compatible")
export type ModelOptions = Omit<OpenAICompatibleChatModelInput, "id" | "provider"> & {
readonly provider: string
}
type GenericModelOptions = Omit<ModelOptions, "provider"> & {
readonly provider?: string
}
export type FamilyModelOptions = Omit<OpenAICompatibleChatModelInput, "id" | "provider" | "baseURL"> & {
readonly baseURL?: string
}
export const routes = [OpenAICompatibleChat.route]
export const model = (id: string | ModelID, options: ModelOptions) => {
return OpenAICompatibleChat.model({
...options,
id,
provider: ProviderID.make(options.provider),
})
}
export const profileModel = (
profile: OpenAICompatibleProfile,
id: string | ModelID,
options: FamilyModelOptions = {},
) =>
OpenAICompatibleChat.model({
...options,
id,
provider: profile.provider,
baseURL: options.baseURL ?? profile.baseURL,
})
const define = (profile: OpenAICompatibleProfile) =>
Provider.make({
id: ProviderID.make(profile.provider),
model: (id: string | ModelID, options: FamilyModelOptions = {}) => profileModel(profile, id, options),
})
export const provider = Provider.make({
id,
model: (id: string | ModelID, options: GenericModelOptions) =>
model(id, { ...options, provider: options.provider ?? "openai-compatible" }),
})
export const baseten = define(profiles.baseten)
export const cerebras = define(profiles.cerebras)
export const deepinfra = define(profiles.deepinfra)
export const deepseek = define(profiles.deepseek)
export const fireworks = define(profiles.fireworks)
export const groq = define(profiles.groq)
export const togetherai = define(profiles.togetherai)

View File

@@ -0,0 +1,70 @@
import type { ProviderOptions, ReasoningEffort, TextVerbosity } from "../schema"
import { mergeProviderOptions } from "../schema"
export interface OpenAIOptionsInput {
readonly [key: string]: unknown
readonly store?: boolean
readonly promptCacheKey?: string
readonly reasoningEffort?: ReasoningEffort
readonly reasoningSummary?: "auto"
readonly includeEncryptedReasoning?: boolean
readonly textVerbosity?: TextVerbosity
}
export type OpenAIProviderOptionsInput = ProviderOptions & {
readonly openai?: OpenAIOptionsInput
}
const definedEntries = (input: Record<string, unknown>) =>
Object.entries(input).filter((entry) => entry[1] !== undefined)
const openAIProviderOptions = (options: OpenAIOptionsInput | undefined): ProviderOptions | undefined => {
const openai = Object.fromEntries(
definedEntries({
store: options?.store,
promptCacheKey: options?.promptCacheKey,
reasoningEffort: options?.reasoningEffort,
reasoningSummary: options?.reasoningSummary,
includeEncryptedReasoning: options?.includeEncryptedReasoning,
textVerbosity: options?.textVerbosity,
}),
)
if (Object.keys(openai).length === 0) return undefined
return { openai }
}
export const gpt5DefaultOptions = (
modelID: string,
options: { readonly textVerbosity?: boolean } = {},
): ProviderOptions | undefined => {
const id = modelID.toLowerCase()
if (!id.includes("gpt-5") || id.includes("gpt-5-chat") || id.includes("gpt-5-pro")) return undefined
return openAIProviderOptions({
reasoningEffort: "medium",
reasoningSummary: "auto",
textVerbosity:
options.textVerbosity === true && id.includes("gpt-5.") && !id.includes("codex") && !id.includes("-chat")
? "low"
: undefined,
})
}
export const openAIDefaultOptions = (
modelID: string,
options: { readonly textVerbosity?: boolean } = {},
): ProviderOptions | undefined =>
mergeProviderOptions(openAIProviderOptions({ store: false }), gpt5DefaultOptions(modelID, options))
export const withOpenAIOptions = <Options extends { readonly providerOptions?: OpenAIProviderOptionsInput }>(
modelID: string,
options: Options,
defaults: { readonly textVerbosity?: boolean } = {},
): Options & { readonly id: string; readonly providerOptions?: ProviderOptions } => {
return {
...options,
id: modelID,
providerOptions: mergeProviderOptions(openAIDefaultOptions(modelID, defaults), options.providerOptions),
}
}
export * as OpenAIProviderOptions from "./openai-options"

View File

@@ -0,0 +1,53 @@
import { AuthOptions, type ProviderAuthOption } from "../route/auth-options"
import type { RouteModelInput } from "../route/client"
import { Provider } from "../provider"
import { ProviderID, type ModelID } from "../schema"
import * as OpenAIChat from "../protocols/openai-chat"
import * as OpenAIResponses from "../protocols/openai-responses"
import { withOpenAIOptions, type OpenAIProviderOptionsInput } from "./openai-options"
export type { OpenAIOptionsInput } from "./openai-options"
export const id = ProviderID.make("openai")
export const routes = [OpenAIResponses.route, OpenAIResponses.webSocketRoute, OpenAIChat.route]
// This provider facade wraps the lower-level Responses and Chat model factories
// with OpenAI-specific conveniences: typed options, API-key sugar, env fallback,
// and default option normalization.
type OpenAIModelInput<ModelInput> = Omit<ModelInput, "apiKey" | "auth" | "baseURL"> &
ProviderAuthOption<"optional"> & {
readonly baseURL?: string
readonly providerOptions?: OpenAIProviderOptionsInput
}
const auth = (options: ProviderAuthOption<"optional">) => AuthOptions.bearer(options, "OPENAI_API_KEY")
export const responses = (id: string | ModelID, options: OpenAIModelInput<Omit<RouteModelInput, "id">> = {}) => {
const { apiKey: _, ...rest } = options
return OpenAIResponses.model(withOpenAIOptions(id, { ...rest, auth: auth(options) }, { textVerbosity: true }))
}
export const responsesWebSocket = (
id: string | ModelID,
options: OpenAIModelInput<Omit<RouteModelInput, "id">> = {},
) => {
const { apiKey: _, ...rest } = options
return OpenAIResponses.webSocketModel(
withOpenAIOptions(id, { ...rest, auth: auth(options) }, { textVerbosity: true }),
)
}
export const chat = (id: string | ModelID, options: OpenAIModelInput<Omit<RouteModelInput, "id">> = {}) => {
const { apiKey: _, ...rest } = options
return OpenAIChat.model(withOpenAIOptions(id, { ...rest, auth: auth(options) }))
}
export const provider = Provider.make({
id,
model: responses,
apis: { responses, responsesWebSocket, chat },
})
export const model = provider.model
export const apis = provider.apis

View File

@@ -0,0 +1,88 @@
import { Effect, Schema } from "effect"
import { Route, type RouteModelInput } from "../route/client"
import { Endpoint } from "../route/endpoint"
import { Framing } from "../route/framing"
import { Provider } from "../provider"
import { Protocol } from "../route/protocol"
import { ProviderID, type ModelID, type ProviderOptions } from "../schema"
import * as OpenAICompatibleProfiles from "./openai-compatible-profile"
import * as OpenAIChat from "../protocols/openai-chat"
import { isRecord } from "../protocols/shared"
export const profile = OpenAICompatibleProfiles.profiles.openrouter
export const id = ProviderID.make(profile.provider)
const ADAPTER = "openrouter"
export interface OpenRouterOptions {
readonly [key: string]: unknown
readonly usage?: boolean | Record<string, unknown>
readonly reasoning?: Record<string, unknown>
readonly promptCacheKey?: string
}
export type OpenRouterProviderOptionsInput = ProviderOptions & {
readonly openrouter?: OpenRouterOptions
}
export type ModelOptions = Omit<RouteModelInput, "id" | "baseURL" | "providerOptions"> & {
readonly baseURL?: string
readonly providerOptions?: OpenRouterProviderOptionsInput
}
type ModelInput = ModelOptions & Pick<RouteModelInput, "id">
const OpenRouterBody = Schema.StructWithRest(Schema.Struct(OpenAIChat.bodyFields), [
Schema.Record(Schema.String, Schema.Any),
])
export type OpenRouterBody = Schema.Schema.Type<typeof OpenRouterBody>
export const protocol = Protocol.make({
id: "openrouter-chat",
body: {
schema: OpenRouterBody,
from: (request) =>
OpenAIChat.protocol.body.from(request).pipe(
Effect.map(
(body) =>
({
...body,
...bodyOptions(request.providerOptions?.openrouter),
}) as OpenRouterBody,
),
),
},
stream: OpenAIChat.protocol.stream,
})
const bodyOptions = (input: unknown) => {
const openrouter = isRecord(input) ? input : {}
return {
...(openrouter.usage === true
? { usage: { include: true } }
: isRecord(openrouter.usage)
? { usage: openrouter.usage }
: {}),
...(isRecord(openrouter.reasoning) ? { reasoning: openrouter.reasoning } : {}),
...(typeof openrouter.promptCacheKey === "string" ? { prompt_cache_key: openrouter.promptCacheKey } : {}),
}
}
export const route = Route.make({
id: ADAPTER,
protocol,
endpoint: Endpoint.path("/chat/completions"),
framing: Framing.sse,
})
export const routes = [route]
const modelRef = Route.model<ModelInput>(route, {
provider: profile.provider,
baseURL: profile.baseURL,
})
export const model = (id: string | ModelID, options: ModelOptions = {}) => modelRef({ ...options, id })
export const provider = Provider.make({
id,
model,
})

View File

@@ -0,0 +1,52 @@
import { AuthOptions, type ProviderAuthOption } from "../route/auth-options"
import { Route } from "../route/client"
import type { RouteModelInput } from "../route/client"
import { Provider } from "../provider"
import { ProviderID, type ModelID } from "../schema"
import * as OpenAICompatibleProfiles from "./openai-compatible-profile"
import * as OpenAICompatibleChat from "../protocols/openai-compatible-chat"
import * as OpenAIResponses from "../protocols/openai-responses"
export const id = ProviderID.make("xai")
export type ModelOptions = Omit<RouteModelInput, "id" | "apiKey" | "auth" | "baseURL"> &
ProviderAuthOption<"optional"> & {
readonly baseURL?: string
}
export const routes = [OpenAIResponses.route, OpenAICompatibleChat.route]
const responsesModel = Route.model(OpenAIResponses.route, { provider: id })
const chatModel = OpenAICompatibleChat.model
const auth = (options: ProviderAuthOption<"optional">) => AuthOptions.bearer(options, "XAI_API_KEY")
export const responses = (modelID: string | ModelID, options: ModelOptions = {}) => {
const { apiKey: _, ...rest } = options
return responsesModel({
...rest,
auth: auth(options),
id: modelID,
baseURL: options.baseURL ?? OpenAICompatibleProfiles.profiles.xai.baseURL,
})
}
export const chat = (modelID: string | ModelID, options: ModelOptions = {}) => {
const { apiKey: _, ...rest } = options
return chatModel({
...rest,
auth: auth(options),
id: modelID,
provider: id,
baseURL: options.baseURL ?? OpenAICompatibleProfiles.profiles.xai.baseURL,
})
}
export const provider = Provider.make({
id,
model: responses,
apis: { responses, chat },
})
export const model = provider.model
export const apis = provider.apis

View File

@@ -0,0 +1,57 @@
import type { Config, Redacted } from "effect"
import { Auth } from "./auth"
export type ApiKeyMode = "optional" | "required"
export type AuthOverride = {
readonly auth: Auth
readonly apiKey?: never
}
export type OptionalApiKeyAuth = {
readonly apiKey?: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>>
readonly auth?: never
}
export type RequiredApiKeyAuth = {
readonly apiKey: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>>
readonly auth?: never
}
export type ProviderAuthOption<Mode extends ApiKeyMode> =
| AuthOverride
| (Mode extends "optional" ? OptionalApiKeyAuth : RequiredApiKeyAuth)
export type ModelOptions<Base, Mode extends ApiKeyMode> = Omit<Base, "apiKey" | "auth"> & ProviderAuthOption<Mode>
export type ModelArgs<Base, Mode extends ApiKeyMode> = Mode extends "optional"
? readonly [options?: ModelOptions<Base, Mode>]
: readonly [options: ModelOptions<Base, Mode>]
export type ModelFactory<Base, Mode extends ApiKeyMode, Model> = (id: string, ...args: ModelArgs<Base, Mode>) => Model
/**
* Require at least one of the keys in `T`. Use for option shapes where any
* subset of fields is acceptable but at least one must be present (e.g. Azure
* accepts `resourceName` or `baseURL`).
*/
export type AtLeastOne<T> = {
[K in keyof T]: Required<Pick<T, K>> & Partial<Omit<T, K>>
}[keyof T]
/**
* Standard bearer-auth resolution for providers: honor an explicit `auth`
* override, otherwise resolve `apiKey` (option > config var) and apply it as
* a bearer token.
*/
export const bearer = (options: ProviderAuthOption<"optional">, envVar: string | ReadonlyArray<string>): Auth => {
if ("auth" in options && options.auth) return options.auth
return (Array.isArray(envVar) ? envVar : [envVar])
.reduce(
(auth, name) => auth.orElse(Auth.config(name)),
Auth.optional("apiKey" in options ? options.apiKey : undefined, "apiKey"),
)
.bearer()
}
export * as AuthOptions from "./auth-options"

View File

@@ -0,0 +1,196 @@
import { Config, Effect, Redacted } from "effect"
import { Headers } from "effect/unstable/http"
import { AuthenticationReason, InvalidRequestReason, LLMError, type LLMRequest } from "../schema"
export class MissingCredentialError extends Error {
readonly _tag = "MissingCredentialError"
constructor(readonly source: string) {
super(`Missing auth credential: ${source}`)
}
}
export type CredentialError = MissingCredentialError | Config.ConfigError
export type AuthError = CredentialError | LLMError
export interface AuthInput {
readonly request: LLMRequest
readonly method: "POST" | "GET"
readonly url: string
readonly body: string
readonly headers: Headers.Headers
}
export interface Credential {
readonly load: Effect.Effect<Redacted.Redacted<string>, CredentialError>
readonly orElse: (that: Credential) => Credential
readonly bearer: () => Auth
readonly header: (name: string) => Auth
readonly pipe: <A>(f: (self: Credential) => A) => A
}
export interface Auth {
readonly apply: (input: AuthInput) => Effect.Effect<Headers.Headers, AuthError>
readonly andThen: (that: Auth) => Auth
readonly orElse: (that: Auth) => Auth
readonly pipe: <A>(f: (self: Auth) => A) => A
}
export const isAuth = (input: unknown): input is Auth =>
typeof input === "object" && input !== null && "apply" in input && typeof input.apply === "function"
const credential = (load: Effect.Effect<Redacted.Redacted<string>, CredentialError>): Credential => {
const self: Credential = {
load,
orElse: (that) => credential(load.pipe(Effect.catch(() => that.load))),
bearer: () => fromCredential(self, (secret) => ({ authorization: `Bearer ${secret}` })),
header: (name) => fromCredential(self, (secret) => ({ [name]: secret })),
pipe: (f) => f(self),
}
return self
}
const auth = (apply: Auth["apply"]): Auth => {
const self: Auth = {
apply,
andThen: (that) =>
auth((input) => apply(input).pipe(Effect.flatMap((headers) => that.apply({ ...input, headers })))),
orElse: (that) => auth((input) => apply(input).pipe(Effect.catch(() => that.apply(input)))),
pipe: (f) => f(self),
}
return self
}
const fromCredential = (source: Credential, render: (secret: string) => Headers.Input) =>
auth((input) =>
source.load.pipe(Effect.map((secret) => Headers.setAll(input.headers, render(Redacted.value(secret))))),
)
const secretEffect = (secret: string | Redacted.Redacted<string>, source: string) => {
const redacted = typeof secret === "string" ? Redacted.make(secret) : secret
if (Redacted.value(redacted) === "") return Effect.fail(new MissingCredentialError(source))
return Effect.succeed(redacted)
}
const credentialFromSecret = (
secret: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>>,
source: string,
) => {
if (typeof secret === "string" || Redacted.isRedacted(secret)) return credential(secretEffect(secret, source))
return credential(
Effect.gen(function* () {
return yield* secretEffect(yield* secret, source)
}),
)
}
export const value = (secret: string, source = "value") => credentialFromSecret(secret, source)
export const optional = (
secret: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>> | undefined,
source = "optional value",
) =>
secret === undefined
? credential(Effect.fail(new MissingCredentialError(source)))
: credentialFromSecret(secret, source)
export const config = (name: string) => credentialFromSecret(Config.redacted(name), name)
export const effect = (load: Effect.Effect<Redacted.Redacted<string>, CredentialError>) => credential(load)
export const none = auth((input) => Effect.succeed(input.headers))
export const headers = (input: Headers.Input) =>
auth((inputAuth) => Effect.succeed(Headers.setAll(inputAuth.headers, input)))
export const remove = (name: string) => auth((input) => Effect.succeed(Headers.remove(input.headers, name)))
export const custom = (apply: (input: AuthInput) => Effect.Effect<Headers.Headers, LLMError>) => auth(apply)
export const passthrough = none
const fromModelApiKey = (from: (apiKey: string) => Headers.Input) =>
auth(({ request, headers }) => {
const key = request.model.apiKey
if (!key) return Effect.succeed(headers)
return Effect.succeed(Headers.setAll(headers, from(key)))
})
const credentialInput = (
source: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>> | Credential,
) =>
typeof source === "string" || Redacted.isRedacted(source) || Config.isConfig(source)
? credentialFromSecret(source, "value")
: source
export function bearer(): Auth
export function bearer(
source: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>> | Credential,
): Auth
export function bearer(
source?: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>> | Credential,
) {
if (source === undefined) return fromModelApiKey((key) => ({ authorization: `Bearer ${key}` }))
return credentialInput(source).bearer()
}
export const apiKey = bearer
export const apiKeyHeader = (name: string) => fromModelApiKey((key) => ({ [name]: key }))
export function header(
name: string,
): (source: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>> | Credential) => Auth
export function header(
name: string,
source: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>> | Credential,
): Auth
export function header(
name: string,
source?: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>> | Credential,
) {
if (source === undefined) {
return (
next: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>> | Credential,
) => credentialInput(next).header(name)
}
return credentialInput(source).header(name)
}
export function bearerHeader(
name: string,
): (source: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>> | Credential) => Auth
export function bearerHeader(
name: string,
source: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>> | Credential,
): Auth
export function bearerHeader(
name: string,
source?: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>> | Credential,
) {
const render = (input: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>> | Credential) =>
fromCredential(credentialInput(input), (secret) => ({ [name]: `Bearer ${secret}` }))
if (source === undefined) return render
return render(source)
}
const toLLMError = (error: AuthError): LLMError => {
if (error instanceof MissingCredentialError || error instanceof Config.ConfigError) {
return new LLMError({
module: "Auth",
method: "apply",
reason:
error instanceof MissingCredentialError
? new AuthenticationReason({ message: error.message, kind: "missing" })
: new InvalidRequestReason({ message: `Failed to resolve auth config: ${error.message}` }),
})
}
return error
}
export const toEffect =
(input: Auth) =>
(authInput: AuthInput): Effect.Effect<Headers.Headers, LLMError> =>
input.apply(authInput).pipe(Effect.mapError(toLLMError))
export * as Auth from "./auth"

View File

@@ -0,0 +1,528 @@
import { Cause, Context, Effect, Layer, Schema, Stream } from "effect"
import type { Auth as AuthDef } from "./auth"
import type { Endpoint } from "./endpoint"
import { RequestExecutor } from "./executor"
import type { Framing } from "./framing"
import { HttpTransport } from "./transport"
import type { Transport, TransportRuntime } from "./transport"
import { WebSocketExecutor } from "./transport"
import type { Service as WebSocketExecutorService } from "./transport/websocket"
import type { Protocol } from "./protocol"
import * as ProviderShared from "../protocols/shared"
import * as ToolRuntime from "../tool-runtime"
import type { Tools } from "../tool"
import type { LLMError, LLMEvent, PreparedRequestOf, ProtocolID } from "../schema"
import {
GenerationOptions,
HttpOptions,
LLMRequest,
LLMResponse,
ModelID,
ModelLimits,
ModelRef,
LLMError as LLMErrorClass,
NoRouteReason,
PreparedRequest,
ProviderID,
RouteID,
mergeGenerationOptions,
mergeHttpOptions,
mergeProviderOptions,
} from "../schema"
export interface RouteBody<Body> {
/** Schema for the validated provider-native body sent as the JSON request. */
readonly schema: Schema.Codec<Body, unknown>
/** Build the provider-native body from a common `LLMRequest`. */
readonly from: (request: LLMRequest) => Effect.Effect<Body, LLMError>
}
export interface Route<Body, Prepared = unknown> {
readonly id: string
readonly provider?: ProviderID
readonly protocol: ProtocolID
readonly transport: Transport<Body, Prepared, unknown>
readonly defaults: RouteDefaults
readonly body: RouteBody<Body>
readonly with: (patch: RoutePatch<Body, Prepared>) => Route<Body, Prepared>
readonly model: <Input extends RouteModelInput = RouteModelInput>(input: Input) => ModelRef
readonly prepareTransport: (body: Body, request: LLMRequest) => Effect.Effect<Prepared, LLMError>
readonly streamPrepared: (
prepared: Prepared,
request: LLMRequest,
runtime: TransportRuntime,
) => Stream.Stream<LLMEvent, LLMError>
}
// Route registries intentionally erase body generics after construction.
// Normal call sites use `OpenAIChat.route`; callers only need body types
// when preparing a request with a protocol-specific type assertion.
// oxlint-disable-next-line typescript-eslint/no-explicit-any
export type AnyRoute = Route<any, any>
const routeRegistry = new Map<string, AnyRoute>()
// Route lookup is intentionally global: model refs name a route id, and
// importing the provider/protocol/custom-route module registers the runnable
// implementation. Duplicate ids are bugs because model refs cannot disambiguate
// them.
const register = <R extends AnyRoute>(route: R): R => {
const existing = routeRegistry.get(route.id)
if (existing && existing !== route) throw new Error(`Duplicate LLM route id "${route.id}"`)
routeRegistry.set(route.id, route)
return route
}
const registeredRoute = (id: string) => routeRegistry.get(id)
export type HttpOptionsInput = HttpOptions.Input
export type ModelRefInput = Omit<
ConstructorParameters<typeof ModelRef>[0],
"id" | "provider" | "route" | "limits" | "generation" | "http" | "auth"
> & {
readonly id: string | ModelID
readonly provider: string | ProviderID
readonly route: string | RouteID
readonly auth?: AuthDef
readonly limits?: ModelLimits.Input
readonly generation?: GenerationOptions.Input
readonly http?: HttpOptionsInput
}
// `baseURL` is required on `ModelRefInput` (every materialized `ModelRef` has
// a host) but optional at the route-input layers below. The route's `defaults`
// can supply a canonical URL (e.g. OpenAI/Anthropic) so the user's input may
// omit it. Routes without a canonical URL (OpenAI-compatible, GitHub Copilot)
// re-tighten this in their own input type.
export type RouteModelInput = Omit<ModelRefInput, "provider" | "route" | "baseURL"> & {
readonly baseURL?: string
}
export type RouteModelDefaults = Omit<ModelRefInput, "id" | "route" | "baseURL"> & {
readonly baseURL?: string
}
export type RouteRoutedModelInput = Omit<ModelRefInput, "route" | "baseURL"> & {
readonly baseURL?: string
}
export type RouteRoutedModelDefaults = Partial<Omit<ModelRefInput, "id" | "provider" | "route">>
export type RouteDefaults = Partial<Omit<ModelRefInput, "id" | "provider" | "route">>
export interface RoutePatch<Body, Prepared> extends RouteDefaults {
readonly id: string
readonly provider?: string | ProviderID
readonly transport?: Transport<Body, Prepared, unknown>
}
type RouteMappedModelInput = RouteModelInput | RouteRoutedModelInput
export interface RouteModelOptions<
Input extends RouteMappedModelInput,
Output extends RouteMappedModelInput = RouteMappedModelInput,
> {
readonly mapInput?: (input: Input) => Output
}
export interface RouteMappedModelOptions<Input, Output extends RouteMappedModelInput = RouteMappedModelInput> {
readonly mapInput: (input: Input) => Output
}
const modelWithDefaults =
<Input>(
route: AnyRoute,
defaults: Partial<Omit<ModelRefInput, "id" | "route">>,
options: { readonly mapInput?: (input: Input) => RouteMappedModelInput },
) =>
(input: Input) => {
const mapped = options.mapInput === undefined ? (input as RouteMappedModelInput) : options.mapInput(input)
const provider = defaults.provider ?? route.provider ?? ("provider" in mapped ? mapped.provider : undefined)
if (!provider) throw new Error(`Route.model(${route.id}) requires a provider`)
const baseURL = mapped.baseURL ?? defaults.baseURL ?? route.defaults.baseURL
if (!baseURL)
throw new Error(
`Route.model(${route.id}) requires a baseURL — supply it via input, defaults, or route defaults`,
)
const generation = mergeGenerationOptions(route.defaults.generation, defaults.generation)
const providerOptions = mergeProviderOptions(route.defaults.providerOptions, defaults.providerOptions)
const http = mergeHttpOptions(httpOptions(route.defaults.http), httpOptions(defaults.http))
return modelRef({
...route.defaults,
...defaults,
...mapped,
baseURL,
provider,
route: route.id,
limits: mapped.limits ?? defaults.limits ?? route.defaults.limits,
generation: mergeGenerationOptions(generation, mapped.generation),
providerOptions: mergeProviderOptions(providerOptions, mapped.providerOptions),
http: mergeHttpOptions(http, httpOptions(mapped.http)),
})
}
const mergeRouteDefaults = (base: RouteDefaults | undefined, patch: RouteDefaults): RouteDefaults => ({
...base,
...patch,
limits: patch.limits ?? base?.limits,
generation: mergeGenerationOptions(generationOptions(base?.generation), generationOptions(patch.generation)),
providerOptions: mergeProviderOptions(base?.providerOptions, patch.providerOptions),
http: mergeHttpOptions(httpOptions(base?.http), httpOptions(patch.http)),
})
export const modelLimits = ModelLimits.make
export const generationOptions = (input: GenerationOptions.Input | undefined) =>
input === undefined ? undefined : GenerationOptions.make(input)
export const httpOptions = (input: HttpOptionsInput | undefined) => {
if (input === undefined) return input
return HttpOptions.make(input)
}
export const modelRef = (input: ModelRefInput) =>
new ModelRef({
...input,
id: ModelID.make(input.id),
provider: ProviderID.make(input.provider),
route: RouteID.make(input.route),
limits: modelLimits(input.limits),
generation: generationOptions(input.generation),
http: httpOptions(input.http),
})
function model<Input extends RouteModelInput = RouteModelInput>(
route: AnyRoute,
defaults: RouteModelDefaults,
options?: RouteModelOptions<Input, RouteModelInput>,
): (input: Input) => ModelRef
function model<Input extends RouteRoutedModelInput = RouteRoutedModelInput>(
route: AnyRoute,
defaults?: RouteRoutedModelDefaults,
options?: RouteModelOptions<Input, RouteRoutedModelInput>,
): (input: Input) => ModelRef
function model<Input, Output extends RouteMappedModelInput = RouteMappedModelInput>(
route: AnyRoute,
defaults: Partial<Omit<ModelRefInput, "id" | "route">>,
options: RouteMappedModelOptions<Input, Output>,
): (input: Input) => ModelRef
function model<Input>(
route: AnyRoute,
defaults: Partial<Omit<ModelRefInput, "id" | "route">> = {},
options: { readonly mapInput?: (input: Input) => RouteMappedModelInput } = {},
) {
return modelWithDefaults(route, defaults, options)
}
export interface Interface {
/**
* Compile a request through protocol body construction, validation, and HTTP
* preparation without sending it. Returns the prepared request including the
* provider-native body.
*
* Pass a `Body` type argument to statically expose the route's body
* shape (e.g. `prepare<OpenAIChatBody>(...)`) — the runtime body is
* identical, so this is a type-level assertion the caller makes about which
* route the request will resolve to.
*/
readonly prepare: <Body = unknown>(request: LLMRequest) => Effect.Effect<PreparedRequestOf<Body>, LLMError>
readonly stream: StreamMethod
readonly generate: GenerateMethod
}
export interface StreamMethod {
(request: LLMRequest): Stream.Stream<LLMEvent, LLMError>
<T extends Tools>(options: ToolRuntime.RunOptions<T>): Stream.Stream<LLMEvent, LLMError>
}
export interface GenerateMethod {
(request: LLMRequest): Effect.Effect<LLMResponse, LLMError>
<T extends Tools>(options: ToolRuntime.RunOptions<T>): Effect.Effect<LLMResponse, LLMError>
}
export class Service extends Context.Service<Service, Interface>()("@opencode/LLMClient") {}
const noRoute = (model: ModelRef) =>
new LLMErrorClass({
module: "LLMClient",
method: "resolveRoute",
reason: new NoRouteReason({ route: model.route, provider: model.provider, model: model.id }),
})
const resolveRequestOptions = (request: LLMRequest) =>
LLMRequest.update(request, {
generation: mergeGenerationOptions(request.model.generation, request.generation) ?? new GenerationOptions({}),
providerOptions: mergeProviderOptions(request.model.providerOptions, request.providerOptions),
http: mergeHttpOptions(request.model.http, request.http),
})
export interface MakeInput<Body, Frame, Event, State> {
/** Route id used in registry lookup and error messages. */
readonly id: string
/** Provider identity for route-owned model construction. */
readonly provider?: string | ProviderID
/** Semantic API contract — owns body construction, body schema, and parsing. */
readonly protocol: Protocol<Body, Frame, Event, State>
/** Where the request is sent. */
readonly endpoint: Endpoint<Body>
/** Per-request transport auth. Model-level `Auth` overrides this. */
readonly auth?: AuthDef
/** Stream framing — bytes -> frames before `protocol.stream.event` decoding. */
readonly framing: Framing<Frame>
/** Static / per-request headers added before `auth` runs. */
readonly headers?: (input: { readonly request: LLMRequest }) => Record<string, string>
/** Model defaults used by the route's `.model(...)` helper. */
readonly defaults?: RouteDefaults
}
export interface MakeTransportInput<Body, Prepared, Frame, Event, State> {
/** Route id used in registry lookup and error messages. */
readonly id: string
/** Provider identity for route-owned model construction. */
readonly provider?: string | ProviderID
/** Semantic API contract — owns body construction, body schema, and parsing. */
readonly protocol: Protocol<Body, Frame, Event, State>
/** Runnable transport route. */
readonly transport: Transport<Body, Prepared, Frame>
/** Provider/model defaults used by the route's `.model(...)` helper. */
readonly defaults?: RouteDefaults
}
const streamError = (route: string, message: string, cause: Cause.Cause<unknown>) => {
const failed = cause.reasons.find(Cause.isFailReason)?.error
if (failed instanceof LLMErrorClass) return failed
return ProviderShared.eventError(route, message, Cause.pretty(cause))
}
function makeFromTransport<Body, Prepared, Frame, Event, State>(
input: MakeTransportInput<Body, Prepared, Frame, Event, State>,
): Route<Body, Prepared> {
const protocol = input.protocol
const decodeEventEffect = Schema.decodeUnknownEffect(protocol.stream.event)
const decodeEvent = (route: string) => (frame: Frame) =>
decodeEventEffect(frame).pipe(
Effect.mapError(() =>
ProviderShared.eventError(
input.id,
`Invalid ${route} stream event`,
typeof frame === "string" ? frame : ProviderShared.encodeJson(frame),
),
),
)
const build = (routeInput: MakeTransportInput<Body, Prepared, Frame, Event, State>): Route<Body, Prepared> => {
const route: Route<Body, Prepared> = {
id: routeInput.id,
provider: routeInput.provider === undefined ? undefined : ProviderID.make(routeInput.provider),
protocol: protocol.id,
transport: routeInput.transport,
defaults: routeInput.defaults ?? {},
body: protocol.body,
with: (patch: RoutePatch<Body, Prepared>) => {
const { id, provider, transport, ...defaults } = patch
if (!id || id === routeInput.id) throw new Error(`Route.with(${routeInput.id}) requires a new route id`)
return build({
...routeInput,
id,
provider: provider ?? routeInput.provider,
transport: (transport as Transport<Body, Prepared, Frame> | undefined) ?? routeInput.transport,
defaults: mergeRouteDefaults(routeInput.defaults, defaults),
})
},
model: (input: RouteModelInput): ModelRef => modelWithDefaults<RouteModelInput>(route, {}, {})(input),
prepareTransport: routeInput.transport.prepare,
streamPrepared: (prepared: Prepared, request: LLMRequest, runtime: TransportRuntime) => {
const route = `${request.model.provider}/${request.model.route}`
const events = routeInput.transport
.frames(prepared, request, runtime)
.pipe(
Stream.mapEffect(decodeEvent(route)),
protocol.stream.terminal ? Stream.takeUntil(protocol.stream.terminal) : (stream) => stream,
)
return events.pipe(
Stream.mapAccumEffect(
protocol.stream.initial,
protocol.stream.step,
protocol.stream.onHalt ? { onHalt: protocol.stream.onHalt } : undefined,
),
Stream.catchCause((cause) => Stream.fail(streamError(route, `Failed to read ${route} stream`, cause))),
)
},
} satisfies Route<Body, Prepared>
return register(route)
}
return build(input)
}
export function make<Body, Prepared, Frame, Event, State>(
input: MakeTransportInput<Body, Prepared, Frame, Event, State>,
): Route<Body, Prepared>
/**
* Build a `Route` by composing the four orthogonal pieces of a deployment:
*
* - `Protocol` — what is the API I'm speaking?
* - `Endpoint` — where do I send the request?
* - `Auth` — how do I authenticate it?
* - `Framing` — how do I cut the response stream into protocol frames?
*
* Plus optional `headers` for cross-cutting deployment concerns (provider
* version pins, per-deployment quirks).
*
* This is the canonical route constructor. If a new route does not fit
* this four-axis model, add a purpose-built constructor rather than widening
* the public surface preemptively.
*/
export function make<Body, Frame, Event, State>(
input: MakeInput<Body, Frame, Event, State>,
): Route<Body, HttpTransport.HttpPrepared<Frame>>
export function make<Body, Prepared, Frame, Event, State>(
input: MakeInput<Body, Frame, Event, State> | MakeTransportInput<Body, Prepared, Frame, Event, State>,
): Route<Body, Prepared> | Route<Body, HttpTransport.HttpPrepared<Frame>> {
if ("transport" in input) return makeFromTransport(input)
const protocol = input.protocol
const encodeBody = Schema.encodeSync(Schema.fromJsonString(protocol.body.schema))
return makeFromTransport({
id: input.id,
provider: input.provider,
protocol,
transport: HttpTransport.httpJson({
endpoint: input.endpoint,
auth: input.auth,
framing: input.framing,
encodeBody,
headers: input.headers,
}),
defaults: input.defaults,
})
}
// `compile` is the important boundary: it turns a common `LLMRequest` into a
// validated provider body plus transport-private prepared data, but does not
// execute transport.
const compile = Effect.fn("LLM.compile")(function* (request: LLMRequest) {
const resolved = resolveRequestOptions(request)
const route = registeredRoute(resolved.model.route)
if (!route) return yield* noRoute(resolved.model)
const body = yield* route.body
.from(resolved)
.pipe(Effect.flatMap(ProviderShared.validateWith(Schema.decodeUnknownEffect(route.body.schema))))
const prepared = yield* route.prepareTransport(body, resolved)
return {
request: resolved,
route,
body,
prepared,
}
})
const prepareWith = Effect.fn("LLMClient.prepare")(function* (request: LLMRequest) {
const compiled = yield* compile(request)
return new PreparedRequest({
id: compiled.request.id ?? "request",
route: compiled.route.id,
protocol: compiled.route.protocol,
model: compiled.request.model,
body: compiled.body,
metadata: { transport: compiled.route.transport.id },
})
})
const streamRequestWith = (runtime: TransportRuntime) => (request: LLMRequest) =>
Stream.unwrap(
Effect.gen(function* () {
const compiled = yield* compile(request)
return compiled.route.streamPrepared(compiled.prepared, compiled.request, runtime)
}),
)
const isToolRunOptions = (input: LLMRequest | ToolRuntime.RunOptions<Tools>): input is ToolRuntime.RunOptions<Tools> =>
"request" in input && "tools" in input
const streamWith = (streamRequest: (request: LLMRequest) => Stream.Stream<LLMEvent, LLMError>): StreamMethod =>
((input: LLMRequest | ToolRuntime.RunOptions<Tools>) => {
if (isToolRunOptions(input)) return ToolRuntime.stream({ ...input, stream: streamRequest })
return streamRequest(input)
}) as StreamMethod
const generateWith = (stream: Interface["stream"]) =>
Effect.fn("LLM.generate")(function* (input: LLMRequest | ToolRuntime.RunOptions<Tools>) {
return new LLMResponse(
yield* stream(input as never).pipe(
Stream.runFold(
() => ({ events: [] as LLMEvent[], usage: undefined as LLMResponse["usage"] }),
(acc, event) => {
acc.events.push(event)
if ("usage" in event && event.usage !== undefined) acc.usage = event.usage
return acc
},
),
),
)
})
export const prepare = <Body = unknown>(request: LLMRequest) =>
prepareWith(request) as Effect.Effect<PreparedRequestOf<Body>, LLMError>
export function stream(request: LLMRequest): Stream.Stream<LLMEvent, LLMError>
export function stream<T extends Tools>(options: ToolRuntime.RunOptions<T>): Stream.Stream<LLMEvent, LLMError>
export function stream(input: LLMRequest | ToolRuntime.RunOptions<Tools>) {
return Stream.unwrap(
Effect.gen(function* () {
return (yield* Service).stream(input as never)
}),
)
}
export function generate(request: LLMRequest): Effect.Effect<LLMResponse, LLMError>
export function generate<T extends Tools>(options: ToolRuntime.RunOptions<T>): Effect.Effect<LLMResponse, LLMError>
export function generate(input: LLMRequest | ToolRuntime.RunOptions<Tools>) {
return Effect.gen(function* () {
return yield* (yield* Service).generate(input as never)
})
}
export const streamRequest = (request: LLMRequest) =>
Stream.unwrap(
Effect.gen(function* () {
return (yield* Service).stream(request)
}),
)
export const layer: Layer.Layer<Service, never, RequestExecutor.Service> = Layer.effect(
Service,
Effect.gen(function* () {
const stream = streamWith(streamRequestWith({ http: yield* RequestExecutor.Service }))
return Service.of({ prepare: prepareWith as Interface["prepare"], stream, generate: generateWith(stream) })
}),
)
export const layerWithWebSocket: Layer.Layer<Service, never, RequestExecutor.Service | WebSocketExecutorService> =
Layer.effect(
Service,
Effect.gen(function* () {
const stream = streamWith(
streamRequestWith({
http: yield* RequestExecutor.Service,
webSocket: yield* WebSocketExecutor.Service,
}),
)
return Service.of({ prepare: prepareWith as Interface["prepare"], stream, generate: generateWith(stream) })
}),
)
export const Route = { make, model } as const
export const LLMClient = {
Service,
layer,
layerWithWebSocket,
prepare,
stream,
generate,
stepCountIs: ToolRuntime.stepCountIs,
} as const

View File

@@ -0,0 +1,41 @@
import type { LLMRequest } from "../schema"
import * as ProviderShared from "../protocols/shared"
export interface EndpointInput<Body> {
readonly request: LLMRequest
readonly body: Body
}
export type EndpointPart<Body> = string | ((input: EndpointInput<Body>) => string)
/**
* Declarative URL construction for one route.
*
* `Endpoint` carries only the path. The host always lives on `model.baseURL`,
* supplied by the provider helper that constructs the model. `render(...)`
* just appends the path (and any `model.queryParams`) to that host.
*
* `path` may be a string or a function of `EndpointInput`, for routes whose
* URL embeds the model id, region, or another body field (e.g. Bedrock,
* Gemini).
*/
export interface Endpoint<Body> {
readonly path: EndpointPart<Body>
}
/** Construct an `Endpoint` from a path string or path function. */
export const path = <Body>(value: EndpointPart<Body>): Endpoint<Body> => ({ path: value })
const renderPart = <Body>(part: EndpointPart<Body>, input: EndpointInput<Body>) =>
typeof part === "function" ? part(input) : part
export const render = <Body>(endpoint: Endpoint<Body>, input: EndpointInput<Body>) => {
const url = new URL(
`${ProviderShared.trimBaseUrl(input.request.model.baseURL)}${renderPart(endpoint.path, input)}`,
)
const params = input.request.model.queryParams
if (params) for (const [key, value] of Object.entries(params)) url.searchParams.set(key, value)
return url
}
export * as Endpoint from "./endpoint"

View File

@@ -0,0 +1,374 @@
import { Cause, Context, Effect, Layer, Random } from "effect"
import {
FetchHttpClient,
Headers,
HttpClient,
HttpClientError,
HttpClientRequest,
HttpClientResponse,
} from "effect/unstable/http"
import {
AuthenticationReason,
ContentPolicyReason,
HttpContext,
HttpRateLimitDetails,
HttpRequestDetails,
HttpResponseDetails,
InvalidRequestReason,
LLMError,
ProviderInternalReason,
QuotaExceededReason,
RateLimitReason,
TransportReason,
UnknownProviderReason,
} from "../schema"
export interface Interface {
readonly execute: (
request: HttpClientRequest.HttpClientRequest,
) => Effect.Effect<HttpClientResponse.HttpClientResponse, LLMError>
}
export class Service extends Context.Service<Service, Interface>()("@opencode/LLM/RequestExecutor") {}
const BODY_LIMIT = 16_384
const MAX_RETRIES = 2
const BASE_DELAY_MS = 500
const MAX_DELAY_MS = 10_000
const REDACTED = "<redacted>"
// One source of truth for what counts as a sensitive name across headers,
// URL query keys, and field names embedded inside request/response bodies.
//
// `SENSITIVE_NAME` is used as both a substring matcher (for free-form header
// names like `Authorization` / `X-API-Key`) and as the body-field alternation
// list. `SHORT_QUERY_NAME` covers anchored short keys like `?key=…` / `?sig=…`
// that are too generic to redact substring-style without false positives.
const SENSITIVE_NAME_SOURCE =
"authorization|api[-_]?key|access[-_]?token|refresh[-_]?token|id[-_]?token|token|secret|credential|signature|x-amz-signature"
const SENSITIVE_NAME = new RegExp(SENSITIVE_NAME_SOURCE, "i")
const SHORT_QUERY_NAME = /^(key|sig)$/i
const SENSITIVE_BODY_FIELD = new RegExp(`(?:${SENSITIVE_NAME_SOURCE}|key)`, "i")
const REDACT_JSON_FIELD = new RegExp(`("(?:${SENSITIVE_BODY_FIELD.source})"\\s*:\\s*)"[^"]*"`, "gi")
const REDACT_QUERY_FIELD = new RegExp(`((?:${SENSITIVE_BODY_FIELD.source})=)[^&\\s"]+`, "gi")
const isSensitiveHeaderName = (name: string) => SENSITIVE_NAME.test(name)
const isSensitiveQueryName = (name: string) => isSensitiveHeaderName(name) || SHORT_QUERY_NAME.test(name)
const redactHeaders = (headers: Headers.Headers, redactedNames: ReadonlyArray<string | RegExp>) =>
Object.fromEntries(
Object.entries(Headers.redact(headers, [...redactedNames, SENSITIVE_NAME])).map(([name, value]) => [
name,
String(value),
]),
)
const redactUrl = (value: string) => {
if (!URL.canParse(value)) return REDACTED
const url = new URL(value)
url.searchParams.forEach((_, key) => {
if (isSensitiveQueryName(key)) url.searchParams.set(key, REDACTED)
})
return url.toString()
}
const normalizedHeaders = (headers: Headers.Headers) =>
Object.fromEntries(Object.entries(headers).map(([key, value]) => [key.toLowerCase(), value]))
const requestId = (headers: Record<string, string>) => {
return (
headers["x-request-id"] ??
headers["request-id"] ??
headers["x-amzn-requestid"] ??
headers["x-amz-request-id"] ??
headers["x-goog-request-id"] ??
headers["cf-ray"]
)
}
const retryableStatus = (status: number) => status === 429 || status === 503 || status === 504 || status === 529
const retryAfterMs = (headers: Record<string, string>) => {
const millis = Number(headers["retry-after-ms"])
if (Number.isFinite(millis)) return Math.max(0, millis)
const value = headers["retry-after"]
if (!value) return undefined
const seconds = Number(value)
if (Number.isFinite(seconds)) return Math.max(0, seconds * 1000)
const date = Date.parse(value)
if (!Number.isNaN(date)) return Math.max(0, date - Date.now())
return undefined
}
const addRateLimitValue = (target: Record<string, string>, key: string, value: string) => {
if (key.length > 0) target[key] = value
}
const rateLimitDetails = (headers: Record<string, string>, retryAfter: number | undefined) => {
const limit: Record<string, string> = {}
const remaining: Record<string, string> = {}
const reset: Record<string, string> = {}
Object.entries(headers).forEach(([name, value]) => {
const openaiLimit = /^x-ratelimit-limit-(.+)$/.exec(name)?.[1]
if (openaiLimit) return addRateLimitValue(limit, openaiLimit, value)
const openaiRemaining = /^x-ratelimit-remaining-(.+)$/.exec(name)?.[1]
if (openaiRemaining) return addRateLimitValue(remaining, openaiRemaining, value)
const openaiReset = /^x-ratelimit-reset-(.+)$/.exec(name)?.[1]
if (openaiReset) return addRateLimitValue(reset, openaiReset, value)
const anthropic = /^anthropic-ratelimit-(.+)-(limit|remaining|reset)$/.exec(name)
if (!anthropic) return
if (anthropic[2] === "limit") return addRateLimitValue(limit, anthropic[1], value)
if (anthropic[2] === "remaining") return addRateLimitValue(remaining, anthropic[1], value)
return addRateLimitValue(reset, anthropic[1], value)
})
if (
retryAfter === undefined &&
Object.keys(limit).length === 0 &&
Object.keys(remaining).length === 0 &&
Object.keys(reset).length === 0
)
return undefined
return new HttpRateLimitDetails({
retryAfterMs: retryAfter,
limit: Object.keys(limit).length === 0 ? undefined : limit,
remaining: Object.keys(remaining).length === 0 ? undefined : remaining,
reset: Object.keys(reset).length === 0 ? undefined : reset,
})
}
const requestDetails = (request: HttpClientRequest.HttpClientRequest, redactedNames: ReadonlyArray<string | RegExp>) =>
new HttpRequestDetails({
method: request.method,
url: redactUrl(request.url),
headers: redactHeaders(request.headers, redactedNames),
})
const responseDetails = (
response: HttpClientResponse.HttpClientResponse,
redactedNames: ReadonlyArray<string | RegExp>,
) =>
new HttpResponseDetails({
status: response.status,
headers: redactHeaders(response.headers, redactedNames),
})
const secretValues = (request: HttpClientRequest.HttpClientRequest) => {
const values = new Set<string>()
const add = (value: string) => {
if (value.length < 4) return
values.add(value)
values.add(encodeURIComponent(value))
}
Object.entries(request.headers).forEach(([name, value]) => {
if (!isSensitiveHeaderName(name)) return
add(value)
const bearer = /^Bearer\s+(.+)$/i.exec(value)?.[1]
if (bearer) add(bearer)
})
if (!URL.canParse(request.url)) return values
new URL(request.url).searchParams.forEach((value, key) => {
if (isSensitiveQueryName(key)) add(value)
})
return values
}
// Two passes: structural (redact `"name": "value"` and `name=value` patterns
// for any field name that looks sensitive) plus literal (replace any actual
// secret values we sent in the request, in case the response echoes one back).
const redactBody = (body: string, request: HttpClientRequest.HttpClientRequest) =>
Array.from(secretValues(request)).reduce(
(text, secret) => text.split(secret).join(REDACTED),
body.replace(REDACT_JSON_FIELD, `$1"${REDACTED}"`).replace(REDACT_QUERY_FIELD, `$1${REDACTED}`),
)
const responseBody = (body: string | void, request: HttpClientRequest.HttpClientRequest) => {
if (body === undefined) return {}
const redacted = redactBody(body, request)
if (redacted.length <= BODY_LIMIT) return { body: redacted }
return { body: redacted.slice(0, BODY_LIMIT), bodyTruncated: true }
}
const providerMessage = (status: number, body: { readonly body?: string }) => {
if (body.body && body.body.length <= 500) return `Provider request failed with HTTP ${status}: ${body.body}`
return `Provider request failed with HTTP ${status}`
}
const responseHttp = (input: {
readonly request: HttpClientRequest.HttpClientRequest
readonly response: HttpClientResponse.HttpClientResponse
readonly redactedNames: ReadonlyArray<string | RegExp>
readonly body: ReturnType<typeof responseBody>
readonly requestId?: string | undefined
readonly rateLimit?: HttpRateLimitDetails | undefined
}) =>
new HttpContext({
request: requestDetails(input.request, input.redactedNames),
response: responseDetails(input.response, input.redactedNames),
...input.body,
requestId: input.requestId,
rateLimit: input.rateLimit,
})
const statusReason = (input: {
readonly status: number
readonly message: string
readonly retryAfterMs?: number | undefined
readonly rateLimit?: HttpRateLimitDetails | undefined
readonly http: HttpContext
}) => {
const body = input.http.body ?? ""
if (/content[-_\s]?policy|content_filter|safety/i.test(body)) {
return new ContentPolicyReason({ message: input.message, http: input.http })
}
if (input.status === 401) {
return new AuthenticationReason({ message: input.message, kind: "invalid", http: input.http })
}
if (input.status === 403) {
return new AuthenticationReason({ message: input.message, kind: "insufficient-permissions", http: input.http })
}
if (input.status === 429) {
if (/insufficient[-_\s]?quota|quota[-_\s]?exceeded/i.test(body)) {
return new QuotaExceededReason({ message: input.message, http: input.http })
}
return new RateLimitReason({
message: input.message,
retryAfterMs: input.retryAfterMs,
rateLimit: input.rateLimit,
http: input.http,
})
}
if (input.status === 400 || input.status === 404 || input.status === 409 || input.status === 422) {
return new InvalidRequestReason({ message: input.message, http: input.http })
}
if (input.status >= 500 || retryableStatus(input.status)) {
return new ProviderInternalReason({
message: input.message,
status: input.status,
retryAfterMs: input.retryAfterMs,
http: input.http,
})
}
return new UnknownProviderReason({ message: input.message, status: input.status, http: input.http })
}
const statusError =
(request: HttpClientRequest.HttpClientRequest, redactedNames: ReadonlyArray<string | RegExp>) =>
(response: HttpClientResponse.HttpClientResponse) =>
Effect.gen(function* () {
if (response.status < 400) return response
const body = yield* response.text.pipe(Effect.catch(() => Effect.void))
const headers = normalizedHeaders(response.headers)
const retryAfter = retryAfterMs(headers)
const rateLimit = rateLimitDetails(headers, retryAfter)
const details = responseBody(body, request)
return yield* new LLMError({
module: "RequestExecutor",
method: "execute",
reason: statusReason({
status: response.status,
message: providerMessage(response.status, details),
retryAfterMs: retryAfter,
rateLimit,
http: responseHttp({
request,
response,
redactedNames,
body: details,
requestId: requestId(headers),
rateLimit,
}),
}),
})
})
const toHttpError = (redactedNames: ReadonlyArray<string | RegExp>) => (error: unknown) => {
const transportError = (input: {
readonly message: string
readonly kind?: string | undefined
readonly request?: HttpClientRequest.HttpClientRequest | undefined
}) =>
new LLMError({
module: "RequestExecutor",
method: "execute",
reason: new TransportReason({
message: input.message,
kind: input.kind,
url: input.request ? redactUrl(input.request.url) : undefined,
http: input.request ? new HttpContext({ request: requestDetails(input.request, redactedNames) }) : undefined,
}),
})
if (Cause.isTimeoutError(error)) {
return transportError({ message: error.message, kind: "Timeout" })
}
if (!HttpClientError.isHttpClientError(error)) {
return transportError({ message: "HTTP transport failed" })
}
const request = "request" in error ? error.request : undefined
if (error.reason._tag === "TransportError") {
return transportError({
message: error.reason.description ?? "HTTP transport failed",
kind: error.reason._tag,
request,
})
}
return transportError({
message: `HTTP transport failed: ${error.reason._tag}`,
kind: error.reason._tag,
request,
})
}
const retryDelay = (error: LLMError, attempt: number) => {
if (error.retryAfterMs !== undefined) return Effect.succeed(Math.min(error.retryAfterMs, MAX_DELAY_MS))
return Random.nextBetween(
Math.min(BASE_DELAY_MS * 2 ** attempt * 0.8, MAX_DELAY_MS),
Math.min(BASE_DELAY_MS * 2 ** attempt * 1.2, MAX_DELAY_MS),
).pipe(Effect.map((delay) => Math.round(delay)))
}
const retryStatusFailures = <A, R>(
effect: Effect.Effect<A, LLMError, R>,
retries = MAX_RETRIES,
attempt = 0,
): Effect.Effect<A, LLMError, R> =>
Effect.catchTag(effect, "LLM.Error", (error): Effect.Effect<A, LLMError, R> => {
if (!error.retryable || retries <= 0) return Effect.fail(error)
return retryDelay(error, attempt).pipe(
Effect.flatMap((delay) => Effect.sleep(delay)),
Effect.flatMap(() => retryStatusFailures(effect, retries - 1, attempt + 1)),
)
})
export const layer: Layer.Layer<Service, never, HttpClient.HttpClient> = Layer.effect(
Service,
Effect.gen(function* () {
const http = yield* HttpClient.HttpClient
const executeOnce = (request: HttpClientRequest.HttpClientRequest) =>
Effect.gen(function* () {
const redactedNames = yield* Headers.CurrentRedactedNames
return yield* http
.execute(request)
.pipe(Effect.mapError(toHttpError(redactedNames)), Effect.flatMap(statusError(request, redactedNames)))
})
return Service.of({
execute: (request) => retryStatusFailures(executeOnce(request)),
})
}),
)
export const defaultLayer = layer.pipe(Layer.provide(FetchHttpClient.layer))
export * as RequestExecutor from "./executor"

View File

@@ -0,0 +1,27 @@
import type { Stream } from "effect"
import * as ProviderShared from "../protocols/shared"
import type { LLMError } from "../schema"
/**
* Decode a streaming HTTP response body into provider-protocol frames.
*
* `Framing` is the byte-stream-shaped seam between transport and protocol:
*
* - SSE (`Framing.sse`) — UTF-8 decode the body, run the SSE channel decoder,
* drop empty / `[DONE]` keep-alives. Each emitted frame is the JSON `data:`
* payload of one event.
* - AWS event stream — length-prefixed binary frames with CRC checksums.
* Each emitted frame is one parsed binary event record.
*
* The frame type is opaque to this layer; the protocol's `decode` step turns
* a frame into a typed chunk.
*/
export interface Framing<Frame> {
readonly id: string
readonly frame: (bytes: Stream.Stream<Uint8Array, LLMError>) => Stream.Stream<Frame, LLMError>
}
/** Server-Sent Events framing. Used by every JSON-streaming HTTP provider. */
export const sse: Framing<string> = { id: "sse", frame: ProviderShared.sseFraming }
export * as Framing from "./framing"

View File

@@ -0,0 +1,26 @@
export { Route, LLMClient, modelLimits, modelRef } from "./client"
export type {
Route as RouteShape,
RouteModelDefaults,
RouteModelInput,
RouteRoutedModelDefaults,
RouteRoutedModelInput,
AnyRoute,
Interface as LLMClientShape,
Service as LLMClientService,
ModelRefInput,
} from "./client"
export * from "./executor"
export { Auth } from "./auth"
export { AuthOptions } from "./auth-options"
export { Endpoint } from "./endpoint"
export { Framing } from "./framing"
export { Protocol } from "./protocol"
export { HttpTransport, WebSocketExecutor, WebSocketTransport } from "./transport"
export * as Transport from "./transport"
export type { Auth as AuthShape, AuthInput, Credential, CredentialError } from "./auth"
export type { ApiKeyMode, AuthOverride, ProviderAuthOption } from "./auth-options"
export type { Endpoint as EndpointFn, EndpointInput } from "./endpoint"
export type { Framing as FramingDef } from "./framing"
export type { Protocol as ProtocolDef } from "./protocol"
export type { Transport as TransportDef, TransportRuntime } from "./transport"

View File

@@ -0,0 +1,84 @@
import { Schema, type Effect } from "effect"
import type { LLMError, LLMEvent, LLMRequest, ProtocolID } from "../schema"
/**
* The semantic API contract of one model server family.
*
* A `Protocol` owns the parts of a route that are intrinsic to "what does
* this API look like": how a common `LLMRequest` becomes a provider-native
* body, what schema that body must satisfy before it is JSON-encoded, and
* how the streaming response decodes back into common `LLMEvent`s.
*
* Examples:
*
* - `OpenAIChat.protocol` — chat completions style
* - `OpenAIResponses.protocol` — responses API
* - `AnthropicMessages.protocol` — messages API with content blocks
* - `Gemini.protocol` — generateContent
* - `BedrockConverse.protocol` — Converse with binary event-stream framing
*
* A `Protocol` is **not** a deployment. It does not know which URL, which
* headers, or which auth scheme to use. Those are deployment concerns owned
* by `Route.make(...)` along with the chosen `Endpoint`, `Auth`,
* and `Framing`. This separation is what lets DeepSeek, TogetherAI, Cerebras,
* etc. all reuse `OpenAIChat.protocol` without forking 300 lines per provider.
*
* The four type parameters reflect the pipeline:
*
* - `Body` — provider-native request body candidate. `Route.make(...)`
* validates and JSON-encodes it with `body.schema`.
* - `Frame` — one unit of the framed response stream. SSE: a JSON data
* string. AWS event stream: a parsed binary frame.
* - `Event` — schema-decoded provider event produced from one frame.
* - `State` — accumulator threaded through `stream.step` to translate event
* sequences into `LLMEvent` sequences.
*/
export interface Protocol<Body, Frame, Event, State> {
/** Stable id for the wire protocol implementation. */
readonly id: ProtocolID
/** Request side: schema for the provider-native body and how to build it. */
readonly body: ProtocolBody<Body>
/** Response side: streaming state machine. */
readonly stream: ProtocolStream<Frame, Event, State>
}
export interface ProtocolBody<Body> {
/** Schema for the validated provider-native body sent as the JSON request. */
readonly schema: Schema.Codec<Body, unknown>
/** Build the provider-native body from a common `LLMRequest`. */
readonly from: (request: LLMRequest) => Effect.Effect<Body, LLMError>
}
export interface ProtocolStream<Frame, Event, State> {
/** Schema for one decoded streaming event, decoded from a transport frame. */
readonly event: Schema.Codec<Event, Frame>
/** Initial parser state. Called once per response. */
readonly initial: () => State
/** Translate one event into emitted `LLMEvent`s plus the next state. */
readonly step: (state: State, event: Event) => Effect.Effect<readonly [State, ReadonlyArray<LLMEvent>], LLMError>
/** Optional request-completion signal for transports that do not end naturally. */
readonly terminal?: (event: Event) => boolean
/** Optional flush emitted when the framed stream ends. */
readonly onHalt?: (state: State) => ReadonlyArray<LLMEvent>
}
/**
* Construct a `Protocol` from its body and stream pieces:
*
* - `body.schema` infers the provider-native request body shape.
* - `body.from` ties the common `LLMRequest` to the provider body.
* - `stream.event` infers the decoded streaming event and the wire frame.
* - `stream.initial`, `stream.step`, and `stream.onHalt` infer the parser state.
*
* Provider implementations should usually call `Protocol.make({ ... })`
* without explicit type arguments; the schemas and parser functions are the
* source of truth. The constructor remains as the public seam for future
* cross-cutting concerns such as tracing or instrumentation.
*/
export const make = <Body, Frame, Event, State>(
input: Protocol<Body, Frame, Event, State>,
): Protocol<Body, Frame, Event, State> => input
export const jsonEvent = <const S extends Schema.Top>(schema: S) => Schema.fromJsonString(schema)
export * as Protocol from "./protocol"

View File

@@ -0,0 +1,122 @@
import { Effect, Stream } from "effect"
import { Headers, HttpClientRequest } from "effect/unstable/http"
import { Auth, type Auth as AuthDef } from "../auth"
import { type Endpoint, render as renderEndpoint } from "../endpoint"
import type { Framing } from "../framing"
import type { Transport } from "./index"
import * as ProviderShared from "../../protocols/shared"
import { mergeJsonRecords, type LLMRequest } from "../../schema"
export interface JsonRequestInput<Body> {
readonly body: Body
readonly request: LLMRequest
readonly endpoint: Endpoint<Body>
readonly auth: AuthDef
readonly encodeBody: (body: Body) => string
readonly headers?: (input: { readonly request: LLMRequest }) => Record<string, string>
}
export interface JsonRequestParts<Body = unknown> {
readonly url: string
readonly jsonBody: Body | Record<string, unknown>
readonly bodyText: string
readonly headers: Headers.Headers
}
export interface HttpPrepared<Frame> {
readonly request: HttpClientRequest.HttpClientRequest
readonly framing: Framing<Frame>
}
const applyQuery = (url: string, query: Record<string, string> | undefined) => {
if (!query) return url
const next = new URL(url)
Object.entries(query).forEach(([key, value]) => next.searchParams.set(key, value))
return next.toString()
}
const bodyWithOverlay = <Body>(body: Body, request: LLMRequest, encodeBody: (body: Body) => string) =>
Effect.gen(function* () {
if (request.http?.body === undefined) return { jsonBody: body, bodyText: encodeBody(body) }
if (ProviderShared.isRecord(body)) {
const overlaid = mergeJsonRecords(body, request.http.body) ?? {}
return { jsonBody: overlaid, bodyText: ProviderShared.encodeJson(overlaid) }
}
return yield* ProviderShared.invalidRequest("http.body can only overlay JSON object request bodies")
})
export const jsonRequestParts = <Body>(input: JsonRequestInput<Body>) =>
Effect.gen(function* () {
const url = applyQuery(
renderEndpoint(input.endpoint, { request: input.request, body: input.body }).toString(),
input.request.http?.query,
)
const body = yield* bodyWithOverlay(input.body, input.request, input.encodeBody)
const headers = yield* Auth.toEffect(Auth.isAuth(input.request.model.auth) ? input.request.model.auth : input.auth)(
{
request: input.request,
method: "POST",
url,
body: body.bodyText,
headers: Headers.fromInput({
...(input.headers?.({ request: input.request }) ?? {}),
...input.request.model.headers,
...input.request.http?.headers,
}),
},
)
return { url, jsonBody: body.jsonBody, bodyText: body.bodyText, headers }
})
export interface HttpJsonInput<Body, Frame> {
readonly endpoint: Endpoint<Body>
readonly auth?: AuthDef
readonly framing: Framing<Frame>
readonly encodeBody: (body: Body) => string
readonly headers?: (input: { readonly request: LLMRequest }) => Record<string, string>
}
export type HttpJsonPatch<Body, Frame> = Partial<HttpJsonInput<Body, Frame>>
export interface HttpJsonTransport<Body, Frame> extends Transport<Body, HttpPrepared<Frame>, Frame> {
readonly with: (patch: HttpJsonPatch<Body, Frame>) => HttpJsonTransport<Body, Frame>
}
export const httpJson = <Body, Frame>(input: HttpJsonInput<Body, Frame>): HttpJsonTransport<Body, Frame> => ({
id: "http-json",
with: (patch) => httpJson({ ...input, ...patch }),
prepare: (body, request) =>
jsonRequestParts({
body,
request,
endpoint: input.endpoint,
auth: input.auth ?? Auth.bearer(),
encodeBody: input.encodeBody,
headers: input.headers,
}).pipe(
Effect.map((parts) => ({
request: ProviderShared.jsonPost({ url: parts.url, body: parts.bodyText, headers: parts.headers }),
framing: input.framing,
})),
),
frames: (prepared, request, runtime) =>
Stream.unwrap(
runtime.http
.execute(prepared.request)
.pipe(
Effect.map((response) =>
prepared.framing.frame(
response.stream.pipe(
Stream.mapError((error) =>
ProviderShared.eventError(
`${request.model.provider}/${request.model.route}`,
`Failed to read ${request.model.provider}/${request.model.route} stream`,
ProviderShared.errorText(error),
),
),
),
),
),
),
),
})

View File

@@ -0,0 +1,22 @@
import type { Effect, Stream } from "effect"
import type { Interface as RequestExecutorInterface } from "../executor"
import type { Interface as WebSocketExecutorInterface } from "./websocket"
import type { LLMError, LLMRequest } from "../../schema"
export interface TransportRuntime {
readonly http: RequestExecutorInterface
readonly webSocket?: WebSocketExecutorInterface
}
export interface Transport<Body, Prepared, Frame> {
readonly id: string
readonly prepare: (body: Body, request: LLMRequest) => Effect.Effect<Prepared, LLMError>
readonly frames: (
prepared: Prepared,
request: LLMRequest,
runtime: TransportRuntime,
) => Stream.Stream<Frame, LLMError>
}
export * as HttpTransport from "./http"
export { WebSocketExecutor, WebSocketTransport } from "./websocket"

View File

@@ -0,0 +1,282 @@
import { Cause, Context, Effect, Queue, Stream } from "effect"
import { Headers } from "effect/unstable/http"
import { Auth, type Auth as AuthDef } from "../auth"
import type { Endpoint } from "../endpoint"
import { LLMError, TransportReason, type LLMRequest } from "../../schema"
import * as HttpTransport from "./http"
import type { Transport } from "./index"
export interface WebSocketRequest {
readonly url: string
readonly headers: Headers.Headers
}
export interface WebSocketConnection {
readonly sendText: (message: string) => Effect.Effect<void, LLMError>
readonly messages: Stream.Stream<string | Uint8Array, LLMError>
readonly close: Effect.Effect<void, never>
}
export interface Interface {
readonly open: (input: WebSocketRequest) => Effect.Effect<WebSocketConnection, LLMError>
}
type WebSocketConstructorWithHeaders = new (
url: string,
options?: { readonly headers?: Headers.Headers },
) => globalThis.WebSocket
export class Service extends Context.Service<Service, Interface>()("@opencode/LLM/WebSocketExecutor") {}
const transportError = (
method: string,
message: string,
input: { readonly url?: string; readonly kind?: string } = {},
) =>
new LLMError({
module: "WebSocketExecutor",
method,
reason: new TransportReason({ message, url: input.url, kind: input.kind }),
})
const eventMessage = (event: Event) => {
if ("message" in event && typeof event.message === "string") return event.message
return event.type
}
const binaryMessage = (data: unknown) => {
if (data instanceof Uint8Array) return data
if (data instanceof ArrayBuffer) return new Uint8Array(data)
if (ArrayBuffer.isView(data)) return new Uint8Array(data.buffer, data.byteOffset, data.byteLength)
return undefined
}
const waitOpen = (ws: globalThis.WebSocket, input: WebSocketRequest) => {
if (ws.readyState === globalThis.WebSocket.OPEN) return Effect.void
if (ws.readyState === globalThis.WebSocket.CLOSING || ws.readyState === globalThis.WebSocket.CLOSED) {
return Effect.fail(
transportError("open", `WebSocket closed before opening (state ${ws.readyState})`, {
url: input.url,
kind: "open",
}),
)
}
return Effect.callback<void, LLMError>((resume, signal) => {
const cleanup = () => {
ws.removeEventListener("open", onOpen)
ws.removeEventListener("error", onError)
ws.removeEventListener("close", onClose)
signal.removeEventListener("abort", onAbort)
}
const onAbort = () => {
cleanup()
if (ws.readyState !== globalThis.WebSocket.CLOSED && ws.readyState !== globalThis.WebSocket.CLOSING)
ws.close(1000)
}
const onOpen = () => {
cleanup()
resume(Effect.void)
}
const onError = (event: Event) => {
cleanup()
resume(
Effect.fail(
transportError("open", `Failed to open WebSocket: ${eventMessage(event)}`, { url: input.url, kind: "open" }),
),
)
}
const onClose = (event: CloseEvent) => {
cleanup()
resume(
Effect.fail(
transportError("open", `WebSocket closed before opening with code ${event.code}`, {
url: input.url,
kind: "open",
}),
),
)
}
ws.addEventListener("open", onOpen, { once: true })
ws.addEventListener("error", onError, { once: true })
ws.addEventListener("close", onClose, { once: true })
signal.addEventListener("abort", onAbort, { once: true })
})
}
const webSocketUrl = (value: string) =>
Effect.try({
try: () => {
const url = new URL(value)
if (url.protocol === "https:") {
url.protocol = "wss:"
return url.toString()
}
if (url.protocol === "http:") {
url.protocol = "ws:"
return url.toString()
}
throw new Error(`Unsupported WebSocket URL protocol ${url.protocol}`)
},
catch: (error) =>
transportError("prepare", error instanceof Error ? error.message : "Invalid WebSocket URL", {
url: value,
kind: "websocket",
}),
})
export const open = (input: WebSocketRequest) =>
Effect.try({
try: () =>
new (globalThis.WebSocket as unknown as WebSocketConstructorWithHeaders)(input.url, { headers: input.headers }),
catch: (error) =>
transportError("open", error instanceof Error ? error.message : "Failed to construct WebSocket", {
url: input.url,
kind: "open",
}),
}).pipe(Effect.flatMap((ws) => fromWebSocket(ws, input)))
export const fromWebSocket = (
ws: globalThis.WebSocket,
input: WebSocketRequest,
): Effect.Effect<WebSocketConnection, LLMError> =>
Effect.gen(function* () {
yield* waitOpen(ws, input)
const messages = yield* Queue.bounded<string | Uint8Array, LLMError | Cause.Done<void>>(128)
const onMessage = (event: MessageEvent) => {
if (typeof event.data === "string") return Queue.offerUnsafe(messages, event.data)
const binary = binaryMessage(event.data)
if (binary) return Queue.offerUnsafe(messages, binary)
Queue.failCauseUnsafe(
messages,
Cause.fail(
transportError("message", "Unsupported WebSocket message payload", { url: input.url, kind: "message" }),
),
)
}
const onError = (event: Event) => {
Queue.failCauseUnsafe(
messages,
Cause.fail(
transportError("message", `WebSocket error: ${eventMessage(event)}`, { url: input.url, kind: "message" }),
),
)
}
const onClose = (event: CloseEvent) => {
if (event.code === 1000 || event.code === 1005) return Queue.endUnsafe(messages)
Queue.failCauseUnsafe(
messages,
Cause.fail(
transportError("message", `WebSocket closed with code ${event.code}`, { url: input.url, kind: "close" }),
),
)
}
const cleanup = Effect.sync(() => {
ws.removeEventListener("message", onMessage)
ws.removeEventListener("error", onError)
ws.removeEventListener("close", onClose)
}).pipe(Effect.andThen(Queue.shutdown(messages)))
ws.addEventListener("message", onMessage)
ws.addEventListener("error", onError)
ws.addEventListener("close", onClose)
return {
sendText: (message) =>
Effect.try({
try: () => ws.send(message),
catch: (error) =>
transportError("sendText", error instanceof Error ? error.message : "Failed to send WebSocket message", {
url: input.url,
kind: "write",
}),
}),
messages: Stream.fromQueue(messages),
close: cleanup.pipe(
Effect.andThen(
Effect.sync(() => {
if (ws.readyState === globalThis.WebSocket.CLOSED || ws.readyState === globalThis.WebSocket.CLOSING) return
ws.close(1000)
}),
),
),
}
})
export const messageText = (message: string | Uint8Array, decoder: TextDecoder) =>
typeof message === "string" ? message : decoder.decode(message)
export interface JsonPrepared {
readonly url: string
readonly headers: Headers.Headers
readonly message: string
}
export interface JsonInput<Body, Message> {
readonly endpoint: Endpoint<Body>
readonly auth?: AuthDef
readonly encodeBody: (body: Body) => string
readonly toMessage: (body: Body | Record<string, unknown>) => Effect.Effect<Message, LLMError>
readonly encodeMessage: (message: Message) => string
readonly headers?: (input: { readonly request: LLMRequest }) => Record<string, string>
}
export type JsonPatch<Body, Message> = Partial<JsonInput<Body, Message>>
export interface JsonTransport<Body, Message> extends Transport<Body, JsonPrepared, string> {
readonly with: (patch: JsonPatch<Body, Message>) => JsonTransport<Body, Message>
}
export const json = <Body, Message>(input: JsonInput<Body, Message>): JsonTransport<Body, Message> => ({
id: "websocket-json",
with: (patch) => json({ ...input, ...patch }),
prepare: (body, request) =>
Effect.gen(function* () {
const parts = yield* HttpTransport.jsonRequestParts({
body,
request,
endpoint: input.endpoint,
auth: input.auth ?? Auth.bearer(),
encodeBody: input.encodeBody,
headers: input.headers,
})
return {
url: yield* webSocketUrl(parts.url),
headers: parts.headers,
message: input.encodeMessage(yield* input.toMessage(parts.jsonBody)),
}
}),
frames: (prepared, _request, runtime) => {
const webSocket = runtime.webSocket
if (!webSocket) {
return Stream.fail(
transportError("json", "WebSocket JSON transport requires WebSocketExecutor.Service", {
url: prepared.url,
kind: "websocket",
}),
)
}
const decoder = new TextDecoder()
return Stream.unwrap(
Effect.gen(function* () {
const connection = yield* Effect.acquireRelease(
webSocket.open({ url: prepared.url, headers: prepared.headers }),
(connection) => connection.close,
)
yield* connection.sendText(prepared.message)
return connection.messages.pipe(Stream.map((message) => messageText(message, decoder)))
}),
)
},
})
export const WebSocketExecutor = {
Service,
open,
fromWebSocket,
messageText,
} as const
export const WebSocketTransport = {
json,
} as const

View File

@@ -0,0 +1,202 @@
import { Schema } from "effect"
import { ModelID, ProviderID, ProviderMetadata, RouteID } from "./ids"
export class HttpRequestDetails extends Schema.Class<HttpRequestDetails>("LLM.HttpRequestDetails")({
method: Schema.String,
url: Schema.String,
headers: Schema.Record(Schema.String, Schema.String),
}) {}
export class HttpResponseDetails extends Schema.Class<HttpResponseDetails>("LLM.HttpResponseDetails")({
status: Schema.Number,
headers: Schema.Record(Schema.String, Schema.String),
}) {}
export class HttpRateLimitDetails extends Schema.Class<HttpRateLimitDetails>("LLM.HttpRateLimitDetails")({
retryAfterMs: Schema.optional(Schema.Number),
limit: Schema.optional(Schema.Record(Schema.String, Schema.String)),
remaining: Schema.optional(Schema.Record(Schema.String, Schema.String)),
reset: Schema.optional(Schema.Record(Schema.String, Schema.String)),
}) {}
export class HttpContext extends Schema.Class<HttpContext>("LLM.HttpContext")({
request: HttpRequestDetails,
response: Schema.optional(HttpResponseDetails),
body: Schema.optional(Schema.String),
bodyTruncated: Schema.optional(Schema.Boolean),
requestId: Schema.optional(Schema.String),
rateLimit: Schema.optional(HttpRateLimitDetails),
}) {}
export class InvalidRequestReason extends Schema.Class<InvalidRequestReason>("LLM.Error.InvalidRequest")({
_tag: Schema.tag("InvalidRequest"),
message: Schema.String,
parameter: Schema.optional(Schema.String),
providerMetadata: Schema.optional(ProviderMetadata),
http: Schema.optional(HttpContext),
}) {
get retryable() {
return false
}
}
export class NoRouteReason extends Schema.Class<NoRouteReason>("LLM.Error.NoRoute")({
_tag: Schema.tag("NoRoute"),
route: RouteID,
provider: ProviderID,
model: ModelID,
}) {
get retryable() {
return false
}
get message() {
return `No LLM route for ${this.provider}/${this.model} using ${this.route}`
}
}
export class AuthenticationReason extends Schema.Class<AuthenticationReason>("LLM.Error.Authentication")({
_tag: Schema.tag("Authentication"),
message: Schema.String,
kind: Schema.Literals(["missing", "invalid", "expired", "insufficient-permissions", "unknown"]),
providerMetadata: Schema.optional(ProviderMetadata),
http: Schema.optional(HttpContext),
}) {
get retryable() {
return false
}
}
export class RateLimitReason extends Schema.Class<RateLimitReason>("LLM.Error.RateLimit")({
_tag: Schema.tag("RateLimit"),
message: Schema.String,
retryAfterMs: Schema.optional(Schema.Number),
rateLimit: Schema.optional(HttpRateLimitDetails),
providerMetadata: Schema.optional(ProviderMetadata),
http: Schema.optional(HttpContext),
}) {
get retryable() {
return true
}
}
export class QuotaExceededReason extends Schema.Class<QuotaExceededReason>("LLM.Error.QuotaExceeded")({
_tag: Schema.tag("QuotaExceeded"),
message: Schema.String,
providerMetadata: Schema.optional(ProviderMetadata),
http: Schema.optional(HttpContext),
}) {
get retryable() {
return false
}
}
export class ContentPolicyReason extends Schema.Class<ContentPolicyReason>("LLM.Error.ContentPolicy")({
_tag: Schema.tag("ContentPolicy"),
message: Schema.String,
providerMetadata: Schema.optional(ProviderMetadata),
http: Schema.optional(HttpContext),
}) {
get retryable() {
return false
}
}
export class ProviderInternalReason extends Schema.Class<ProviderInternalReason>("LLM.Error.ProviderInternal")({
_tag: Schema.tag("ProviderInternal"),
message: Schema.String,
status: Schema.Number,
retryAfterMs: Schema.optional(Schema.Number),
providerMetadata: Schema.optional(ProviderMetadata),
http: Schema.optional(HttpContext),
}) {
get retryable() {
return true
}
}
export class TransportReason extends Schema.Class<TransportReason>("LLM.Error.Transport")({
_tag: Schema.tag("Transport"),
message: Schema.String,
kind: Schema.optional(Schema.String),
url: Schema.optional(Schema.String),
http: Schema.optional(HttpContext),
}) {
get retryable() {
return false
}
}
export class InvalidProviderOutputReason extends Schema.Class<InvalidProviderOutputReason>(
"LLM.Error.InvalidProviderOutput",
)({
_tag: Schema.tag("InvalidProviderOutput"),
message: Schema.String,
route: Schema.optional(Schema.String),
raw: Schema.optional(Schema.String),
providerMetadata: Schema.optional(ProviderMetadata),
}) {
get retryable() {
return false
}
}
export class UnknownProviderReason extends Schema.Class<UnknownProviderReason>("LLM.Error.UnknownProvider")({
_tag: Schema.tag("UnknownProvider"),
message: Schema.String,
status: Schema.optional(Schema.Number),
providerMetadata: Schema.optional(ProviderMetadata),
http: Schema.optional(HttpContext),
}) {
get retryable() {
return false
}
}
export const LLMErrorReason = Schema.Union([
InvalidRequestReason,
NoRouteReason,
AuthenticationReason,
RateLimitReason,
QuotaExceededReason,
ContentPolicyReason,
ProviderInternalReason,
TransportReason,
InvalidProviderOutputReason,
UnknownProviderReason,
]).pipe(Schema.toTaggedUnion("_tag"))
export type LLMErrorReason = Schema.Schema.Type<typeof LLMErrorReason>
export class LLMError extends Schema.TaggedErrorClass<LLMError>()("LLM.Error", {
module: Schema.String,
method: Schema.String,
reason: LLMErrorReason,
}) {
override readonly cause = this.reason
get retryable() {
return this.reason.retryable
}
get retryAfterMs() {
return "retryAfterMs" in this.reason ? this.reason.retryAfterMs : undefined
}
override get message() {
return `${this.module}.${this.method}: ${this.reason.message}`
}
}
/**
* Failure type for tool execute handlers. Handlers must map their internal
* errors to this shape; the runtime catches `ToolFailure`s and surfaces them
* as `tool-error` events plus a `tool-result` of `type: "error"` so the model
* can self-correct.
*
* Anything thrown or yielded by a handler that is not a `ToolFailure` is
* treated as a defect and fails the stream.
*/
export class ToolFailure extends Schema.TaggedErrorClass<ToolFailure>()("LLM.ToolFailure", {
message: Schema.String,
metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
}) {}

View File

@@ -0,0 +1,237 @@
import { Schema } from "effect"
import { FinishReason, ProtocolID, ProviderMetadata, RouteID } from "./ids"
import { ModelRef } from "./options"
import { ToolResultValue } from "./messages"
export class Usage extends Schema.Class<Usage>("LLM.Usage")({
inputTokens: Schema.optional(Schema.Number),
outputTokens: Schema.optional(Schema.Number),
reasoningTokens: Schema.optional(Schema.Number),
cacheReadInputTokens: Schema.optional(Schema.Number),
cacheWriteInputTokens: Schema.optional(Schema.Number),
totalTokens: Schema.optional(Schema.Number),
native: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
}) {}
export const RequestStart = Schema.Struct({
type: Schema.Literal("request-start"),
id: Schema.String,
model: ModelRef,
}).annotate({ identifier: "LLM.Event.RequestStart" })
export type RequestStart = Schema.Schema.Type<typeof RequestStart>
export const StepStart = Schema.Struct({
type: Schema.Literal("step-start"),
index: Schema.Number,
}).annotate({ identifier: "LLM.Event.StepStart" })
export type StepStart = Schema.Schema.Type<typeof StepStart>
export const TextStart = Schema.Struct({
type: Schema.Literal("text-start"),
id: Schema.String,
providerMetadata: Schema.optional(ProviderMetadata),
}).annotate({ identifier: "LLM.Event.TextStart" })
export type TextStart = Schema.Schema.Type<typeof TextStart>
export const TextDelta = Schema.Struct({
type: Schema.Literal("text-delta"),
id: Schema.optional(Schema.String),
text: Schema.String,
providerMetadata: Schema.optional(ProviderMetadata),
}).annotate({ identifier: "LLM.Event.TextDelta" })
export type TextDelta = Schema.Schema.Type<typeof TextDelta>
export const TextEnd = Schema.Struct({
type: Schema.Literal("text-end"),
id: Schema.String,
providerMetadata: Schema.optional(ProviderMetadata),
}).annotate({ identifier: "LLM.Event.TextEnd" })
export type TextEnd = Schema.Schema.Type<typeof TextEnd>
export const ReasoningDelta = Schema.Struct({
type: Schema.Literal("reasoning-delta"),
id: Schema.optional(Schema.String),
text: Schema.String,
providerMetadata: Schema.optional(ProviderMetadata),
}).annotate({ identifier: "LLM.Event.ReasoningDelta" })
export type ReasoningDelta = Schema.Schema.Type<typeof ReasoningDelta>
export const ToolInputDelta = Schema.Struct({
type: Schema.Literal("tool-input-delta"),
id: Schema.String,
name: Schema.String,
text: Schema.String,
providerMetadata: Schema.optional(ProviderMetadata),
}).annotate({ identifier: "LLM.Event.ToolInputDelta" })
export type ToolInputDelta = Schema.Schema.Type<typeof ToolInputDelta>
export const ToolCall = Schema.Struct({
type: Schema.Literal("tool-call"),
id: Schema.String,
name: Schema.String,
input: Schema.Unknown,
providerExecuted: Schema.optional(Schema.Boolean),
providerMetadata: Schema.optional(ProviderMetadata),
}).annotate({ identifier: "LLM.Event.ToolCall" })
export type ToolCall = Schema.Schema.Type<typeof ToolCall>
export const ToolResult = Schema.Struct({
type: Schema.Literal("tool-result"),
id: Schema.String,
name: Schema.String,
result: ToolResultValue,
providerExecuted: Schema.optional(Schema.Boolean),
providerMetadata: Schema.optional(ProviderMetadata),
}).annotate({ identifier: "LLM.Event.ToolResult" })
export type ToolResult = Schema.Schema.Type<typeof ToolResult>
export const ToolError = Schema.Struct({
type: Schema.Literal("tool-error"),
id: Schema.String,
name: Schema.String,
message: Schema.String,
providerMetadata: Schema.optional(ProviderMetadata),
}).annotate({ identifier: "LLM.Event.ToolError" })
export type ToolError = Schema.Schema.Type<typeof ToolError>
export const StepFinish = Schema.Struct({
type: Schema.Literal("step-finish"),
index: Schema.Number,
reason: FinishReason,
usage: Schema.optional(Usage),
providerMetadata: Schema.optional(ProviderMetadata),
}).annotate({ identifier: "LLM.Event.StepFinish" })
export type StepFinish = Schema.Schema.Type<typeof StepFinish>
export const RequestFinish = Schema.Struct({
type: Schema.Literal("request-finish"),
reason: FinishReason,
usage: Schema.optional(Usage),
providerMetadata: Schema.optional(ProviderMetadata),
}).annotate({ identifier: "LLM.Event.RequestFinish" })
export type RequestFinish = Schema.Schema.Type<typeof RequestFinish>
export const ProviderErrorEvent = Schema.Struct({
type: Schema.Literal("provider-error"),
message: Schema.String,
retryable: Schema.optional(Schema.Boolean),
providerMetadata: Schema.optional(ProviderMetadata),
}).annotate({ identifier: "LLM.Event.ProviderError" })
export type ProviderErrorEvent = Schema.Schema.Type<typeof ProviderErrorEvent>
const llmEventTagged = Schema.Union([
RequestStart,
StepStart,
TextStart,
TextDelta,
TextEnd,
ReasoningDelta,
ToolInputDelta,
ToolCall,
ToolResult,
ToolError,
StepFinish,
RequestFinish,
ProviderErrorEvent,
]).pipe(Schema.toTaggedUnion("type"))
/**
* camelCase aliases for `LLMEvent.guards` (provided by `Schema.toTaggedUnion`).
* Lets consumers write `events.filter(LLMEvent.is.toolCall)` instead of
* `events.filter(LLMEvent.guards["tool-call"])`.
*/
export const LLMEvent = Object.assign(llmEventTagged, {
is: {
requestStart: llmEventTagged.guards["request-start"],
stepStart: llmEventTagged.guards["step-start"],
textStart: llmEventTagged.guards["text-start"],
textDelta: llmEventTagged.guards["text-delta"],
textEnd: llmEventTagged.guards["text-end"],
reasoningDelta: llmEventTagged.guards["reasoning-delta"],
toolInputDelta: llmEventTagged.guards["tool-input-delta"],
toolCall: llmEventTagged.guards["tool-call"],
toolResult: llmEventTagged.guards["tool-result"],
toolError: llmEventTagged.guards["tool-error"],
stepFinish: llmEventTagged.guards["step-finish"],
requestFinish: llmEventTagged.guards["request-finish"],
providerError: llmEventTagged.guards["provider-error"],
},
})
export type LLMEvent = Schema.Schema.Type<typeof llmEventTagged>
export class PreparedRequest extends Schema.Class<PreparedRequest>("LLM.PreparedRequest")({
id: Schema.String,
route: RouteID,
protocol: ProtocolID,
model: ModelRef,
body: Schema.Unknown,
metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
}) {}
/**
* A `PreparedRequest` whose `body` is typed as `Body`. Use with the generic
* on `LLMClient.prepare<Body>(...)` when the caller knows which route their
* request will resolve to and wants its native shape statically exposed
* (debug UIs, request previews, plan rendering).
*
* The runtime body is identical — the route still emits `body: unknown` — so
* this is a type-level assertion the caller makes about what they expect to
* find. The prepare runtime does not validate the assertion.
*/
export type PreparedRequestOf<Body> = Omit<PreparedRequest, "body"> & {
readonly body: Body
}
const responseText = (events: ReadonlyArray<LLMEvent>) =>
events
.filter(LLMEvent.is.textDelta)
.map((event) => event.text)
.join("")
const responseReasoning = (events: ReadonlyArray<LLMEvent>) =>
events
.filter(LLMEvent.is.reasoningDelta)
.map((event) => event.text)
.join("")
const responseUsage = (events: ReadonlyArray<LLMEvent>) =>
events.reduce<Usage | undefined>(
(usage, event) => ("usage" in event && event.usage !== undefined ? event.usage : usage),
undefined,
)
export class LLMResponse extends Schema.Class<LLMResponse>("LLM.Response")({
events: Schema.Array(LLMEvent),
usage: Schema.optional(Usage),
}) {
/** Concatenated assistant text assembled from streamed `text-delta` events. */
get text() {
return responseText(this.events)
}
/** Concatenated reasoning text assembled from streamed `reasoning-delta` events. */
get reasoning() {
return responseReasoning(this.events)
}
/** Completed tool calls emitted by the provider. */
get toolCalls() {
return this.events.filter(LLMEvent.is.toolCall)
}
}
export namespace LLMResponse {
export type Output = LLMResponse | { readonly events: ReadonlyArray<LLMEvent>; readonly usage?: Usage }
/** Concatenate assistant text from a response or collected event list. */
export const text = (response: Output) => responseText(response.events)
/** Return response usage, falling back to the latest usage-bearing event. */
export const usage = (response: Output) => response.usage ?? responseUsage(response.events)
/** Return completed tool calls from a response or collected event list. */
export const toolCalls = (response: Output) => response.events.filter(LLMEvent.is.toolCall)
/** Concatenate reasoning text from a response or collected event list. */
export const reasoning = (response: Output) => responseReasoning(response.events)
}

View File

@@ -0,0 +1,34 @@
import { Schema } from "effect"
/** Stable string identifier for a protocol implementation. */
export const ProtocolID = Schema.String
export type ProtocolID = Schema.Schema.Type<typeof ProtocolID>
/** Stable string identifier for the runnable route. */
export const RouteID = Schema.String
export type RouteID = Schema.Schema.Type<typeof RouteID>
export const ModelID = Schema.String.pipe(Schema.brand("LLM.ModelID"))
export type ModelID = typeof ModelID.Type
export const ProviderID = Schema.String.pipe(Schema.brand("LLM.ProviderID"))
export type ProviderID = typeof ProviderID.Type
export const ReasoningEfforts = ["none", "minimal", "low", "medium", "high", "xhigh", "max"] as const
export const ReasoningEffort = Schema.Literals(ReasoningEfforts)
export type ReasoningEffort = Schema.Schema.Type<typeof ReasoningEffort>
export const TextVerbosity = Schema.Literals(["low", "medium", "high"])
export type TextVerbosity = Schema.Schema.Type<typeof TextVerbosity>
export const MessageRole = Schema.Literals(["user", "assistant", "tool"])
export type MessageRole = Schema.Schema.Type<typeof MessageRole>
export const FinishReason = Schema.Literals(["stop", "length", "tool-calls", "content-filter", "error", "unknown"])
export type FinishReason = Schema.Schema.Type<typeof FinishReason>
export const JsonSchema = Schema.Record(Schema.String, Schema.Unknown)
export type JsonSchema = Schema.Schema.Type<typeof JsonSchema>
export const ProviderMetadata = Schema.Record(Schema.String, Schema.Record(Schema.String, Schema.Unknown))
export type ProviderMetadata = Schema.Schema.Type<typeof ProviderMetadata>

View File

@@ -0,0 +1,5 @@
export * from "./ids"
export * from "./options"
export * from "./messages"
export * from "./events"
export * from "./errors"

View File

@@ -0,0 +1,234 @@
import { Schema } from "effect"
import { JsonSchema, MessageRole, ProviderMetadata } from "./ids"
import { CacheHint, GenerationOptions, HttpOptions, ModelRef, ProviderOptions } from "./options"
const isRecord = (value: unknown): value is Record<string, unknown> =>
typeof value === "object" && value !== null && !Array.isArray(value)
const systemPartSchema = Schema.Struct({
type: Schema.Literal("text"),
text: Schema.String,
cache: Schema.optional(CacheHint),
metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
}).annotate({ identifier: "LLM.SystemPart" })
export type SystemPart = Schema.Schema.Type<typeof systemPartSchema>
const makeSystemPart = (text: string): SystemPart => ({ type: "text", text })
export const SystemPart = Object.assign(systemPartSchema, {
make: makeSystemPart,
content: (input?: string | SystemPart | ReadonlyArray<SystemPart>) => {
if (input === undefined) return []
return typeof input === "string" ? [makeSystemPart(input)] : Array.isArray(input) ? [...input] : [input]
},
})
export const TextPart = Schema.Struct({
type: Schema.Literal("text"),
text: Schema.String,
cache: Schema.optional(CacheHint),
metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
providerMetadata: Schema.optional(ProviderMetadata),
}).annotate({ identifier: "LLM.Content.Text" })
export type TextPart = Schema.Schema.Type<typeof TextPart>
export const MediaPart = Schema.Struct({
type: Schema.Literal("media"),
mediaType: Schema.String,
data: Schema.Union([Schema.String, Schema.Uint8Array]),
filename: Schema.optional(Schema.String),
metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
}).annotate({ identifier: "LLM.Content.Media" })
export type MediaPart = Schema.Schema.Type<typeof MediaPart>
const isToolResultValue = (value: unknown): value is ToolResultValue =>
isRecord(value) && (value.type === "text" || value.type === "json" || value.type === "error") && "value" in value
export const ToolResultValue = Object.assign(
Schema.Struct({
type: Schema.Literals(["json", "text", "error"]),
value: Schema.Unknown,
}).annotate({ identifier: "LLM.ToolResult" }),
{
make: (value: unknown, type: ToolResultValue["type"] = "json"): ToolResultValue =>
isToolResultValue(value) ? value : { type, value },
},
)
export type ToolResultValue = Schema.Schema.Type<typeof ToolResultValue>
export const ToolCallPart = Object.assign(
Schema.Struct({
type: Schema.Literal("tool-call"),
id: Schema.String,
name: Schema.String,
input: Schema.Unknown,
providerExecuted: Schema.optional(Schema.Boolean),
metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
providerMetadata: Schema.optional(ProviderMetadata),
}).annotate({ identifier: "LLM.Content.ToolCall" }),
{
make: (input: Omit<ToolCallPart, "type">): ToolCallPart => ({ type: "tool-call", ...input }),
},
)
export type ToolCallPart = Schema.Schema.Type<typeof ToolCallPart>
export const ToolResultPart = Object.assign(
Schema.Struct({
type: Schema.Literal("tool-result"),
id: Schema.String,
name: Schema.String,
result: ToolResultValue,
providerExecuted: Schema.optional(Schema.Boolean),
metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
providerMetadata: Schema.optional(ProviderMetadata),
}).annotate({ identifier: "LLM.Content.ToolResult" }),
{
make: (
input: Omit<ToolResultPart, "type" | "result"> & {
readonly result: unknown
readonly resultType?: ToolResultValue["type"]
},
): ToolResultPart => ({
type: "tool-result",
id: input.id,
name: input.name,
result: ToolResultValue.make(input.result, input.resultType),
providerExecuted: input.providerExecuted,
metadata: input.metadata,
providerMetadata: input.providerMetadata,
}),
},
)
export type ToolResultPart = Schema.Schema.Type<typeof ToolResultPart>
export const ReasoningPart = Schema.Struct({
type: Schema.Literal("reasoning"),
text: Schema.String,
encrypted: Schema.optional(Schema.String),
metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
providerMetadata: Schema.optional(ProviderMetadata),
}).annotate({ identifier: "LLM.Content.Reasoning" })
export type ReasoningPart = Schema.Schema.Type<typeof ReasoningPart>
export const ContentPart = Schema.Union([TextPart, MediaPart, ToolCallPart, ToolResultPart, ReasoningPart]).pipe(
Schema.toTaggedUnion("type"),
)
export type ContentPart = Schema.Schema.Type<typeof ContentPart>
export class Message extends Schema.Class<Message>("LLM.Message")({
id: Schema.optional(Schema.String),
role: MessageRole,
content: Schema.Array(ContentPart),
metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
native: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
}) {}
export namespace Message {
export type ContentInput = string | ContentPart | ReadonlyArray<ContentPart>
export type Input = Omit<ConstructorParameters<typeof Message>[0], "content"> & {
readonly content: ContentInput
}
export const text = (value: string): ContentPart => ({ type: "text", text: value })
export const content = (input: ContentInput) =>
typeof input === "string" ? [text(input)] : Array.isArray(input) ? [...input] : [input]
export const make = (input: Message | Input) => {
if (input instanceof Message) return input
return new Message({ ...input, content: content(input.content) })
}
export const user = (content: ContentInput) => make({ role: "user", content })
export const assistant = (content: ContentInput) => make({ role: "assistant", content })
export const tool = (result: ToolResultPart | Parameters<typeof ToolResultPart.make>[0]) =>
make({ role: "tool", content: ["type" in result ? result : ToolResultPart.make(result)] })
}
export class ToolDefinition extends Schema.Class<ToolDefinition>("LLM.ToolDefinition")({
name: Schema.String,
description: Schema.String,
inputSchema: JsonSchema,
metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
native: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
}) {}
export namespace ToolDefinition {
export type Input = ToolDefinition | ConstructorParameters<typeof ToolDefinition>[0]
/** Normalize tool definition input into the canonical `ToolDefinition` class. */
export const make = (input: Input) => (input instanceof ToolDefinition ? input : new ToolDefinition(input))
}
export class ToolChoice extends Schema.Class<ToolChoice>("LLM.ToolChoice")({
type: Schema.Literals(["auto", "none", "required", "tool"]),
name: Schema.optional(Schema.String),
}) {}
export namespace ToolChoice {
export type Mode = Exclude<ToolChoice["type"], "tool">
export type Input = ToolChoice | ConstructorParameters<typeof ToolChoice>[0] | ToolDefinition | string
const isMode = (value: string): value is Mode => value === "auto" || value === "none" || value === "required"
/** Select a specific named tool. */
export const named = (value: string) => new ToolChoice({ type: "tool", name: value })
/** Normalize ergonomic tool-choice inputs into the canonical `ToolChoice` class. */
export const make = (input: Input) => {
if (input instanceof ToolChoice) return input
if (input instanceof ToolDefinition) return named(input.name)
if (typeof input === "string") return isMode(input) ? new ToolChoice({ type: input }) : named(input)
return new ToolChoice(input)
}
}
export const ResponseFormat = Schema.Union([
Schema.Struct({ type: Schema.Literal("text") }),
Schema.Struct({ type: Schema.Literal("json"), schema: JsonSchema }),
Schema.Struct({ type: Schema.Literal("tool"), tool: ToolDefinition }),
]).pipe(Schema.toTaggedUnion("type"))
export type ResponseFormat = Schema.Schema.Type<typeof ResponseFormat>
export class LLMRequest extends Schema.Class<LLMRequest>("LLM.Request")({
id: Schema.optional(Schema.String),
model: ModelRef,
system: Schema.Array(SystemPart),
messages: Schema.Array(Message),
tools: Schema.Array(ToolDefinition),
toolChoice: Schema.optional(ToolChoice),
generation: Schema.optional(GenerationOptions),
providerOptions: Schema.optional(ProviderOptions),
http: Schema.optional(HttpOptions),
responseFormat: Schema.optional(ResponseFormat),
metadata: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
}) {}
export namespace LLMRequest {
export type Input = ConstructorParameters<typeof LLMRequest>[0]
export const input = (request: LLMRequest): Input => ({
id: request.id,
model: request.model,
system: request.system,
messages: request.messages,
tools: request.tools,
toolChoice: request.toolChoice,
generation: request.generation,
providerOptions: request.providerOptions,
http: request.http,
responseFormat: request.responseFormat,
metadata: request.metadata,
})
export const update = (request: LLMRequest, patch: Partial<Input>) => {
if (Object.keys(patch).length === 0) return request
return new LLMRequest({
...input(request),
...patch,
model: patch.model ?? request.model,
})
}
}

View File

@@ -0,0 +1,202 @@
import { Schema } from "effect"
import { JsonSchema, ModelID, ProviderID, RouteID } from "./ids"
const isRecord = (value: unknown): value is Record<string, unknown> =>
typeof value === "object" && value !== null && !Array.isArray(value)
export const mergeJsonRecords = (
...items: ReadonlyArray<Record<string, unknown> | undefined>
): Record<string, unknown> | undefined => {
const defined = items.filter((item): item is Record<string, unknown> => item !== undefined)
if (defined.length === 0) return undefined
if (defined.length === 1 && Object.values(defined[0]).every((value) => value !== undefined)) return defined[0]
const result: Record<string, unknown> = {}
for (const item of defined) {
for (const [key, value] of Object.entries(item)) {
if (value === undefined) continue
result[key] = isRecord(result[key]) && isRecord(value) ? mergeJsonRecords(result[key], value) : value
}
}
return Object.keys(result).length === 0 ? undefined : result
}
const mergeStringRecords = (
...items: ReadonlyArray<Record<string, string> | undefined>
): Record<string, string> | undefined => {
const defined = items.filter((item): item is Record<string, string> => item !== undefined)
if (defined.length === 0) return undefined
if (defined.length === 1) return defined[0]
const result = Object.fromEntries(
defined.flatMap((item) =>
Object.entries(item).filter((entry): entry is [string, string] => entry[1] !== undefined),
),
)
return Object.keys(result).length === 0 ? undefined : result
}
export const ProviderOptions = Schema.Record(Schema.String, Schema.Record(Schema.String, Schema.Unknown))
export type ProviderOptions = Schema.Schema.Type<typeof ProviderOptions>
export const mergeProviderOptions = (
...items: ReadonlyArray<ProviderOptions | undefined>
): ProviderOptions | undefined => {
const result: Record<string, Record<string, unknown>> = {}
for (const item of items) {
if (!item) continue
for (const [provider, options] of Object.entries(item)) {
const merged = mergeJsonRecords(result[provider], options)
if (merged) result[provider] = merged
}
}
return Object.keys(result).length === 0 ? undefined : result
}
export class HttpOptions extends Schema.Class<HttpOptions>("LLM.HttpOptions")({
body: Schema.optional(JsonSchema),
headers: Schema.optional(Schema.Record(Schema.String, Schema.String)),
query: Schema.optional(Schema.Record(Schema.String, Schema.String)),
}) {}
export namespace HttpOptions {
export type Input = HttpOptions | ConstructorParameters<typeof HttpOptions>[0]
/** Normalize HTTP option input into the canonical `HttpOptions` class. */
export const make = (input: Input) => (input instanceof HttpOptions ? input : new HttpOptions(input))
}
export const mergeHttpOptions = (...items: ReadonlyArray<HttpOptions | undefined>): HttpOptions | undefined => {
const body = mergeJsonRecords(...items.map((item) => item?.body))
const headers = mergeStringRecords(...items.map((item) => item?.headers))
const query = mergeStringRecords(...items.map((item) => item?.query))
if (!body && !headers && !query) return undefined
return new HttpOptions({ body, headers, query })
}
export class GenerationOptions extends Schema.Class<GenerationOptions>("LLM.GenerationOptions")({
maxTokens: Schema.optional(Schema.Number),
temperature: Schema.optional(Schema.Number),
topP: Schema.optional(Schema.Number),
topK: Schema.optional(Schema.Number),
frequencyPenalty: Schema.optional(Schema.Number),
presencePenalty: Schema.optional(Schema.Number),
seed: Schema.optional(Schema.Number),
stop: Schema.optional(Schema.Array(Schema.String)),
}) {}
export namespace GenerationOptions {
export type Input = GenerationOptions | ConstructorParameters<typeof GenerationOptions>[0]
/** Normalize generation option input into the canonical `GenerationOptions` class. */
export const make = (input: Input = {}) => (input instanceof GenerationOptions ? input : new GenerationOptions(input))
}
export type GenerationOptionsFields = {
readonly maxTokens?: number
readonly temperature?: number
readonly topP?: number
readonly topK?: number
readonly frequencyPenalty?: number
readonly presencePenalty?: number
readonly seed?: number
readonly stop?: ReadonlyArray<string>
}
export type GenerationOptionsInput = GenerationOptions | GenerationOptionsFields
const latestGeneration = <Key extends keyof GenerationOptionsFields>(
items: ReadonlyArray<GenerationOptionsInput | undefined>,
key: Key,
) => items.findLast((item) => item?.[key] !== undefined)?.[key]
export const mergeGenerationOptions = (...items: ReadonlyArray<GenerationOptionsInput | undefined>) => {
const result = new GenerationOptions({
maxTokens: latestGeneration(items, "maxTokens"),
temperature: latestGeneration(items, "temperature"),
topP: latestGeneration(items, "topP"),
topK: latestGeneration(items, "topK"),
frequencyPenalty: latestGeneration(items, "frequencyPenalty"),
presencePenalty: latestGeneration(items, "presencePenalty"),
seed: latestGeneration(items, "seed"),
stop: latestGeneration(items, "stop"),
})
return Object.values(result).some((value) => value !== undefined) ? result : undefined
}
export class ModelLimits extends Schema.Class<ModelLimits>("LLM.ModelLimits")({
context: Schema.optional(Schema.Number),
output: Schema.optional(Schema.Number),
}) {}
export namespace ModelLimits {
export type Input = ModelLimits | ConstructorParameters<typeof ModelLimits>[0]
/** Normalize model limit input into the canonical `ModelLimits` class. */
export const make = (input: Input | undefined) =>
input instanceof ModelLimits ? input : new ModelLimits(input ?? {})
}
export class ModelRef extends Schema.Class<ModelRef>("LLM.ModelRef")({
id: ModelID,
provider: ProviderID,
route: RouteID,
baseURL: Schema.String,
/** Provider-specific API key convenience. Provider helpers normalize this into `auth`. */
apiKey: Schema.optional(Schema.String),
/** Optional transport auth policy. Opaque because it may contain functions. */
auth: Schema.optional(Schema.Any),
headers: Schema.optional(Schema.Record(Schema.String, Schema.String)),
/**
* Query params appended to the request URL by `Endpoint.baseURL`. Used for
* deployment-level URL-scoped settings such as Azure's `api-version` or any
* provider that requires a per-request key in the URL. Generic concern, so
* lives as a typed first-class field instead of `native`.
*/
queryParams: Schema.optional(Schema.Record(Schema.String, Schema.String)),
limits: ModelLimits,
/** Provider-neutral generation defaults. Request-level values override them. */
generation: Schema.optional(GenerationOptions),
/** Provider-owned typed-at-the-facade options for non-portable knobs. */
providerOptions: Schema.optional(ProviderOptions),
/** Serializable raw HTTP overlays applied to the final outgoing request. */
http: Schema.optional(HttpOptions),
/**
* Provider-specific opaque options. Reach for this only when the value is
* genuinely provider-private and does not fit a typed axis (e.g. Bedrock's
* `aws_credentials` / `aws_region` for SigV4). Anything used by more than
* one route should grow into a typed field instead.
*/
native: Schema.optional(Schema.Record(Schema.String, Schema.Unknown)),
}) {}
export namespace ModelRef {
export type Input = ConstructorParameters<typeof ModelRef>[0]
export const input = (model: ModelRef): Input => ({
id: model.id,
provider: model.provider,
route: model.route,
baseURL: model.baseURL,
apiKey: model.apiKey,
auth: model.auth,
headers: model.headers,
queryParams: model.queryParams,
limits: model.limits,
generation: model.generation,
providerOptions: model.providerOptions,
http: model.http,
native: model.native,
})
export const update = (model: ModelRef, patch: Partial<Input>) => {
if (Object.keys(patch).length === 0) return model
return new ModelRef({
...input(model),
...patch,
})
}
}
export class CacheHint extends Schema.Class<CacheHint>("LLM.CacheHint")({
type: Schema.Literals(["ephemeral", "persistent"]),
ttlSeconds: Schema.optional(Schema.Number),
}) {}

View File

@@ -0,0 +1,240 @@
import { Effect, Stream } from "effect"
import type { Concurrency } from "effect/Types"
import {
type ContentPart,
type FinishReason,
type LLMError,
type LLMEvent,
LLMRequest,
Message,
type ProviderMetadata,
ToolCallPart,
ToolFailure,
ToolResultPart,
type ToolResultValue,
} from "./schema"
import { type AnyTool, type ExecutableTools, type Tools, toDefinitions } from "./tool"
export interface RuntimeState {
readonly step: number
readonly request: LLMRequest
}
export type StopCondition = (state: RuntimeState) => boolean
export type ToolExecution = "auto" | "none"
interface RunOptionsBase {
readonly request: LLMRequest
readonly concurrency?: Concurrency
readonly stopWhen?: StopCondition
}
export type RunOptions<T extends Tools> = RunOptionsAuto<T & ExecutableTools> | RunOptionsNone<T>
export interface RunOptionsAuto<T extends ExecutableTools> extends RunOptionsBase {
readonly request: LLMRequest
readonly tools: T
readonly toolExecution?: "auto"
}
export interface RunOptionsNone<T extends Tools> extends RunOptionsBase {
readonly request: LLMRequest
readonly tools: T
/** Advertise tool schemas but leave model-emitted tool calls for the caller. */
readonly toolExecution: "none"
}
export type StreamOptions<T extends Tools> = RunOptions<T> & {
readonly stream: (request: LLMRequest) => Stream.Stream<LLMEvent, LLMError>
}
export const stepCountIs =
(count: number): StopCondition =>
(state) =>
state.step + 1 >= count
/**
* Run a model with typed tools. This helper owns tool orchestration, while the
* caller supplies the actual model stream function. It can advertise schemas
* only (`toolExecution: "none"`), execute one step, or continue model rounds
* when `stopWhen` is provided.
*/
export const stream = <T extends Tools>(options: StreamOptions<T>): Stream.Stream<LLMEvent, LLMError> => {
const concurrency = options.concurrency ?? 10
const tools = options.tools as Tools
const runtimeTools = toDefinitions(tools)
const runtimeToolNames = new Set(runtimeTools.map((tool) => tool.name))
const initialRequest =
runtimeTools.length === 0
? options.request
: LLMRequest.update(options.request, {
tools: [...options.request.tools.filter((tool) => !runtimeToolNames.has(tool.name)), ...runtimeTools],
})
const loop = (request: LLMRequest, step: number): Stream.Stream<LLMEvent, LLMError> =>
Stream.unwrap(
Effect.gen(function* () {
const state: StepState = { assistantContent: [], toolCalls: [], finishReason: undefined }
const modelStream = options
.stream(request)
.pipe(Stream.tap((event) => Effect.sync(() => accumulate(state, event))))
const continuation = Stream.unwrap(
Effect.gen(function* () {
if (state.finishReason !== "tool-calls" || state.toolCalls.length === 0) return Stream.empty
if (options.toolExecution === "none") return Stream.empty
const dispatched = yield* Effect.forEach(
state.toolCalls,
(call) => dispatch(tools, call).pipe(Effect.map((result) => [call, result] as const)),
{ concurrency },
)
const resultStream = Stream.fromIterable(dispatched.flatMap(([call, result]) => emitEvents(call, result)))
if (!options.stopWhen) return resultStream
if (options.stopWhen({ step, request })) return resultStream
return resultStream.pipe(Stream.concat(loop(followUpRequest(request, state, dispatched), step + 1)))
}),
)
return modelStream.pipe(Stream.concat(continuation))
}),
)
return loop(initialRequest, 0)
}
interface StepState {
assistantContent: ContentPart[]
toolCalls: ToolCallPart[]
finishReason: FinishReason | undefined
}
const accumulate = (state: StepState, event: LLMEvent) => {
if (event.type === "text-delta") {
appendStreamingText(state, "text", event.text, event.providerMetadata)
return
}
if (event.type === "reasoning-delta") {
appendStreamingText(state, "reasoning", event.text, event.providerMetadata)
return
}
if (event.type === "tool-call") {
const part = ToolCallPart.make({
id: event.id,
name: event.name,
input: event.input,
providerExecuted: event.providerExecuted,
providerMetadata: event.providerMetadata,
})
state.assistantContent.push(part)
if (!event.providerExecuted) state.toolCalls.push(part)
return
}
if (event.type === "tool-result" && event.providerExecuted) {
state.assistantContent.push(
ToolResultPart.make({
id: event.id,
name: event.name,
result: event.result,
providerExecuted: true,
providerMetadata: event.providerMetadata,
}),
)
return
}
if (event.type === "request-finish") {
state.finishReason = event.reason
}
}
const sameProviderMetadata = (left: ProviderMetadata | undefined, right: ProviderMetadata | undefined) =>
left === right || JSON.stringify(left) === JSON.stringify(right)
const mergeProviderMetadata = (left: ProviderMetadata | undefined, right: ProviderMetadata | undefined) => {
if (!left) return right
if (!right) return left
return Object.fromEntries(
Array.from(new Set([...Object.keys(left), ...Object.keys(right)])).map((provider) => [
provider,
{ ...left[provider], ...right[provider] },
]),
)
}
const appendStreamingText = (
state: StepState,
type: "text" | "reasoning",
text: string,
providerMetadata: ProviderMetadata | undefined,
) => {
const last = state.assistantContent.at(-1)
if (last?.type === type && text.length === 0) {
state.assistantContent[state.assistantContent.length - 1] = {
...last,
providerMetadata: mergeProviderMetadata(last.providerMetadata, providerMetadata),
}
return
}
if (last?.type === type && sameProviderMetadata(last.providerMetadata, providerMetadata)) {
state.assistantContent[state.assistantContent.length - 1] = { ...last, text: `${last.text}${text}` }
return
}
state.assistantContent.push({ type, text, providerMetadata })
}
const dispatch = (tools: Tools, call: ToolCallPart): Effect.Effect<ToolResultValue> => {
const tool = tools[call.name]
if (!tool) return Effect.succeed({ type: "error" as const, value: `Unknown tool: ${call.name}` })
if (!tool.execute)
return Effect.succeed({ type: "error" as const, value: `Tool has no execute handler: ${call.name}` })
return decodeAndExecute(tool, call.input).pipe(
Effect.catchTag("LLM.ToolFailure", (failure) =>
Effect.succeed({ type: "error" as const, value: failure.message } satisfies ToolResultValue),
),
)
}
const decodeAndExecute = (tool: AnyTool, input: unknown): Effect.Effect<ToolResultValue, ToolFailure> =>
tool._decode(input).pipe(
Effect.mapError((error) => new ToolFailure({ message: `Invalid tool input: ${error.message}` })),
Effect.flatMap((decoded) => tool.execute!(decoded)),
Effect.flatMap((value) =>
tool._encode(value).pipe(
Effect.mapError(
(error) =>
new ToolFailure({
message: `Tool returned an invalid value for its success schema: ${error.message}`,
}),
),
),
),
Effect.map((encoded): ToolResultValue => ({ type: "json", value: encoded })),
)
const emitEvents = (call: ToolCallPart, result: ToolResultValue): ReadonlyArray<LLMEvent> =>
result.type === "error"
? [
{ type: "tool-error", id: call.id, name: call.name, message: String(result.value) },
{ type: "tool-result", id: call.id, name: call.name, result },
]
: [{ type: "tool-result", id: call.id, name: call.name, result }]
const followUpRequest = (
request: LLMRequest,
state: StepState,
dispatched: ReadonlyArray<readonly [ToolCallPart, ToolResultValue]>,
) =>
LLMRequest.update(request, {
messages: [
...request.messages,
Message.assistant(state.assistantContent),
...dispatched.map(([call, result]) => Message.tool({ id: call.id, name: call.name, result })),
],
})
export const ToolRuntime = { stream, stepCountIs } as const

185
packages/llm/src/tool.ts Normal file
View File

@@ -0,0 +1,185 @@
import { Effect, JsonSchema, Schema } from "effect"
import type { ToolDefinition as ToolDefinitionClass } from "./schema"
import { ToolDefinition, ToolFailure } from "./schema"
/**
* Schema constraint for tool parameters / success values: no decoding or
* encoding services are allowed. Tools should be self-contained — anything
* beyond pure data conversion belongs in the handler closure.
*/
export type ToolSchema<T> = Schema.Codec<T, any, never, never>
export type ToolExecute<Parameters extends ToolSchema<any>, Success extends ToolSchema<any>> = (
params: Schema.Schema.Type<Parameters>,
) => Effect.Effect<Schema.Schema.Type<Success>, ToolFailure>
/**
* A type-safe LLM tool. Each tool bundles its own description, parameter
* Schema and success Schema. The execute handler is optional: omit it when you
* only want to expose a tool schema to the model and handle tool calls outside
* this package.
*
* Errors must be expressed as `ToolFailure`. Unmapped errors and defects fail
* the stream.
*
* Internally each tool also carries memoized codecs and a precomputed
* `ToolDefinition` so the runtime doesn't rebuild them per invocation.
*/
export interface Tool<Parameters extends ToolSchema<any>, Success extends ToolSchema<any>> {
readonly description: string
readonly parameters: Parameters
readonly success: Success
readonly execute?: ToolExecute<Parameters, Success>
/** @internal */
readonly _decode: (input: unknown) => Effect.Effect<Schema.Schema.Type<Parameters>, Schema.SchemaError>
/** @internal */
readonly _encode: (value: Schema.Schema.Type<Success>) => Effect.Effect<unknown, Schema.SchemaError>
/** @internal */
readonly _definition: ToolDefinitionClass
}
export type AnyTool = Tool<ToolSchema<any>, ToolSchema<any>>
export type ExecutableTool<Parameters extends ToolSchema<any>, Success extends ToolSchema<any>> = Tool<
Parameters,
Success
> & {
readonly execute: ToolExecute<Parameters, Success>
}
export type AnyExecutableTool = ExecutableTool<ToolSchema<any>, ToolSchema<any>>
export type ExecutableTools = Record<string, AnyExecutableTool>
type TypedToolConfig = {
readonly description: string
readonly parameters: ToolSchema<any>
readonly success: ToolSchema<any>
readonly execute?: ToolExecute<ToolSchema<any>, ToolSchema<any>>
}
type DynamicToolConfig = {
readonly description: string
readonly jsonSchema: JsonSchema.JsonSchema
readonly execute?: (params: unknown) => Effect.Effect<unknown, ToolFailure>
}
/**
* Constructs a tool. Two input modes:
*
* 1. **Typed** — pass Effect `parameters` and `success` Schemas; inputs and
* outputs are statically typed and decoded/encoded automatically.
*
* ```ts
* Tool.make({
* description: "Get current weather",
* parameters: Schema.Struct({ city: Schema.String }),
* success: Schema.Struct({ temperature: Schema.Number }),
* execute: ({ city }) => Effect.succeed({ temperature: 22 }),
* })
* ```
*
* 2. **Dynamic** — pass raw JSON Schema as `jsonSchema`. Use this when the
* schema comes from an external source (MCP server, plugin manifest,
* dynamic config) and is not known at compile time. Inputs are typed as
* `unknown`; the handler is responsible for any validation it needs.
*
* ```ts
* Tool.make({
* description: "Look something up",
* jsonSchema: { type: "object", properties: { ... } },
* execute: (params) => Effect.succeed(...),
* })
* ```
*
* In both modes the produced tool flows through `toDefinitions(...)` and the
* runtime identically.
*/
export function make<Parameters extends ToolSchema<any>, Success extends ToolSchema<any>>(config: {
readonly description: string
readonly parameters: Parameters
readonly success: Success
readonly execute: ToolExecute<Parameters, Success>
}): ExecutableTool<Parameters, Success>
export function make<Parameters extends ToolSchema<any>, Success extends ToolSchema<any>>(config: {
readonly description: string
readonly parameters: Parameters
readonly success: Success
readonly execute?: undefined
}): Tool<Parameters, Success>
export function make(config: {
readonly description: string
readonly jsonSchema: JsonSchema.JsonSchema
readonly execute: (params: unknown) => Effect.Effect<unknown, ToolFailure>
}): AnyExecutableTool
export function make(config: {
readonly description: string
readonly jsonSchema: JsonSchema.JsonSchema
readonly execute?: undefined
}): AnyTool
export function make(config: TypedToolConfig | DynamicToolConfig): AnyTool {
if ("jsonSchema" in config) {
return {
description: config.description,
parameters: Schema.Unknown as ToolSchema<unknown>,
success: Schema.Unknown as ToolSchema<unknown>,
execute: config.execute,
_decode: Effect.succeed,
_encode: Effect.succeed,
_definition: new ToolDefinition({
name: "",
description: config.description,
inputSchema: config.jsonSchema,
}),
}
}
return {
description: config.description,
parameters: config.parameters,
success: config.success,
execute: config.execute,
_decode: Schema.decodeUnknownEffect(config.parameters),
_encode: Schema.encodeEffect(config.success),
_definition: new ToolDefinition({
name: "",
description: config.description,
inputSchema: toJsonSchema(config.parameters),
}),
}
}
export const tool = make
/**
* A record of named tools. The record key becomes the tool name on the wire.
*/
export type Tools = Record<string, AnyTool>
/**
* Convert a tools record into the `ToolDefinition[]` shape that
* `LLMRequest.tools` expects. The runtime calls this internally; consumers
* that build `LLMRequest` themselves can use it too.
*
* Tool names come from the record keys, so the per-tool cached
* `_definition` is rebuilt with the correct name here. The JSON Schema body
* is reused.
*/
export const toDefinitions = (tools: Tools): ReadonlyArray<ToolDefinitionClass> =>
Object.entries(tools).map(
([name, item]) =>
new ToolDefinition({
name,
description: item._definition.description,
inputSchema: item._definition.inputSchema,
}),
)
const toJsonSchema = (schema: Schema.Top): JsonSchema.JsonSchema => {
const document = Schema.toJsonSchemaDocument(schema)
if (Object.keys(document.definitions).length === 0) return document.schema
return { ...document.schema, $defs: document.definitions }
}
export { ToolFailure }
export * as Tool from "./tool"

View File

@@ -0,0 +1,175 @@
import { describe, expect } from "bun:test"
import { Effect, Schema, Stream } from "effect"
import { LLM } from "../src"
import { Route, Endpoint, LLMClient, Protocol, type RouteModelInput, type FramingDef } from "../src/route"
import { ModelRef } from "../src/schema"
import { testEffect } from "./lib/effect"
import { dynamicResponse } from "./lib/http"
const updateModel = (model: ModelRef, patch: Partial<ModelRef.Input>) => ModelRef.update(model, patch)
const Json = Schema.fromJsonString(Schema.Unknown)
const encodeJson = Schema.encodeSync(Json)
type FakeBody = {
readonly body: string
}
const FakeEvent = Schema.Union([
Schema.Struct({ type: Schema.Literal("text"), text: Schema.String }),
Schema.Struct({ type: Schema.Literal("finish"), reason: Schema.Literal("stop") }),
])
type FakeEvent = Schema.Schema.Type<typeof FakeEvent>
const decodeFakeEvents = Schema.decodeUnknownEffect(Schema.fromJsonString(Schema.Array(FakeEvent)))
const fakeFraming: FramingDef<FakeEvent> = {
id: "fake-json-array",
frame: (bytes) =>
Stream.fromEffect(
bytes.pipe(
Stream.decodeText(),
Stream.runFold(
() => "",
(text, event) => text + event,
),
Effect.flatMap(decodeFakeEvents),
Effect.orDie,
),
).pipe(Stream.flatMap(Stream.fromIterable)),
}
const request = LLM.request({
id: "req_1",
model: LLM.model({
id: "fake-model",
provider: "fake-provider",
route: "fake",
baseURL: "https://fake.local",
}),
prompt: "hello",
})
const raiseEvent = (event: FakeEvent): import("../src/schema").LLMEvent =>
event.type === "finish" ? { type: "request-finish", reason: event.reason } : { type: "text-delta", text: event.text }
const fakeProtocol = Protocol.make<FakeBody, FakeEvent, FakeEvent, void>({
id: "fake",
body: {
schema: Schema.Struct({
body: Schema.String,
}),
from: (request) =>
Effect.succeed({
body: [
...request.messages
.flatMap((message) => message.content)
.filter((part) => part.type === "text")
.map((part) => part.text),
...request.tools.map((tool) => `tool:${tool.name}:${tool.description}`),
].join("\n"),
}),
},
stream: {
event: FakeEvent,
initial: () => undefined,
step: (state, event) => Effect.succeed([state, [raiseEvent(event)]] as const),
},
})
const fake = Route.make({
id: "fake",
protocol: fakeProtocol,
endpoint: Endpoint.path("/chat"),
framing: fakeFraming,
})
const gemini = Route.make({
id: "gemini-fake",
protocol: fakeProtocol,
endpoint: Endpoint.path("/chat"),
framing: fakeFraming,
})
const echoLayer = dynamicResponse(({ text, respond }) =>
Effect.succeed(
respond(
encodeJson([
{ type: "text", text: `echo:${text}` },
{ type: "finish", reason: "stop" },
]),
),
),
)
const it = testEffect(echoLayer)
describe("llm route", () => {
it.effect("stream and generate use the route pipeline", () =>
Effect.gen(function* () {
const llm = yield* LLMClient.Service
const events = Array.from(yield* llm.stream(request).pipe(Stream.runCollect))
const response = yield* llm.generate(request)
expect(events.map((event) => event.type)).toEqual(["text-delta", "request-finish"])
expect(response.events.map((event) => event.type)).toEqual(["text-delta", "request-finish"])
}),
)
it.effect("selects routes by request route", () =>
Effect.gen(function* () {
const llm = yield* LLMClient.Service
const prepared = yield* llm.prepare(
LLM.updateRequest(request, { model: updateModel(request.model, { route: "gemini-fake" }) }),
)
expect(prepared.route).toBe("gemini-fake")
}),
)
it.effect("maps model input before building refs", () =>
Effect.gen(function* () {
const mapped = Route.model<RouteModelInput & { readonly region?: string }>(
fake,
{ provider: "fake-provider", baseURL: "https://fake.local" },
{
mapInput: (input) => {
const { region, ...rest } = input
return { ...rest, native: { region } }
},
},
)
expect(mapped({ id: "fake-model", region: "us-east-1" }).native).toEqual({ region: "us-east-1" })
}),
)
it.effect("rejects duplicate route ids", () =>
Effect.gen(function* () {
expect(() =>
Route.make({
id: "fake",
protocol: Protocol.make({
...fakeProtocol,
body: {
...fakeProtocol.body,
from: () => Effect.succeed({ body: "late-default" }),
},
}),
endpoint: Endpoint.path("/chat"),
framing: fakeFraming,
}),
).toThrow('Duplicate LLM route id "fake"')
}),
)
it.effect("rejects missing route", () =>
Effect.gen(function* () {
const llm = yield* LLMClient.Service
const error = yield* llm
.prepare(LLM.updateRequest(request, { model: updateModel(request.model, { route: "missing" }) }))
.pipe(Effect.flip)
expect(error.message).toContain("No LLM route")
}),
)
})

View File

@@ -0,0 +1,100 @@
import { Config } from "effect"
import type { Auth } from "../src/route/auth"
import type { ModelFactory } from "../src/route/auth-options"
import { Auth as RuntimeAuth } from "../src/route/auth"
import * as Azure from "../src/providers/azure"
import * as OpenAI from "../src/providers/openai"
type BaseOptions = {
readonly baseURL?: string
readonly headers?: Record<string, string>
}
type Model = {
readonly id: string
}
declare const auth: Auth
declare const optionalAuthModel: ModelFactory<BaseOptions, "optional", Model>
declare const requiredAuthModel: ModelFactory<BaseOptions, "required", Model>
const configApiKey = Config.redacted("OPENAI_API_KEY")
optionalAuthModel("gpt-4.1-mini")
optionalAuthModel("gpt-4.1-mini", {})
optionalAuthModel("gpt-4.1-mini", { apiKey: "sk-test" })
optionalAuthModel("gpt-4.1-mini", { apiKey: configApiKey })
optionalAuthModel("gpt-4.1-mini", { auth })
optionalAuthModel("gpt-4.1-mini", { auth, baseURL: "https://gateway.example.com/v1" })
optionalAuthModel("gpt-4.1-mini", { apiKey: "sk-test", headers: { "x-source": "test" } })
// @ts-expect-error auth is an override, so apiKey cannot be supplied with it.
optionalAuthModel("gpt-4.1-mini", { apiKey: "sk-test", auth })
requiredAuthModel("custom-model", { apiKey: "key" })
requiredAuthModel("custom-model", { apiKey: configApiKey })
requiredAuthModel("custom-model", { auth })
requiredAuthModel("custom-model", { auth, headers: { "x-tenant-id": "tenant" } })
// @ts-expect-error providers without config fallback need apiKey or auth.
requiredAuthModel("custom-model")
// @ts-expect-error providers without config fallback need apiKey or auth.
requiredAuthModel("custom-model", {})
// @ts-expect-error auth is an override, so apiKey cannot be supplied with it.
requiredAuthModel("custom-model", { apiKey: "key", auth })
OpenAI.responses("gpt-4.1-mini")
OpenAI.responses("gpt-4.1-mini", {})
OpenAI.responses("gpt-4.1-mini", { apiKey: "sk-test" })
OpenAI.responses("gpt-4.1-mini", { apiKey: configApiKey })
OpenAI.responses("gpt-4.1-mini", { auth: RuntimeAuth.bearer("oauth-token") })
OpenAI.responses("gpt-4.1-mini", {
auth: RuntimeAuth.headers({ authorization: "Bearer gateway" }),
baseURL: "https://gateway.example.com/v1",
})
OpenAI.responses("gpt-4.1-mini", {
generation: { maxTokens: 100 },
providerOptions: { openai: { store: false } },
})
// @ts-expect-error apiKey only accepts string, Redacted<string>, or Config<string | Redacted<string>>.
OpenAI.responses("gpt-4.1-mini", { apiKey: 123 })
// @ts-expect-error provider helpers reject unknown top-level options.
OpenAI.responses("gpt-4.1-mini", { bogus: true })
// @ts-expect-error common generation options remain typed.
OpenAI.responses("gpt-4.1-mini", { generation: { maxTokens: "many" } })
// @ts-expect-error provider-native options remain typed.
OpenAI.responses("gpt-4.1-mini", { providerOptions: { openai: { store: "false" } } })
// @ts-expect-error auth is an override, so OpenAI rejects apiKey with auth.
OpenAI.responses("gpt-4.1-mini", { apiKey: "sk-test", auth: RuntimeAuth.bearer("oauth-token") })
OpenAI.chat("gpt-4.1-mini")
OpenAI.chat("gpt-4.1-mini", { apiKey: "sk-test" })
OpenAI.chat("gpt-4.1-mini", { apiKey: configApiKey })
OpenAI.chat("gpt-4.1-mini", { auth: RuntimeAuth.bearer("oauth-token") })
// @ts-expect-error auth is an override, so OpenAI Chat rejects apiKey with auth.
OpenAI.chat("gpt-4.1-mini", { apiKey: "sk-test", auth: RuntimeAuth.bearer("oauth-token") })
// @ts-expect-error Azure requires at least one of `resourceName` or `baseURL`.
Azure.responses("deployment")
Azure.responses("deployment", { apiKey: "azure-key", resourceName: "resource" })
Azure.responses("deployment", { apiKey: configApiKey, resourceName: "resource" })
Azure.responses("deployment", { auth: RuntimeAuth.header("api-key", "azure-key"), resourceName: "resource" })
// @ts-expect-error auth is an override, so Azure rejects apiKey with auth.
Azure.responses("deployment", { apiKey: "azure-key", auth: RuntimeAuth.header("api-key", "override") })
// @ts-expect-error Azure requires at least one of `resourceName` or `baseURL`.
Azure.chat("deployment")
Azure.chat("deployment", { apiKey: "azure-key", resourceName: "resource" })
Azure.chat("deployment", { apiKey: configApiKey, resourceName: "resource" })
Azure.chat("deployment", { auth: RuntimeAuth.header("api-key", "azure-key"), resourceName: "resource" })
// @ts-expect-error auth is an override, so Azure Chat rejects apiKey with auth.
Azure.chat("deployment", { apiKey: "azure-key", auth: RuntimeAuth.header("api-key", "override") })

View File

@@ -0,0 +1,101 @@
import { describe, expect } from "bun:test"
import { ConfigProvider, Effect } from "effect"
import { Headers } from "effect/unstable/http"
import { LLM } from "../src"
import { Auth } from "../src/route/auth"
import { it } from "./lib/effect"
const request = LLM.request({
id: "req_auth",
model: LLM.model({ id: "fake-model", provider: "fake", route: "fake", baseURL: "https://fake.local" }),
prompt: "hello",
})
const input = {
request,
method: "POST" as const,
url: "https://example.test/v1/chat",
body: "{}",
headers: Headers.fromInput({ "x-existing": "yes" }),
}
const withEnv = (env: Record<string, string>) => Effect.provide(ConfigProvider.layer(ConfigProvider.fromEnv({ env })))
describe("Auth", () => {
it.effect("renders a config credential as bearer auth", () =>
Effect.gen(function* () {
const headers = yield* Auth.config("OPENAI_API_KEY")
.bearer()
.apply(input)
.pipe(withEnv({ OPENAI_API_KEY: "sk-test" }))
expect(headers.authorization).toBe("Bearer sk-test")
expect(headers["x-existing"]).toBe("yes")
}),
)
it.effect("falls back between credential sources before rendering", () =>
Effect.gen(function* () {
const headers = yield* Auth.config("PRIMARY_KEY")
.orElse(Auth.value("fallback-key"))
.pipe(Auth.header("x-api-key"))
.apply(input)
.pipe(withEnv({}))
expect(headers["x-api-key"]).toBe("fallback-key")
expect(headers["x-existing"]).toBe("yes")
}),
)
it.effect("composes header auth in sequence", () =>
Effect.gen(function* () {
const headers = yield* Auth.headers({ "x-tenant-id": "tenant-1" })
.andThen(Auth.bearer("gateway-token"))
.apply(input)
expect(headers["x-tenant-id"]).toBe("tenant-1")
expect(headers.authorization).toBe("Bearer gateway-token")
expect(headers["x-existing"]).toBe("yes")
}),
)
it.effect("renders a direct secret as a custom header", () =>
Effect.gen(function* () {
const headers = yield* Auth.header("api-key", "direct-key").apply(input)
expect(headers["api-key"]).toBe("direct-key")
expect(headers["x-existing"]).toBe("yes")
}),
)
it.effect("renders bearer auth into a custom header", () =>
Effect.gen(function* () {
const headers = yield* Auth.bearerHeader("cf-aig-authorization", "gateway-token").apply(input)
expect(headers["cf-aig-authorization"]).toBe("Bearer gateway-token")
expect(headers["x-existing"]).toBe("yes")
}),
)
it.effect("falls back between full auth values", () =>
Effect.gen(function* () {
const headers = yield* Auth.config("OPENAI_API_KEY")
.bearer()
.orElse(Auth.headers({ authorization: "Bearer supplied" }))
.apply(input)
.pipe(withEnv({}))
expect(headers.authorization).toBe("Bearer supplied")
expect(headers["x-existing"]).toBe("yes")
}),
)
it.effect("can intentionally leave auth untouched", () =>
Effect.gen(function* () {
const headers = yield* Auth.none.apply(input)
expect(headers.authorization).toBeUndefined()
expect(headers["x-existing"]).toBe("yes")
}),
)
})

View File

@@ -0,0 +1,57 @@
import { describe, expect, test } from "bun:test"
import { LLM } from "../src"
import { Endpoint } from "../src/route"
const request = (
input: {
readonly baseURL: string
readonly queryParams?: Record<string, string>
},
) =>
LLM.request({
model: LLM.model({
id: "model-1",
provider: "test",
route: "test-route",
baseURL: input.baseURL,
queryParams: input.queryParams,
}),
prompt: "hello",
})
describe("Endpoint", () => {
test("appends a static path to the model's baseURL", () => {
const url = Endpoint.render(Endpoint.path("/chat"), {
request: request({ baseURL: "https://api.example.test/v1/" }),
body: {},
})
expect(url.toString()).toBe("https://api.example.test/v1/chat")
})
test("model query params are appended to the rendered URL", () => {
const url = Endpoint.render(Endpoint.path("/chat?alt=sse"), {
request: request({
baseURL: "https://custom.example.test/root/",
queryParams: { "api-version": "2026-01-01", alt: "json" },
}),
body: {},
})
expect(url.toString()).toBe("https://custom.example.test/root/chat?alt=json&api-version=2026-01-01")
})
test("path may be a function of the validated body", () => {
const url = Endpoint.render(
Endpoint.path<{ readonly modelId: string }>(({ body }) => `/model/${encodeURIComponent(body.modelId)}/converse-stream`),
{
request: request({ baseURL: "https://bedrock-runtime.us-east-1.amazonaws.com" }),
body: { modelId: "us.amazon.nova-micro-v1:0" },
},
)
expect(url.toString()).toBe(
"https://bedrock-runtime.us-east-1.amazonaws.com/model/us.amazon.nova-micro-v1%3A0/converse-stream",
)
})
})

View File

@@ -0,0 +1,416 @@
import { describe, expect } from "bun:test"
import { Effect, Fiber, Layer, Random, Ref } from "effect"
import * as TestClock from "effect/testing/TestClock"
import { Headers, HttpClient, HttpClientRequest, HttpClientResponse } from "effect/unstable/http"
import { LLM, LLMError } from "../src"
import { LLMClient, RequestExecutor } from "../src/route"
import * as OpenAIChat from "../src/protocols/openai-chat"
import { dynamicResponse } from "./lib/http"
import { deltaChunk } from "./lib/openai-chunks"
import { sseRaw } from "./lib/sse"
import { it } from "./lib/effect"
const request = HttpClientRequest.post("https://provider.test/v1/chat?api_key=secret&key=secret&debug=1").pipe(
HttpClientRequest.setHeaders(Headers.fromInput({ authorization: "Bearer secret", "x-safe": "visible" })),
)
const secretRequest = HttpClientRequest.post("https://provider.test/v1/chat?api_key=query-secret-123&debug=1").pipe(
HttpClientRequest.setHeaders(Headers.fromInput({ authorization: "Bearer header-secret-456" })),
)
const responsesLayer = (responses: ReadonlyArray<Response>) =>
RequestExecutor.layer.pipe(
Layer.provide(
Layer.unwrap(
Effect.gen(function* () {
const cursor = yield* Ref.make(0)
return Layer.succeed(
HttpClient.HttpClient,
HttpClient.make((request) =>
Effect.gen(function* () {
const index = yield* Ref.getAndUpdate(cursor, (value) => value + 1)
return HttpClientResponse.fromWeb(request, responses[index] ?? responses[responses.length - 1])
}),
),
)
}),
),
),
)
const countedResponsesLayer = (attempts: Ref.Ref<number>, responses: ReadonlyArray<Response>) =>
RequestExecutor.layer.pipe(
Layer.provide(
Layer.unwrap(
Effect.gen(function* () {
const cursor = yield* Ref.make(0)
return Layer.succeed(
HttpClient.HttpClient,
HttpClient.make((request) =>
Effect.gen(function* () {
yield* Ref.update(attempts, (value) => value + 1)
const index = yield* Ref.getAndUpdate(cursor, (value) => value + 1)
return HttpClientResponse.fromWeb(request, responses[index] ?? responses[responses.length - 1])
}),
),
)
}),
),
),
)
const randomMidpoint = {
nextDoubleUnsafe: () => 0.5,
nextIntUnsafe: () => 0,
}
const expectLLMError = (error: unknown) => {
expect(error).toBeInstanceOf(LLMError)
if (!(error instanceof LLMError)) throw new Error("expected LLMError")
return error
}
const errorHttp = (error: LLMError) => ("http" in error.reason ? error.reason.http : undefined)
describe("RequestExecutor", () => {
it.effect("returns redacted diagnostics for retryable rate limits", () =>
Effect.gen(function* () {
const executor = yield* RequestExecutor.Service
const error = yield* executor.execute(request).pipe(Effect.flip)
expectLLMError(error)
expect(error).toMatchObject({
retryable: true,
retryAfterMs: 0,
reason: {
_tag: "RateLimit",
rateLimit: { retryAfterMs: 0 },
http: {
requestId: "req_123",
request: {
method: "POST",
url: "https://provider.test/v1/chat?api_key=%3Credacted%3E&key=%3Credacted%3E&debug=1",
headers: { authorization: "<redacted>", "x-safe": "visible" },
},
response: {
status: 429,
headers: {
"retry-after-ms": "0",
"x-request-id": "req_123",
"x-api-key": "<redacted>",
},
},
},
},
})
expect(errorHttp(error)?.body).toBe("rate limited")
}).pipe(
Effect.provide(
responsesLayer([
...Array.from(
{ length: 3 },
() =>
new Response("rate limited", {
status: 429,
headers: { "retry-after-ms": "0", "x-request-id": "req_123", "x-api-key": "secret" },
}),
),
]),
),
),
)
it.effect("honors current redacted header names in diagnostics", () =>
Effect.gen(function* () {
const executor = yield* RequestExecutor.Service
const error = yield* executor.execute(request).pipe(Effect.flip)
expectLLMError(error)
expect(errorHttp(error)?.request.headers["x-safe"]).toBe("<redacted>")
expect(errorHttp(error)?.response?.headers["x-safe"]).toBe("<redacted>")
}).pipe(
Effect.provide(responsesLayer([new Response("bad", { status: 400, headers: { "x-safe": "response-secret" } })])),
Effect.provideService(Headers.CurrentRedactedNames, ["x-safe"]),
),
)
it.effect("extracts OpenAI-style rate-limit diagnostics", () =>
Effect.gen(function* () {
const executor = yield* RequestExecutor.Service
const error = yield* executor.execute(request).pipe(Effect.flip)
expectLLMError(error)
expect(error.reason).toMatchObject({ _tag: "RateLimit" })
expect(error.reason._tag === "RateLimit" ? error.reason.rateLimit : undefined).toEqual({
retryAfterMs: 0,
limit: { requests: "500", tokens: "30000" },
remaining: { requests: "499", tokens: "29900" },
reset: { requests: "1s", tokens: "10s" },
})
}).pipe(
Effect.provide(
responsesLayer(
Array.from(
{ length: 3 },
() =>
new Response("rate limited", {
status: 429,
headers: {
"retry-after-ms": "0",
"x-ratelimit-limit-requests": "500",
"x-ratelimit-limit-tokens": "30000",
"x-ratelimit-remaining-requests": "499",
"x-ratelimit-remaining-tokens": "29900",
"x-ratelimit-reset-requests": "1s",
"x-ratelimit-reset-tokens": "10s",
},
}),
),
),
),
),
)
it.effect("extracts Anthropic-style rate-limit diagnostics", () =>
Effect.gen(function* () {
const executor = yield* RequestExecutor.Service
const error = yield* executor.execute(request).pipe(Effect.flip)
expectLLMError(error)
expect(error.reason).toMatchObject({ _tag: "ProviderInternal" })
expect(errorHttp(error)?.rateLimit).toEqual({
retryAfterMs: 0,
limit: { requests: "100", "input-tokens": "10000" },
remaining: { requests: "12", "input-tokens": "9000" },
reset: { requests: "2026-05-06T12:00:00Z", "input-tokens": "2026-05-06T12:00:10Z" },
})
}).pipe(
Effect.provide(
responsesLayer(
Array.from(
{ length: 3 },
() =>
new Response("overloaded", {
status: 529,
headers: {
"retry-after-ms": "0",
"anthropic-ratelimit-requests-limit": "100",
"anthropic-ratelimit-requests-remaining": "12",
"anthropic-ratelimit-requests-reset": "2026-05-06T12:00:00Z",
"anthropic-ratelimit-input-tokens-limit": "10000",
"anthropic-ratelimit-input-tokens-remaining": "9000",
"anthropic-ratelimit-input-tokens-reset": "2026-05-06T12:00:10Z",
},
}),
),
),
),
),
)
it.effect("retries retryable status responses before returning the stream", () =>
Effect.gen(function* () {
const executor = yield* RequestExecutor.Service
const response = yield* executor.execute(request)
expect(response.status).toBe(200)
expect(yield* response.text).toBe("ok")
}).pipe(
Effect.provide(
responsesLayer([
new Response("busy", { status: 503, headers: { "retry-after-ms": "0" } }),
new Response("ok", { status: 200 }),
]),
),
),
)
it.effect("marks 504 and 529 status responses retryable", () =>
Effect.gen(function* () {
const failWith = (status: number) =>
Effect.gen(function* () {
const executor = yield* RequestExecutor.Service
const error = yield* executor.execute(request).pipe(Effect.flip)
expectLLMError(error)
expect(error.reason).toMatchObject({ _tag: "ProviderInternal", status })
expect(error.retryable).toBe(true)
}).pipe(
Effect.provide(
responsesLayer(
Array.from(
{ length: 3 },
() =>
new Response("retry", {
status,
headers: { "retry-after-ms": "0" },
}),
),
),
),
)
yield* failWith(504)
yield* failWith(529)
}),
)
it.effect("does not retry non-retryable status responses and truncates large bodies", () =>
Effect.gen(function* () {
const executor = yield* RequestExecutor.Service
const error = yield* executor.execute(request).pipe(Effect.flip)
expectLLMError(error)
expect(error.reason).toMatchObject({ _tag: "Authentication" })
expect(error.retryable).toBe(false)
expect(errorHttp(error)?.bodyTruncated).toBe(true)
expect(errorHttp(error)?.body).toHaveLength(16_384)
}).pipe(
Effect.provide(
responsesLayer([
new Response("x".repeat(20_000), { status: 401 }),
new Response("should not retry", { status: 200 }),
]),
),
),
)
it.effect("redacts common secret fields in response bodies", () =>
Effect.gen(function* () {
const executor = yield* RequestExecutor.Service
const error = yield* executor.execute(request).pipe(Effect.flip)
expectLLMError(error)
expect(errorHttp(error)?.body).toContain('"key":"<redacted>"')
expect(errorHttp(error)?.body).toContain("api_key=<redacted>")
expect(errorHttp(error)?.body).not.toContain("body-secret")
expect(errorHttp(error)?.body).not.toContain("query-secret")
}).pipe(
Effect.provide(
responsesLayer([
new Response('{"error":{"message":"bad","key":"body-secret","detail":"api_key=query-secret"}}', {
status: 400,
}),
]),
),
),
)
it.effect("redacts echoed request secret values in response bodies", () =>
Effect.gen(function* () {
const executor = yield* RequestExecutor.Service
const error = yield* executor.execute(secretRequest).pipe(Effect.flip)
expectLLMError(error)
expect(errorHttp(error)?.body).toContain("provider echoed <redacted>")
expect(errorHttp(error)?.body).toContain("authorization <redacted>")
expect(errorHttp(error)?.body).not.toContain("query-secret-123")
expect(errorHttp(error)?.body).not.toContain("header-secret-456")
}).pipe(
Effect.provide(
responsesLayer([
new Response("provider echoed query-secret-123 and authorization header-secret-456", { status: 400 }),
]),
),
),
)
it.effect("honors Retry-After delta seconds before retrying", () =>
Effect.gen(function* () {
const attempts = yield* Ref.make(0)
return yield* Effect.gen(function* () {
const executor = yield* RequestExecutor.Service
const fiber = yield* executor.execute(request).pipe(Effect.forkChild)
yield* Effect.yieldNow
expect(yield* Ref.get(attempts)).toBe(1)
yield* TestClock.adjust(1_999)
yield* Effect.yieldNow
expect(yield* Ref.get(attempts)).toBe(1)
yield* TestClock.adjust(1)
const response = yield* Fiber.join(fiber)
expect(response.status).toBe(200)
expect(yield* Ref.get(attempts)).toBe(2)
}).pipe(
Effect.provide(
countedResponsesLayer(attempts, [
new Response("busy", { status: 503, headers: { "retry-after": "2" } }),
new Response("ok", { status: 200 }),
]),
),
)
}),
)
it.effect("uses exponential jittered delay when retry-after is absent", () =>
Effect.gen(function* () {
const attempts = yield* Ref.make(0)
return yield* Effect.gen(function* () {
const executor = yield* RequestExecutor.Service
const fiber = yield* executor.execute(request).pipe(Effect.flip, Effect.forkChild)
yield* Effect.yieldNow
expect(yield* Ref.get(attempts)).toBe(1)
yield* TestClock.adjust(499)
yield* Effect.yieldNow
expect(yield* Ref.get(attempts)).toBe(1)
yield* TestClock.adjust(1)
yield* Effect.yieldNow
expect(yield* Ref.get(attempts)).toBe(2)
yield* TestClock.adjust(999)
yield* Effect.yieldNow
expect(yield* Ref.get(attempts)).toBe(2)
yield* TestClock.adjust(1)
const error = yield* Fiber.join(fiber)
expectLLMError(error)
expect(error.reason).toMatchObject({ _tag: "ProviderInternal" })
expect(yield* Ref.get(attempts)).toBe(3)
}).pipe(
Effect.provide(
countedResponsesLayer(attempts, [
new Response("busy", { status: 503 }),
new Response("still busy", { status: 503 }),
new Response("done retrying", { status: 503 }),
]),
),
)
}).pipe(Effect.provideService(Random.Random, randomMidpoint)),
)
it.effect("does not retry after a successful response reaches stream parsing", () =>
Effect.gen(function* () {
const attempts = yield* Ref.make(0)
const model = OpenAIChat.model({ id: "gpt-4o-mini", baseURL: "https://api.openai.test/v1" })
const error = yield* LLMClient.generate(LLM.request({ model, prompt: "Say hello." })).pipe(
Effect.provide(
dynamicResponse((input) =>
Ref.update(attempts, (value) => value + 1).pipe(
Effect.as(
input.respond(
sseRaw(
`data: ${JSON.stringify(deltaChunk({ role: "assistant", content: "Hello" }))}`,
"data: not-json",
),
{ headers: { "content-type": "text/event-stream" } },
),
),
),
),
),
Effect.flip,
)
expectLLMError(error)
expect(error.reason).toMatchObject({ _tag: "InvalidProviderOutput" })
expect(yield* Ref.get(attempts)).toBe(1)
}),
)
})

View File

@@ -0,0 +1,56 @@
import { describe, expect, test } from "bun:test"
import { LLM, LLMClient, Provider } from "@opencode-ai/llm"
import { Route, Protocol } from "@opencode-ai/llm/route"
import { Provider as ProviderSubpath } from "@opencode-ai/llm/provider"
import { Cloudflare, OpenAI, OpenAICompatible, OpenRouter, XAI } from "@opencode-ai/llm/providers"
import * as GitHubCopilot from "@opencode-ai/llm/providers/github-copilot"
import { OpenAIChat, OpenAICompatibleChat, OpenAIResponses } from "@opencode-ai/llm/protocols"
import * as AnthropicMessages from "@opencode-ai/llm/protocols/anthropic-messages"
describe("public exports", () => {
test("root exposes app-facing runtime APIs", () => {
expect(LLM.request).toBeFunction()
expect(LLMClient.Service).toBeFunction()
expect(LLMClient.layer).toBeDefined()
expect(Provider.make).toBeFunction()
expect(ProviderSubpath.make).toBe(Provider.make)
})
test("route barrel exposes route-authoring APIs", () => {
expect(Route.make).toBeFunction()
expect(Protocol.make).toBeFunction()
})
test("provider barrels expose user-facing facades", () => {
expect(OpenAI.model).toBeFunction()
expect(OpenAI.provider.model).toBe(OpenAI.model)
expect(OpenAI.apis.responses).toBe(OpenAI.responses)
expect(OpenAI.apis.responsesWebSocket).toBe(OpenAI.responsesWebSocket)
expect(OpenAICompatible.deepseek.model).toBeFunction()
expect(Cloudflare.model).toBeFunction()
expect(Cloudflare.provider.model).toBe(Cloudflare.model)
expect(Cloudflare.aiGateway).toBeFunction()
expect(Cloudflare.workersAI).toBeFunction()
expect(OpenRouter.model).toBeFunction()
expect(OpenRouter.provider.model).toBe(OpenRouter.model)
expect(XAI.model).toBeFunction()
expect(XAI.provider.model).toBe(XAI.model)
expect(XAI.apis.responses).toBe(XAI.responses)
expect(XAI.apis.chat).toBe(XAI.chat)
expect(XAI.responses("grok-4.3", { apiKey: "fixture" })).toMatchObject({
route: "openai-responses",
})
expect(XAI.chat("grok-4.3", { apiKey: "fixture" })).toMatchObject({
route: "openai-compatible-chat",
})
expect(GitHubCopilot.model).toBeFunction()
})
test("protocol barrels expose supported low-level routes", () => {
expect(OpenAIChat.route.id).toBe("openai-chat")
expect(OpenAICompatibleChat.route.id).toBe("openai-compatible-chat")
expect(OpenAIResponses.route.id).toBe("openai-responses")
expect(OpenAIResponses.webSocketRoute.id).toBe("openai-responses-websocket")
expect(AnthropicMessages.route.id).toBe("anthropic-messages")
})
})

View File

@@ -0,0 +1,29 @@
{
"version": 1,
"metadata": {
"name": "anthropic-messages/accepts-malformed-assistant-tool-order-with-default-patch",
"recordedAt": "2026-05-05T20:09:16.245Z",
"tags": ["prefix:anthropic-messages", "provider:anthropic", "protocol:anthropic-messages", "tool"]
},
"interactions": [
{
"transport": "http",
"request": {
"method": "POST",
"url": "https://api.anthropic.com/v1/messages",
"headers": {
"anthropic-version": "2023-06-01",
"content-type": "application/json"
},
"body": "{\"model\":\"claude-haiku-4-5-20251001\",\"messages\":[{\"role\":\"assistant\",\"content\":[{\"type\":\"text\",\"text\":\"I will check the weather.\"}]},{\"role\":\"assistant\",\"content\":[{\"type\":\"tool_use\",\"id\":\"call_1\",\"name\":\"get_weather\",\"input\":{\"city\":\"Paris\"}}]},{\"role\":\"user\",\"content\":[{\"type\":\"tool_result\",\"tool_use_id\":\"call_1\",\"content\":\"{\\\"temperature\\\":\\\"72F\\\"}\"}]},{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"Use that result to answer briefly.\",\"cache_control\":{\"type\":\"ephemeral\"}}]}],\"tools\":[{\"name\":\"get_weather\",\"description\":\"Get weather\",\"input_schema\":{\"type\":\"object\",\"properties\":{}}}],\"stream\":true,\"max_tokens\":4096}"
},
"response": {
"status": 200,
"headers": {
"content-type": "text/event-stream; charset=utf-8"
},
"body": "event: message_start\ndata: {\"type\":\"message_start\",\"message\":{\"model\":\"claude-haiku-4-5-20251001\",\"id\":\"msg_01SikJVFaMR1XLMtavUhvuog\",\"type\":\"message\",\"role\":\"assistant\",\"content\":[],\"stop_reason\":null,\"stop_sequence\":null,\"stop_details\":null,\"usage\":{\"input_tokens\":638,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"cache_creation\":{\"ephemeral_5m_input_tokens\":0,\"ephemeral_1h_input_tokens\":0},\"output_tokens\":1,\"service_tier\":\"standard\",\"inference_geo\":\"not_available\"}} }\n\nevent: content_block_start\ndata: {\"type\":\"content_block_start\",\"index\":0,\"content_block\":{\"type\":\"text\",\"text\":\"\"} }\n\nevent: ping\ndata: {\"type\": \"ping\"}\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\"The\"} }\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\" weather in Paris is currently 72°F.\"} }\n\nevent: content_block_stop\ndata: {\"type\":\"content_block_stop\",\"index\":0}\n\nevent: message_delta\ndata: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"end_turn\",\"stop_sequence\":null,\"stop_details\":null},\"usage\":{\"input_tokens\":638,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"output_tokens\":14} }\n\nevent: message_stop\ndata: {\"type\":\"message_stop\" }\n\n"
}
}
]
}

View File

@@ -0,0 +1,56 @@
{
"version": 1,
"metadata": {
"name": "anthropic-messages/claude-opus-4-7-drives-a-tool-loop",
"recordedAt": "2026-05-03T19:59:44.186Z",
"tags": [
"prefix:anthropic-messages",
"provider:anthropic",
"protocol:anthropic-messages",
"tool",
"tool-loop",
"golden",
"flagship"
]
},
"interactions": [
{
"transport": "http",
"request": {
"method": "POST",
"url": "https://api.anthropic.com/v1/messages",
"headers": {
"anthropic-version": "2023-06-01",
"content-type": "application/json"
},
"body": "{\"model\":\"claude-opus-4-7\",\"system\":[{\"type\":\"text\",\"text\":\"Use the get_weather tool, then answer in one short sentence.\"}],\"messages\":[{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"What is the weather in Paris?\"}]}],\"tools\":[{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"input_schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}],\"stream\":true,\"max_tokens\":80}"
},
"response": {
"status": 200,
"headers": {
"content-type": "text/event-stream; charset=utf-8"
},
"body": "event: message_start\ndata: {\"type\":\"message_start\",\"message\":{\"model\":\"claude-opus-4-7\",\"id\":\"msg_01DgAEgLgB1ZhavZon4qGE1t\",\"type\":\"message\",\"role\":\"assistant\",\"content\":[],\"stop_reason\":null,\"stop_sequence\":null,\"stop_details\":null,\"usage\":{\"input_tokens\":798,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"cache_creation\":{\"ephemeral_5m_input_tokens\":0,\"ephemeral_1h_input_tokens\":0},\"output_tokens\":0,\"service_tier\":\"standard\",\"inference_geo\":\"global\"}} }\n\nevent: content_block_start\ndata: {\"type\":\"content_block_start\",\"index\":0,\"content_block\":{\"type\":\"tool_use\",\"id\":\"toolu_01M8nJQQMxqpv1VaPYuJKT4j\",\"name\":\"get_weather\",\"input\":{},\"caller\":{\"type\":\"direct\"}} }\n\nevent: ping\ndata: {\"type\": \"ping\"}\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"input_json_delta\",\"partial_json\":\"\"} }\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"input_json_delta\",\"partial_json\":\"{\\\"city\\\": \"} }\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"input_json_delta\",\"partial_json\":\"\\\"Pa\"} }\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"input_json_delta\",\"partial_json\":\"ris\\\"}\"} }\n\nevent: content_block_stop\ndata: {\"type\":\"content_block_stop\",\"index\":0 }\n\nevent: message_delta\ndata: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"tool_use\",\"stop_sequence\":null,\"stop_details\":null},\"usage\":{\"input_tokens\":798,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"output_tokens\":66} }\n\nevent: message_stop\ndata: {\"type\":\"message_stop\" }\n\n"
}
},
{
"transport": "http",
"request": {
"method": "POST",
"url": "https://api.anthropic.com/v1/messages",
"headers": {
"anthropic-version": "2023-06-01",
"content-type": "application/json"
},
"body": "{\"model\":\"claude-opus-4-7\",\"system\":[{\"type\":\"text\",\"text\":\"Use the get_weather tool, then answer in one short sentence.\"}],\"messages\":[{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"What is the weather in Paris?\"}]},{\"role\":\"assistant\",\"content\":[{\"type\":\"tool_use\",\"id\":\"toolu_01M8nJQQMxqpv1VaPYuJKT4j\",\"name\":\"get_weather\",\"input\":{\"city\":\"Paris\"}}]},{\"role\":\"user\",\"content\":[{\"type\":\"tool_result\",\"tool_use_id\":\"toolu_01M8nJQQMxqpv1VaPYuJKT4j\",\"content\":\"{\\\"temperature\\\":22,\\\"condition\\\":\\\"sunny\\\"}\"}]}],\"tools\":[{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"input_schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}],\"stream\":true,\"max_tokens\":80}"
},
"response": {
"status": 200,
"headers": {
"content-type": "text/event-stream; charset=utf-8"
},
"body": "event: message_start\ndata: {\"type\":\"message_start\",\"message\":{\"model\":\"claude-opus-4-7\",\"id\":\"msg_011KJqj32QjkrUAiBFxhmEoG\",\"type\":\"message\",\"role\":\"assistant\",\"content\":[],\"stop_reason\":null,\"stop_sequence\":null,\"stop_details\":null,\"usage\":{\"input_tokens\":895,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"cache_creation\":{\"ephemeral_5m_input_tokens\":0,\"ephemeral_1h_input_tokens\":0},\"output_tokens\":5,\"service_tier\":\"standard\",\"inference_geo\":\"global\"}} }\n\nevent: content_block_start\ndata: {\"type\":\"content_block_start\",\"index\":0,\"content_block\":{\"type\":\"text\",\"text\":\"\"} }\n\nevent: ping\ndata: {\"type\": \"ping\"}\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\"Paris is curr\"} }\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\"ently sunny at 22°C.\"} }\n\nevent: content_block_stop\ndata: {\"type\":\"content_block_stop\",\"index\":0 }\n\nevent: message_delta\ndata: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"end_turn\",\"stop_sequence\":null,\"stop_details\":null},\"usage\":{\"input_tokens\":895,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"output_tokens\":19}}\n\nevent: message_stop\ndata: {\"type\":\"message_stop\"}\n\n"
}
}
]
}

View File

@@ -0,0 +1,29 @@
{
"version": 1,
"metadata": {
"name": "anthropic-messages/rejects-malformed-assistant-tool-order-without-patch",
"recordedAt": "2026-05-05T20:08:42.597Z",
"tags": ["prefix:anthropic-messages", "provider:anthropic", "protocol:anthropic-messages", "tool", "sad-path"]
},
"interactions": [
{
"transport": "http",
"request": {
"method": "POST",
"url": "https://api.anthropic.com/v1/messages",
"headers": {
"anthropic-version": "2023-06-01",
"content-type": "application/json"
},
"body": "{\"model\":\"claude-haiku-4-5-20251001\",\"messages\":[{\"role\":\"assistant\",\"content\":[{\"type\":\"tool_use\",\"id\":\"call_1\",\"name\":\"get_weather\",\"input\":{\"city\":\"Paris\"}},{\"type\":\"text\",\"text\":\"I will check the weather.\"}]},{\"role\":\"user\",\"content\":[{\"type\":\"tool_result\",\"tool_use_id\":\"call_1\",\"content\":\"{\\\"temperature\\\":\\\"72F\\\"}\"}]},{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"Use that result to answer briefly.\"}]}],\"tools\":[{\"name\":\"get_weather\",\"description\":\"Get weather\",\"input_schema\":{\"type\":\"object\",\"properties\":{}}}],\"stream\":true,\"max_tokens\":4096}"
},
"response": {
"status": 400,
"headers": {
"content-type": "application/json"
},
"body": "{\"type\":\"error\",\"error\":{\"type\":\"invalid_request_error\",\"message\":\"messages.1: `tool_use` ids were found without `tool_result` blocks immediately after: call_1. Each `tool_use` block must have a corresponding `tool_result` block in the next message.\"},\"request_id\":\"req_011Cak2XdJgnzxKCY2BC2Beh\"}"
}
}
]
}

View File

@@ -0,0 +1,29 @@
{
"version": 1,
"metadata": {
"name": "anthropic-messages/streams-text",
"recordedAt": "2026-04-28T21:18:45.535Z",
"tags": ["prefix:anthropic-messages", "provider:anthropic", "protocol:anthropic-messages"]
},
"interactions": [
{
"transport": "http",
"request": {
"method": "POST",
"url": "https://api.anthropic.com/v1/messages",
"headers": {
"anthropic-version": "2023-06-01",
"content-type": "application/json"
},
"body": "{\"model\":\"claude-haiku-4-5-20251001\",\"system\":[{\"type\":\"text\",\"text\":\"You are concise.\"}],\"messages\":[{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"Reply with exactly: Hello!\"}]}],\"stream\":true,\"max_tokens\":20,\"temperature\":0}"
},
"response": {
"status": 200,
"headers": {
"content-type": "text/event-stream; charset=utf-8"
},
"body": "event: message_start\ndata: {\"type\":\"message_start\",\"message\":{\"model\":\"claude-haiku-4-5-20251001\",\"id\":\"msg_01UodR8c3ezAK8rAfi8HAs8g\",\"type\":\"message\",\"role\":\"assistant\",\"content\":[],\"stop_reason\":null,\"stop_sequence\":null,\"stop_details\":null,\"usage\":{\"input_tokens\":18,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"cache_creation\":{\"ephemeral_5m_input_tokens\":0,\"ephemeral_1h_input_tokens\":0},\"output_tokens\":2,\"service_tier\":\"standard\",\"inference_geo\":\"not_available\"}} }\n\nevent: content_block_start\ndata: {\"type\":\"content_block_start\",\"index\":0,\"content_block\":{\"type\":\"text\",\"text\":\"\"} }\n\nevent: ping\ndata: {\"type\": \"ping\"}\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\"Hello!\"} }\n\nevent: content_block_stop\ndata: {\"type\":\"content_block_stop\",\"index\":0 }\n\nevent: message_delta\ndata: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"end_turn\",\"stop_sequence\":null,\"stop_details\":null},\"usage\":{\"input_tokens\":18,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"output_tokens\":5} }\n\nevent: message_stop\ndata: {\"type\":\"message_stop\" }\n\n"
}
}
]
}

View File

@@ -0,0 +1,29 @@
{
"version": 1,
"metadata": {
"name": "anthropic-messages/streams-tool-call",
"recordedAt": "2026-04-28T21:18:46.878Z",
"tags": ["prefix:anthropic-messages", "provider:anthropic", "protocol:anthropic-messages", "tool"]
},
"interactions": [
{
"transport": "http",
"request": {
"method": "POST",
"url": "https://api.anthropic.com/v1/messages",
"headers": {
"anthropic-version": "2023-06-01",
"content-type": "application/json"
},
"body": "{\"model\":\"claude-haiku-4-5-20251001\",\"system\":[{\"type\":\"text\",\"text\":\"Call tools exactly as requested.\"}],\"messages\":[{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"Call get_weather with city exactly Paris.\"}]}],\"tools\":[{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"input_schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}],\"tool_choice\":{\"type\":\"tool\",\"name\":\"get_weather\"},\"stream\":true,\"max_tokens\":80,\"temperature\":0}"
},
"response": {
"status": 200,
"headers": {
"content-type": "text/event-stream; charset=utf-8"
},
"body": "event: message_start\ndata: {\"type\":\"message_start\",\"message\":{\"model\":\"claude-haiku-4-5-20251001\",\"id\":\"msg_01RYgU7NUPMK4B9v8S7gVpCS\",\"type\":\"message\",\"role\":\"assistant\",\"content\":[],\"stop_reason\":null,\"stop_sequence\":null,\"stop_details\":null,\"usage\":{\"input_tokens\":677,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"cache_creation\":{\"ephemeral_5m_input_tokens\":0,\"ephemeral_1h_input_tokens\":0},\"output_tokens\":16,\"service_tier\":\"standard\",\"inference_geo\":\"not_available\"}} }\n\nevent: content_block_start\ndata: {\"type\":\"content_block_start\",\"index\":0,\"content_block\":{\"type\":\"tool_use\",\"id\":\"toolu_012rmAruviySvUXSjgCPWVRu\",\"name\":\"get_weather\",\"input\":{},\"caller\":{\"type\":\"direct\"}} }\n\nevent: ping\ndata: {\"type\": \"ping\"}\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"input_json_delta\",\"partial_json\":\"\"} }\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"input_json_delta\",\"partial_json\":\"{\\\"city\\\":\"} }\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"input_json_delta\",\"partial_json\":\" \\\"Paris\\\"}\"} }\n\nevent: content_block_stop\ndata: {\"type\":\"content_block_stop\",\"index\":0 }\n\nevent: message_delta\ndata: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"tool_use\",\"stop_sequence\":null,\"stop_details\":null},\"usage\":{\"input_tokens\":677,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"output_tokens\":33} }\n\nevent: message_stop\ndata: {\"type\":\"message_stop\" }\n\n"
}
}
]
}

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,29 @@
{
"version": 1,
"metadata": {
"name": "bedrock-converse/streams-a-tool-call",
"recordedAt": "2026-04-28T21:18:46.929Z",
"tags": ["prefix:bedrock-converse", "provider:amazon-bedrock", "protocol:bedrock-converse", "tool"]
},
"interactions": [
{
"transport": "http",
"request": {
"method": "POST",
"url": "https://bedrock-runtime.us-east-1.amazonaws.com/model/us.amazon.nova-micro-v1%3A0/converse-stream",
"headers": {
"content-type": "application/json"
},
"body": "{\"modelId\":\"us.amazon.nova-micro-v1:0\",\"messages\":[{\"role\":\"user\",\"content\":[{\"text\":\"Call get_weather with city exactly Paris.\"}]}],\"system\":[{\"text\":\"Call tools exactly as requested.\"}],\"inferenceConfig\":{\"maxTokens\":80,\"temperature\":0},\"toolConfig\":{\"tools\":[{\"toolSpec\":{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"inputSchema\":{\"json\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}}}],\"toolChoice\":{\"tool\":{\"name\":\"get_weather\"}}}}"
},
"response": {
"status": 200,
"headers": {
"content-type": "application/vnd.amazon.eventstream"
},
"body": "AAAAuQAAAFL9kIXUCzpldmVudC10eXBlBwAMbWVzc2FnZVN0YXJ0DTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsicCI6ImFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6QUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVowMTIzNDU2NyIsInJvbGUiOiJhc3Npc3RhbnQifWf51EkAAAEMAAAAV56BJZoLOmV2ZW50LXR5cGUHABFjb250ZW50QmxvY2tTdGFydA06Y29udGVudC10eXBlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbnR7ImNvbnRlbnRCbG9ja0luZGV4IjowLCJwIjoiYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNERUZHSElKS0xNTk9QUVJTVFUiLCJzdGFydCI6eyJ0b29sVXNlIjp7Im5hbWUiOiJnZXRfd2VhdGhlciIsInRvb2xVc2VJZCI6InRvb2x1c2VfNmExcFB2bmM5OUdMS08zS0drVUEyTiJ9fX2LR7PFAAAA4gAAAFfCOY+BCzpldmVudC10eXBlBwARY29udGVudEJsb2NrRGVsdGENOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJjb250ZW50QmxvY2tJbmRleCI6MCwiZGVsdGEiOnsidG9vbFVzZSI6eyJpbnB1dCI6IntcImNpdHlcIjpcIlBhcmlzXCJ9In19LCJwIjoiYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNERUZHSElKS0xNTiJ9RkW+2gAAAIcAAABW5OxHKgs6ZXZlbnQtdHlwZQcAEGNvbnRlbnRCbG9ja1N0b3ANOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJjb250ZW50QmxvY2tJbmRleCI6MCwicCI6ImFiYyJ9y6nrtwAAAK4AAABRtlmf/As6ZXZlbnQtdHlwZQcAC21lc3NhZ2VTdG9wDTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsicCI6ImFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6QUJDREVGR0hJSktMTU5PUFFSUyIsInN0b3BSZWFzb24iOiJ0b29sX3VzZSJ9MTlQawAAAOIAAABOplInQQs6ZXZlbnQtdHlwZQcACG1ldGFkYXRhDTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsibWV0cmljcyI6eyJsYXRlbmN5TXMiOjM1NX0sInAiOiJhYmNkZWZnaGlqayIsInVzYWdlIjp7ImlucHV0VG9rZW5zIjo0MTksIm91dHB1dFRva2VucyI6MTYsInNlcnZlclRvb2xVc2FnZSI6e30sInRvdGFsVG9rZW5zIjo0MzV9fU1tVJc=",
"bodyEncoding": "base64"
}
}
]
}

View File

@@ -0,0 +1,29 @@
{
"version": 1,
"metadata": {
"name": "bedrock-converse/streams-text",
"recordedAt": "2026-04-28T21:18:46.553Z",
"tags": ["prefix:bedrock-converse", "provider:amazon-bedrock", "protocol:bedrock-converse"]
},
"interactions": [
{
"transport": "http",
"request": {
"method": "POST",
"url": "https://bedrock-runtime.us-east-1.amazonaws.com/model/us.amazon.nova-micro-v1%3A0/converse-stream",
"headers": {
"content-type": "application/json"
},
"body": "{\"modelId\":\"us.amazon.nova-micro-v1:0\",\"messages\":[{\"role\":\"user\",\"content\":[{\"text\":\"Say hello.\"}]}],\"system\":[{\"text\":\"Reply with the single word 'Hello'.\"}],\"inferenceConfig\":{\"maxTokens\":16,\"temperature\":0}}"
},
"response": {
"status": 200,
"headers": {
"content-type": "application/vnd.amazon.eventstream"
},
"body": "AAAAmQAAAFI8UarQCzpldmVudC10eXBlBwAMbWVzc2FnZVN0YXJ0DTpjb250ZW50LXR5cGUHABBhcHBsaWNhdGlvbi9qc29uDTptZXNzYWdlLXR5cGUHAAVldmVudHsicCI6ImFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6QUIiLCJyb2xlIjoiYXNzaXN0YW50In3SL1jNAAAAvQAAAFd4etebCzpldmVudC10eXBlBwARY29udGVudEJsb2NrRGVsdGENOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJjb250ZW50QmxvY2tJbmRleCI6MCwiZGVsdGEiOnsidGV4dCI6IkhlbGxvIn0sInAiOiJhYmNkZWZnaGlqa2xtbm9wcXJzdHV2d3h5ekFCQ0RFIn2B0NR6AAAAxgAAAFf2eAZFCzpldmVudC10eXBlBwARY29udGVudEJsb2NrRGVsdGENOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJjb250ZW50QmxvY2tJbmRleCI6MCwiZGVsdGEiOnsidGV4dCI6IiJ9LCJwIjoiYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNERUZHSElKS0xNTk9QUVJTIn3XaHMvAAAAhwAAAFbk7EcqCzpldmVudC10eXBlBwAQY29udGVudEJsb2NrU3RvcA06Y29udGVudC10eXBlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbnR7ImNvbnRlbnRCbG9ja0luZGV4IjowLCJwIjoiYWJjIn3Lqeu3AAAAjwAAAFFK+JlICzpldmVudC10eXBlBwALbWVzc2FnZVN0b3ANOmNvbnRlbnQtdHlwZQcAEGFwcGxpY2F0aW9uL2pzb24NOm1lc3NhZ2UtdHlwZQcABWV2ZW50eyJwIjoiYWJjZGVmZ2hpamtsbW4iLCJzdG9wUmVhc29uIjoiZW5kX3R1cm4ifZ+RQqEAAAECAAAATkXaMzsLOmV2ZW50LXR5cGUHAAhtZXRhZGF0YQ06Y29udGVudC10eXBlBwAQYXBwbGljYXRpb24vanNvbg06bWVzc2FnZS10eXBlBwAFZXZlbnR7Im1ldHJpY3MiOnsibGF0ZW5jeU1zIjozMDZ9LCJwIjoiYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNERUZHSElKS0xNTk9QUVJTVCIsInVzYWdlIjp7ImlucHV0VG9rZW5zIjoxMiwib3V0cHV0VG9rZW5zIjoyLCJzZXJ2ZXJUb29sVXNhZ2UiOnt9LCJ0b3RhbFRva2VucyI6MTR9fSnnkUk=",
"bodyEncoding": "base64"
}
}
]
}

View File

@@ -0,0 +1,37 @@
{
"version": 1,
"metadata": {
"name": "cloudflare-ai-gateway/cloudflare-ai-gateway-workers-ai-llama-3-1-8b-text",
"recordedAt": "2026-05-08T15:55:48.952Z",
"provider": "cloudflare-ai-gateway",
"route": "cloudflare-ai-gateway",
"transport": "http",
"model": "workers-ai/@cf/meta/llama-3.1-8b-instruct",
"tags": [
"prefix:cloudflare-ai-gateway",
"provider:cloudflare-ai-gateway",
"text",
"golden"
]
},
"interactions": [
{
"transport": "http",
"request": {
"method": "POST",
"url": "https://gateway.ai.cloudflare.com/v1/{account}/{gateway}/compat/chat/completions",
"headers": {
"content-type": "application/json"
},
"body": "{\"model\":\"workers-ai/@cf/meta/llama-3.1-8b-instruct\",\"messages\":[{\"role\":\"system\",\"content\":\"You are concise.\"},{\"role\":\"user\",\"content\":\"Reply exactly with: Hello!\"}],\"stream\":true,\"stream_options\":{\"include_usage\":true},\"max_tokens\":40,\"temperature\":0}"
},
"response": {
"status": 200,
"headers": {
"content-type": "text/event-stream"
},
"body": "data: {\"id\":\"id-1778255748911\",\"created\":1778255748,\"model\":\"@cf/meta/llama-3.1-8b-instruct\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"Hello\"}}]}\n\ndata: {\"id\":\"id-1778255748911\",\"created\":1778255748,\"model\":\"@cf/meta/llama-3.1-8b-instruct\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"!\"}}]}\n\ndata: {\"id\":\"id-1778255748911\",\"object\":\"chat.completion.chunk\",\"created\":1778255748,\"model\":\"@cf/meta/llama-3.1-8b-instruct\",\"choices\":[{\"index\":0,\"delta\":{},\"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":45,\"completion_tokens\":2,\"total_tokens\":47}}\n\ndata: {\"id\":\"id-1778255748911\",\"object\":\"chat.completion.chunk\",\"created\":1778255748,\"model\":\"@cf/meta/llama-3.1-8b-instruct\",\"choices\":[{\"index\":0,\"delta\":{},\"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":0,\"completion_tokens\":0,\"total_tokens\":0,\"prompt_tokens_details\":{\"cached_tokens\":0}}}\n\ndata: [DONE]\n\n"
}
}
]
}

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,37 @@
{
"version": 1,
"metadata": {
"name": "cloudflare-workers-ai/cloudflare-workers-ai-llama-3-1-8b-text",
"recordedAt": "2026-05-08T15:56:18.284Z",
"provider": "cloudflare-workers-ai",
"route": "cloudflare-workers-ai",
"transport": "http",
"model": "@cf/meta/llama-3.1-8b-instruct",
"tags": [
"prefix:cloudflare-workers-ai",
"provider:cloudflare-workers-ai",
"text",
"golden"
]
},
"interactions": [
{
"transport": "http",
"request": {
"method": "POST",
"url": "https://api.cloudflare.com/client/v4/accounts/{account}/ai/v1/chat/completions",
"headers": {
"content-type": "application/json"
},
"body": "{\"model\":\"@cf/meta/llama-3.1-8b-instruct\",\"messages\":[{\"role\":\"system\",\"content\":\"You are concise.\"},{\"role\":\"user\",\"content\":\"Reply exactly with: Hello!\"}],\"stream\":true,\"stream_options\":{\"include_usage\":true},\"max_tokens\":40,\"temperature\":0}"
},
"response": {
"status": 200,
"headers": {
"content-type": "text/event-stream"
},
"body": "data: {\"id\":\"id-1778255778230\",\"created\":1778255778,\"model\":\"@cf/meta/llama-3.1-8b-instruct\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"Hello\"}}]}\n\ndata: {\"id\":\"id-1778255778230\",\"created\":1778255778,\"model\":\"@cf/meta/llama-3.1-8b-instruct\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"!\"}}]}\n\ndata: {\"id\":\"id-1778255778230\",\"object\":\"chat.completion.chunk\",\"created\":1778255778,\"model\":\"@cf/meta/llama-3.1-8b-instruct\",\"choices\":[{\"index\":0,\"delta\":{},\"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":45,\"completion_tokens\":2,\"total_tokens\":47}}\n\ndata: {\"id\":\"id-1778255778230\",\"object\":\"chat.completion.chunk\",\"created\":1778255778,\"model\":\"@cf/meta/llama-3.1-8b-instruct\",\"choices\":[{\"index\":0,\"delta\":{},\"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":0,\"completion_tokens\":0,\"total_tokens\":0,\"prompt_tokens_details\":{\"cached_tokens\":0}}}\n\ndata: [DONE]\n\n"
}
}
]
}

View File

@@ -0,0 +1,28 @@
{
"version": 1,
"metadata": {
"name": "gemini/streams-text",
"recordedAt": "2026-04-28T21:18:47.483Z",
"tags": ["prefix:gemini", "provider:google", "protocol:gemini"]
},
"interactions": [
{
"transport": "http",
"request": {
"method": "POST",
"url": "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:streamGenerateContent?alt=sse",
"headers": {
"content-type": "application/json"
},
"body": "{\"contents\":[{\"role\":\"user\",\"parts\":[{\"text\":\"Reply with exactly: Hello!\"}]}],\"systemInstruction\":{\"parts\":[{\"text\":\"You are concise.\"}]},\"generationConfig\":{\"maxOutputTokens\":80,\"temperature\":0}}"
},
"response": {
"status": 200,
"headers": {
"content-type": "text/event-stream"
},
"body": "data: {\"candidates\": [{\"content\": {\"parts\": [{\"text\": \"Hello!\"}],\"role\": \"model\"},\"finishReason\": \"STOP\",\"index\": 0}],\"usageMetadata\": {\"promptTokenCount\": 11,\"candidatesTokenCount\": 2,\"totalTokenCount\": 29,\"promptTokensDetails\": [{\"modality\": \"TEXT\",\"tokenCount\": 11}],\"thoughtsTokenCount\": 16},\"modelVersion\": \"gemini-2.5-flash\",\"responseId\": \"NyTxaczMAZ-b_uMP6u--iQg\"}\r\n\r\n"
}
}
]
}

View File

@@ -0,0 +1,28 @@
{
"version": 1,
"metadata": {
"name": "gemini/streams-tool-call",
"recordedAt": "2026-04-28T21:18:48.285Z",
"tags": ["prefix:gemini", "provider:google", "protocol:gemini", "tool"]
},
"interactions": [
{
"transport": "http",
"request": {
"method": "POST",
"url": "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:streamGenerateContent?alt=sse",
"headers": {
"content-type": "application/json"
},
"body": "{\"contents\":[{\"role\":\"user\",\"parts\":[{\"text\":\"Call get_weather with city exactly Paris.\"}]}],\"systemInstruction\":{\"parts\":[{\"text\":\"Call tools exactly as requested.\"}]},\"tools\":[{\"functionDeclarations\":[{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"parameters\":{\"required\":[\"city\"],\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}}}}]}],\"toolConfig\":{\"functionCallingConfig\":{\"mode\":\"ANY\",\"allowedFunctionNames\":[\"get_weather\"]}},\"generationConfig\":{\"maxOutputTokens\":80,\"temperature\":0}}"
},
"response": {
"status": 200,
"headers": {
"content-type": "text/event-stream"
},
"body": "data: {\"candidates\": [{\"content\": {\"parts\": [{\"functionCall\": {\"name\": \"get_weather\",\"args\": {\"city\": \"Paris\"}},\"thoughtSignature\": \"CiQBDDnWx5RcSsS1UMbykQ5HWlrMu6wrxXGUhmZ0uRKLaMhDZaEKXwEMOdbHVoJAlfbOQyKB378pDZ/gkjWr3HP+dWw1us1kMG22g4G3oJvuTq/SrWS+7KYtSlvOxCKhW2l/2/TczpyGyGmANmsusDcxF1SKOYA5/8Hg0nI24MAlT3+91V/MCoUBAQw51seClFLy3E71v2H44F1kpmjgz8FeTRZofrjbaazfrT+w8Yxgdr3UgGagLMY4OadZemQTWckq9IAqRum78hrBg6NGtQvn15SbtfTNqI4PcxX/+qPo4/g4/ZT5kVORDhVqO8BVP/RA5GQ3ce3sRK8hSkvQlXSoXIPpHh6x7hBezIGXzw==\"}],\"role\": \"model\"},\"finishReason\": \"STOP\",\"index\": 0,\"finishMessage\": \"Model generated function call(s).\"}],\"usageMetadata\": {\"promptTokenCount\": 55,\"candidatesTokenCount\": 15,\"totalTokenCount\": 115,\"promptTokensDetails\": [{\"modality\": \"TEXT\",\"tokenCount\": 55}],\"thoughtsTokenCount\": 45},\"modelVersion\": \"gemini-2.5-flash\",\"responseId\": \"NyTxaYuTJ_OW_uMPgIPKgAg\"}\r\n\r\n"
}
}
]
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,28 @@
{
"version": 1,
"metadata": {
"name": "openai-chat/streams-text",
"recordedAt": "2026-05-06T01:33:30.542Z",
"tags": ["prefix:openai-chat", "provider:openai", "protocol:openai-chat"]
},
"interactions": [
{
"transport": "http",
"request": {
"method": "POST",
"url": "https://api.openai.com/v1/chat/completions",
"headers": {
"content-type": "application/json"
},
"body": "{\"model\":\"gpt-4o-mini\",\"messages\":[{\"role\":\"system\",\"content\":\"You are concise.\"},{\"role\":\"user\",\"content\":\"Say hello in one short sentence.\"}],\"stream\":true,\"stream_options\":{\"include_usage\":true},\"max_tokens\":20,\"temperature\":0}"
},
"response": {
"status": 200,
"headers": {
"content-type": "text/event-stream; charset=utf-8"
},
"body": "data: {\"id\":\"chatcmpl-DcLQgbFetadY4JFl0fHK0g7OYsCOL\",\"object\":\"chat.completion.chunk\",\"created\":1778031210,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_57133166c6\",\"choices\":[{\"index\":0,\"delta\":{\"role\":\"assistant\",\"content\":\"\",\"refusal\":null},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"g9SWm2h6J\"}\n\ndata: {\"id\":\"chatcmpl-DcLQgbFetadY4JFl0fHK0g7OYsCOL\",\"object\":\"chat.completion.chunk\",\"created\":1778031210,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_57133166c6\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"Hello\"},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"lVzwlh\"}\n\ndata: {\"id\":\"chatcmpl-DcLQgbFetadY4JFl0fHK0g7OYsCOL\",\"object\":\"chat.completion.chunk\",\"created\":1778031210,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_57133166c6\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"!\"},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"onzhziaLGv\"}\n\ndata: {\"id\":\"chatcmpl-DcLQgbFetadY4JFl0fHK0g7OYsCOL\",\"object\":\"chat.completion.chunk\",\"created\":1778031210,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_57133166c6\",\"choices\":[{\"index\":0,\"delta\":{},\"logprobs\":null,\"finish_reason\":\"stop\"}],\"usage\":null,\"obfuscation\":\"LzUj1\"}\n\ndata: {\"id\":\"chatcmpl-DcLQgbFetadY4JFl0fHK0g7OYsCOL\",\"object\":\"chat.completion.chunk\",\"created\":1778031210,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_57133166c6\",\"choices\":[],\"usage\":{\"prompt_tokens\":22,\"completion_tokens\":2,\"total_tokens\":24,\"prompt_tokens_details\":{\"cached_tokens\":0,\"audio_tokens\":0},\"completion_tokens_details\":{\"reasoning_tokens\":0,\"audio_tokens\":0,\"accepted_prediction_tokens\":0,\"rejected_prediction_tokens\":0}},\"obfuscation\":\"emMuPcvvOkI\"}\n\ndata: [DONE]\n\n"
}
}
]
}

View File

@@ -0,0 +1,28 @@
{
"version": 1,
"metadata": {
"name": "openai-chat/streams-tool-call",
"recordedAt": "2026-05-06T01:33:31.127Z",
"tags": ["prefix:openai-chat", "provider:openai", "protocol:openai-chat", "tool"]
},
"interactions": [
{
"transport": "http",
"request": {
"method": "POST",
"url": "https://api.openai.com/v1/chat/completions",
"headers": {
"content-type": "application/json"
},
"body": "{\"model\":\"gpt-4o-mini\",\"messages\":[{\"role\":\"system\",\"content\":\"Call tools exactly as requested.\"},{\"role\":\"user\",\"content\":\"Call get_weather with city exactly Paris.\"}],\"tools\":[{\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"description\":\"Get current weather for a city.\",\"parameters\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\"}},\"required\":[\"city\"],\"additionalProperties\":false}}}],\"tool_choice\":{\"type\":\"function\",\"function\":{\"name\":\"get_weather\"}},\"stream\":true,\"stream_options\":{\"include_usage\":true},\"max_tokens\":80,\"temperature\":0}"
},
"response": {
"status": 200,
"headers": {
"content-type": "text/event-stream; charset=utf-8"
},
"body": "data: {\"id\":\"chatcmpl-DcLQgGuIIwnMHqZMRCOwZMLir5SkK\",\"object\":\"chat.completion.chunk\",\"created\":1778031210,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_d0a1738203\",\"choices\":[{\"index\":0,\"delta\":{\"role\":\"assistant\",\"content\":null,\"tool_calls\":[{\"index\":0,\"id\":\"call_5wBV98AvGPwOyC6a2HtKh85w\",\"type\":\"function\",\"function\":{\"name\":\"get_weather\",\"arguments\":\"\"}}],\"refusal\":null},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"hrw8\"}\n\ndata: {\"id\":\"chatcmpl-DcLQgGuIIwnMHqZMRCOwZMLir5SkK\",\"object\":\"chat.completion.chunk\",\"created\":1778031210,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_d0a1738203\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"{\\\"\"}}]},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"MzOlaTohF20Sbb\"}\n\ndata: {\"id\":\"chatcmpl-DcLQgGuIIwnMHqZMRCOwZMLir5SkK\",\"object\":\"chat.completion.chunk\",\"created\":1778031210,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_d0a1738203\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"city\"}}]},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"QuYBQ5vYEUVxR\"}\n\ndata: {\"id\":\"chatcmpl-DcLQgGuIIwnMHqZMRCOwZMLir5SkK\",\"object\":\"chat.completion.chunk\",\"created\":1778031210,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_d0a1738203\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"\\\":\\\"\"}}]},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"spyXlsV2hl6l\"}\n\ndata: {\"id\":\"chatcmpl-DcLQgGuIIwnMHqZMRCOwZMLir5SkK\",\"object\":\"chat.completion.chunk\",\"created\":1778031210,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_d0a1738203\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"Paris\"}}]},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"Db1cjFKa6YAI\"}\n\ndata: {\"id\":\"chatcmpl-DcLQgGuIIwnMHqZMRCOwZMLir5SkK\",\"object\":\"chat.completion.chunk\",\"created\":1778031210,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_d0a1738203\",\"choices\":[{\"index\":0,\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"\\\"}\"}}]},\"logprobs\":null,\"finish_reason\":null}],\"usage\":null,\"obfuscation\":\"oPu35nrhXcjTL5\"}\n\ndata: {\"id\":\"chatcmpl-DcLQgGuIIwnMHqZMRCOwZMLir5SkK\",\"object\":\"chat.completion.chunk\",\"created\":1778031210,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_d0a1738203\",\"choices\":[{\"index\":0,\"delta\":{},\"logprobs\":null,\"finish_reason\":\"stop\"}],\"usage\":null,\"obfuscation\":\"63TVy\"}\n\ndata: {\"id\":\"chatcmpl-DcLQgGuIIwnMHqZMRCOwZMLir5SkK\",\"object\":\"chat.completion.chunk\",\"created\":1778031210,\"model\":\"gpt-4o-mini-2024-07-18\",\"service_tier\":\"default\",\"system_fingerprint\":\"fp_d0a1738203\",\"choices\":[],\"usage\":{\"prompt_tokens\":67,\"completion_tokens\":5,\"total_tokens\":72,\"prompt_tokens_details\":{\"cached_tokens\":0,\"audio_tokens\":0},\"completion_tokens_details\":{\"reasoning_tokens\":0,\"audio_tokens\":0,\"accepted_prediction_tokens\":0,\"rejected_prediction_tokens\":0}},\"obfuscation\":\"NxJjur40z4H\"}\n\ndata: [DONE]\n\n"
}
}
]
}

Some files were not shown because too many files have changed in this diff Show More