refactor(llm): use runtime flag for native preview

This commit is contained in:
Kit Langton
2026-05-13 10:13:02 -04:00
parent 1f450ad704
commit 8b3a19fc5a
6 changed files with 164 additions and 210 deletions

View File

@@ -23,6 +23,11 @@ export class Service extends ConfigService.Service<Service>()("@opencode/Runtime
experimentalLspTool: enabledByExperimental("OPENCODE_EXPERIMENTAL_LSP_TOOL"), experimentalLspTool: enabledByExperimental("OPENCODE_EXPERIMENTAL_LSP_TOOL"),
experimentalPlanMode: enabledByExperimental("OPENCODE_EXPERIMENTAL_PLAN_MODE"), experimentalPlanMode: enabledByExperimental("OPENCODE_EXPERIMENTAL_PLAN_MODE"),
experimentalEventSystem: enabledByExperimental("OPENCODE_EXPERIMENTAL_EVENT_SYSTEM"), experimentalEventSystem: enabledByExperimental("OPENCODE_EXPERIMENTAL_EVENT_SYSTEM"),
experimentalNativeLlm: Config.all({
experimental,
enabled: bool("OPENCODE_EXPERIMENTAL_NATIVE_LLM"),
legacy: Config.string("OPENCODE_LLM_RUNTIME").pipe(Config.withDefault("")),
}).pipe(Config.map((flags) => flags.experimental || flags.enabled || flags.legacy === "native")),
client: Config.string("OPENCODE_CLIENT").pipe(Config.withDefault("cli")), client: Config.string("OPENCODE_CLIENT").pipe(Config.withDefault("cli")),
}) {} }) {}

View File

@@ -29,6 +29,7 @@ import * as Option from "effect/Option"
import * as OtelTracer from "@effect/opentelemetry/Tracer" import * as OtelTracer from "@effect/opentelemetry/Tracer"
import { LLMAISDK } from "./llm/ai-sdk" import { LLMAISDK } from "./llm/ai-sdk"
import { LLMNativeRuntime } from "./llm/native-runtime" import { LLMNativeRuntime } from "./llm/native-runtime"
import { RuntimeFlags } from "@/effect/runtime-flags"
const log = Log.create({ service: "llm" }) const log = Log.create({ service: "llm" })
export const OUTPUT_TOKEN_MAX = ProviderTransform.OUTPUT_TOKEN_MAX export const OUTPUT_TOKEN_MAX = ProviderTransform.OUTPUT_TOKEN_MAX
@@ -37,8 +38,6 @@ export const OUTPUT_TOKEN_MAX = ProviderTransform.OUTPUT_TOKEN_MAX
const mergeOptions = (target: Record<string, any>, source: Record<string, any> | undefined): Record<string, any> => const mergeOptions = (target: Record<string, any>, source: Record<string, any> | undefined): Record<string, any> =>
mergeDeep(target, source ?? {}) as Record<string, any> mergeDeep(target, source ?? {}) as Record<string, any>
const runtime = () => (process.env.OPENCODE_LLM_RUNTIME === "native" ? "native" : "ai-sdk")
export type StreamInput = { export type StreamInput = {
user: MessageV2.User user: MessageV2.User
sessionID: string sessionID: string
@@ -67,7 +66,13 @@ export class Service extends Context.Service<Service, Interface>()("@opencode/LL
const live: Layer.Layer< const live: Layer.Layer<
Service, Service,
never, never,
Auth.Service | Config.Service | Provider.Service | Plugin.Service | Permission.Service | LLMClientService | Auth.Service
| Config.Service
| Provider.Service
| Plugin.Service
| Permission.Service
| LLMClientService
| RuntimeFlags.Service
> = Layer.effect( > = Layer.effect(
Service, Service,
Effect.gen(function* () { Effect.gen(function* () {
@@ -77,6 +82,7 @@ const live: Layer.Layer<
const plugin = yield* Plugin.Service const plugin = yield* Plugin.Service
const perm = yield* Permission.Service const perm = yield* Permission.Service
const llmClient = yield* LLMClient.Service const llmClient = yield* LLMClient.Service
const flags = yield* RuntimeFlags.Service
const run = Effect.fn("LLM.run")(function* (input: StreamRequest) { const run = Effect.fn("LLM.run")(function* (input: StreamRequest) {
const l = log const l = log
@@ -357,7 +363,7 @@ const live: Layer.Layer<
...headers, ...headers,
} }
if (runtime() === "native") { if (flags.experimentalNativeLlm) {
const native = LLMNativeRuntime.stream({ const native = LLMNativeRuntime.stream({
model: input.model, model: input.model,
provider: item, provider: item,
@@ -491,6 +497,7 @@ export const defaultLayer = Layer.suspend(() =>
Layer.provide(Provider.defaultLayer), Layer.provide(Provider.defaultLayer),
Layer.provide(Plugin.defaultLayer), Layer.provide(Plugin.defaultLayer),
Layer.provide(LLMClient.layer.pipe(Layer.provide(RequestExecutor.defaultLayer))), Layer.provide(LLMClient.layer.pipe(Layer.provide(RequestExecutor.defaultLayer))),
Layer.provide(RuntimeFlags.defaultLayer),
), ),
) )

View File

@@ -11,6 +11,6 @@ This folder contains adapters behind that service boundary:
Safety boundary: Safety boundary:
- AI SDK remains the default. - AI SDK remains the default.
- `OPENCODE_LLM_RUNTIME=native` is an opt-in hint, not a global replacement. - `OPENCODE_EXPERIMENTAL_NATIVE_LLM=true` is an opt-in hint, not a global replacement. The legacy `OPENCODE_LLM_RUNTIME=native` env var is still accepted by `RuntimeFlags` for local testing.
- Native execution currently runs only for OpenAI-compatible Responses models exposed through `@ai-sdk/openai`: direct `openai` API-key auth and console-managed `opencode`/Zen API-key config. - Native execution currently runs only for OpenAI-compatible Responses models exposed through `@ai-sdk/openai`: direct `openai` API-key auth and console-managed `opencode`/Zen API-key config.
- Unsupported providers, OpenAI OAuth, and missing API-key cases fall back to AI SDK. - Unsupported providers, OpenAI OAuth, and missing API-key cases fall back to AI SDK.

View File

@@ -1629,20 +1629,12 @@ describe("SessionNs.getUsage", () => {
}) })
const result = SessionNs.getUsage({ const result = SessionNs.getUsage({
model, model,
usage: { usage: usage({
inputTokens: 650_000, inputTokens: 650_000,
outputTokens: 100_000, outputTokens: 100_000,
totalTokens: 750_000, totalTokens: 750_000,
inputTokenDetails: { cacheReadInputTokens: 100_000,
noCacheTokens: undefined, }),
cacheReadTokens: 100_000,
cacheWriteTokens: undefined,
},
outputTokenDetails: {
textTokens: undefined,
reasoningTokens: undefined,
},
},
}) })
expect(result.tokens.input).toBe(550_000) expect(result.tokens.input).toBe(550_000)
@@ -1674,20 +1666,7 @@ describe("SessionNs.getUsage", () => {
}) })
const result = SessionNs.getUsage({ const result = SessionNs.getUsage({
model, model,
usage: { usage: usage({ inputTokens: 300_000, outputTokens: 100_000, totalTokens: 400_000 }),
inputTokens: 300_000,
outputTokens: 100_000,
totalTokens: 400_000,
inputTokenDetails: {
noCacheTokens: undefined,
cacheReadTokens: undefined,
cacheWriteTokens: undefined,
},
outputTokenDetails: {
textTokens: undefined,
reasoningTokens: undefined,
},
},
}) })
expect(result.cost).toBe(0.9 + 0.4) expect(result.cost).toBe(0.9 + 0.4)

View File

@@ -13,6 +13,7 @@ import { Provider } from "@/provider/provider"
import { ModelID, ProviderID } from "@/provider/schema" import { ModelID, ProviderID } from "@/provider/schema"
import { Filesystem } from "@/util/filesystem" import { Filesystem } from "@/util/filesystem"
import { LLMClient, RequestExecutor } from "@opencode-ai/llm/route" import { LLMClient, RequestExecutor } from "@opencode-ai/llm/route"
import { RuntimeFlags } from "@/effect/runtime-flags"
import type { Agent } from "../../src/agent/agent" import type { Agent } from "../../src/agent/agent"
import { LLM } from "../../src/session/llm" import { LLM } from "../../src/session/llm"
import { MessageV2 } from "../../src/session/message-v2" import { MessageV2 } from "../../src/session/message-v2"
@@ -127,6 +128,7 @@ function recordedNativeLLMLayer(cassette: string, metadata: Record<string, unkno
Layer.provide(Plugin.defaultLayer), Layer.provide(Plugin.defaultLayer),
Layer.provide(client), Layer.provide(client),
Layer.provide(cassetteService), Layer.provide(cassetteService),
Layer.provide(RuntimeFlags.layer({ experimentalNativeLlm: true })),
) )
return Layer.mergeAll(providerLayer, llmLayer) return Layer.mergeAll(providerLayer, llmLayer)
@@ -175,22 +177,6 @@ const collect = (input: LLM.StreamInput) =>
return Array.from(yield* llm.stream(input).pipe(Stream.runCollect)) return Array.from(yield* llm.stream(input).pipe(Stream.runCollect))
}) })
const nativeRuntime = <A, E, R>(effect: Effect.Effect<A, E, R>) => {
return Effect.acquireUseRelease(
Effect.sync(() => {
const previous = process.env.OPENCODE_LLM_RUNTIME
process.env.OPENCODE_LLM_RUNTIME = "native"
return previous
}),
() => effect,
(previous) =>
Effect.sync(() => {
if (previous === undefined) delete process.env.OPENCODE_LLM_RUNTIME
else process.env.OPENCODE_LLM_RUNTIME = previous
}),
)
}
describe("session.llm native recorded", () => { describe("session.llm native recorded", () => {
recordedOpenAIInstance("uses real RequestExecutor with HTTP recorder for native OpenAI tools", () => recordedOpenAIInstance("uses real RequestExecutor with HTTP recorder for native OpenAI tools", () =>
Effect.gen(function* () { Effect.gen(function* () {
@@ -210,34 +196,32 @@ describe("session.llm native recorded", () => {
const resolved = yield* getModel(ProviderID.openai, ModelID.make(model.id)) const resolved = yield* getModel(ProviderID.openai, ModelID.make(model.id))
let executed: unknown let executed: unknown
const events = yield* nativeRuntime( const events = yield* collect({
collect({ user: {
user: { id: MessageID.make("msg_user-recorded-native-tool"),
id: MessageID.make("msg_user-recorded-native-tool"),
sessionID,
role: "user",
time: { created: 0 },
agent: agent.name,
model: { providerID: ProviderID.make("openai"), modelID: ModelID.make(model.id) },
} satisfies MessageV2.User,
sessionID, sessionID,
model: resolved, role: "user",
agent, time: { created: 0 },
system: ["You must call the lookup tool exactly once with query weather. Do not answer in text."], agent: agent.name,
messages: [{ role: "user", content: "Use lookup." }], model: { providerID: ProviderID.make("openai"), modelID: ModelID.make(model.id) },
toolChoice: "required", } satisfies MessageV2.User,
tools: { sessionID,
lookup: tool({ model: resolved,
description: "Lookup data.", agent,
inputSchema: z.object({ query: z.string() }), system: ["You must call the lookup tool exactly once with query weather. Do not answer in text."],
execute: async (args, options) => { messages: [{ role: "user", content: "Use lookup." }],
executed = { args, toolCallId: options.toolCallId } toolChoice: "required",
return { output: "looked up" } tools: {
}, lookup: tool({
}), description: "Lookup data.",
}, inputSchema: z.object({ query: z.string() }),
}), execute: async (args, options) => {
) executed = { args, toolCallId: options.toolCallId }
return { output: "looked up" }
},
}),
},
})
expect(events.filter((event) => event.type === "step-finish")).toHaveLength(1) expect(events.filter((event) => event.type === "step-finish")).toHaveLength(1)
expect(events.filter((event) => event.type === "finish")).toHaveLength(1) expect(events.filter((event) => event.type === "finish")).toHaveLength(1)
@@ -263,34 +247,32 @@ describe("session.llm native recorded", () => {
const resolved = yield* getModel(ProviderID.opencode, ModelID.make(model.id)) const resolved = yield* getModel(ProviderID.opencode, ModelID.make(model.id))
let executed: unknown let executed: unknown
const events = yield* nativeRuntime( const events = yield* collect({
collect({ user: {
user: { id: MessageID.make("msg_user-recorded-native-zen-tool"),
id: MessageID.make("msg_user-recorded-native-zen-tool"),
sessionID,
role: "user",
time: { created: 0 },
agent: agent.name,
model: { providerID: ProviderID.opencode, modelID: ModelID.make(model.id) },
} satisfies MessageV2.User,
sessionID, sessionID,
model: resolved, role: "user",
agent, time: { created: 0 },
system: ["You must call the lookup tool exactly once with query weather. Do not answer in text."], agent: agent.name,
messages: [{ role: "user", content: "Use lookup." }], model: { providerID: ProviderID.opencode, modelID: ModelID.make(model.id) },
toolChoice: "required", } satisfies MessageV2.User,
tools: { sessionID,
lookup: tool({ model: resolved,
description: "Lookup data.", agent,
inputSchema: z.object({ query: z.string() }), system: ["You must call the lookup tool exactly once with query weather. Do not answer in text."],
execute: async (args, options) => { messages: [{ role: "user", content: "Use lookup." }],
executed = { args, toolCallId: options.toolCallId } toolChoice: "required",
return { output: "looked up" } tools: {
}, lookup: tool({
}), description: "Lookup data.",
}, inputSchema: z.object({ query: z.string() }),
}), execute: async (args, options) => {
) executed = { args, toolCallId: options.toolCallId }
return { output: "looked up" }
},
}),
},
})
expect(events.filter((event) => event.type === "step-finish")).toHaveLength(1) expect(events.filter((event) => event.type === "step-finish")).toHaveLength(1)
expect(events.filter((event) => event.type === "finish")).toHaveLength(1) expect(events.filter((event) => event.type === "finish")).toHaveLength(1)

View File

@@ -21,6 +21,7 @@ import type { Agent } from "../../src/agent/agent"
import { MessageV2 } from "../../src/session/message-v2" import { MessageV2 } from "../../src/session/message-v2"
import { SessionID, MessageID } from "../../src/session/schema" import { SessionID, MessageID } from "../../src/session/schema"
import { AppRuntime } from "../../src/effect/app-runtime" import { AppRuntime } from "../../src/effect/app-runtime"
import { RuntimeFlags } from "@/effect/runtime-flags"
const openAIConfig = (model: ModelsDev.Provider["models"][string], baseURL: string): Partial<Config.Info> => { const openAIConfig = (model: ModelsDev.Provider["models"][string], baseURL: string): Partial<Config.Info> => {
const { experimental: _experimental, ...configModel } = model const { experimental: _experimental, ...configModel } = model
@@ -66,13 +67,14 @@ async function drainWith(layer: Layer.Layer<LLM.Service>, input: LLM.StreamInput
) )
} }
function llmLayerWithExecutor(executor: Layer.Layer<RequestExecutor.Service>) { function llmLayerWithExecutor(executor: Layer.Layer<RequestExecutor.Service>, flags: Partial<RuntimeFlags.Info> = {}) {
return LLM.layer.pipe( return LLM.layer.pipe(
Layer.provide(Auth.defaultLayer), Layer.provide(Auth.defaultLayer),
Layer.provide(Config.defaultLayer), Layer.provide(Config.defaultLayer),
Layer.provide(Provider.defaultLayer), Layer.provide(Provider.defaultLayer),
Layer.provide(Plugin.defaultLayer), Layer.provide(Plugin.defaultLayer),
Layer.provide(LLMClient.layer.pipe(Layer.provide(executor))), Layer.provide(LLMClient.layer.pipe(Layer.provide(executor))),
Layer.provide(RuntimeFlags.layer(flags)),
) )
} }
@@ -769,39 +771,32 @@ describe("session.llm.stream", () => {
await WithInstance.provide({ await WithInstance.provide({
directory: tmp.path, directory: tmp.path,
fn: async () => { fn: async () => {
const previous = process.env.OPENCODE_LLM_RUNTIME const resolved = await getModel(ProviderID.openai, ModelID.make(model.id))
process.env.OPENCODE_LLM_RUNTIME = "native" const sessionID = SessionID.make("session-test-native")
try { const agent = {
const resolved = await getModel(ProviderID.openai, ModelID.make(model.id)) name: "test",
const sessionID = SessionID.make("session-test-native") mode: "primary",
const agent = { options: {},
name: "test", permission: [{ permission: "*", pattern: "*", action: "allow" }],
mode: "primary", temperature: 0.2,
options: {}, } satisfies Agent.Info
permission: [{ permission: "*", pattern: "*", action: "allow" }],
temperature: 0.2,
} satisfies Agent.Info
await drain({ await drainWith(llmLayerWithExecutor(RequestExecutor.defaultLayer, { experimentalNativeLlm: true }), {
user: { user: {
id: MessageID.make("msg_user-native"), id: MessageID.make("msg_user-native"),
sessionID,
role: "user",
time: { created: Date.now() },
agent: agent.name,
model: { providerID: ProviderID.make("openai"), modelID: resolved.id, variant: "high" },
} satisfies MessageV2.User,
sessionID, sessionID,
model: resolved, role: "user",
agent, time: { created: Date.now() },
system: ["You are a helpful assistant."], agent: agent.name,
messages: [{ role: "user", content: "Hello" }], model: { providerID: ProviderID.make("openai"), modelID: resolved.id, variant: "high" },
tools: {}, } satisfies MessageV2.User,
}) sessionID,
} finally { model: resolved,
if (previous === undefined) delete process.env.OPENCODE_LLM_RUNTIME agent,
else process.env.OPENCODE_LLM_RUNTIME = previous system: ["You are a helpful assistant."],
} messages: [{ role: "user", content: "Hello" }],
tools: {},
})
const capture = await request const capture = await request
expect(capture.url.pathname.endsWith("/responses")).toBe(true) expect(capture.url.pathname.endsWith("/responses")).toBe(true)
@@ -862,47 +857,40 @@ describe("session.llm.stream", () => {
await WithInstance.provide({ await WithInstance.provide({
directory: tmp.path, directory: tmp.path,
fn: async () => { fn: async () => {
const previous = process.env.OPENCODE_LLM_RUNTIME const resolved = await getModel(ProviderID.openai, ModelID.make(model.id))
process.env.OPENCODE_LLM_RUNTIME = "native" const sessionID = SessionID.make("session-test-native-injected-tool")
try { const agent = {
const resolved = await getModel(ProviderID.openai, ModelID.make(model.id)) name: "test",
const sessionID = SessionID.make("session-test-native-injected-tool") mode: "primary",
const agent = { options: {},
name: "test", permission: [{ permission: "*", pattern: "*", action: "allow" }],
mode: "primary", } satisfies Agent.Info
options: {},
permission: [{ permission: "*", pattern: "*", action: "allow" }],
} satisfies Agent.Info
await drainWith(llmLayerWithExecutor(executor), { await drainWith(llmLayerWithExecutor(executor, { experimentalNativeLlm: true }), {
user: { user: {
id: MessageID.make("msg_user-native-injected-tool"), id: MessageID.make("msg_user-native-injected-tool"),
sessionID,
role: "user",
time: { created: Date.now() },
agent: agent.name,
model: { providerID: ProviderID.make("openai"), modelID: resolved.id },
} satisfies MessageV2.User,
sessionID, sessionID,
model: resolved, role: "user",
agent, time: { created: Date.now() },
system: [], agent: agent.name,
messages: [{ role: "user", content: "Use lookup" }], model: { providerID: ProviderID.make("openai"), modelID: resolved.id },
tools: { } satisfies MessageV2.User,
lookup: tool({ sessionID,
description: "Lookup data", model: resolved,
inputSchema: z.object({ query: z.string() }), agent,
execute: async (args, options) => { system: [],
executed = { args, toolCallId: options.toolCallId } messages: [{ role: "user", content: "Use lookup" }],
return { output: "looked up" } tools: {
}, lookup: tool({
}), description: "Lookup data",
}, inputSchema: z.object({ query: z.string() }),
}) execute: async (args, options) => {
} finally { executed = { args, toolCallId: options.toolCallId }
if (previous === undefined) delete process.env.OPENCODE_LLM_RUNTIME return { output: "looked up" }
else process.env.OPENCODE_LLM_RUNTIME = previous },
} }),
},
})
expect(captured?.model).toBe(model.id) expect(captured?.model).toBe(model.id)
expect(captured?.tools).toEqual([ expect(captured?.tools).toEqual([
@@ -990,47 +978,40 @@ describe("session.llm.stream", () => {
await WithInstance.provide({ await WithInstance.provide({
directory: tmp.path, directory: tmp.path,
fn: async () => { fn: async () => {
const previous = process.env.OPENCODE_LLM_RUNTIME const resolved = await getModel(ProviderID.openai, ModelID.make(model.id))
process.env.OPENCODE_LLM_RUNTIME = "native" const sessionID = SessionID.make("session-test-native-tool")
try { const agent = {
const resolved = await getModel(ProviderID.openai, ModelID.make(model.id)) name: "test",
const sessionID = SessionID.make("session-test-native-tool") mode: "primary",
const agent = { options: {},
name: "test", permission: [{ permission: "*", pattern: "*", action: "allow" }],
mode: "primary", } satisfies Agent.Info
options: {},
permission: [{ permission: "*", pattern: "*", action: "allow" }],
} satisfies Agent.Info
await drain({ await drainWith(llmLayerWithExecutor(RequestExecutor.defaultLayer, { experimentalNativeLlm: true }), {
user: { user: {
id: MessageID.make("msg_user-native-tool"), id: MessageID.make("msg_user-native-tool"),
sessionID,
role: "user",
time: { created: Date.now() },
agent: agent.name,
model: { providerID: ProviderID.make("openai"), modelID: resolved.id },
} satisfies MessageV2.User,
sessionID, sessionID,
model: resolved, role: "user",
agent, time: { created: Date.now() },
system: [], agent: agent.name,
messages: [{ role: "user", content: "Use lookup" }], model: { providerID: ProviderID.make("openai"), modelID: resolved.id },
tools: { } satisfies MessageV2.User,
lookup: tool({ sessionID,
description: "Lookup data", model: resolved,
inputSchema: z.object({ query: z.string() }), agent,
execute: async (args, options) => { system: [],
executed = { args, toolCallId: options.toolCallId } messages: [{ role: "user", content: "Use lookup" }],
return { output: "looked up" } tools: {
}, lookup: tool({
}), description: "Lookup data",
}, inputSchema: z.object({ query: z.string() }),
}) execute: async (args, options) => {
} finally { executed = { args, toolCallId: options.toolCallId }
if (previous === undefined) delete process.env.OPENCODE_LLM_RUNTIME return { output: "looked up" }
else process.env.OPENCODE_LLM_RUNTIME = previous },
} }),
},
})
const capture = await request const capture = await request
expect(capture.body.tools).toEqual([ expect(capture.body.tools).toEqual([