refactor(llm): use runtime flag for native preview

This commit is contained in:
Kit Langton
2026-05-13 10:13:02 -04:00
parent 1f450ad704
commit 8b3a19fc5a
6 changed files with 164 additions and 210 deletions

View File

@@ -23,6 +23,11 @@ export class Service extends ConfigService.Service<Service>()("@opencode/Runtime
experimentalLspTool: enabledByExperimental("OPENCODE_EXPERIMENTAL_LSP_TOOL"),
experimentalPlanMode: enabledByExperimental("OPENCODE_EXPERIMENTAL_PLAN_MODE"),
experimentalEventSystem: enabledByExperimental("OPENCODE_EXPERIMENTAL_EVENT_SYSTEM"),
experimentalNativeLlm: Config.all({
experimental,
enabled: bool("OPENCODE_EXPERIMENTAL_NATIVE_LLM"),
legacy: Config.string("OPENCODE_LLM_RUNTIME").pipe(Config.withDefault("")),
}).pipe(Config.map((flags) => flags.experimental || flags.enabled || flags.legacy === "native")),
client: Config.string("OPENCODE_CLIENT").pipe(Config.withDefault("cli")),
}) {}

View File

@@ -29,6 +29,7 @@ import * as Option from "effect/Option"
import * as OtelTracer from "@effect/opentelemetry/Tracer"
import { LLMAISDK } from "./llm/ai-sdk"
import { LLMNativeRuntime } from "./llm/native-runtime"
import { RuntimeFlags } from "@/effect/runtime-flags"
const log = Log.create({ service: "llm" })
export const OUTPUT_TOKEN_MAX = ProviderTransform.OUTPUT_TOKEN_MAX
@@ -37,8 +38,6 @@ export const OUTPUT_TOKEN_MAX = ProviderTransform.OUTPUT_TOKEN_MAX
const mergeOptions = (target: Record<string, any>, source: Record<string, any> | undefined): Record<string, any> =>
mergeDeep(target, source ?? {}) as Record<string, any>
const runtime = () => (process.env.OPENCODE_LLM_RUNTIME === "native" ? "native" : "ai-sdk")
export type StreamInput = {
user: MessageV2.User
sessionID: string
@@ -67,7 +66,13 @@ export class Service extends Context.Service<Service, Interface>()("@opencode/LL
const live: Layer.Layer<
Service,
never,
Auth.Service | Config.Service | Provider.Service | Plugin.Service | Permission.Service | LLMClientService
| Auth.Service
| Config.Service
| Provider.Service
| Plugin.Service
| Permission.Service
| LLMClientService
| RuntimeFlags.Service
> = Layer.effect(
Service,
Effect.gen(function* () {
@@ -77,6 +82,7 @@ const live: Layer.Layer<
const plugin = yield* Plugin.Service
const perm = yield* Permission.Service
const llmClient = yield* LLMClient.Service
const flags = yield* RuntimeFlags.Service
const run = Effect.fn("LLM.run")(function* (input: StreamRequest) {
const l = log
@@ -357,7 +363,7 @@ const live: Layer.Layer<
...headers,
}
if (runtime() === "native") {
if (flags.experimentalNativeLlm) {
const native = LLMNativeRuntime.stream({
model: input.model,
provider: item,
@@ -491,6 +497,7 @@ export const defaultLayer = Layer.suspend(() =>
Layer.provide(Provider.defaultLayer),
Layer.provide(Plugin.defaultLayer),
Layer.provide(LLMClient.layer.pipe(Layer.provide(RequestExecutor.defaultLayer))),
Layer.provide(RuntimeFlags.defaultLayer),
),
)

View File

@@ -11,6 +11,6 @@ This folder contains adapters behind that service boundary:
Safety boundary:
- AI SDK remains the default.
- `OPENCODE_LLM_RUNTIME=native` is an opt-in hint, not a global replacement.
- `OPENCODE_EXPERIMENTAL_NATIVE_LLM=true` is an opt-in hint, not a global replacement. The legacy `OPENCODE_LLM_RUNTIME=native` env var is still accepted by `RuntimeFlags` for local testing.
- Native execution currently runs only for OpenAI-compatible Responses models exposed through `@ai-sdk/openai`: direct `openai` API-key auth and console-managed `opencode`/Zen API-key config.
- Unsupported providers, OpenAI OAuth, and missing API-key cases fall back to AI SDK.

View File

@@ -1629,20 +1629,12 @@ describe("SessionNs.getUsage", () => {
})
const result = SessionNs.getUsage({
model,
usage: {
usage: usage({
inputTokens: 650_000,
outputTokens: 100_000,
totalTokens: 750_000,
inputTokenDetails: {
noCacheTokens: undefined,
cacheReadTokens: 100_000,
cacheWriteTokens: undefined,
},
outputTokenDetails: {
textTokens: undefined,
reasoningTokens: undefined,
},
},
cacheReadInputTokens: 100_000,
}),
})
expect(result.tokens.input).toBe(550_000)
@@ -1674,20 +1666,7 @@ describe("SessionNs.getUsage", () => {
})
const result = SessionNs.getUsage({
model,
usage: {
inputTokens: 300_000,
outputTokens: 100_000,
totalTokens: 400_000,
inputTokenDetails: {
noCacheTokens: undefined,
cacheReadTokens: undefined,
cacheWriteTokens: undefined,
},
outputTokenDetails: {
textTokens: undefined,
reasoningTokens: undefined,
},
},
usage: usage({ inputTokens: 300_000, outputTokens: 100_000, totalTokens: 400_000 }),
})
expect(result.cost).toBe(0.9 + 0.4)

View File

@@ -13,6 +13,7 @@ import { Provider } from "@/provider/provider"
import { ModelID, ProviderID } from "@/provider/schema"
import { Filesystem } from "@/util/filesystem"
import { LLMClient, RequestExecutor } from "@opencode-ai/llm/route"
import { RuntimeFlags } from "@/effect/runtime-flags"
import type { Agent } from "../../src/agent/agent"
import { LLM } from "../../src/session/llm"
import { MessageV2 } from "../../src/session/message-v2"
@@ -127,6 +128,7 @@ function recordedNativeLLMLayer(cassette: string, metadata: Record<string, unkno
Layer.provide(Plugin.defaultLayer),
Layer.provide(client),
Layer.provide(cassetteService),
Layer.provide(RuntimeFlags.layer({ experimentalNativeLlm: true })),
)
return Layer.mergeAll(providerLayer, llmLayer)
@@ -175,22 +177,6 @@ const collect = (input: LLM.StreamInput) =>
return Array.from(yield* llm.stream(input).pipe(Stream.runCollect))
})
const nativeRuntime = <A, E, R>(effect: Effect.Effect<A, E, R>) => {
return Effect.acquireUseRelease(
Effect.sync(() => {
const previous = process.env.OPENCODE_LLM_RUNTIME
process.env.OPENCODE_LLM_RUNTIME = "native"
return previous
}),
() => effect,
(previous) =>
Effect.sync(() => {
if (previous === undefined) delete process.env.OPENCODE_LLM_RUNTIME
else process.env.OPENCODE_LLM_RUNTIME = previous
}),
)
}
describe("session.llm native recorded", () => {
recordedOpenAIInstance("uses real RequestExecutor with HTTP recorder for native OpenAI tools", () =>
Effect.gen(function* () {
@@ -210,8 +196,7 @@ describe("session.llm native recorded", () => {
const resolved = yield* getModel(ProviderID.openai, ModelID.make(model.id))
let executed: unknown
const events = yield* nativeRuntime(
collect({
const events = yield* collect({
user: {
id: MessageID.make("msg_user-recorded-native-tool"),
sessionID,
@@ -236,8 +221,7 @@ describe("session.llm native recorded", () => {
},
}),
},
}),
)
})
expect(events.filter((event) => event.type === "step-finish")).toHaveLength(1)
expect(events.filter((event) => event.type === "finish")).toHaveLength(1)
@@ -263,8 +247,7 @@ describe("session.llm native recorded", () => {
const resolved = yield* getModel(ProviderID.opencode, ModelID.make(model.id))
let executed: unknown
const events = yield* nativeRuntime(
collect({
const events = yield* collect({
user: {
id: MessageID.make("msg_user-recorded-native-zen-tool"),
sessionID,
@@ -289,8 +272,7 @@ describe("session.llm native recorded", () => {
},
}),
},
}),
)
})
expect(events.filter((event) => event.type === "step-finish")).toHaveLength(1)
expect(events.filter((event) => event.type === "finish")).toHaveLength(1)

View File

@@ -21,6 +21,7 @@ import type { Agent } from "../../src/agent/agent"
import { MessageV2 } from "../../src/session/message-v2"
import { SessionID, MessageID } from "../../src/session/schema"
import { AppRuntime } from "../../src/effect/app-runtime"
import { RuntimeFlags } from "@/effect/runtime-flags"
const openAIConfig = (model: ModelsDev.Provider["models"][string], baseURL: string): Partial<Config.Info> => {
const { experimental: _experimental, ...configModel } = model
@@ -66,13 +67,14 @@ async function drainWith(layer: Layer.Layer<LLM.Service>, input: LLM.StreamInput
)
}
function llmLayerWithExecutor(executor: Layer.Layer<RequestExecutor.Service>) {
function llmLayerWithExecutor(executor: Layer.Layer<RequestExecutor.Service>, flags: Partial<RuntimeFlags.Info> = {}) {
return LLM.layer.pipe(
Layer.provide(Auth.defaultLayer),
Layer.provide(Config.defaultLayer),
Layer.provide(Provider.defaultLayer),
Layer.provide(Plugin.defaultLayer),
Layer.provide(LLMClient.layer.pipe(Layer.provide(executor))),
Layer.provide(RuntimeFlags.layer(flags)),
)
}
@@ -769,9 +771,6 @@ describe("session.llm.stream", () => {
await WithInstance.provide({
directory: tmp.path,
fn: async () => {
const previous = process.env.OPENCODE_LLM_RUNTIME
process.env.OPENCODE_LLM_RUNTIME = "native"
try {
const resolved = await getModel(ProviderID.openai, ModelID.make(model.id))
const sessionID = SessionID.make("session-test-native")
const agent = {
@@ -782,7 +781,7 @@ describe("session.llm.stream", () => {
temperature: 0.2,
} satisfies Agent.Info
await drain({
await drainWith(llmLayerWithExecutor(RequestExecutor.defaultLayer, { experimentalNativeLlm: true }), {
user: {
id: MessageID.make("msg_user-native"),
sessionID,
@@ -798,10 +797,6 @@ describe("session.llm.stream", () => {
messages: [{ role: "user", content: "Hello" }],
tools: {},
})
} finally {
if (previous === undefined) delete process.env.OPENCODE_LLM_RUNTIME
else process.env.OPENCODE_LLM_RUNTIME = previous
}
const capture = await request
expect(capture.url.pathname.endsWith("/responses")).toBe(true)
@@ -862,9 +857,6 @@ describe("session.llm.stream", () => {
await WithInstance.provide({
directory: tmp.path,
fn: async () => {
const previous = process.env.OPENCODE_LLM_RUNTIME
process.env.OPENCODE_LLM_RUNTIME = "native"
try {
const resolved = await getModel(ProviderID.openai, ModelID.make(model.id))
const sessionID = SessionID.make("session-test-native-injected-tool")
const agent = {
@@ -874,7 +866,7 @@ describe("session.llm.stream", () => {
permission: [{ permission: "*", pattern: "*", action: "allow" }],
} satisfies Agent.Info
await drainWith(llmLayerWithExecutor(executor), {
await drainWith(llmLayerWithExecutor(executor, { experimentalNativeLlm: true }), {
user: {
id: MessageID.make("msg_user-native-injected-tool"),
sessionID,
@@ -899,10 +891,6 @@ describe("session.llm.stream", () => {
}),
},
})
} finally {
if (previous === undefined) delete process.env.OPENCODE_LLM_RUNTIME
else process.env.OPENCODE_LLM_RUNTIME = previous
}
expect(captured?.model).toBe(model.id)
expect(captured?.tools).toEqual([
@@ -990,9 +978,6 @@ describe("session.llm.stream", () => {
await WithInstance.provide({
directory: tmp.path,
fn: async () => {
const previous = process.env.OPENCODE_LLM_RUNTIME
process.env.OPENCODE_LLM_RUNTIME = "native"
try {
const resolved = await getModel(ProviderID.openai, ModelID.make(model.id))
const sessionID = SessionID.make("session-test-native-tool")
const agent = {
@@ -1002,7 +987,7 @@ describe("session.llm.stream", () => {
permission: [{ permission: "*", pattern: "*", action: "allow" }],
} satisfies Agent.Info
await drain({
await drainWith(llmLayerWithExecutor(RequestExecutor.defaultLayer, { experimentalNativeLlm: true }), {
user: {
id: MessageID.make("msg_user-native-tool"),
sessionID,
@@ -1027,10 +1012,6 @@ describe("session.llm.stream", () => {
}),
},
})
} finally {
if (previous === undefined) delete process.env.OPENCODE_LLM_RUNTIME
else process.env.OPENCODE_LLM_RUNTIME = previous
}
const capture = await request
expect(capture.body.tools).toEqual([