refactor(llm): use runtime flag for native preview

2026-05-13 15:44:56 +00:00 · 2026-05-13 10:13:02 -04:00
parent 1f450ad704
commit 8b3a19fc5a
6 changed files with 164 additions and 210 deletions
--- a/packages/opencode/src/effect/runtime-flags.ts
+++ b/packages/opencode/src/effect/runtime-flags.ts
@@ -23,6 +23,11 @@ export class Service extends ConfigService.Service<Service>()("@opencode/Runtime
  experimentalLspTool: enabledByExperimental("OPENCODE_EXPERIMENTAL_LSP_TOOL"),
  experimentalPlanMode: enabledByExperimental("OPENCODE_EXPERIMENTAL_PLAN_MODE"),
  experimentalEventSystem: enabledByExperimental("OPENCODE_EXPERIMENTAL_EVENT_SYSTEM"),
+  experimentalNativeLlm: Config.all({
+    experimental,
+    enabled: bool("OPENCODE_EXPERIMENTAL_NATIVE_LLM"),
+    legacy: Config.string("OPENCODE_LLM_RUNTIME").pipe(Config.withDefault("")),
+  }).pipe(Config.map((flags) => flags.experimental || flags.enabled || flags.legacy === "native")),
  client: Config.string("OPENCODE_CLIENT").pipe(Config.withDefault("cli")),
 }) {}

--- a/packages/opencode/src/session/llm.ts
+++ b/packages/opencode/src/session/llm.ts
@@ -29,6 +29,7 @@ import * as Option from "effect/Option"
 import * as OtelTracer from "@effect/opentelemetry/Tracer"
 import { LLMAISDK } from "./llm/ai-sdk"
 import { LLMNativeRuntime } from "./llm/native-runtime"
+import { RuntimeFlags } from "@/effect/runtime-flags"

 const log = Log.create({ service: "llm" })
 export const OUTPUT_TOKEN_MAX = ProviderTransform.OUTPUT_TOKEN_MAX
@@ -37,8 +38,6 @@ export const OUTPUT_TOKEN_MAX = ProviderTransform.OUTPUT_TOKEN_MAX
 const mergeOptions = (target: Record<string, any>, source: Record<string, any> | undefined): Record<string, any> =>
  mergeDeep(target, source ?? {}) as Record<string, any>

-const runtime = () => (process.env.OPENCODE_LLM_RUNTIME === "native" ? "native" : "ai-sdk")
-
 export type StreamInput = {
  user: MessageV2.User
  sessionID: string
@@ -67,7 +66,13 @@ export class Service extends Context.Service<Service, Interface>()("@opencode/LL
 const live: Layer.Layer<
  Service,
  never,
-  Auth.Service | Config.Service | Provider.Service | Plugin.Service | Permission.Service | LLMClientService
+  | Auth.Service
+  | Config.Service
+  | Provider.Service
+  | Plugin.Service
+  | Permission.Service
+  | LLMClientService
+  | RuntimeFlags.Service
 > = Layer.effect(
  Service,
  Effect.gen(function* () {
@@ -77,6 +82,7 @@ const live: Layer.Layer<
    const plugin = yield* Plugin.Service
    const perm = yield* Permission.Service
    const llmClient = yield* LLMClient.Service
+    const flags = yield* RuntimeFlags.Service

    const run = Effect.fn("LLM.run")(function* (input: StreamRequest) {
      const l = log
@@ -357,7 +363,7 @@ const live: Layer.Layer<
        ...headers,
      }

-      if (runtime() === "native") {
+      if (flags.experimentalNativeLlm) {
        const native = LLMNativeRuntime.stream({
          model: input.model,
          provider: item,
@@ -491,6 +497,7 @@ export const defaultLayer = Layer.suspend(() =>
    Layer.provide(Provider.defaultLayer),
    Layer.provide(Plugin.defaultLayer),
    Layer.provide(LLMClient.layer.pipe(Layer.provide(RequestExecutor.defaultLayer))),
+    Layer.provide(RuntimeFlags.defaultLayer),
  ),
 )

--- a/packages/opencode/src/session/llm/README.md
+++ b/packages/opencode/src/session/llm/README.md
@@ -11,6 +11,6 @@ This folder contains adapters behind that service boundary:
 Safety boundary:

 - AI SDK remains the default.
- `OPENCODE_LLM_RUNTIME=native` is an opt-in hint, not a global replacement.
+- `OPENCODE_EXPERIMENTAL_NATIVE_LLM=true` is an opt-in hint, not a global replacement. The legacy `OPENCODE_LLM_RUNTIME=native` env var is still accepted by `RuntimeFlags` for local testing.
 - Native execution currently runs only for OpenAI-compatible Responses models exposed through `@ai-sdk/openai`: direct `openai` API-key auth and console-managed `opencode`/Zen API-key config.
 - Unsupported providers, OpenAI OAuth, and missing API-key cases fall back to AI SDK.
--- a/packages/opencode/test/session/compaction.test.ts
+++ b/packages/opencode/test/session/compaction.test.ts
@@ -1629,20 +1629,12 @@ describe("SessionNs.getUsage", () => {
    })
    const result = SessionNs.getUsage({
      model,
-      usage: {
+      usage: usage({
        inputTokens: 650_000,
        outputTokens: 100_000,
        totalTokens: 750_000,
-        inputTokenDetails: {
-          noCacheTokens: undefined,
-          cacheReadTokens: 100_000,
-          cacheWriteTokens: undefined,
-        },
-        outputTokenDetails: {
-          textTokens: undefined,
-          reasoningTokens: undefined,
-        },
-      },
+        cacheReadInputTokens: 100_000,
+      }),
    })

    expect(result.tokens.input).toBe(550_000)
@@ -1674,20 +1666,7 @@ describe("SessionNs.getUsage", () => {
    })
    const result = SessionNs.getUsage({
      model,
-      usage: {
-        inputTokens: 300_000,
-        outputTokens: 100_000,
-        totalTokens: 400_000,
-        inputTokenDetails: {
-          noCacheTokens: undefined,
-          cacheReadTokens: undefined,
-          cacheWriteTokens: undefined,
-        },
-        outputTokenDetails: {
-          textTokens: undefined,
-          reasoningTokens: undefined,
-        },
-      },
+      usage: usage({ inputTokens: 300_000, outputTokens: 100_000, totalTokens: 400_000 }),
    })

    expect(result.cost).toBe(0.9 + 0.4)
--- a/packages/opencode/test/session/llm-native-recorded.test.ts
+++ b/packages/opencode/test/session/llm-native-recorded.test.ts
@@ -13,6 +13,7 @@ import { Provider } from "@/provider/provider"
 import { ModelID, ProviderID } from "@/provider/schema"
 import { Filesystem } from "@/util/filesystem"
 import { LLMClient, RequestExecutor } from "@opencode-ai/llm/route"
+import { RuntimeFlags } from "@/effect/runtime-flags"
 import type { Agent } from "../../src/agent/agent"
 import { LLM } from "../../src/session/llm"
 import { MessageV2 } from "../../src/session/message-v2"
@@ -127,6 +128,7 @@ function recordedNativeLLMLayer(cassette: string, metadata: Record<string, unkno
    Layer.provide(Plugin.defaultLayer),
    Layer.provide(client),
    Layer.provide(cassetteService),
+    Layer.provide(RuntimeFlags.layer({ experimentalNativeLlm: true })),
  )

  return Layer.mergeAll(providerLayer, llmLayer)
@@ -175,22 +177,6 @@ const collect = (input: LLM.StreamInput) =>
    return Array.from(yield* llm.stream(input).pipe(Stream.runCollect))
  })

-const nativeRuntime = <A, E, R>(effect: Effect.Effect<A, E, R>) => {
-  return Effect.acquireUseRelease(
-    Effect.sync(() => {
-      const previous = process.env.OPENCODE_LLM_RUNTIME
-      process.env.OPENCODE_LLM_RUNTIME = "native"
-      return previous
-    }),
-    () => effect,
-    (previous) =>
-      Effect.sync(() => {
-        if (previous === undefined) delete process.env.OPENCODE_LLM_RUNTIME
-        else process.env.OPENCODE_LLM_RUNTIME = previous
-      }),
-  )
-}
-
 describe("session.llm native recorded", () => {
  recordedOpenAIInstance("uses real RequestExecutor with HTTP recorder for native OpenAI tools", () =>
    Effect.gen(function* () {
@@ -210,8 +196,7 @@ describe("session.llm native recorded", () => {
      const resolved = yield* getModel(ProviderID.openai, ModelID.make(model.id))
      let executed: unknown

-      const events = yield* nativeRuntime(
-        collect({
+      const events = yield* collect({
        user: {
          id: MessageID.make("msg_user-recorded-native-tool"),
          sessionID,
@@ -236,8 +221,7 @@ describe("session.llm native recorded", () => {
            },
          }),
        },
-        }),
-      )
+      })

      expect(events.filter((event) => event.type === "step-finish")).toHaveLength(1)
      expect(events.filter((event) => event.type === "finish")).toHaveLength(1)
@@ -263,8 +247,7 @@ describe("session.llm native recorded", () => {
      const resolved = yield* getModel(ProviderID.opencode, ModelID.make(model.id))
      let executed: unknown

-      const events = yield* nativeRuntime(
-        collect({
+      const events = yield* collect({
        user: {
          id: MessageID.make("msg_user-recorded-native-zen-tool"),
          sessionID,
@@ -289,8 +272,7 @@ describe("session.llm native recorded", () => {
            },
          }),
        },
-        }),
-      )
+      })

      expect(events.filter((event) => event.type === "step-finish")).toHaveLength(1)
      expect(events.filter((event) => event.type === "finish")).toHaveLength(1)
--- a/packages/opencode/test/session/llm.test.ts
+++ b/packages/opencode/test/session/llm.test.ts
@@ -21,6 +21,7 @@ import type { Agent } from "../../src/agent/agent"
 import { MessageV2 } from "../../src/session/message-v2"
 import { SessionID, MessageID } from "../../src/session/schema"
 import { AppRuntime } from "../../src/effect/app-runtime"
+import { RuntimeFlags } from "@/effect/runtime-flags"

 const openAIConfig = (model: ModelsDev.Provider["models"][string], baseURL: string): Partial<Config.Info> => {
  const { experimental: _experimental, ...configModel } = model
@@ -66,13 +67,14 @@ async function drainWith(layer: Layer.Layer<LLM.Service>, input: LLM.StreamInput
  )
 }

-function llmLayerWithExecutor(executor: Layer.Layer<RequestExecutor.Service>) {
+function llmLayerWithExecutor(executor: Layer.Layer<RequestExecutor.Service>, flags: Partial<RuntimeFlags.Info> = {}) {
  return LLM.layer.pipe(
    Layer.provide(Auth.defaultLayer),
    Layer.provide(Config.defaultLayer),
    Layer.provide(Provider.defaultLayer),
    Layer.provide(Plugin.defaultLayer),
    Layer.provide(LLMClient.layer.pipe(Layer.provide(executor))),
+    Layer.provide(RuntimeFlags.layer(flags)),
  )
 }

@@ -769,9 +771,6 @@ describe("session.llm.stream", () => {
    await WithInstance.provide({
      directory: tmp.path,
      fn: async () => {
-        const previous = process.env.OPENCODE_LLM_RUNTIME
-        process.env.OPENCODE_LLM_RUNTIME = "native"
-        try {
        const resolved = await getModel(ProviderID.openai, ModelID.make(model.id))
        const sessionID = SessionID.make("session-test-native")
        const agent = {
@@ -782,7 +781,7 @@ describe("session.llm.stream", () => {
          temperature: 0.2,
        } satisfies Agent.Info

-          await drain({
+        await drainWith(llmLayerWithExecutor(RequestExecutor.defaultLayer, { experimentalNativeLlm: true }), {
          user: {
            id: MessageID.make("msg_user-native"),
            sessionID,
@@ -798,10 +797,6 @@ describe("session.llm.stream", () => {
          messages: [{ role: "user", content: "Hello" }],
          tools: {},
        })
-        } finally {
-          if (previous === undefined) delete process.env.OPENCODE_LLM_RUNTIME
-          else process.env.OPENCODE_LLM_RUNTIME = previous
-        }

        const capture = await request
        expect(capture.url.pathname.endsWith("/responses")).toBe(true)
@@ -862,9 +857,6 @@ describe("session.llm.stream", () => {
    await WithInstance.provide({
      directory: tmp.path,
      fn: async () => {
-        const previous = process.env.OPENCODE_LLM_RUNTIME
-        process.env.OPENCODE_LLM_RUNTIME = "native"
-        try {
        const resolved = await getModel(ProviderID.openai, ModelID.make(model.id))
        const sessionID = SessionID.make("session-test-native-injected-tool")
        const agent = {
@@ -874,7 +866,7 @@ describe("session.llm.stream", () => {
          permission: [{ permission: "*", pattern: "*", action: "allow" }],
        } satisfies Agent.Info

-          await drainWith(llmLayerWithExecutor(executor), {
+        await drainWith(llmLayerWithExecutor(executor, { experimentalNativeLlm: true }), {
          user: {
            id: MessageID.make("msg_user-native-injected-tool"),
            sessionID,
@@ -899,10 +891,6 @@ describe("session.llm.stream", () => {
            }),
          },
        })
-        } finally {
-          if (previous === undefined) delete process.env.OPENCODE_LLM_RUNTIME
-          else process.env.OPENCODE_LLM_RUNTIME = previous
-        }

        expect(captured?.model).toBe(model.id)
        expect(captured?.tools).toEqual([
@@ -990,9 +978,6 @@ describe("session.llm.stream", () => {
    await WithInstance.provide({
      directory: tmp.path,
      fn: async () => {
-        const previous = process.env.OPENCODE_LLM_RUNTIME
-        process.env.OPENCODE_LLM_RUNTIME = "native"
-        try {
        const resolved = await getModel(ProviderID.openai, ModelID.make(model.id))
        const sessionID = SessionID.make("session-test-native-tool")
        const agent = {
@@ -1002,7 +987,7 @@ describe("session.llm.stream", () => {
          permission: [{ permission: "*", pattern: "*", action: "allow" }],
        } satisfies Agent.Info

-          await drain({
+        await drainWith(llmLayerWithExecutor(RequestExecutor.defaultLayer, { experimentalNativeLlm: true }), {
          user: {
            id: MessageID.make("msg_user-native-tool"),
            sessionID,
@@ -1027,10 +1012,6 @@ describe("session.llm.stream", () => {
            }),
          },
        })
-        } finally {
-          if (previous === undefined) delete process.env.OPENCODE_LLM_RUNTIME
-          else process.env.OPENCODE_LLM_RUNTIME = previous
-        }

        const capture = await request
        expect(capture.body.tools).toEqual([