mirror of
https://github.com/anomalyco/opencode.git
synced 2026-05-21 19:35:10 +00:00
426 lines
17 KiB
TypeScript
426 lines
17 KiB
TypeScript
// Subprocess test harness for the opencode CLI. Spawns the real binary against
|
|
// a TestLLMServer running in-process at a random port, with full env isolation.
|
|
//
|
|
// This is the missing test tier: in-process tests can't catch bugs that span
|
|
// argv parsing → server boot → SDK call → event consumption → exit code (like
|
|
// the original /event race or #27371's invalid-model hang).
|
|
//
|
|
// Configuration flows through opencode's built-in test affordances:
|
|
// - OPENCODE_CONFIG_CONTENT : provider config inline, no files to find
|
|
// - OPENCODE_TEST_HOME : pins os.homedir() → tmpdir
|
|
// - OPENCODE_DISABLE_PROJECT_CONFIG : skip walking up for opencode.json
|
|
// - OPENCODE_PURE : skip external plugin discovery + install
|
|
// - OPENCODE_DISABLE_AUTOUPDATE / AUTOCOMPACT / MODELS_FETCH : no background work
|
|
// Plus HOME / XDG_* pointing at the tmpdir for belt-and-suspenders isolation.
|
|
//
|
|
// Today only `opencode.run` is fully wired. The shape supports adding more
|
|
// builders (`opencode.serve(opts)`, `opencode.acp(opts)`, `opencode.auth(...)`)
|
|
// without changing the fixture. Long-lived commands like `serve` will need a
|
|
// different return shape — see the TODO at the bottom of OpencodeCli.
|
|
import type { TestOptions } from "bun:test"
|
|
import { Deferred, Duration, Effect, Layer, Queue, Scope, Stream } from "effect"
|
|
import { FetchHttpClient, HttpClient } from "effect/unstable/http"
|
|
import path from "node:path"
|
|
import fs from "node:fs/promises"
|
|
import os from "node:os"
|
|
import { Process } from "@/util/process"
|
|
import { TestLLMServer } from "./llm-server"
|
|
import { testProviderConfig } from "./test-provider"
|
|
import { it } from "./effect"
|
|
|
|
const opencodeRoot = path.resolve(import.meta.dir, "../../")
|
|
const cliEntry = path.join(opencodeRoot, "src/index.ts")
|
|
|
|
export const testModelID = "test/test-model"
|
|
|
|
function isolatedEnv(home: string, configJson: string): Record<string, string> {
|
|
return {
|
|
OPENCODE_TEST_HOME: home,
|
|
HOME: home,
|
|
XDG_CONFIG_HOME: path.join(home, ".config"),
|
|
XDG_DATA_HOME: path.join(home, ".local/share"),
|
|
XDG_STATE_HOME: path.join(home, ".local/state"),
|
|
XDG_CACHE_HOME: path.join(home, ".cache"),
|
|
OPENCODE_CONFIG_CONTENT: configJson,
|
|
OPENCODE_DISABLE_PROJECT_CONFIG: "1",
|
|
OPENCODE_PURE: "1",
|
|
OPENCODE_DISABLE_AUTOUPDATE: "1",
|
|
OPENCODE_DISABLE_AUTOCOMPACT: "1",
|
|
OPENCODE_DISABLE_MODELS_FETCH: "1",
|
|
OPENCODE_AUTH_CONTENT: "{}",
|
|
}
|
|
}
|
|
|
|
export type RunResult = {
|
|
readonly exitCode: number
|
|
readonly stdout: string
|
|
readonly stderr: string
|
|
readonly durationMs: number
|
|
}
|
|
|
|
export type SpawnOpts = { readonly timeoutMs?: number; readonly env?: Record<string, string> }
|
|
|
|
// Typed equivalent of constructing argv for `opencode run`. New flags should
|
|
// land here so tests stay grep-able and refactor-safe.
|
|
export type RunOpts = SpawnOpts & {
|
|
readonly model?: string
|
|
readonly agent?: string
|
|
readonly format?: "default" | "json"
|
|
readonly command?: string
|
|
readonly printLogs?: boolean
|
|
readonly extraArgs?: string[]
|
|
}
|
|
|
|
// `opencode serve` is a long-lived process — it never exits on its own.
|
|
// `serve(opts)` therefore returns a handle inside the caller's Scope: the
|
|
// subprocess is killed when the scope closes (test end), and the URL the
|
|
// server actually bound to (port 0 means OS-assigned) is parsed off stdout.
|
|
export type ServeOpts = SpawnOpts & {
|
|
readonly port?: number
|
|
readonly hostname?: string
|
|
readonly extraArgs?: string[]
|
|
// How long to wait for the "listening on http://..." line before failing.
|
|
// Default 15s — startup is dominated by bun's transpile + plugin init, not
|
|
// the actual listen() call.
|
|
readonly readyTimeoutMs?: number
|
|
}
|
|
|
|
export type ServeHandle = {
|
|
// Full URL the server is bound to, e.g. "http://127.0.0.1:54321". Use this
|
|
// as the base for HTTP requests in tests — never assume the port.
|
|
readonly url: string
|
|
readonly hostname: string
|
|
readonly port: number
|
|
// Sends SIGTERM. The scope finalizer also calls this, so tests rarely need
|
|
// to invoke it directly — useful for tests that assert exit behavior.
|
|
readonly kill: () => void
|
|
// Resolves with the exit code once the process exits. Bun returns a number.
|
|
readonly exited: Promise<number>
|
|
}
|
|
|
|
// `opencode acp` speaks newline-delimited JSON-RPC over stdin/stdout. It is
|
|
// long-lived and exits cleanly when stdin is closed. The handle exposes the
|
|
// duplex stream as send/receive rather than raw pipes so tests don't have to
|
|
// reimplement framing on every call site.
|
|
export type AcpOpts = SpawnOpts & {
|
|
readonly cwd?: string
|
|
readonly extraArgs?: string[]
|
|
}
|
|
|
|
export type AcpHandle = {
|
|
// Writes a single JSON-RPC message to the child's stdin as one ndjson line.
|
|
readonly send: (msg: object) => Effect.Effect<void>
|
|
// Resolves with the next parsed JSON-RPC line from the child's stdout.
|
|
// Lines are buffered in a queue so multiple receives in a row won't drop
|
|
// anything. Pair with `Effect.timeout` if a test wants a deadline.
|
|
readonly receive: Effect.Effect<unknown>
|
|
// Closes stdin. ACP exits cleanly on stdin EOF; the scope finalizer also
|
|
// calls this, so tests only need it when asserting exit behavior.
|
|
readonly close: () => void
|
|
readonly exited: Promise<number>
|
|
}
|
|
|
|
export type OpencodeCli = {
|
|
// High-level: run a single prompt against the test model. Short-lived.
|
|
readonly run: (message: string, opts?: RunOpts) => Effect.Effect<RunResult>
|
|
// Spawn `opencode serve` and wait until it's listening. Long-lived: the
|
|
// returned handle is killed when the caller's Scope closes. Fails if the
|
|
// listening line doesn't appear within `readyTimeoutMs`.
|
|
readonly serve: (opts?: ServeOpts) => Effect.Effect<ServeHandle, Error, Scope.Scope>
|
|
// Spawn `opencode acp` and return a duplex JSON-RPC handle. Long-lived:
|
|
// the subprocess exits on stdin close, which the scope finalizer triggers.
|
|
readonly acp: (opts?: AcpOpts) => Effect.Effect<AcpHandle, Error, Scope.Scope>
|
|
// Escape hatch: any CLI invocation with full control over argv. Used to test
|
|
// commands that don't yet have a typed builder.
|
|
readonly spawn: (args: string[], opts?: SpawnOpts) => Effect.Effect<RunResult>
|
|
// Convenience assertion. Dumps captured stderr/stdout on mismatch so CI
|
|
// failures are debuggable without re-running locally.
|
|
readonly expectExit: (result: RunResult, expected: number, label?: string) => void
|
|
// Parse `--format json` stdout into one event object per non-empty line.
|
|
// The CLI writes `JSON.stringify({ type, sessionID, ... }) + EOL` for each
|
|
// event (see src/cli/cmd/run.ts `emit`). Throws on a malformed line so
|
|
// tests fail loudly rather than silently skipping data.
|
|
readonly parseJsonEvents: (stdout: string) => Array<Record<string, unknown>>
|
|
}
|
|
|
|
export type CliFixture = {
|
|
readonly llm: TestLLMServer["Service"]
|
|
readonly home: string
|
|
readonly opencode: OpencodeCli
|
|
}
|
|
|
|
// Provisions a TestLLMServer + tmpdir + spawn helper and invokes fn. Cleans
|
|
// up the tmpdir on scope exit. TestLLMServer.layer is provided internally so
|
|
// the caller doesn't need to wire it up — the fixture's lifetime is tied to
|
|
// the surrounding Scope.
|
|
export function withCliFixture<A, E>(
|
|
fn: (input: CliFixture) => Effect.Effect<A, E, Scope.Scope | HttpClient.HttpClient>,
|
|
): Effect.Effect<A, E | unknown, Scope.Scope> {
|
|
return Effect.gen(function* () {
|
|
const llm = yield* TestLLMServer
|
|
|
|
const home = path.join(os.tmpdir(), "oc-cli-" + Math.random().toString(36).slice(2))
|
|
yield* Effect.promise(() => fs.mkdir(home, { recursive: true }))
|
|
yield* Effect.addFinalizer(() =>
|
|
Effect.promise(() => fs.rm(home, { recursive: true, force: true }).catch(() => undefined)),
|
|
)
|
|
|
|
const configJson = JSON.stringify(testProviderConfig(llm.url))
|
|
const env = isolatedEnv(home, configJson)
|
|
|
|
const spawn = (args: string[], opts?: SpawnOpts): Effect.Effect<RunResult> =>
|
|
Effect.promise(async () => {
|
|
const start = Date.now()
|
|
// Process.run pipes stdout/stderr by default and returns them as Buffers.
|
|
const result = await Process.run(["bun", "run", "--conditions=browser", cliEntry, ...args], {
|
|
cwd: home,
|
|
timeout: opts?.timeoutMs ?? 30_000,
|
|
env: { ...process.env, ...env, ...opts?.env },
|
|
nothrow: true,
|
|
})
|
|
return {
|
|
exitCode: result.code,
|
|
stdout: result.stdout.toString(),
|
|
stderr: result.stderr.toString(),
|
|
durationMs: Date.now() - start,
|
|
}
|
|
})
|
|
|
|
const run = (message: string, opts?: RunOpts): Effect.Effect<RunResult> => {
|
|
const argv: string[] = ["run"]
|
|
if (opts?.printLogs) argv.push("--print-logs")
|
|
argv.push("--model", opts?.model ?? testModelID)
|
|
if (opts?.agent) argv.push("--agent", opts.agent)
|
|
if (opts?.format) argv.push("--format", opts.format)
|
|
if (opts?.command) argv.push("--command", opts.command)
|
|
if (opts?.extraArgs) argv.push(...opts.extraArgs)
|
|
argv.push(message)
|
|
return spawn(argv, opts)
|
|
}
|
|
|
|
const serve = Effect.fn("opencode.serve")(function* (opts?: ServeOpts) {
|
|
const argv = ["serve"]
|
|
// Default port 0 — let the OS pick a free port, parse the actual one
|
|
// off stdout. Hard-coded ports flake under parallel tests.
|
|
argv.push("--port", String(opts?.port ?? 0))
|
|
if (opts?.hostname) argv.push("--hostname", opts.hostname)
|
|
if (opts?.extraArgs) argv.push(...opts.extraArgs)
|
|
|
|
// Acquire the subprocess; release sends SIGTERM and awaits exit on
|
|
// scope close. Wrapped in Effect.ignore so a flaky kill doesn't surface
|
|
// as a finalizer error during test teardown.
|
|
const proc = yield* Effect.acquireRelease(
|
|
Effect.sync(() =>
|
|
Bun.spawn(["bun", "run", "--conditions=browser", cliEntry, ...argv], {
|
|
cwd: home,
|
|
env: { ...process.env, ...env, ...opts?.env },
|
|
stdout: "pipe",
|
|
stderr: "pipe",
|
|
}),
|
|
),
|
|
(p) =>
|
|
Effect.promise(() => {
|
|
p.kill()
|
|
return p.exited
|
|
}).pipe(Effect.ignore),
|
|
)
|
|
|
|
// Drain stderr in a scope-bound fork. Without this the OS pipe buffer
|
|
// eventually fills and the child blocks on its next log call. Kept as a
|
|
// tail buffer so timeout failures can include context.
|
|
const stderrChunks: string[] = []
|
|
yield* Effect.forkScoped(
|
|
Stream.fromReadableStream({
|
|
evaluate: () => proc.stderr,
|
|
onError: () => new Error("stderr stream error"),
|
|
}).pipe(
|
|
Stream.decodeText(),
|
|
Stream.runForEach((chunk) => Effect.sync(() => stderrChunks.push(chunk))),
|
|
Effect.ignore,
|
|
),
|
|
)
|
|
|
|
// Watch stdout line-by-line for the listening sentinel. Format
|
|
// (see src/cli/cmd/serve.ts):
|
|
// "opencode server listening on http://<host>:<port>"
|
|
const readyRe = /listening on (http:\/\/([^\s:]+):(\d+))/
|
|
const readyDeferred = yield* Deferred.make<{ url: string; hostname: string; port: number }>()
|
|
yield* Effect.forkScoped(
|
|
Stream.fromReadableStream({
|
|
evaluate: () => proc.stdout,
|
|
onError: () => new Error("stdout stream error"),
|
|
}).pipe(
|
|
Stream.decodeText(),
|
|
Stream.splitLines,
|
|
Stream.runForEach((line) => {
|
|
const m = line.match(readyRe)
|
|
return m ? Deferred.succeed(readyDeferred, { url: m[1], hostname: m[2], port: Number(m[3]) }) : Effect.void
|
|
}),
|
|
Effect.ignore,
|
|
),
|
|
)
|
|
|
|
const readyTimeoutMs = opts?.readyTimeoutMs ?? 15_000
|
|
const match = yield* Deferred.await(readyDeferred).pipe(
|
|
Effect.timeoutOrElse({
|
|
duration: Duration.millis(readyTimeoutMs),
|
|
orElse: () =>
|
|
Effect.fail(
|
|
new Error(
|
|
`opencode serve did not become ready within ${readyTimeoutMs}ms\n` +
|
|
`stderr (last 2000):\n${stderrChunks.join("").slice(-2000)}`,
|
|
),
|
|
),
|
|
}),
|
|
)
|
|
|
|
return {
|
|
url: match.url,
|
|
hostname: match.hostname,
|
|
port: match.port,
|
|
kill: () => {
|
|
proc.kill()
|
|
},
|
|
exited: proc.exited as Promise<number>,
|
|
} satisfies ServeHandle
|
|
})
|
|
|
|
const acp = Effect.fn("opencode.acp")(function* (opts?: AcpOpts) {
|
|
const argv = ["acp"]
|
|
if (opts?.cwd) argv.push("--cwd", opts.cwd)
|
|
if (opts?.extraArgs) argv.push(...opts.extraArgs)
|
|
|
|
// Acquire the subprocess. Release ends stdin (clean shutdown — ACP exits
|
|
// on stdin EOF) and falls back to SIGTERM if it doesn't exit promptly.
|
|
// Either way we await proc.exited so the test scope doesn't leak.
|
|
const proc = yield* Effect.acquireRelease(
|
|
Effect.sync(() =>
|
|
Bun.spawn(["bun", "run", "--conditions=browser", cliEntry, ...argv], {
|
|
cwd: opts?.cwd ?? home,
|
|
env: { ...process.env, ...env, ...opts?.env },
|
|
stdin: "pipe",
|
|
stdout: "pipe",
|
|
stderr: "pipe",
|
|
}),
|
|
),
|
|
(p) =>
|
|
// Graceful shutdown: close stdin (ACP exits on EOF), give it a
|
|
// window to exit, then SIGTERM. The Effect.timeoutOrElse expresses
|
|
// exactly that race without raw setTimeout or Promise.race.
|
|
Effect.gen(function* () {
|
|
yield* Effect.sync(() => p.stdin.end())
|
|
yield* Effect.promise(() => p.exited).pipe(
|
|
Effect.timeoutOrElse({
|
|
duration: Duration.seconds(2),
|
|
orElse: () =>
|
|
Effect.sync(() => {
|
|
p.kill()
|
|
}),
|
|
}),
|
|
)
|
|
yield* Effect.promise(() => p.exited)
|
|
}).pipe(Effect.ignore),
|
|
)
|
|
|
|
const stderrChunks: string[] = []
|
|
yield* Effect.forkScoped(
|
|
Stream.fromReadableStream({
|
|
evaluate: () => proc.stderr,
|
|
onError: () => new Error("stderr stream error"),
|
|
}).pipe(
|
|
Stream.decodeText(),
|
|
Stream.runForEach((chunk) => Effect.sync(() => stderrChunks.push(chunk))),
|
|
Effect.ignore,
|
|
),
|
|
)
|
|
|
|
// Each ndjson line becomes one queue entry. JSON.parse failures are
|
|
// surfaced as the raw string so a malformed protocol message doesn't
|
|
// silently wedge the test in `receive`.
|
|
const responses = yield* Queue.unbounded<unknown>()
|
|
yield* Effect.forkScoped(
|
|
Stream.fromReadableStream({
|
|
evaluate: () => proc.stdout,
|
|
onError: () => new Error("stdout stream error"),
|
|
}).pipe(
|
|
Stream.decodeText(),
|
|
Stream.splitLines,
|
|
Stream.runForEach((line) => {
|
|
if (line.length === 0) return Effect.void
|
|
let parsed: unknown
|
|
try {
|
|
parsed = JSON.parse(line)
|
|
} catch {
|
|
parsed = { _rawLine: line }
|
|
}
|
|
return Queue.offer(responses, parsed)
|
|
}),
|
|
Effect.ignore,
|
|
),
|
|
)
|
|
|
|
return {
|
|
send: (msg: object) =>
|
|
Effect.sync(() => {
|
|
proc.stdin.write(JSON.stringify(msg) + "\n")
|
|
}),
|
|
receive: Queue.take(responses),
|
|
close: () => {
|
|
try {
|
|
proc.stdin.end()
|
|
} catch {
|
|
// already closed
|
|
}
|
|
},
|
|
exited: proc.exited as Promise<number>,
|
|
} satisfies AcpHandle
|
|
})
|
|
|
|
const opencode: OpencodeCli = { run, serve, acp, spawn, expectExit, parseJsonEvents }
|
|
|
|
return yield* fn({ llm, home, opencode })
|
|
// FetchHttpClient is provided so test bodies can `yield* HttpClient.HttpClient`
|
|
// and hit endpoints on `opencode.serve()` without rolling their own fetch.
|
|
}).pipe(Effect.provide(Layer.mergeAll(TestLLMServer.layer, FetchHttpClient.layer)))
|
|
}
|
|
|
|
function parseJsonEvents(stdout: string): Array<Record<string, unknown>> {
|
|
return stdout
|
|
.split("\n")
|
|
.map((line) => line.trim())
|
|
.filter((line) => line.length > 0)
|
|
.map((line) => JSON.parse(line) as Record<string, unknown>)
|
|
}
|
|
|
|
// Convenience for the common assertion pattern. Dumps stderr/stdout when
|
|
// the exit code doesn't match — saves debugging time on CI failures.
|
|
function expectExit(result: RunResult, expected: number, label = "opencode") {
|
|
if (result.exitCode === expected) return
|
|
const tail = (s: string, n: number) => (s.length > n ? "..." + s.slice(-n) : s)
|
|
// eslint-disable-next-line no-console
|
|
console.error(`[${label}] expected exit ${expected}, got ${result.exitCode} after ${result.durationMs}ms`)
|
|
// eslint-disable-next-line no-console
|
|
console.error(`[${label}] stderr (last 2000):\n${tail(result.stderr, 2000)}`)
|
|
// eslint-disable-next-line no-console
|
|
console.error(`[${label}] stdout (last 500):\n${tail(result.stdout, 500)}`)
|
|
throw new Error(`${label}: expected exit ${expected}, got ${result.exitCode}`)
|
|
}
|
|
|
|
// `cliIt.live(name, fixture => effect)` is the same as
|
|
// `it.live(name, () => withCliFixture(fixture))` — one fewer nesting level at
|
|
// every call site. Use this for any test that needs the opencode CLI fixture.
|
|
//
|
|
// Only `.live` is exposed because subprocess tests must run against the real
|
|
// clock — a TestClock-paused environment can't drive a child process. If you
|
|
// need `.only` or `.skip`, fall back to `it.live` + `withCliFixture` directly.
|
|
// Body's R is `Scope.Scope | never` so tests can yield* scope-requiring
|
|
// resources (e.g. `opencode.serve`) without an extra `Effect.scoped` wrapper —
|
|
// `withCliFixture`'s outer scope is the natural lifetime.
|
|
export const cliIt = {
|
|
live: <A, E>(
|
|
name: string,
|
|
body: (input: CliFixture) => Effect.Effect<A, E, Scope.Scope | HttpClient.HttpClient>,
|
|
opts?: number | TestOptions,
|
|
) => it.live(name, () => withCliFixture(body), opts),
|
|
}
|