Compare commits

..

4 Commits

Author SHA1 Message Date
Nikhil Sonti
8e83d3a670 fix(patch): handle canonical workspace paths 2026-05-04 18:03:36 -07:00
Nikhil
d61d6fc8a9 feat: add ACPX agent runtime adapters (#924)
* feat: add acpx claude runtime paths

* feat: add acpx adapter preparation

* refactor: use acpx adapter preparation

* refactor: move openclaw image turns to adapter

* fix: keep openclaw independent of host cwd

* fix: address acpx review feedback

* fix: preserve claude host auth in acpx
2026-05-04 11:04:24 -07:00
shivammittal274
d383b5e344 feat(eval): add claude-generated run report artifact (#892)
* feat(eval): add claude-generated run report artifact

* fix(eval): install claude code cli for CI evals

* fix(eval): bypass claude code tool permissions

* Eval metrics configs (#932)

* feat(eval): add agisdk comparison metrics configs

* fix(eval): keep cdp crashes from aborting run
2026-05-04 21:09:06 +05:30
Dani Akash
ce4bb44083 feat(agent): /home composer parity with image attachments (#930)
* feat(agent): /home composer parity with image attachments

The /home composer used the same ConversationInput component as the
chat screen but passed attachmentsEnabled={false}, and the home →
chat handoff was a URL search param `?q=<text>` that physically
can't carry binary attachments. Pasting a screenshot at /home did
nothing.

Add a small in-memory registry (pending-initial-message.ts) as the
rich-data side channel for the same navigation: the home composer
writes { agentId, text, attachments } there before navigating; the
chat screen consumes it on mount and replays through the existing
harness send() path that already supports attachments. URL `?q=`
stays for shareable text-only prompts; the registry wins when both
are present. Module-scope, 10s TTL, destructive consume.

Net: home is now flagged attachmentsEnabled={true}; users can paste,
drag, or pick image files at /home and they survive the navigation
into the chat screen with previews intact.

* docs(agent): clarify why initial-message ref reset is safe post-registry-fire
2026-05-04 18:02:31 +05:30
54 changed files with 2426 additions and 834 deletions

View File

@@ -44,6 +44,19 @@ jobs:
working-directory: packages/browseros-agent
run: bun install --ignore-scripts
- name: Install Claude Code CLI
working-directory: packages/browseros-agent/apps/eval
env:
EVAL_CONFIG: ${{ github.event.inputs.config || 'configs/legacy/browseros-agent-weekly.json' }}
run: |
if bun -e "const config = await Bun.file(process.env.EVAL_CONFIG).json(); process.exit(config.agent?.type === 'claude-code' ? 0 : 1)"; then
npm install -g @anthropic-ai/claude-code@2.1.119
echo "Claude Code CLI installed at $(command -v claude)"
claude --version
else
echo "Eval config does not use Claude Code; skipping Claude Code CLI install"
fi
- name: Install Python eval dependencies
# agisdk pinned so silent upstream releases can't shift task definitions
# or grader behavior. Bump intentionally with a documented re-baseline.
@@ -67,13 +80,11 @@ jobs:
env:
FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
AWS_REGION: ${{ secrets.AWS_REGION || 'us-west-2' }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
CLAUDE_CODE_OAUTH_TOKEN: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
NOPECHA_API_KEY: ${{ secrets.NOPECHA_API_KEY }}
EVAL_R2_ACCOUNT_ID: ${{ secrets.EVAL_R2_ACCOUNT_ID }}
EVAL_R2_ACCESS_KEY_ID: ${{ secrets.EVAL_R2_ACCESS_KEY_ID }}
EVAL_R2_SECRET_ACCESS_KEY: ${{ secrets.EVAL_R2_SECRET_ACCESS_KEY }}
EVAL_R2_BUCKET: ${{ secrets.EVAL_R2_BUCKET }}
EVAL_R2_CDN_BASE_URL: ${{ secrets.EVAL_R2_CDN_BASE_URL }}
BROWSEROS_BINARY: /usr/bin/browseros
WEBARENA_INFINITY_DIR: /tmp/webarena-infinity
# OpenClaw container runtime is macOS-only; opt the Linux runner
@@ -82,7 +93,35 @@ jobs:
EVAL_CONFIG: ${{ github.event.inputs.config || 'configs/legacy/browseros-agent-weekly.json' }}
run: |
echo "Running eval with config: $EVAL_CONFIG"
xvfb-run --auto-servernum --server-args="-screen 0 1440x900x24" bun run src/index.ts suite --config "$EVAL_CONFIG" --publish r2
xvfb-run --auto-servernum --server-args="-screen 0 1440x900x24" bun run src/index.ts suite --config "$EVAL_CONFIG"
# Capture the run directory so report.html can be generated before the R2 publish step.
SUMMARY_PATH="$(find results -name summary.json -type f -print | sort | tail -n 1)"
if [ -z "$SUMMARY_PATH" ]; then
echo "No eval run summary found"
exit 1
fi
RUN_DIR="$(dirname "$SUMMARY_PATH")"
echo "EVAL_RUN_DIR=$RUN_DIR" >> "$GITHUB_ENV"
- name: Generate run analysis report
if: success()
working-directory: packages/browseros-agent/apps/eval
env:
CLAUDE_CODE_OAUTH_TOKEN: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
run: |
echo "Generating run report for $EVAL_RUN_DIR"
bun scripts/generate-report.ts --input "$EVAL_RUN_DIR" --output "$EVAL_RUN_DIR/report.html"
- name: Publish eval run to R2
if: success()
working-directory: packages/browseros-agent/apps/eval
env:
EVAL_R2_ACCOUNT_ID: ${{ secrets.EVAL_R2_ACCOUNT_ID }}
EVAL_R2_ACCESS_KEY_ID: ${{ secrets.EVAL_R2_ACCESS_KEY_ID }}
EVAL_R2_SECRET_ACCESS_KEY: ${{ secrets.EVAL_R2_SECRET_ACCESS_KEY }}
EVAL_R2_BUCKET: ${{ secrets.EVAL_R2_BUCKET }}
EVAL_R2_CDN_BASE_URL: ${{ secrets.EVAL_R2_CDN_BASE_URL }}
run: bun run src/index.ts publish --run "$EVAL_RUN_DIR" --target r2
- name: Generate trend report
if: success()
@@ -97,7 +136,7 @@ jobs:
EVAL_R2_CDN_BASE_URL: ${{ secrets.EVAL_R2_CDN_BASE_URL }}
run: bun apps/eval/scripts/weekly-report.ts /tmp/eval-report.html
- name: Upload report as artifact
- name: Upload trend report as artifact
if: success()
uses: actions/upload-artifact@v4
with:

View File

@@ -26,6 +26,7 @@ import {
filterTurnsPersistedInHistory,
flattenHistoryPages,
} from './claw-chat-types'
import { consumePendingInitialMessage } from './pending-initial-message'
import { QueuePanel } from './QueuePanel'
import { useAgentConversation } from './useAgentConversation'
import { useHarnessChatHistory } from './useHarnessChatHistory'
@@ -113,25 +114,52 @@ function AgentConversationController({
sendRef.current = send
useEffect(() => {
if (disabled || !historyReady) return
// Registry-first: when the user submitted at /home with
// attachments, the rich payload is here. URL `?q=` may also be
// present and is the text-only fallback path; the registry wins
// when both exist because it carries the binary attachments
// alongside the text.
const pending = consumePendingInitialMessage(agentId)
if (pending) {
// Mark the dedup ref so the text-only branch below doesn't
// re-fire on the same render.
if (initialMessageKey) {
initialMessageSentRef.current = initialMessageKey
}
onInitialMessageConsumedRef.current()
void sendRef.current({
text: pending.text,
attachments: pending.attachments.map((a) => a.payload),
attachmentPreviews: pending.attachments.map((a) => ({
id: a.id,
kind: a.kind,
mediaType: a.mediaType,
name: a.name,
dataUrl: a.dataUrl,
})),
})
return
}
const query = initialMessage?.trim()
if (!initialMessageKey) {
// Reset is safe even on the post-registry-fire re-run: consume
// is destructive, so the registry is already drained — there's
// nothing left for a third run to re-send.
initialMessageSentRef.current = null
return
}
if (
!query ||
initialMessageSentRef.current === initialMessageKey ||
disabled ||
!historyReady
) {
if (!query || initialMessageSentRef.current === initialMessageKey) {
return
}
initialMessageSentRef.current = initialMessageKey
onInitialMessageConsumedRef.current()
void sendRef.current({ text: query })
}, [disabled, historyReady, initialMessage, initialMessageKey])
}, [agentId, disabled, historyReady, initialMessage, initialMessageKey])
const handleSelectAgent = (entry: AgentEntry) => {
navigate(`${agentPathPrefix}/${entry.agentId}`)

View File

@@ -18,8 +18,12 @@ import { SignInHint } from '@/entrypoints/newtab/index/SignInHint'
import { useActiveHint } from '@/entrypoints/newtab/index/useActiveHint'
import { AgentCardDock } from './AgentCardDock'
import { useAgentCommandData } from './agent-command-layout'
import { ConversationInput } from './ConversationInput'
import {
ConversationInput,
type ConversationInputSendInput,
} from './ConversationInput'
import { orderHomeAgents } from './home-agent-card.helpers'
import { setPendingInitialMessage } from './pending-initial-message'
function EmptyAgentsState({ onOpenAgents }: { onOpenAgents: () => void }) {
return (
@@ -116,8 +120,19 @@ export const AgentCommandHome: FC = () => {
}
}, [legacyAgents, selectedAgentId])
const handleSend = (input: { text: string }) => {
const handleSend = (input: ConversationInputSendInput) => {
if (!selectedAgentId) return
// Stash text + attachments in the in-memory registry. Text also
// travels in `?q=` so a hard refresh / shareable URL still works
// for text-only prompts; attachments are registry-only because a
// multi-megabyte dataUrl can't ride a URL search param. The chat
// screen prefers the registry when both are present.
setPendingInitialMessage({
agentId: selectedAgentId,
text: input.text,
attachments: input.attachments,
createdAt: Date.now(),
})
navigate(
`/home/agents/${selectedAgentId}?q=${encodeURIComponent(input.text)}`,
)
@@ -167,7 +182,7 @@ export const AgentCommandHome: FC = () => {
streaming={false}
disabled={!selectedAgentReady}
status={selectedAgentStatus}
attachmentsEnabled={false}
attachmentsEnabled={true}
placeholder={
selectedAgentReady
? `Ask ${selectedAgentName} to handle a task...`

View File

@@ -0,0 +1,109 @@
import { afterEach, describe, expect, it } from 'bun:test'
import type { StagedAttachment } from '@/lib/attachments'
import {
consumePendingInitialMessage,
peekPendingInitialMessage,
setPendingInitialMessage,
} from './pending-initial-message'
function makeAttachment(id: string): StagedAttachment {
return {
id,
kind: 'image',
mediaType: 'image/png',
name: `${id}.png`,
dataUrl: `data:image/png;base64,${id}`,
payload: {
kind: 'image',
mediaType: 'image/png',
name: `${id}.png`,
dataUrl: `data:image/png;base64,${id}`,
},
}
}
afterEach(() => {
// Drain any leftover pending entry so tests don't leak into each
// other (the module-scope state survives across `it` blocks).
consumePendingInitialMessage('drain')
// If still set, clear by consuming with the matching id.
const leftover = peekPendingInitialMessage()
if (leftover) consumePendingInitialMessage(leftover.agentId)
})
describe('pending-initial-message', () => {
it('consume returns the payload set for the same agentId', () => {
setPendingInitialMessage({
agentId: 'agent-a',
text: 'hello',
attachments: [makeAttachment('one')],
createdAt: Date.now(),
})
const result = consumePendingInitialMessage('agent-a')
expect(result?.text).toBe('hello')
expect(result?.attachments).toHaveLength(1)
expect(result?.attachments[0]?.id).toBe('one')
})
it('consume is destructive — second call returns null', () => {
setPendingInitialMessage({
agentId: 'agent-a',
text: 'hello',
attachments: [],
createdAt: Date.now(),
})
expect(consumePendingInitialMessage('agent-a')).not.toBeNull()
expect(consumePendingInitialMessage('agent-a')).toBeNull()
})
it('consume returns null and preserves entry when agentId differs', () => {
setPendingInitialMessage({
agentId: 'agent-a',
text: 'hello',
attachments: [],
createdAt: Date.now(),
})
expect(consumePendingInitialMessage('agent-b')).toBeNull()
expect(peekPendingInitialMessage()?.agentId).toBe('agent-a')
expect(consumePendingInitialMessage('agent-a')).not.toBeNull()
})
it('returns null for entries older than the TTL', () => {
setPendingInitialMessage({
agentId: 'agent-a',
text: 'old',
attachments: [],
createdAt: Date.now() - 11_000, // older than 10 s TTL
})
expect(consumePendingInitialMessage('agent-a')).toBeNull()
})
it('replaces a previous pending entry when set is called again', () => {
setPendingInitialMessage({
agentId: 'agent-a',
text: 'first',
attachments: [],
createdAt: Date.now(),
})
setPendingInitialMessage({
agentId: 'agent-b',
text: 'second',
attachments: [makeAttachment('two')],
createdAt: Date.now(),
})
expect(consumePendingInitialMessage('agent-a')).toBeNull()
const result = consumePendingInitialMessage('agent-b')
expect(result?.text).toBe('second')
expect(result?.attachments[0]?.id).toBe('two')
})
it('no-ops when set is called with empty agentId', () => {
setPendingInitialMessage({
agentId: '',
text: 'oops',
attachments: [],
createdAt: Date.now(),
})
expect(peekPendingInitialMessage()).toBeNull()
})
})

View File

@@ -0,0 +1,81 @@
import type { StagedAttachment } from '@/lib/attachments'
/**
* Same-tab in-memory handoff between the `/home` composer and the
* chat screen at `/home/agents/:agentId`. URL search params (`?q=`)
* carry the text fine, but cannot carry binary attachments — a multi-
* megabyte image dataUrl would explode URL length limits and round-
* trip badly. This module is the rich-data side channel for the same
* navigation: the composer writes here, the chat screen reads here on
* mount.
*
* Intentionally module-scope. Same render tree, same tab — no need
* for sessionStorage (which would force JSON-serialising the dataUrls
* and re-parsing on the read side). Cross-tab handoff is out of
* scope: the user typing at home in tab A and switching to tab B's
* chat would surface an empty registry there, which is the correct
* behaviour.
*/
export interface PendingInitialMessage {
agentId: string
text: string
attachments: StagedAttachment[]
createdAt: number
}
/**
* 10s TTL on the entry. A stale entry from a back-button journey
* shouldn't fire on a future visit; if real-world latency makes 10s
* too tight under slow harness boot, bump but never make it
* indefinite.
*/
const PENDING_TTL_MS = 10_000
let pending: PendingInitialMessage | null = null
let pendingTimer: ReturnType<typeof setTimeout> | null = null
function clearPending(): void {
pending = null
if (pendingTimer !== null) {
clearTimeout(pendingTimer)
pendingTimer = null
}
}
export function setPendingInitialMessage(payload: PendingInitialMessage): void {
// Defensive: the home composer should never call this without an
// agent selected. If it somehow does, no-op rather than holding a
// payload we can't route.
if (!payload.agentId) return
clearPending()
pending = payload
pendingTimer = setTimeout(clearPending, PENDING_TTL_MS)
}
/**
* Destructive read. Returns the entry only if `agentId` matches and
* the entry is fresh; clears the entry on success so Strict-Mode
* double-invokes can't double-send.
*/
export function consumePendingInitialMessage(
agentId: string,
): PendingInitialMessage | null {
if (!pending) return null
if (pending.agentId !== agentId) return null
if (Date.now() - pending.createdAt >= PENDING_TTL_MS) {
clearPending()
return null
}
const entry = pending
clearPending()
return entry
}
/**
* Non-mutating read for tests. Production code should never need this
* — use `consume` and own the lifecycle.
*/
export function peekPendingInitialMessage(): PendingInitialMessage | null {
return pending
}

View File

@@ -0,0 +1,26 @@
{
"agent": {
"type": "single",
"provider": "openai-compatible",
"model": "moonshotai/kimi-k2.5",
"apiKey": "OPENROUTER_API_KEY",
"baseUrl": "https://openrouter.ai/api/v1",
"supportsImages": true
},
"dataset": "../../data/agisdk-real.jsonl",
"num_workers": 3,
"restart_server_per_task": true,
"browseros": {
"server_url": "http://127.0.0.1:9110",
"base_cdp_port": 9010,
"base_server_port": 9110,
"base_extension_port": 9310,
"load_extensions": false,
"headless": false
},
"captcha": {
"api_key_env": "NOPECHA_API_KEY"
},
"graders": ["agisdk_state_diff"],
"timeout_ms": 1800000
}

View File

@@ -0,0 +1,27 @@
{
"agent": {
"type": "single",
"provider": "bedrock",
"model": "global.anthropic.claude-opus-4-6-v1",
"region": "AWS_REGION",
"accessKeyId": "AWS_ACCESS_KEY_ID",
"secretAccessKey": "AWS_SECRET_ACCESS_KEY",
"supportsImages": true
},
"dataset": "../../data/agisdk-real.jsonl",
"num_workers": 2,
"restart_server_per_task": true,
"browseros": {
"server_url": "http://127.0.0.1:9110",
"base_cdp_port": 9010,
"base_server_port": 9110,
"base_extension_port": 9310,
"load_extensions": false,
"headless": false
},
"captcha": {
"api_key_env": "NOPECHA_API_KEY"
},
"graders": ["agisdk_state_diff"],
"timeout_ms": 1800000
}

View File

@@ -8,7 +8,7 @@
"supportsImages": true
},
"dataset": "../../data/agisdk-real.jsonl",
"num_workers": 10,
"num_workers": 3,
"restart_server_per_task": true,
"browseros": {
"server_url": "http://127.0.0.1:9110",

View File

@@ -1,7 +1,8 @@
{
"agent": {
"type": "claude-code",
"model": "opus"
"model": "opus",
"extraArgs": ["--permission-mode", "bypassPermissions"]
},
"dataset": "../../data/agisdk-real.jsonl",
"num_workers": 1,

View File

@@ -0,0 +1,191 @@
#!/usr/bin/env bun
import { mkdir, stat } from 'node:fs/promises'
import { dirname, resolve } from 'node:path'
import { query as claudeQuery } from '@anthropic-ai/claude-agent-sdk'
import { readRunMetricSummary } from '../src/reporting/task-metrics'
export const DEFAULT_REPORT_MODEL = 'claude-opus-4-6'
export const DEFAULT_REPORT_MAX_TURNS = 300
type Env = Record<string, string | undefined>
type ClaudeQuery = (input: unknown) => AsyncIterable<Record<string, unknown>>
export interface ReportAgentInvocation {
inputDir: string
outputPath: string
prompt: string
}
export interface GenerateEvalReportOptions {
inputDir: string
outputPath: string
runAgent?: (invocation: ReportAgentInvocation) => Promise<void>
}
interface ClaudeReportAgentDeps {
query?: ClaudeQuery
env?: Env
}
function usage(): string {
return `Usage: bun scripts/generate-report.ts --input <run-dir> --output <report.html>`
}
function parseArgs(
argv: string[],
): Pick<GenerateEvalReportOptions, 'inputDir' | 'outputPath'> {
let inputDir = ''
let outputPath = ''
for (let i = 0; i < argv.length; i++) {
const arg = argv[i]
if (arg === '--input' || arg === '--run') {
inputDir = argv[++i] ?? ''
} else if (arg === '--output' || arg === '--out') {
outputPath = argv[++i] ?? ''
} else if (arg === '--help' || arg === '-h') {
console.log(usage())
process.exit(0)
}
}
if (!inputDir || !outputPath) {
throw new Error(usage())
}
return { inputDir, outputPath }
}
function claudeCodeEnv(env: Env): Env {
return {
CLAUDE_CODE_OAUTH_TOKEN: env.CLAUDE_CODE_OAUTH_TOKEN,
ANTHROPIC_API_KEY: env.ANTHROPIC_API_KEY,
HOME: env.HOME,
PATH: env.PATH,
SHELL: env.SHELL,
TMPDIR: env.TMPDIR,
TMP: env.TMP,
TEMP: env.TEMP,
USER: env.USER,
CLAUDECODE: '',
}
}
async function buildReportPrompt(
inputDir: string,
outputPath: string,
): Promise<string> {
const metrics = await readRunMetricSummary(inputDir)
return `Analyze this BrowserOS eval run and write a shareable HTML report.
Run directory: ${inputDir}
Output file to write: ${outputPath}
You are running with the run directory as cwd. Inspect the local artifacts:
- summary.json for run totals and pass rate
- each task directory's metadata.json for query, final answer, timing, screenshots, and grader results
- each task directory's messages.jsonl for tool calls, tool errors, and recent trajectory
- screenshots/ for visual evidence
- grader-artifacts/ when present for grader-specific context
Write the final report directly to the output file path above. Do not print the
report instead of writing it. Do not modify any input artifacts. The only file
you should create or overwrite is the requested report.html.
The report should follow the style and density of the Shadowfax AGI SDK report:
- Title like "AGI SDK Random-10 Failure Report" or a run-specific equivalent
- Run directory and note that screenshots are embedded as data URIs
- Summary cards for total tasks, passed, failed, pass rate, average duration, average steps, and average tool calls
- A Metrics section with compact charts for Duration by task, Steps by task, Tool calls by task, and Tool errors by task
- Task Summary table with task id, status, score, duration, steps, and prompt
- Include tool calls and tool errors in the Task Summary table
- Failure sections with stable anchors using each task id, for example <section id="agisdk-networkin-10">
- For each failed task: Diagnosis, Evidence, Next Check, final screenshot, AGI SDK / grader criteria, final answer, and recent trajectory events
- Make failure links in the summary table point to the task anchors
- Keep the HTML self-contained: inline CSS and embedded final screenshots as data:image/png;base64 URIs
- Escape user/model text correctly so task outputs cannot break the page
Analysis guidance:
- Focus on why the model failed: task understanding, browser/tool usage, missing verification, tool errors, max-step/timeout, bad final answer, or grader ambiguity
- Use messages.jsonl strategically. Do not paste huge DOM outputs into the report. Summarize only the relevant recent trajectory and evidence.
- Limit trajectory analysis to the most relevant 200-300 events/calls across the run. Prefer failed tasks and the final/key actions for each failure.
- If a grader criterion is boolean-only or ambiguous, say so and identify what additional artifact would make it debuggable.
Deterministic run metrics computed from metadata.json and messages.jsonl:
\`\`\`json
${JSON.stringify(metrics, null, 2)}
\`\`\`
After writing the file, verify that ${outputPath} exists and is non-empty.`
}
async function assertRunDir(inputDir: string): Promise<void> {
const inputStat = await stat(inputDir).catch(() => null)
if (!inputStat?.isDirectory()) {
throw new Error(`Not a run directory: ${inputDir}`)
}
}
async function assertReportWritten(outputPath: string): Promise<void> {
const outputStat = await stat(outputPath).catch(() => null)
if (!outputStat?.isFile() || outputStat.size === 0) {
throw new Error(`Report was not written: ${outputPath}`)
}
}
export async function runClaudeCodeReportAgent(
invocation: ReportAgentInvocation,
deps: ClaudeReportAgentDeps = {},
): Promise<void> {
const query = deps.query ?? (claudeQuery as unknown as ClaudeQuery)
let resultSubtype: string | undefined
for await (const message of query({
prompt: invocation.prompt,
options: {
cwd: invocation.inputDir,
model: DEFAULT_REPORT_MODEL,
systemPrompt:
'You are an eval failure analyst. Produce a concise, evidence-backed, self-contained HTML report from local run artifacts.',
permissionMode: 'bypassPermissions',
allowDangerouslySkipPermissions: true,
maxTurns: DEFAULT_REPORT_MAX_TURNS,
env: claudeCodeEnv(deps.env ?? process.env),
},
})) {
if (message.type === 'result') {
resultSubtype =
typeof message.subtype === 'string' ? message.subtype : undefined
}
}
if (resultSubtype && resultSubtype !== 'success') {
throw new Error(`Claude Code report agent failed: ${resultSubtype}`)
}
}
export async function generateEvalReport(
options: GenerateEvalReportOptions,
): Promise<void> {
const inputDir = resolve(options.inputDir)
const outputPath = resolve(options.outputPath)
await assertRunDir(inputDir)
await mkdir(dirname(outputPath), { recursive: true })
const invocation = {
inputDir,
outputPath,
prompt: await buildReportPrompt(inputDir, outputPath),
}
await (options.runAgent ?? runClaudeCodeReportAgent)(invocation)
await assertReportWritten(outputPath)
}
if (import.meta.main) {
try {
await generateEvalReport(parseArgs(Bun.argv.slice(2)))
} catch (error) {
console.error(error instanceof Error ? error.message : String(error))
process.exit(1)
}
}

View File

@@ -134,7 +134,10 @@ export class OrchestratorExecutorEvaluator implements AgentEvaluator {
// Connect to Chrome via CDP — same per-worker offset used by app-manager.
const cdpPort = config.browseros.base_cdp_port + workerIndex
const cdp = new CdpBackend({ port: cdpPort })
const cdp = new CdpBackend({
port: cdpPort,
exitOnReconnectFailure: false,
})
await cdp.connect()
const browser = new Browser(cdp)
capture.screenshot.setBrowser(browser)

View File

@@ -43,7 +43,10 @@ export class SingleAgentEvaluator implements AgentEvaluator {
// Connect to Chrome via CDP — same per-worker offset used by app-manager.
const cdpPort = config.browseros.base_cdp_port + workerIndex
const cdp = new CdpBackend({ port: cdpPort })
const cdp = new CdpBackend({
port: cdpPort,
exitOnReconnectFailure: false,
})
await cdp.connect()
const browser = new Browser(cdp)

View File

@@ -536,6 +536,12 @@ export interface DashboardConfig {
configMode?: boolean
}
export function shouldAutoOpenDashboard(
env: Record<string, string | undefined> = process.env,
): boolean {
return env.CI !== 'true'
}
export function startDashboard(config: DashboardConfig) {
const port = config.port ?? 9900
dashboardConfigMode = config.configMode ?? false
@@ -558,10 +564,12 @@ export function startDashboard(config: DashboardConfig) {
console.log(` Dashboard: ${url}`)
// Auto-open browser
try {
Bun.spawn(['open', url], { stdout: 'ignore', stderr: 'ignore' })
} catch {
/* ignore if open command fails */
if (shouldAutoOpenDashboard()) {
try {
Bun.spawn(['open', url], { stdout: 'ignore', stderr: 'ignore' })
} catch {
/* ignore if open command fails */
}
}
return { url, port }

View File

@@ -61,6 +61,17 @@
.header-stats .stat-pass { color: #3fb950; }
.header-stats .stat-fail { color: #f85149; }
.header-stats .stat-score { color: #f0883e; }
.header-report {
color: #58a6ff;
text-decoration: none;
font-size: 12px;
font-weight: 600;
border: 1px solid #30363d;
border-radius: 6px;
padding: 5px 9px;
white-space: nowrap;
}
.header-report:hover { border-color: #58a6ff; background: #1c2333; }
/* ── 3-column layout ─────────────────────────────────────────── */
.layout {
@@ -84,6 +95,7 @@
background: #161b22;
border-bottom: 1px solid #30363d;
display: flex;
flex-wrap: wrap;
gap: 12px;
font-size: 11px;
font-weight: 600;
@@ -93,6 +105,80 @@
}
.sidebar-stats .s-pass { color: #3fb950; }
.sidebar-stats .s-fail { color: #f85149; }
.sidebar-metrics {
padding: 12px 16px;
background: #0d1117;
border-bottom: 1px solid #21262d;
}
.metric-grid {
display: grid;
grid-template-columns: repeat(3, minmax(0, 1fr));
gap: 8px;
margin-bottom: 12px;
}
.metric-cell {
min-width: 0;
}
.metric-label {
display: block;
font-size: 9px;
font-weight: 600;
color: #6e7681;
text-transform: uppercase;
letter-spacing: 0.04em;
white-space: nowrap;
}
.metric-value {
display: block;
font-size: 13px;
font-weight: 700;
color: #e6edf3;
margin-top: 2px;
overflow: hidden;
text-overflow: ellipsis;
}
.mini-chart {
display: flex;
flex-direction: column;
gap: 6px;
}
.mini-chart-title {
font-size: 10px;
font-weight: 700;
color: #8b949e;
text-transform: uppercase;
letter-spacing: 0.04em;
}
.mini-bar-row {
display: grid;
grid-template-columns: minmax(60px, 1fr) 70px 28px;
gap: 8px;
align-items: center;
font-size: 10px;
color: #8b949e;
}
.mini-bar-name {
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
font-family: 'SF Mono', SFMono-Regular, Consolas, 'Liberation Mono', Menlo, monospace;
}
.mini-bar-track {
height: 6px;
background: #21262d;
border-radius: 999px;
overflow: hidden;
}
.mini-bar-fill {
height: 100%;
background: #58a6ff;
border-radius: 999px;
}
.mini-bar-value {
color: #e6edf3;
font-variant-numeric: tabular-nums;
text-align: right;
}
.sidebar-filter {
padding: 8px 12px;
border-bottom: 1px solid #21262d;
@@ -526,6 +612,7 @@
<div class="header-sep"></div>
<span class="header-run" id="header-run"></span>
<span class="header-date" id="header-date"></span>
<a class="header-report" id="header-report" target="_blank" rel="noopener" style="display: none;">Run Report</a>
<div class="header-stats" id="header-stats"></div>
</div>
@@ -533,6 +620,7 @@
<!-- Left sidebar -->
<div class="sidebar" id="sidebar">
<div class="sidebar-stats" id="sidebar-stats"></div>
<div class="sidebar-metrics" id="sidebar-metrics"></div>
<div class="sidebar-filter">
<input type="text" id="filter-input" placeholder="Search tasks..." autocomplete="off" spellcheck="false" />
</div>
@@ -627,7 +715,23 @@
if (stats.avgScore !== null) {
parts.push(`<span class="stat-score">avg ${stats.avgScore}%</span>`);
}
if (stats.avgDurationMs !== null) {
parts.push(`<span>${fmtDuration(stats.avgDurationMs)} avg</span>`);
}
if (stats.avgToolCalls !== null) {
parts.push(`<span>${fmtCompact(stats.avgToolCalls)} tools/task</span>`);
}
el.innerHTML = parts.join('');
const reportLink = document.getElementById('header-report');
const url = reportUrl(manifest);
if (url) {
reportLink.href = url;
reportLink.style.display = '';
} else {
reportLink.removeAttribute('href');
reportLink.style.display = 'none';
}
}
// ── Sidebar rendering ─────────────────────────────────────────
@@ -639,11 +743,49 @@
statsEl.innerHTML =
'<span>' + stats.total + ' total</span>' +
'<span class="s-pass">' + stats.passed + ' pass</span>' +
'<span class="s-fail">' + stats.failed + ' fail</span>';
'<span class="s-fail">' + stats.failed + ' fail</span>' +
(stats.avgSteps !== null ? '<span>' + fmtCompact(stats.avgSteps) + ' steps/task</span>' : '') +
(stats.avgToolCalls !== null ? '<span>' + fmtCompact(stats.avgToolCalls) + ' tools/task</span>' : '');
renderSidebarMetrics(tasks, stats);
renderTaskList('');
}
function renderSidebarMetrics(tasks, stats) {
const el = document.getElementById('sidebar-metrics');
if (!el) return;
const chartTasks = tasks
.slice()
.sort((a, b) => taskMetrics(b).toolCalls - taskMetrics(a).toolCalls)
.slice(0, 5);
const maxCalls = Math.max(1, ...chartTasks.map((task) => taskMetrics(task).toolCalls));
const bars = chartTasks.map((task) => {
const calls = taskMetrics(task).toolCalls;
const width = Math.max(4, Math.round((calls / maxCalls) * 100));
return (
'<div class="mini-bar-row">' +
'<span class="mini-bar-name" title="' + escAttr(task.queryId || task.id || 'Untitled') + '">' + esc(task.queryId || task.id || 'Untitled') + '</span>' +
'<span class="mini-bar-track"><span class="mini-bar-fill" style="width: ' + width + '%"></span></span>' +
'<span class="mini-bar-value">' + fmtCompact(calls) + '</span>' +
'</div>'
);
}).join('');
el.innerHTML =
'<div class="metric-grid">' +
'<div class="metric-cell"><span class="metric-label">Avg Time</span><span class="metric-value">' + (stats.avgDurationMs !== null ? fmtDuration(stats.avgDurationMs) : '-') + '</span></div>' +
'<div class="metric-cell"><span class="metric-label">Avg Steps</span><span class="metric-value">' + (stats.avgSteps !== null ? fmtCompact(stats.avgSteps) : '-') + '</span></div>' +
'<div class="metric-cell"><span class="metric-label">Avg Tools</span><span class="metric-value">' + (stats.avgToolCalls !== null ? fmtCompact(stats.avgToolCalls) : '-') + '</span></div>' +
'</div>' +
'<div class="mini-chart">' +
'<div class="mini-chart-title">Tool Calls by Task</div>' +
(bars || '<div class="task-meta-line"><span>No tool calls recorded</span></div>') +
'</div>';
}
function renderTaskList(filter) {
const list = document.getElementById('task-list');
list.innerHTML = '';
@@ -668,8 +810,11 @@
}
const metaParts = [];
if (task.durationMs) metaParts.push(fmtDuration(task.durationMs));
if (task.screenshotCount) metaParts.push(`${task.screenshotCount} steps`);
const metrics = taskMetrics(task);
if (metrics.durationMs) metaParts.push(fmtDuration(metrics.durationMs));
if (metrics.steps) metaParts.push(`${fmtCompact(metrics.steps)} steps`);
if (metrics.toolCalls) metaParts.push(`${fmtCompact(metrics.toolCalls)} tools`);
if (metrics.toolErrors) metaParts.push(`${fmtCompact(metrics.toolErrors)} errors`);
item.innerHTML =
'<div class="task-row">' +
@@ -714,7 +859,7 @@
}
function artifactPath(task, artifact) {
const manifestPath = task.paths && task.paths[artifact];
const manifestPath = task.paths?.[artifact];
if (typeof manifestPath === 'string' && manifestPath.length > 0) {
return manifestPath.replace(/^\/+/, '');
}
@@ -725,6 +870,17 @@
return `${basePath}/${artifactPath(task, artifact)}`;
}
function runArtifactUrl(path) {
if (typeof path !== 'string' || path.length === 0) return null;
return `${basePath}/${path.replace(/^\/+/, '')}`;
}
function reportUrl(manifest, task) {
const url = runArtifactUrl(manifest?.reportPath);
if (!url || !task) return url;
return `${url}#${encodeURIComponent(task.queryId || task.id || '')}`;
}
function metadataUrl(task) {
return artifactUrl(task, 'metadata');
}
@@ -905,10 +1061,38 @@
}
// Duration
if (task.durationMs) {
const metrics = taskMetrics(task);
if (metrics.durationMs) {
html += '<div class="db-section">';
html += '<span class="db-label">Duration</span>';
html += `<span class="db-value">${fmtDuration(task.durationMs)}</span>`;
html += `<span class="db-value">${fmtDuration(metrics.durationMs)}</span>`;
html += '</div>';
}
if (metrics.steps) {
html += '<div class="db-section">';
html += '<span class="db-label">Steps</span>';
html += `<span class="db-value">${fmtCompact(metrics.steps)}</span>`;
html += '</div>';
}
html += '<div class="db-section">';
html += '<span class="db-label">Tool Calls</span>';
html += `<span class="db-value">${fmtCompact(metrics.toolCalls)}</span>`;
html += '</div>';
if (metrics.toolErrors) {
html += '<div class="db-section">';
html += '<span class="db-label">Tool Errors</span>';
html += `<span class="db-value">${fmtCompact(metrics.toolErrors)}</span>`;
html += '</div>';
}
const reportLink = reportUrl(manifest, task);
if (reportLink) {
html += '<div class="db-section">';
html += '<span class="db-label">Report</span>';
html += `<span class="db-value"><a href="${escAttr(reportLink)}" target="_blank" rel="noopener">Open task analysis</a></span>`;
html += '</div>';
}
@@ -1234,8 +1418,25 @@
function computeStats(tasks) {
const total = tasks.length;
let passed = 0, failed = 0, totalScore = 0, scoredCount = 0;
let totalDurationMs = 0, durationCount = 0;
let totalSteps = 0, stepsCount = 0;
let totalToolCalls = 0, toolCount = 0;
let totalToolErrors = 0;
tasks.forEach((t) => {
const metrics = taskMetrics(t);
if (metrics.durationMs > 0) {
totalDurationMs += metrics.durationMs;
durationCount++;
}
if (metrics.steps > 0) {
totalSteps += metrics.steps;
stepsCount++;
}
totalToolCalls += metrics.toolCalls;
totalToolErrors += metrics.toolErrors;
toolCount++;
const graders = t.graderResults || {};
const keys = Object.keys(graders);
if (keys.length > 0) {
@@ -1254,7 +1455,34 @@
total: total,
passed: passed,
failed: failed,
avgScore: scoredCount > 0 ? Math.round((totalScore / scoredCount) * 100) : null
avgScore: scoredCount > 0 ? Math.round((totalScore / scoredCount) * 100) : null,
avgDurationMs: durationCount > 0 ? totalDurationMs / durationCount : null,
avgSteps: stepsCount > 0 ? totalSteps / stepsCount : null,
avgToolCalls: toolCount > 0 ? totalToolCalls / toolCount : null,
totalToolCalls: totalToolCalls,
totalToolErrors: totalToolErrors
};
}
function taskMetrics(task) {
const metrics = task.metrics || {};
const screenshots = Number.isFinite(Number(metrics.screenshots))
? Number(metrics.screenshots)
: Number(task.screenshotCount || 0);
return {
durationMs: Number.isFinite(Number(metrics.durationMs))
? Number(metrics.durationMs)
: Number(task.durationMs || 0),
steps: Number.isFinite(Number(metrics.steps))
? Number(metrics.steps)
: screenshots,
screenshots: screenshots,
toolCalls: Number.isFinite(Number(metrics.toolCalls))
? Number(metrics.toolCalls)
: 0,
toolErrors: Number.isFinite(Number(metrics.toolErrors))
? Number(metrics.toolErrors)
: 0
};
}
@@ -1310,6 +1538,13 @@
return `${h}h ${remM}m`;
}
function fmtCompact(value) {
const num = Number(value);
if (!Number.isFinite(num)) return '0';
if (Number.isInteger(num)) return String(num);
return num.toFixed(1);
}
function showFatalError(msgHtml) {
document.getElementById('center-panel').innerHTML =
'<div class="placeholder error">' +

View File

@@ -5,6 +5,7 @@ import {
PutObjectCommand,
S3Client,
} from '@aws-sdk/client-s3'
import { readTaskMetrics } from '../reporting/task-metrics'
import {
buildViewerManifest,
type ViewerManifestTaskInput,
@@ -315,6 +316,7 @@ export class R2Publisher {
graderResults:
(meta.grader_results as ViewerManifestTaskInput['graderResults']) ||
{},
metrics: await readTaskMetrics(taskPath, meta, screenshotCount),
})
}
@@ -379,10 +381,12 @@ export class R2Publisher {
await readFile(join(runDir, 'summary.json'), 'utf-8'),
) as Record<string, unknown>
} catch {}
const reportStat = await stat(join(runDir, 'report.html')).catch(() => null)
return buildViewerManifest({
runId,
uploadedAt: this.now().toISOString(),
reportPath: reportStat?.isFile() ? 'report.html' : undefined,
agentConfig,
dataset,
summary: summaryData

View File

@@ -0,0 +1,188 @@
import { readdir, readFile, stat } from 'node:fs/promises'
import { join } from 'node:path'
export interface EvalTaskMetrics {
durationMs: number
steps: number
screenshots: number
toolCalls: number
toolErrors: number
}
export interface EvalRunMetrics {
taskCount: number
totalDurationMs: number
avgDurationMs: number
totalSteps: number
avgSteps: number
totalToolCalls: number
avgToolCalls: number
totalToolErrors: number
avgToolErrors: number
}
export interface EvalTaskMetricSummary {
queryId: string
status: string
score?: number
pass?: boolean
metrics: EvalTaskMetrics
}
export interface EvalRunMetricSummary {
run: EvalRunMetrics
tasks: EvalTaskMetricSummary[]
}
interface TaskDirEntry {
taskId: string
taskPath: string
}
function numberValue(value: unknown): number {
return typeof value === 'number' && Number.isFinite(value) ? value : 0
}
export function countMessageMetrics(messagesJsonl: string): {
toolCalls: number
toolErrors: number
} {
let toolCalls = 0
let toolErrors = 0
for (const line of messagesJsonl.split('\n')) {
const trimmed = line.trim()
if (!trimmed) continue
try {
const event = JSON.parse(trimmed) as { type?: unknown }
if (event.type === 'tool-input-available') toolCalls++
if (event.type === 'tool-output-error') toolErrors++
} catch {
// Ignore malformed telemetry lines; the raw artifact is still uploaded.
}
}
return { toolCalls, toolErrors }
}
export function buildTaskMetrics(
metadata: Record<string, unknown>,
messageMetrics: { toolCalls: number; toolErrors: number },
screenshotCount = 0,
): EvalTaskMetrics {
const screenshots = numberValue(metadata.screenshot_count) || screenshotCount
return {
durationMs: numberValue(metadata.total_duration_ms),
steps: numberValue(metadata.total_steps) || screenshots,
screenshots,
toolCalls: messageMetrics.toolCalls,
toolErrors: messageMetrics.toolErrors,
}
}
export function buildRunMetrics(metrics: EvalTaskMetrics[]): EvalRunMetrics {
const taskCount = metrics.length
const totalDurationMs = metrics.reduce((sum, metric) => {
return sum + metric.durationMs
}, 0)
const totalSteps = metrics.reduce((sum, metric) => sum + metric.steps, 0)
const totalToolCalls = metrics.reduce((sum, metric) => {
return sum + metric.toolCalls
}, 0)
const totalToolErrors = metrics.reduce((sum, metric) => {
return sum + metric.toolErrors
}, 0)
return {
taskCount,
totalDurationMs,
avgDurationMs: taskCount > 0 ? totalDurationMs / taskCount : 0,
totalSteps,
avgSteps: taskCount > 0 ? totalSteps / taskCount : 0,
totalToolCalls,
avgToolCalls: taskCount > 0 ? totalToolCalls / taskCount : 0,
totalToolErrors,
avgToolErrors: taskCount > 0 ? totalToolErrors / taskCount : 0,
}
}
export async function readTaskMetrics(
taskPath: string,
metadata: Record<string, unknown>,
screenshotCount = 0,
): Promise<EvalTaskMetrics> {
const messages = await readFile(join(taskPath, 'messages.jsonl'), 'utf-8')
.then(countMessageMetrics)
.catch(() => ({ toolCalls: 0, toolErrors: 0 }))
return buildTaskMetrics(metadata, messages, screenshotCount)
}
function statusFromMetadata(metadata: Record<string, unknown>): string {
const termination = metadata.termination_reason
if (termination === 'timeout') return 'timeout'
if (Array.isArray(metadata.errors) && metadata.errors.length > 0) {
return 'failed'
}
return 'completed'
}
function primaryGrade(metadata: Record<string, unknown>): {
score?: number
pass?: boolean
} {
const graders = metadata.grader_results as
| Record<string, { score?: unknown; pass?: unknown }>
| undefined
const first = graders ? Object.values(graders)[0] : undefined
return {
...(typeof first?.score === 'number' ? { score: first.score } : {}),
...(typeof first?.pass === 'boolean' ? { pass: first.pass } : {}),
}
}
async function readTaskDirs(runDir: string): Promise<TaskDirEntry[]> {
const canonicalTasksDir = join(runDir, 'tasks')
const canonicalStat = await stat(canonicalTasksDir).catch(() => null)
const baseDir = canonicalStat?.isDirectory() ? canonicalTasksDir : runDir
const entries = await readdir(baseDir, { withFileTypes: true }).catch(
() => [],
)
return entries
.filter((entry) => entry.isDirectory())
.filter((entry) => entry.name !== 'screenshots')
.filter((entry) => entry.name !== 'tasks')
.map((entry) => ({
taskId: entry.name,
taskPath: join(baseDir, entry.name),
}))
}
export async function readRunMetricSummary(
runDir: string,
): Promise<EvalRunMetricSummary> {
const tasks: EvalTaskMetricSummary[] = []
for (const entry of await readTaskDirs(runDir)) {
const metadata = await readFile(
join(entry.taskPath, 'metadata.json'),
'utf-8',
)
.then((text) => JSON.parse(text) as Record<string, unknown>)
.catch(() => null)
if (!metadata) continue
const metrics = await readTaskMetrics(entry.taskPath, metadata)
tasks.push({
queryId: (metadata.query_id as string | undefined) || entry.taskId,
status: statusFromMetadata(metadata),
...primaryGrade(metadata),
metrics,
})
}
return {
run: buildRunMetrics(tasks.map((task) => task.metrics)),
tasks,
}
}

View File

@@ -36,5 +36,6 @@ export async function resolveProviderConfig(
accessKeyId: resolveEnvValue(agent.accessKeyId),
secretAccessKey: resolveEnvValue(agent.secretAccessKey),
sessionToken: resolveEnvValue(agent.sessionToken),
region: resolveEnvValue(agent.region),
}
}

View File

@@ -1,3 +1,8 @@
import {
buildRunMetrics,
type EvalRunMetrics,
type EvalTaskMetrics,
} from '../reporting/task-metrics'
import type { GraderResult } from '../types'
export const VIEWER_MANIFEST_SCHEMA_VERSION = 2
@@ -20,6 +25,7 @@ export interface ViewerManifestTaskInput {
status: string
durationMs: number
screenshotCount: number
metrics?: EvalTaskMetrics
graderResults: Record<string, GraderResult>
}
@@ -35,9 +41,11 @@ export interface ViewerManifest {
suiteId?: string
variantId?: string
uploadedAt?: string
reportPath?: string
agentConfig?: Record<string, unknown>
dataset?: string
summary?: Record<string, unknown>
metrics?: EvalRunMetrics
tasks: ViewerManifestTask[]
}
@@ -46,6 +54,7 @@ export interface BuildViewerManifestInput {
suiteId?: string
variantId?: string
uploadedAt?: string
reportPath?: string
agentConfig?: Record<string, unknown>
dataset?: string
summary?: Record<string, unknown>
@@ -68,22 +77,37 @@ function taskPaths(queryId: string): ViewerManifestTaskPaths {
export function buildViewerManifest(
input: BuildViewerManifestInput,
): ViewerManifest {
const tasks = input.tasks.map((task) => {
const { artifactId, ...publicTask } = task
const metrics =
publicTask.metrics ??
({
durationMs: publicTask.durationMs,
steps: publicTask.screenshotCount,
screenshots: publicTask.screenshotCount,
toolCalls: 0,
toolErrors: 0,
} satisfies EvalTaskMetrics)
return {
...publicTask,
metrics,
startUrl: publicTask.startUrl ?? '',
paths: taskPaths(artifactId ?? publicTask.queryId),
}
})
return {
schemaVersion: VIEWER_MANIFEST_SCHEMA_VERSION,
runId: input.runId,
...(input.suiteId ? { suiteId: input.suiteId } : {}),
...(input.variantId ? { variantId: input.variantId } : {}),
...(input.uploadedAt ? { uploadedAt: input.uploadedAt } : {}),
...(input.reportPath ? { reportPath: input.reportPath } : {}),
...(input.agentConfig ? { agentConfig: input.agentConfig } : {}),
...(input.dataset ? { dataset: input.dataset } : {}),
...(input.summary ? { summary: input.summary } : {}),
tasks: input.tasks.map((task) => {
const { artifactId, ...publicTask } = task
return {
...publicTask,
startUrl: publicTask.startUrl ?? '',
paths: taskPaths(artifactId ?? publicTask.queryId),
}
}),
metrics: buildRunMetrics(tasks.map((task) => task.metrics)),
tasks,
}
}

View File

@@ -0,0 +1,12 @@
import { describe, expect, it } from 'bun:test'
import { shouldAutoOpenDashboard } from '../../src/dashboard/server'
describe('dashboard server', () => {
it('does not auto-open the dashboard in CI', () => {
expect(shouldAutoOpenDashboard({ CI: 'true' })).toBe(false)
})
it('auto-opens the dashboard outside CI by default', () => {
expect(shouldAutoOpenDashboard({})).toBe(true)
})
})

View File

@@ -40,6 +40,7 @@ async function writeRunFixture(
start_url: 'https://example.test',
termination_reason: 'completed',
total_duration_ms: 1200,
total_steps: 4,
screenshot_count: 1,
agent_config: { type: 'single', model: 'kimi' },
grader_results: {
@@ -47,13 +48,22 @@ async function writeRunFixture(
},
}),
)
await writeFile(join(taskDir, 'messages.jsonl'), '{"type":"user"}\n')
await writeFile(
join(taskDir, 'messages.jsonl'),
[
'{"type":"user"}',
'{"type":"tool-input-available","toolName":"click"}',
'{"type":"tool-input-available","toolName":"take_snapshot"}',
'{"type":"tool-output-error","toolName":"click"}',
].join('\n'),
)
await writeFile(join(taskDir, 'grades.json'), '{"ok":true}')
await writeFile(join(taskDir, 'screenshots', '1.png'), 'png')
await writeFile(
join(runDir, 'summary.json'),
JSON.stringify({ passRate: 1, avgDurationMs: 1200 }),
)
await writeFile(join(runDir, 'report.html'), '<html>report</html>')
return { runDir, runId: `${configName}-${timestamp}` }
}
@@ -110,6 +120,9 @@ describe('R2Publisher', () => {
expect(byKey.get(`runs/${runId}/summary.json`)?.ContentType).toBe(
'application/json',
)
expect(byKey.get(`runs/${runId}/report.html`)?.ContentType).toBe(
'text/html',
)
expect(byKey.get('viewer.html')?.ContentType).toBe('text/html')
expect(result.viewerUrl).toBe(
`https://eval.example.test/viewer.html?run=${runId}`,
@@ -126,12 +139,28 @@ describe('R2Publisher', () => {
uploadedAt: '2026-04-29T12:00:00.000Z',
agentConfig: { type: 'single', model: 'kimi' },
dataset: 'webbench',
reportPath: 'report.html',
summary: { passRate: 1, avgDurationMs: 1200 },
metrics: {
taskCount: 1,
avgDurationMs: 1200,
avgSteps: 4,
avgToolCalls: 2,
totalToolCalls: 2,
totalToolErrors: 1,
},
tasks: [
{
queryId: 'task-1',
status: 'completed',
screenshotCount: 1,
metrics: {
durationMs: 1200,
steps: 4,
screenshots: 1,
toolCalls: 2,
toolErrors: 1,
},
paths: {
attempt: 'tasks/task-1/attempt.json',
metadata: 'tasks/task-1/metadata.json',

View File

@@ -6,6 +6,7 @@ interface ViewerPathResolvers {
artifactUrl(task: Record<string, unknown>, artifact: string): string
metadataUrl(task: Record<string, unknown>): string
messagesUrl(task: Record<string, unknown>): string
reportUrl(manifest: Record<string, unknown>): string | null
screenshotUrl(task: Record<string, unknown>, step: number): string
}
@@ -24,7 +25,7 @@ async function loadViewerPathResolvers(): Promise<ViewerPathResolvers> {
`
const basePath = 'runs/run-1';
${block}
return { artifactUrl, metadataUrl, messagesUrl, screenshotUrl };
return { artifactUrl, metadataUrl, messagesUrl, reportUrl, screenshotUrl };
`,
) as () => ViewerPathResolvers
return createResolvers()
@@ -60,6 +61,35 @@ async function runAutoSelectFromHash(hash: string): Promise<unknown> {
return runAutoSelect()
}
async function runComputeStats(): Promise<unknown> {
const html = await readFile(
join(import.meta.dir, '..', '..', 'src', 'dashboard', 'viewer.html'),
'utf-8',
)
const start = html.indexOf('function computeStats(tasks)')
const end = html.indexOf('function resolveStatus(task)', start)
expect(start).toBeGreaterThan(-1)
expect(end).toBeGreaterThan(start)
const block = html.slice(start, end)
const compute = new Function(
`
${block}
return computeStats([
{
graderResults: { agisdk_state_diff: { pass: true, score: 1 } },
metrics: { durationMs: 1000, steps: 4, toolCalls: 3, toolErrors: 0 }
},
{
graderResults: { agisdk_state_diff: { pass: false, score: 0 } },
metrics: { durationMs: 3000, steps: 8, toolCalls: 5, toolErrors: 2 }
}
]);
`,
) as () => unknown
return compute()
}
describe('R2 viewer artifact path compatibility', () => {
it('uses explicit manifest paths for new uploaded runs', async () => {
const resolvers = await loadViewerPathResolvers()
@@ -95,6 +125,15 @@ describe('R2 viewer artifact path compatibility', () => {
)
})
it('resolves manifest-level run report links', async () => {
const resolvers = await loadViewerPathResolvers()
expect(resolvers.reportUrl({ reportPath: 'report.html' })).toBe(
'runs/run-1/report.html',
)
expect(resolvers.reportUrl({})).toBe(null)
})
it('falls back to legacy inferred paths for old uploaded runs', async () => {
const resolvers = await loadViewerPathResolvers()
const task = { queryId: 'legacy-task' }
@@ -127,4 +166,17 @@ describe('R2 viewer artifact path compatibility', () => {
queryId: 'legacy-task',
})
})
it('computes run-level timing and tool metrics for the viewer', async () => {
expect(await runComputeStats()).toMatchObject({
total: 2,
passed: 1,
failed: 1,
avgDurationMs: 2000,
avgSteps: 6,
avgToolCalls: 4,
totalToolCalls: 8,
totalToolErrors: 2,
})
})
})

View File

@@ -0,0 +1,159 @@
import { describe, expect, it } from 'bun:test'
import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises'
import { tmpdir } from 'node:os'
import { join } from 'node:path'
import {
DEFAULT_REPORT_MAX_TURNS,
DEFAULT_REPORT_MODEL,
generateEvalReport,
runClaudeCodeReportAgent,
} from '../../scripts/generate-report'
async function writeRunFixture(): Promise<string> {
const runDir = await mkdtemp(join(tmpdir(), 'eval-report-script-'))
const taskDir = join(runDir, 'agisdk-networkin-10')
await mkdir(join(taskDir, 'screenshots'), { recursive: true })
await writeFile(
join(runDir, 'summary.json'),
JSON.stringify({
total: 1,
completed: 1,
passRate: 0,
avgDurationMs: 1234,
}),
)
await writeFile(
join(taskDir, 'metadata.json'),
JSON.stringify({
query_id: 'agisdk-networkin-10',
dataset: 'agisdk-real',
query: 'Send a follow-up message starting with "Following up on".',
termination_reason: 'completed',
total_duration_ms: 1234,
total_steps: 2,
screenshot_count: 1,
final_answer: 'No app action was taken.',
errors: [],
warnings: [],
agent_config: { type: 'single', model: 'kimi' },
grader_results: {
agisdk_state_diff: {
score: 0,
pass: false,
reasoning: 'Some criteria failed',
details: {
per_criterion: [
{ passed: true, detail: 'message starts correctly' },
{ passed: false, detail: 'message was not sent' },
],
},
},
},
}),
)
await writeFile(
join(taskDir, 'messages.jsonl'),
[
JSON.stringify({
type: 'tool-input-available',
timestamp: '2026-04-30T00:00:00.000Z',
toolCallId: 'call-1',
toolName: 'memory_search',
input: { q: 'chat' },
}),
JSON.stringify({
type: 'tool-output-error',
timestamp: '2026-04-30T00:00:01.000Z',
toolCallId: 'call-1',
errorText: 'memory unavailable',
}),
].join('\n'),
)
await writeFile(join(taskDir, 'screenshots', '1.png'), 'png')
return runDir
}
describe('generate-report script', () => {
it('delegates report.html creation to Claude Code', async () => {
const runDir = await writeRunFixture()
const outputPath = join(runDir, 'report.html')
let prompt = ''
await generateEvalReport({
inputDir: runDir,
outputPath,
runAgent: async (invocation) => {
prompt = invocation.prompt
await writeFile(
invocation.outputPath,
'<!doctype html><h1>Claude-written report</h1>',
)
},
})
expect(await readFile(outputPath, 'utf-8')).toContain(
'Claude-written report',
)
expect(prompt).toContain('AGI SDK Random-10 Failure Report')
expect(prompt).toContain('summary.json')
expect(prompt).toContain('messages.jsonl')
expect(prompt).toContain('screenshots')
expect(prompt).toContain('Deterministic run metrics')
expect(prompt).toContain('"queryId": "agisdk-networkin-10"')
expect(prompt).toContain('"toolCalls": 1')
expect(prompt).toContain('"toolErrors": 1')
expect(prompt).toContain('Duration by task')
expect(prompt).toContain('Tool calls by task')
expect(prompt).toContain(outputPath)
})
it('fails when the Claude Code agent does not write the report', async () => {
const runDir = await writeRunFixture()
await expect(
generateEvalReport({
inputDir: runDir,
outputPath: join(runDir, 'missing-report.html'),
runAgent: async () => {},
}),
).rejects.toThrow('Report was not written')
})
it('runs Claude Code with Opus 4.6, full bypass, and bounded turns', async () => {
const runDir = await writeRunFixture()
const calls: unknown[] = []
await runClaudeCodeReportAgent(
{
inputDir: runDir,
outputPath: join(runDir, 'report.html'),
prompt: 'write the report',
},
{
query: async function* (call: unknown) {
calls.push(call)
yield { type: 'result', subtype: 'success', result: 'done' }
},
env: {
CLAUDE_CODE_OAUTH_TOKEN: 'token',
EVAL_R2_SECRET_ACCESS_KEY: 'secret',
HOME: '/tmp/home',
PATH: '/bin',
},
},
)
expect(calls).toHaveLength(1)
expect(calls[0]).toMatchObject({
prompt: 'write the report',
options: {
cwd: runDir,
model: DEFAULT_REPORT_MODEL,
maxTurns: DEFAULT_REPORT_MAX_TURNS,
permissionMode: 'bypassPermissions',
allowDangerouslySkipPermissions: true,
},
})
expect(JSON.stringify(calls[0])).not.toContain('secret')
})
})

View File

@@ -13,10 +13,10 @@ describe('adaptEvalConfigFile', () => {
expect(adapted.suite.id).toBe('browseros-agent-weekly')
expect(adapted.suite.dataset).toBe('../../data/agisdk-real.jsonl')
expect(adapted.suite.graders).toEqual(['agisdk_state_diff'])
expect(adapted.suite.workers).toBe(10)
expect(adapted.suite.workers).toBe(3)
expect(adapted.suite.restartBrowserPerTask).toBe(true)
expect(adapted.suite.timeoutMs).toBe(1_800_000)
expect(adapted.evalConfig.num_workers).toBe(10)
expect(adapted.evalConfig.num_workers).toBe(3)
expect(adapted.evalConfig.browseros.server_url).toBe(
'http://127.0.0.1:9110',
)
@@ -38,6 +38,34 @@ describe('adaptEvalConfigFile', () => {
)
})
it('adapts BrowserOS AGI SDK comparison configs', async () => {
const kimi = await adaptEvalConfigFile(
'apps/eval/configs/legacy/browseros-agent-kimi-k2-5-agisdk-real.json',
)
const opus = await adaptEvalConfigFile(
'apps/eval/configs/legacy/browseros-agent-opus-4-6-agisdk-real.json',
)
expect(kimi.suite.id).toBe('browseros-agent-kimi-k2-5-agisdk-real')
expect(kimi.evalConfig.agent).toMatchObject({
type: 'single',
provider: 'openai-compatible',
model: 'moonshotai/kimi-k2.5',
})
expect(kimi.evalConfig.num_workers).toBe(3)
expect(opus.suite.id).toBe('browseros-agent-opus-4-6-agisdk-real')
expect(opus.evalConfig.agent).toMatchObject({
type: 'single',
provider: 'bedrock',
model: 'global.anthropic.claude-opus-4-6-v1',
region: 'AWS_REGION',
accessKeyId: 'AWS_ACCESS_KEY_ID',
secretAccessKey: 'AWS_SECRET_ACCESS_KEY',
})
expect(opus.evalConfig.num_workers).toBe(2)
})
it('adapts claude-code configs without provider credentials', async () => {
const dir = await mkdtemp(join(tmpdir(), 'claude-code-config-'))
const configPath = join(dir, 'claude-code-agisdk.json')

View File

@@ -0,0 +1,38 @@
import { describe, expect, it } from 'bun:test'
import { resolveProviderConfig } from '../../src/utils/resolve-provider-config'
describe('resolveProviderConfig', () => {
it('resolves Bedrock region from environment variables', async () => {
const previous = {
AWS_REGION: process.env.AWS_REGION,
AWS_ACCESS_KEY_ID: process.env.AWS_ACCESS_KEY_ID,
AWS_SECRET_ACCESS_KEY: process.env.AWS_SECRET_ACCESS_KEY,
}
process.env.AWS_REGION = 'us-west-2'
process.env.AWS_ACCESS_KEY_ID = 'test-access-key'
process.env.AWS_SECRET_ACCESS_KEY = 'test-secret-key'
try {
const resolved = await resolveProviderConfig({
provider: 'bedrock',
model: 'global.anthropic.claude-opus-4-6-v1',
region: 'AWS_REGION',
accessKeyId: 'AWS_ACCESS_KEY_ID',
secretAccessKey: 'AWS_SECRET_ACCESS_KEY',
})
expect(resolved).toMatchObject({
provider: 'bedrock',
model: 'global.anthropic.claude-opus-4-6-v1',
region: process.env.AWS_REGION,
accessKeyId: process.env.AWS_ACCESS_KEY_ID,
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY,
})
} finally {
for (const [key, value] of Object.entries(previous)) {
if (value === undefined) delete process.env[key]
else process.env[key] = value
}
}
})
})

View File

@@ -9,6 +9,7 @@ describe('buildViewerManifest', () => {
suiteId: 'agisdk-daily-10',
variantId: 'kimi',
uploadedAt: '2026-04-29T06:00:00.000Z',
reportPath: 'report.html',
summary: { total: 1, passRate: 0 },
tasks: [
{
@@ -18,6 +19,13 @@ describe('buildViewerManifest', () => {
status: 'completed',
durationMs: 353_000,
screenshotCount: 42,
metrics: {
durationMs: 353_000,
steps: 47,
screenshots: 42,
toolCalls: 19,
toolErrors: 2,
},
graderResults: {
agisdk_state_diff: {
score: 0,
@@ -32,6 +40,7 @@ describe('buildViewerManifest', () => {
const publishManifest: R2RunManifest = manifest
expect(publishManifest.schemaVersion).toBe(2)
expect(manifest.reportPath).toBe('report.html')
expect(manifest.tasks[0].paths.messages).toBe(
'tasks/agisdk-dashdish-4/messages.jsonl',
)
@@ -41,6 +50,21 @@ describe('buildViewerManifest', () => {
expect(manifest.tasks[0].paths.graderArtifacts).toBe(
'tasks/agisdk-dashdish-4/grader-artifacts',
)
expect(manifest.metrics).toMatchObject({
taskCount: 1,
avgDurationMs: 353_000,
avgSteps: 47,
avgToolCalls: 19,
totalToolCalls: 19,
totalToolErrors: 2,
})
expect(manifest.tasks[0].metrics).toEqual({
durationMs: 353_000,
steps: 47,
screenshots: 42,
toolCalls: 19,
toolErrors: 2,
})
expect(manifest.tasks[0].graderResults.agisdk_state_diff.details).toEqual({
missing: ['checkout item'],
})

View File

@@ -143,8 +143,9 @@ export async function createHttpServer(config: HttpServerConfig) {
getLimactlPath: () => resolveBundledLimactl(resourcesDir),
getVmName: () => VM_NAME,
},
openclawGatewayChat: new OpenClawGatewayChatClient(() =>
getOpenClawService().getPort(),
openclawGatewayChat: new OpenClawGatewayChatClient(
() => getOpenClawService().getPort(),
async () => getOpenClawService().getGatewayToken(),
),
openclawProvisioner: {
createAgent: (input) => getOpenClawService().createAgent(input),

View File

@@ -53,7 +53,6 @@ export type GatewayContainerSpec = {
hostHome: string
envFilePath: string
gatewayToken?: string
privateIngressNoAuth?: boolean
timezone: string
}
@@ -418,9 +417,6 @@ export class ContainerRuntime {
...(input.gatewayToken
? { OPENCLAW_GATEWAY_TOKEN: input.gatewayToken }
: {}),
...(input.privateIngressNoAuth
? { OPENCLAW_GATEWAY_PRIVATE_INGRESS_NO_AUTH: '1' }
: {}),
}
}

View File

@@ -35,23 +35,22 @@ export interface GatewayChatTurnInput {
signal?: AbortSignal
}
type GatewayTokenProvider = () => Promise<string | null | undefined>
export class OpenClawGatewayChatClient {
constructor(
private readonly getHostPort: () => number,
private readonly getToken?: GatewayTokenProvider,
private readonly getToken: () => Promise<string>,
) {}
async streamTurn(
input: GatewayChatTurnInput,
): Promise<ReadableStream<AgentStreamEvent>> {
const token = await this.getToken()
const response = await fetch(
`http://127.0.0.1:${this.getHostPort()}/v1/chat/completions`,
{
method: 'POST',
headers: {
...(await this.authHeaders()),
Authorization: `Bearer ${token}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
@@ -81,12 +80,6 @@ export class OpenClawGatewayChatClient {
},
})
}
private async authHeaders(): Promise<Record<string, string>> {
const token = await this.getToken?.()
const trimmed = token?.trim()
return trimmed ? { Authorization: `Bearer ${trimmed}` } : {}
}
}
function resolveAgentModel(agentId: string): string {

View File

@@ -73,12 +73,10 @@ export type OpenClawSessionHistoryEvent =
}
| { type: 'error'; data: { message: string } }
type GatewayTokenProvider = () => Promise<string | null | undefined>
export class OpenClawHttpClient {
constructor(
private readonly hostPort: number,
private readonly getToken?: GatewayTokenProvider,
private readonly getToken: () => Promise<string>,
) {}
async getSessionHistory(
@@ -105,11 +103,14 @@ export class OpenClawHttpClient {
async isAuthenticated(): Promise<boolean> {
try {
const token = await this.getToken()
const response = await fetch(
`http://127.0.0.1:${this.hostPort}/v1/models`,
{
method: 'GET',
headers: await this.authHeaders(),
headers: {
Authorization: `Bearer ${token}`,
},
},
)
return response.ok
@@ -123,12 +124,13 @@ export class OpenClawHttpClient {
input: OpenClawSessionHistoryInput,
extraHeaders: Record<string, string>,
): Promise<Response> {
const token = await this.getToken()
const response = await fetch(
`http://127.0.0.1:${this.hostPort}${buildHistoryPath(sessionKey, input)}`,
{
method: 'GET',
headers: {
...(await this.authHeaders()),
Authorization: `Bearer ${token}`,
...extraHeaders,
},
signal: input.signal,
@@ -147,12 +149,6 @@ export class OpenClawHttpClient {
}
return response
}
private async authHeaders(): Promise<Record<string, string>> {
const token = await this.getToken?.()
const trimmed = token?.trim()
return trimmed ? { Authorization: `Bearer ${trimmed}` } : {}
}
}
function buildHistoryPath(

View File

@@ -54,10 +54,10 @@ export class OpenClawObserver {
constructor(private readonly session: ClawSession) {}
/** Start observing the gateway at the given URL. */
connect(gatewayUrl: string, token?: string | null): void {
/** Start observing the gateway at the given URL with the given token. */
connect(gatewayUrl: string, token: string): void {
this.gatewayUrl = gatewayUrl
this.gatewayToken = token?.trim() || null
this.gatewayToken = token
this.closed = false
this.doConnect()
}
@@ -83,7 +83,7 @@ export class OpenClawObserver {
// ── Private ─────────────────────────────────────────────────────────
private doConnect(): void {
if (this.closed || !this.gatewayUrl) return
if (this.closed || !this.gatewayUrl || !this.gatewayToken) return
const wsUrl = this.gatewayUrl
.replace(/^http:\/\//, 'ws://')
@@ -101,37 +101,6 @@ export class OpenClawObserver {
let handshakeSent = false
/**
* Send the gateway protocol connect frame. BrowserOS no-auth gateways omit
* auth entirely; legacy token-mode gateways can still pass a token in.
*/
const sendConnectRequest = () => {
if (handshakeSent) return
handshakeSent = true
const connectReq: RequestFrame = {
type: 'req',
id: HANDSHAKE_REQUEST_ID,
method: 'connect',
params: {
minProtocol: PROTOCOL_VERSION,
maxProtocol: PROTOCOL_VERSION,
client: {
id: 'openclaw-tui',
displayName: 'browseros-observer',
version: '1.0.0',
platform: 'node',
mode: 'ui',
},
role: 'operator',
scopes: ['operator.read'],
...(this.gatewayToken ? { auth: { token: this.gatewayToken } } : {}),
},
}
ws.send(JSON.stringify(connectReq))
}
ws.on('open', sendConnectRequest)
ws.on('message', (raw) => {
let frame: IncomingFrame
try {
@@ -140,14 +109,34 @@ export class OpenClawObserver {
return
}
// Older gateway builds emit connect.challenge before the connect
// response; keep this path so the observer tolerates both flows.
// The gateway sends a connect.challenge event before accepting
// the connect request. Send the handshake after receiving it.
if (
frame.type === 'event' &&
frame.event === 'connect.challenge' &&
!handshakeSent
) {
sendConnectRequest()
handshakeSent = true
const connectReq: RequestFrame = {
type: 'req',
id: HANDSHAKE_REQUEST_ID,
method: 'connect',
params: {
minProtocol: PROTOCOL_VERSION,
maxProtocol: PROTOCOL_VERSION,
client: {
id: 'openclaw-tui',
displayName: 'browseros-observer',
version: '1.0.0',
platform: 'node',
mode: 'ui',
},
role: 'operator',
scopes: ['operator.read'],
auth: { token: this.gatewayToken },
},
}
ws.send(JSON.stringify(connectReq))
return
}

View File

@@ -262,7 +262,6 @@ export class OpenClawService {
private hostPort = OPENCLAW_GATEWAY_CONTAINER_PORT
private token: string
private tokenLoaded = false
private gatewayAuthMode: 'unknown' | 'none' | 'token' | 'password' = 'unknown'
private lastError: string | null = null
private browserosServerPort: number
private resourcesDir: string | null
@@ -285,8 +284,9 @@ export class OpenClawService {
this.token = crypto.randomUUID()
this.cliClient = new OpenClawCliClient(this.runtime)
this.bootstrapCliClient = this.buildBootstrapCliClient()
this.httpClient = new OpenClawHttpClient(this.hostPort, () =>
this.getGatewayHttpToken(),
this.httpClient = new OpenClawHttpClient(
this.hostPort,
async () => this.token,
)
this.browserosServerPort =
config.browserosServerPort ?? DEFAULT_PORTS.server
@@ -324,9 +324,13 @@ export class OpenClawService {
}
/**
* Legacy gateway auth token accessor. BrowserOS configures new bundled
* gateways with `gateway.auth.mode=none`; this remains for older token-auth
* gateway clients that still ask the service for a token.
* Current gateway auth token. The token string is loaded from
* `gateway.auth.token` in the persisted openclaw.json during setup,
* with a freshly generated UUID as fallback. Exposed so the ACPx
* harness can pass it to spawned `openclaw acp` child processes via
* the documented `OPENCLAW_GATEWAY_TOKEN` env var (avoids both the
* `--token` process-listing leak and reliance on a token-file path
* that doesn't exist as a discrete file inside the container).
*/
getGatewayToken(): string {
return this.token
@@ -397,7 +401,7 @@ export class OpenClawService {
await this.bootstrapCliClient.runOnboard({
acceptRisk: true,
authChoice: 'skip',
gatewayAuth: 'none',
gatewayAuth: 'token',
gatewayBind: 'lan',
gatewayPort: OPENCLAW_GATEWAY_CONTAINER_PORT,
installDaemon: false,
@@ -997,8 +1001,9 @@ export class OpenClawService {
private setPort(hostPort: number): void {
if (hostPort === this.hostPort) return
this.hostPort = hostPort
this.httpClient = new OpenClawHttpClient(this.hostPort, () =>
this.getGatewayHttpToken(),
this.httpClient = new OpenClawHttpClient(
this.hostPort,
async () => this.token,
)
}
@@ -1032,15 +1037,24 @@ export class OpenClawService {
}
private async isGatewayAuthenticated(hostPort: number): Promise<boolean> {
if (!this.tokenLoaded) {
logger.debug(
'OpenClaw gateway port is ready before auth token is loaded',
{
hostPort,
},
)
return false
}
const client =
hostPort === this.hostPort
? this.httpClient
: new OpenClawHttpClient(hostPort, () => this.getGatewayHttpToken())
: new OpenClawHttpClient(hostPort, async () => this.token)
const authenticated = await client.isAuthenticated()
if (!authenticated) {
logger.warn('OpenClaw gateway readiness probe failed', {
logger.warn('OpenClaw gateway port rejected current auth token', {
hostPort,
authMode: this.gatewayAuthMode,
})
}
return authenticated
@@ -1104,9 +1118,7 @@ export class OpenClawService {
// ClawSession starts empty after the JSONL seed was removed; the WS
// observer fills in agent status as events arrive.
const url = `http://127.0.0.1:${this.hostPort}`
const token =
this.gatewayAuthMode === 'token' && this.tokenLoaded ? this.token : null
this.observer.connect(url, token)
this.observer.connect(url, this.token)
}
private classifyControlPlaneError(
@@ -1342,11 +1354,7 @@ export class OpenClawService {
hostPort: this.hostPort,
hostHome: this.openclawDir,
envFilePath: this.getStateEnvPath(),
gatewayToken:
this.gatewayAuthMode === 'token' && this.tokenLoaded
? this.token
: undefined,
privateIngressNoAuth: this.gatewayAuthMode === 'none',
gatewayToken: this.tokenLoaded ? this.token : undefined,
timezone: Intl.DateTimeFormat().resolvedOptions().timeZone,
}
}
@@ -1452,7 +1460,7 @@ export class OpenClawService {
}
private async ensureTokenLoaded(): Promise<void> {
if (this.gatewayAuthMode !== 'unknown') {
if (this.tokenLoaded) {
return
}
if (!existsSync(this.getStateConfigPath())) {
@@ -1464,7 +1472,6 @@ export class OpenClawService {
private async refreshGatewayAuthToken(): Promise<void> {
this.tokenLoaded = false
this.gatewayAuthMode = 'unknown'
if (!existsSync(this.getStateConfigPath())) {
return
}
@@ -1479,28 +1486,16 @@ export class OpenClawService {
) as {
gateway?: {
auth?: {
mode?: unknown
token?: unknown
}
}
}
const auth = config.gateway?.auth
const mode = auth?.mode
if (mode === 'none') {
this.gatewayAuthMode = 'none'
logger.debug('OpenClaw gateway config uses no auth')
return
}
const token = auth?.token
const token = config.gateway?.auth?.token
if (typeof token === 'string' && token) {
this.token = token
this.tokenLoaded = true
this.gatewayAuthMode = 'token'
logger.info('Loaded OpenClaw gateway token from mounted config')
return
}
this.gatewayAuthMode = mode === 'password' ? 'password' : 'none'
} catch (err) {
logger.warn('Failed to load OpenClaw gateway token from mounted config', {
error: err instanceof Error ? err.message : String(err),
@@ -1508,13 +1503,6 @@ export class OpenClawService {
}
}
private async getGatewayHttpToken(): Promise<string | null> {
await this.ensureTokenLoaded()
return this.gatewayAuthMode === 'token' && this.tokenLoaded
? this.token
: null
}
private createProgressLogger(
onLog?: (msg: string) => void,
): (msg: string) => void {

View File

@@ -23,11 +23,17 @@ interface CdpVersion {
const LOOPBACK_DISCOVERY_HOSTS = ['127.0.0.1', 'localhost', '[::1]'] as const
type LoopbackDiscoveryHost = (typeof LOOPBACK_DISCOVERY_HOSTS)[number]
interface CdpBackendConfig {
port: number
exitOnReconnectFailure?: boolean
}
// biome-ignore lint/correctness/noUnusedVariables: declaration merging adds ProtocolApi properties to the class
interface CdpBackend extends ProtocolApi {}
// biome-ignore lint/suspicious/noUnsafeDeclarationMerging: intentional — Object.assign fills these at runtime
class CdpBackend implements ICdpBackend {
private port: number
private exitOnReconnectFailure: boolean
private ws: WebSocket | null = null
private messageId = 0
private pending = new Map<number, PendingRequest>()
@@ -44,8 +50,9 @@ class CdpBackend implements ICdpBackend {
private keepaliveTimer: ReturnType<typeof setInterval> | null = null
private preferredDiscoveryHost: LoopbackDiscoveryHost | null = null
constructor(config: { port: number }) {
constructor(config: CdpBackendConfig) {
this.port = config.port
this.exitOnReconnectFailure = config.exitOnReconnectFailure ?? true
const rawSend: RawSend = (method, params) => this.rawSend(method, params)
const rawOn: RawOn = (event, handler) => this.rawOn(event, handler)
@@ -293,7 +300,8 @@ class CdpBackend implements ICdpBackend {
private async reconnectLoop(): Promise<void> {
do {
this.reconnectRequested = false
await this.reconnectWithRetries()
const reconnected = await this.reconnectWithRetries()
if (!reconnected) return
} while (
!this.disconnecting &&
(this.reconnectRequested || !this.connected)
@@ -309,12 +317,12 @@ class CdpBackend implements ICdpBackend {
this.pending.clear()
}
private async reconnectWithRetries(): Promise<void> {
private async reconnectWithRetries(): Promise<boolean> {
const maxRetries = CDP_LIMITS.RECONNECT_MAX_RETRIES
const delay = TIMEOUTS.CDP_RECONNECT_DELAY
for (let attempt = 1; attempt <= maxRetries; attempt++) {
if (this.disconnecting) return
if (this.disconnecting) return false
try {
logger.info(`CDP reconnection attempt ${attempt}/${maxRetries}...`)
@@ -322,7 +330,7 @@ class CdpBackend implements ICdpBackend {
await this.attemptConnect()
this.startKeepalive()
logger.info('CDP reconnected successfully')
return
return true
} catch (error) {
const msg = error instanceof Error ? error.message : String(error)
logger.warn(
@@ -331,10 +339,14 @@ class CdpBackend implements ICdpBackend {
}
}
logger.error(
`CDP reconnection failed after ${maxRetries} attempts, exiting for restart`,
)
process.exit(EXIT_CODES.GENERAL_ERROR)
if (this.exitOnReconnectFailure) {
logger.error(
`CDP reconnection failed after ${maxRetries} attempts, exiting for restart`,
)
process.exit(EXIT_CODES.GENERAL_ERROR)
}
logger.error(`CDP reconnection failed after ${maxRetries} attempts`)
return false
}
async disconnect(): Promise<void> {

View File

@@ -0,0 +1,74 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import type { createRuntimeStore } from 'acpx/runtime'
import type { OpenClawGatewayChatClient } from '../../api/services/openclaw/openclaw-gateway-chat-client'
import type { AgentDefinition } from './agent-types'
import { prepareClaudeCodeContext } from './claude-code/prepare'
import { prepareCodexContext } from './codex/prepare'
import {
maybeHandleOpenClawTurn,
prepareOpenClawContext,
} from './openclaw/prepare'
import type { AgentPromptInput, AgentStreamEvent } from './types'
export interface PreparedAcpxAgentContext {
cwd: string
runtimeSessionKey: string
runPrompt: string
commandEnv: Record<string, string>
commandIdentity: string
useBrowserosMcp: boolean
openclawSessionKey: string | null
}
export interface PrepareAcpxAgentContextInput {
browserosDir: string
agent: AgentDefinition
sessionId: 'main'
sessionKey: string
cwdOverride: string | null
isSelectedCwd: boolean
message: string
}
export interface AcpxAdapterTurnInput {
prompt: AgentPromptInput
prepared: PreparedAcpxAgentContext
sessionStore: ReturnType<typeof createRuntimeStore>
openclawGatewayChat: OpenClawGatewayChatClient | null
}
export interface AcpxAgentAdapter {
prepare(
input: PrepareAcpxAgentContextInput,
): Promise<PreparedAcpxAgentContext>
maybeHandleTurn?(
input: AcpxAdapterTurnInput,
): Promise<ReadableStream<AgentStreamEvent> | null>
}
const ADAPTERS: Record<AgentDefinition['adapter'], AcpxAgentAdapter> = {
claude: { prepare: prepareClaudeCodeContext },
codex: { prepare: prepareCodexContext },
openclaw: {
prepare: prepareOpenClawContext,
maybeHandleTurn: maybeHandleOpenClawTurn,
},
}
export function getAcpxAgentAdapter(
adapter: AgentDefinition['adapter'],
): AcpxAgentAdapter {
return ADAPTERS[adapter]
}
/** Prepares adapter-specific filesystem, prompt, env, and session identity for one ACPX turn. */
export async function prepareAcpxAgentContext(
input: PrepareAcpxAgentContextInput,
): Promise<PreparedAcpxAgentContext> {
return getAcpxAgentAdapter(input.agent.adapter).prepare(input)
}

View File

@@ -0,0 +1,95 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import type {
PrepareAcpxAgentContextInput,
PreparedAcpxAgentContext,
} from './acpx-agent-adapter'
import type { AgentRuntimePaths } from './acpx-runtime-context'
import {
BROWSEROS_ACPX_OPERATING_PROMPT_VERSION,
buildAcpxRuntimePromptPrefix,
buildBrowserosAcpPrompt,
ensureAgentHome,
ensureRuntimeSkills,
ensureUsableCwd,
resolveAgentRuntimePaths,
} from './acpx-runtime-context'
import {
deriveRuntimeSessionKey,
saveLatestRuntimeState,
} from './acpx-runtime-state'
export interface BrowserosManagedContext {
input: PrepareAcpxAgentContextInput
paths: AgentRuntimePaths
skillNames: string[]
promptPrefix: string
}
/** Builds the common BrowserOS-managed home, skills, cwd, and prompt prefix for Claude/Codex. */
export async function prepareBrowserosManagedContext(
input: PrepareAcpxAgentContextInput,
): Promise<BrowserosManagedContext> {
const paths = resolveAgentRuntimePaths({
browserosDir: input.browserosDir,
agentId: input.agent.id,
cwd: input.cwdOverride,
})
await ensureUsableCwd(paths.effectiveCwd, !input.isSelectedCwd)
await ensureAgentHome(paths)
const skillNames = await ensureRuntimeSkills(paths.runtimeSkillsDir)
const promptPrefix = buildAcpxRuntimePromptPrefix({
agent: input.agent,
paths,
skillNames,
})
return { input, paths, skillNames, promptPrefix }
}
/** Finalizes BrowserOS-managed prep into the uniform adapter context consumed by AcpxRuntime. */
export async function finishBrowserosManagedContext(input: {
input: PrepareAcpxAgentContextInput
paths: AgentRuntimePaths
skillNames: string[]
promptPrefix: string
commandEnv: Record<string, string>
}): Promise<PreparedAcpxAgentContext> {
const commandIdentity = stableCommandIdentity(input.commandEnv)
const runtimeSessionKey = deriveRuntimeSessionKey({
agentId: input.input.agent.id,
sessionId: input.input.sessionId,
adapter: input.input.agent.adapter,
cwd: input.paths.effectiveCwd,
agentHome: input.paths.agentHome,
promptVersion: BROWSEROS_ACPX_OPERATING_PROMPT_VERSION,
skillIdentity: input.skillNames.join(','),
commandIdentity,
})
await saveLatestRuntimeState(input.paths.runtimeStatePath, {
sessionId: input.input.sessionId,
runtimeSessionKey,
cwd: input.paths.effectiveCwd,
agentHome: input.paths.agentHome,
updatedAt: Date.now(),
})
return {
cwd: input.paths.effectiveCwd,
runtimeSessionKey,
runPrompt: buildBrowserosAcpPrompt(input.promptPrefix, input.input.message),
commandEnv: input.commandEnv,
commandIdentity,
useBrowserosMcp: true,
openclawSessionKey: null,
}
}
export function stableCommandIdentity(env: Record<string, string>): string {
return Object.entries(env)
.sort(([left], [right]) => left.localeCompare(right))
.map(([key, value]) => `${key}=${value}`)
.join('\n')
}

View File

@@ -35,6 +35,7 @@ export interface AgentRuntimePaths {
effectiveCwd: string
runtimeStatePath: string
runtimeSkillsDir: string
runtimeRoot: string
codexHome: string
}
@@ -45,6 +46,7 @@ export function resolveAgentRuntimePaths(input: {
}): AgentRuntimePaths {
const harnessDir = join(input.browserosDir, 'agents', 'harness')
const defaultWorkspaceCwd = join(harnessDir, 'workspace')
const runtimeRoot = join(harnessDir, input.agentId, 'runtime')
return {
browserosDir: input.browserosDir,
harnessDir,
@@ -57,7 +59,8 @@ export function resolveAgentRuntimePaths(input: {
`${input.agentId}.json`,
),
runtimeSkillsDir: join(harnessDir, 'runtime-skills'),
codexHome: join(harnessDir, input.agentId, 'runtime', 'codex-home'),
runtimeRoot,
codexHome: join(runtimeRoot, 'codex-home'),
}
}
@@ -110,7 +113,7 @@ export async function materializeCodexHome(input: {
}
}
/** Builds the stable BrowserOS operating instructions prepended to ACP turns. */
/** Builds stable BrowserOS-managed instructions for Claude/Codex ACP turns. */
export function buildAcpxRuntimePromptPrefix(input: {
agent: AgentDefinition
paths: AgentRuntimePaths
@@ -134,6 +137,12 @@ BrowserOS has made runtime skills available for this ACPX session.
Skill root: ${input.paths.runtimeSkillsDir}
Available skills: ${input.skillNames.join(', ')}
When a task calls for one of these skills, read its SKILL.md from that root and follow it.
When the user asks you to remember, save feedback, store a preference, or update memory in this BrowserOS ACPX context, use the BrowserOS memory skill.
Write BrowserOS memory only under AGENT_HOME:
- AGENT_HOME/MEMORY.md for durable promoted preferences and operating patterns.
- AGENT_HOME/memory/YYYY-MM-DD.md for daily notes and candidate memories.
Do not use native Claude project memory, native CLI memory, or workspace files for BrowserOS memory.
</browseros_acpx_runtime>`
}
@@ -148,6 +157,40 @@ export function wrapCommandWithEnv(
return prefix ? `env ${prefix} ${command}` : command
}
/** Ensures the runtime cwd exists, creating only the managed default workspace. */
export async function ensureUsableCwd(
cwd: string,
isDefaultWorkspace: boolean,
): Promise<void> {
if (isDefaultWorkspace) {
await mkdir(cwd, { recursive: true })
return
}
let info: Stats
try {
info = await stat(cwd)
} catch (err) {
if (isNotFoundError(err)) {
throw new Error(`Selected workspace does not exist: ${cwd}`)
}
throw err
}
if (!info.isDirectory()) {
throw new Error(`Selected workspace is not a directory: ${cwd}`)
}
}
export function buildBrowserosAcpPrompt(
prefix: string,
message: string,
): string {
return `${prefix}
<user_request>
${escapePromptTagText(message)}
</user_request>`
}
async function writeFileIfMissing(
path: string,
content: string,
@@ -207,7 +250,7 @@ async function sourceFileExists(path: string): Promise<boolean> {
throw err
}
if (!info.isFile()) {
throw new Error(`Expected Codex source file to be a file: ${path}`)
throw new Error(`Expected source file to be a file: ${path}`)
}
return true
}
@@ -216,6 +259,13 @@ function shellQuote(value: string): string {
return `'${value.replace(/'/g, "'\\''")}'`
}
function escapePromptTagText(value: string): string {
return value
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
}
function isNotFoundError(err: unknown): boolean {
return (
typeof err === 'object' &&

View File

@@ -114,6 +114,11 @@ Do not store memory files in the project workspace.
## Write
- When the user explicitly asks you to remember, save feedback, store a preference, or update memory, use this skill.
- Write BrowserOS memory only under $AGENT_HOME.
- Use $AGENT_HOME/MEMORY.md for durable promoted preferences and operating patterns.
- Use $AGENT_HOME/memory/YYYY-MM-DD.md for daily notes and candidate memories.
- Do not use native Claude project memory, native CLI memory, or workspace files for BrowserOS memory.
- Put observations and task breadcrumbs in today's daily note first.
- Promote only stable patterns into MEMORY.md.
- Do not promote one-off facts, raw transcripts, temporary state, secrets, or credentials.

View File

@@ -4,9 +4,6 @@
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { randomUUID } from 'node:crypto'
import type { Stats } from 'node:fs'
import { mkdir, stat } from 'node:fs/promises'
import { join } from 'node:path'
import { OPENCLAW_GATEWAY_CONTAINER_PORT } from '@browseros/shared/constants/openclaw'
import { DEFAULT_PORTS } from '@browseros/shared/constants/ports'
@@ -22,28 +19,18 @@ import {
createAgentRegistry,
createRuntimeStore,
} from 'acpx/runtime'
import type {
OpenAIChatMessage,
OpenAIContentPart,
OpenClawGatewayChatClient,
} from '../../api/services/openclaw/openclaw-gateway-chat-client'
import type { OpenClawGatewayChatClient } from '../../api/services/openclaw/openclaw-gateway-chat-client'
import { getBrowserosDir } from '../browseros-dir'
import { logger } from '../logger'
import type { AgentRuntimePaths } from './acpx-runtime-context'
import {
BROWSEROS_ACPX_OPERATING_PROMPT_VERSION,
buildAcpxRuntimePromptPrefix,
ensureAgentHome,
ensureRuntimeSkills,
materializeCodexHome,
getAcpxAgentAdapter,
prepareAcpxAgentContext,
} from './acpx-agent-adapter'
import {
resolveAgentRuntimePaths,
wrapCommandWithEnv,
} from './acpx-runtime-context'
import {
deriveRuntimeSessionKey,
loadLatestRuntimeState,
saveLatestRuntimeState,
} from './acpx-runtime-state'
import { loadLatestRuntimeState } from './acpx-runtime-state'
import type {
AgentDefinition,
AgentHistoryEntry,
@@ -67,7 +54,7 @@ import type {
* current token and VM/container paths at spawn time.
*/
export interface OpenclawGatewayAccessor {
/** Current gateway auth token. Kept for legacy token-auth gateway clients. */
/** Current gateway auth token. Passed to `openclaw acp --token`. */
getGatewayToken(): string
/** Container name e.g. browseros-openclaw-openclaw-gateway-1. */
getContainerName(): string
@@ -107,6 +94,8 @@ interface PreparedRuntimeContext {
runPrompt: string
agentCommandEnv: Record<string, string>
commandIdentity: string
useBrowserosMcp: boolean
openclawSessionKey: string | null
}
const BROWSEROS_ACP_AGENT_INSTRUCTIONS = `<role>
@@ -194,16 +183,11 @@ export class AcpxRuntime implements AgentRuntime {
async send(
input: AgentPromptInput,
): Promise<ReadableStream<AgentStreamEvent>> {
const prepared =
input.agent.adapter === 'openclaw'
? null
: await this.prepareRuntimeContext(input, input.cwd ?? this.defaultCwd)
const cwd =
prepared?.cwd ??
(await this.resolveNonManagedCwd(
input.cwd ?? this.defaultCwd,
!!input.cwd,
))
const prepared = await this.prepareRuntimeContext(
input,
input.cwd ?? this.defaultCwd,
)
const cwd = prepared.cwd
const imageAttachments = (input.attachments ?? []).filter((a) =>
a.mediaType.startsWith('image/'),
)
@@ -221,42 +205,38 @@ export class AcpxRuntime implements AgentRuntime {
imageAttachmentCount: imageAttachments.length,
})
// Image carve-out for OpenClaw: the openclaw `acp` bridge silently
// drops ACP `image` content blocks, so the model never sees the
// attachment. Divert image-bearing turns to the gateway's HTTP
// /v1/chat/completions endpoint (which accepts OpenAI-style
// `image_url` parts) and pipe its SSE back through the same
// AgentStreamEvent shape callers already consume.
if (
input.agent.adapter === 'openclaw' &&
imageAttachments.length > 0 &&
this.openclawGatewayChat
) {
return this.sendOpenclawViaGateway(input, imageAttachments, cwd)
}
const adapter = getAcpxAgentAdapter(input.agent.adapter)
const adapterStream =
(await adapter.maybeHandleTurn?.({
prompt: input,
prepared: {
cwd: prepared.cwd,
runtimeSessionKey: prepared.runtimeSessionKey,
runPrompt: prepared.runPrompt,
commandEnv: prepared.agentCommandEnv,
commandIdentity: prepared.commandIdentity,
useBrowserosMcp: prepared.useBrowserosMcp,
openclawSessionKey: prepared.openclawSessionKey,
},
sessionStore: this.sessionStore,
openclawGatewayChat: this.openclawGatewayChat,
})) ?? null
if (adapterStream) return adapterStream
const runtime = this.getRuntime({
cwd,
permissionMode: input.permissionMode,
nonInteractivePermissions: 'fail',
commandEnv: prepared?.agentCommandEnv ?? {},
commandIdentity: prepared?.commandIdentity ?? 'openclaw',
// OpenClaw agents need their gateway sessionKey baked into the
// spawn command (acpx does not forward sessionKey to newSession);
// claude/codex don't, and including it would split their cache.
openclawSessionKey:
input.agent.adapter === 'openclaw' ? input.sessionKey : null,
commandEnv: prepared.agentCommandEnv,
commandIdentity: prepared.commandIdentity,
useBrowserosMcp: prepared.useBrowserosMcp,
openclawSessionKey: prepared.openclawSessionKey,
})
return createAcpxEventStream(runtime, input, {
cwd,
runtimeSessionKey: prepared?.runtimeSessionKey ?? input.sessionKey,
runPrompt:
prepared?.runPrompt ??
buildBrowserosAcpPrompt(
BROWSEROS_ACP_AGENT_INSTRUCTIONS,
input.message,
),
runtimeSessionKey: prepared.runtimeSessionKey,
runPrompt: prepared.runPrompt,
})
}
@@ -277,64 +257,27 @@ export class AcpxRuntime implements AgentRuntime {
return (await this.sessionStore.load(agent.sessionKey)) ?? null
}
private async resolveNonManagedCwd(
cwdOverride: string | null,
isSelectedCwd: boolean,
): Promise<string> {
const paths = resolveAgentRuntimePaths({
browserosDir: this.browserosDir,
agentId: 'openclaw',
cwd: cwdOverride,
})
await ensureUsableCwd(paths.effectiveCwd, !isSelectedCwd)
return paths.effectiveCwd
}
private async prepareRuntimeContext(
input: AgentPromptInput,
cwdOverride: string | null,
): Promise<PreparedRuntimeContext> {
const paths = resolveAgentRuntimePaths({
const prepared = await prepareAcpxAgentContext({
browserosDir: this.browserosDir,
agentId: input.agent.id,
cwd: cwdOverride,
})
await ensureUsableCwd(paths.effectiveCwd, !input.cwd)
await ensureAgentHome(paths)
const skillNames = await ensureRuntimeSkills(paths.runtimeSkillsDir)
if (input.agent.adapter === 'codex') {
await materializeCodexHome({ paths, skillNames })
}
const promptPrefix = buildAcpxRuntimePromptPrefix({
agent: input.agent,
paths,
skillNames,
})
const agentCommandEnv = buildAgentCommandEnv(input.agent, paths)
const commandIdentity = stableCommandIdentity(agentCommandEnv)
const runtimeSessionKey = deriveRuntimeSessionKey({
agentId: input.agent.id,
sessionId: input.sessionId,
adapter: input.agent.adapter,
cwd: paths.effectiveCwd,
agentHome: paths.agentHome,
promptVersion: BROWSEROS_ACPX_OPERATING_PROMPT_VERSION,
skillIdentity: skillNames.join(','),
commandIdentity,
})
await saveLatestRuntimeState(paths.runtimeStatePath, {
sessionId: input.sessionId,
runtimeSessionKey,
cwd: paths.effectiveCwd,
agentHome: paths.agentHome,
updatedAt: Date.now(),
sessionKey: input.sessionKey,
cwdOverride,
isSelectedCwd: !!input.cwd,
message: input.message,
})
return {
cwd: paths.effectiveCwd,
runtimeSessionKey,
runPrompt: buildBrowserosAcpPrompt(promptPrefix, input.message),
agentCommandEnv,
commandIdentity,
cwd: prepared.cwd,
runtimeSessionKey: prepared.runtimeSessionKey,
runPrompt: prepared.runPrompt,
agentCommandEnv: prepared.commandEnv,
commandIdentity: prepared.commandIdentity,
useBrowserosMcp: prepared.useBrowserosMcp,
openclawSessionKey: prepared.openclawSessionKey,
}
}
@@ -344,6 +287,7 @@ export class AcpxRuntime implements AgentRuntime {
nonInteractivePermissions: AcpRuntimeOptions['nonInteractivePermissions']
commandEnv: Record<string, string>
commandIdentity: string
useBrowserosMcp: boolean
openclawSessionKey: string | null
}): AcpxCoreRuntime {
const key = JSON.stringify({
@@ -351,16 +295,12 @@ export class AcpxRuntime implements AgentRuntime {
permissionMode: input.permissionMode,
nonInteractivePermissions: input.nonInteractivePermissions,
commandIdentity: input.commandIdentity,
useBrowserosMcp: input.useBrowserosMcp,
openclawSessionKey: input.openclawSessionKey,
})
const existing = this.runtimes.get(key)
if (existing) return existing
// OpenClaw exposes its provider tools through the gateway, not through
// ACP-side MCP servers. Forwarding the BrowserOS HTTP MCP to its bridge
// makes newSession fail because openclaw rejects unsupported transports.
// Claude/codex still need the BrowserOS MCP for browser tooling.
const isOpenclaw = input.openclawSessionKey !== null
const runtime = this.runtimeFactory({
cwd: input.cwd,
sessionStore: this.sessionStore,
@@ -369,9 +309,9 @@ export class AcpxRuntime implements AgentRuntime {
openclawSessionKey: input.openclawSessionKey,
commandEnv: input.commandEnv,
}),
mcpServers: isOpenclaw
? []
: createBrowserosMcpServers(this.browserosServerPort),
mcpServers: input.useBrowserosMcp
? createBrowserosMcpServers(this.browserosServerPort)
: [],
permissionMode: input.permissionMode,
nonInteractivePermissions: input.nonInteractivePermissions,
})
@@ -383,195 +323,11 @@ export class AcpxRuntime implements AgentRuntime {
nonInteractivePermissions: input.nonInteractivePermissions,
browserosServerPort: this.browserosServerPort,
commandIdentity: input.commandIdentity,
useBrowserosMcp: input.useBrowserosMcp,
openclawSessionKey: input.openclawSessionKey,
})
return runtime
}
/**
* Drives an OpenClaw turn that includes image attachments through the
* gateway HTTP endpoint, which translates OpenAI-style `image_url`
* content parts into provider-native multimodal calls. Streams back
* `AgentStreamEvent` so the chat panel renders identically to ACP
* turns. On natural completion, appends a synthetic user+assistant
* pair to the acpx session record so the turn shows up in
* `getHistory()` after a reload.
*
* Persistence is best-effort: when no session record exists yet (e.g.
* the very first turn for a fresh agent is image-only), the live
* stream still works but the turn is absent from history on reload.
* Subsequent text turns through ACP create/update the record normally.
*/
private async sendOpenclawViaGateway(
input: AgentPromptInput,
imageAttachments: ReadonlyArray<{ mediaType: string; data: string }>,
cwd: string,
): Promise<ReadableStream<AgentStreamEvent>> {
if (!this.openclawGatewayChat) {
throw new Error(
'OpenClaw gateway chat client is not wired into AcpxRuntime',
)
}
const existingRecord = await this.sessionStore.load(input.sessionKey)
const priorMessages = existingRecord
? recordToOpenAIMessages(existingRecord)
: []
const userContent: OpenAIContentPart[] = [
{
type: 'text',
text: buildBrowserosAcpPrompt(
BROWSEROS_ACP_AGENT_INSTRUCTIONS,
input.message,
),
},
...imageAttachments.map(
(a): OpenAIContentPart => ({
type: 'image_url',
image_url: { url: `data:${a.mediaType};base64,${a.data}` },
}),
),
]
const messages: OpenAIChatMessage[] = [
...priorMessages,
{ role: 'user', content: userContent },
]
logger.info('Agent harness gateway image turn dispatched', {
agentId: input.agent.id,
sessionKey: input.sessionKey,
cwd,
priorMessageCount: priorMessages.length,
imageAttachmentCount: imageAttachments.length,
})
const upstream = await this.openclawGatewayChat.streamTurn({
agentId: input.agent.id,
sessionKey: input.sessionKey,
messages,
signal: input.signal,
})
const sessionStore = this.sessionStore
const sessionKey = input.sessionKey
const userMessageText = input.message
let accumulated = ''
return new ReadableStream<AgentStreamEvent>({
start: (controller) => {
const reader = upstream.getReader()
const persist = async () => {
if (!existingRecord || !accumulated) return
try {
await persistGatewayTurn(
sessionStore,
sessionKey,
userMessageText,
imageAttachments,
accumulated,
)
} catch (err) {
logger.warn(
'Failed to persist gateway image turn to acpx session record',
{
sessionKey,
error: err instanceof Error ? err.message : String(err),
},
)
}
}
;(async () => {
try {
while (true) {
const { done, value } = await reader.read()
if (done) break
if (value.type === 'text_delta') accumulated += value.text
controller.enqueue(value)
}
await persist()
controller.close()
} catch (err) {
controller.enqueue({
type: 'error',
message: err instanceof Error ? err.message : String(err),
})
controller.close()
}
})().catch(() => {})
},
cancel: () => {
// Best-effort: cancel propagation to the gateway is its own
// upstream issue (see plan), but at least drop our reader so
// the OpenAI SSE parse loop exits.
},
})
}
}
async function persistGatewayTurn(
sessionStore: ReturnType<typeof createRuntimeStore>,
sessionKey: string,
userMessageText: string,
imageAttachments: ReadonlyArray<{ mediaType: string; data: string }>,
assistantText: string,
): Promise<void> {
const record = await sessionStore.load(sessionKey)
if (!record) return
const userContent: AcpxUserContent[] = [
{
Text: buildBrowserosAcpPrompt(
BROWSEROS_ACP_AGENT_INSTRUCTIONS,
userMessageText,
),
} as AcpxUserContent,
]
for (const _image of imageAttachments) {
// The history mapper's `userContentToText` reads `Image.source` and
// emits `[image]` for any non-empty value — we just need a truthy
// marker so the placeholder renders. We don't store the base64 in
// the record (it's already in the gateway's transcript and would
// bloat the JSON file).
userContent.push({ Image: { source: 'base64' } } as AcpxUserContent)
}
// The acpx persistence layer requires User messages to carry an `id`
// and Agent messages to carry a `tool_results` object — without them
// the record fails to round-trip through `parseSessionRecord` on next
// load. See acpx/dist/prompt-turn-... `isUserMessage`/`isAgentMessage`.
const turnId = randomUUID()
const updated = {
...record,
messages: [
...record.messages,
{ User: { id: `user-${turnId}`, content: userContent } },
{ Agent: { content: [{ Text: assistantText }], tool_results: {} } },
],
lastUsedAt: new Date().toISOString(),
} as AcpSessionRecord
await sessionStore.save(updated)
}
function recordToOpenAIMessages(record: AcpSessionRecord): OpenAIChatMessage[] {
const messages: OpenAIChatMessage[] = []
for (const message of record.messages) {
if (message === 'Resume') continue
if ('User' in message) {
const text = message.User.content
.map(userContentToText)
.filter(Boolean)
.join('\n\n')
.trim()
if (text) messages.push({ role: 'user', content: text })
continue
}
if ('Agent' in message) {
const text = message.Agent.content
.map((part) => ('Text' in part ? part.Text : ''))
.join('')
.trim()
if (text) messages.push({ role: 'assistant', content: text })
}
}
return messages
}
type AcpxSessionMessage = AcpSessionRecord['messages'][number]
@@ -1000,8 +756,8 @@ function createBrowserosAgentRegistry(input: {
* already installed alongside the gateway is reused; BrowserOS does
* not require a host-side openclaw install.
*
* Auth: BrowserOS configures the bundled gateway with `gateway.auth.mode=none`,
* so no gateway token flag is needed for the local ACP bridge.
* Auth: `openclaw acp --url ...` deliberately does not reuse implicit
* env/config credentials, so pass the gateway token explicitly.
*
* Banner output: OPENCLAW_HIDE_BANNER and OPENCLAW_SUPPRESS_NOTES
* suppress non-JSON-RPC chatter on stdout that would otherwise corrupt
@@ -1011,6 +767,7 @@ function resolveOpenclawAcpCommand(
gateway: OpenclawGatewayAccessor,
sessionKey: string | null,
): string {
const token = gateway.getGatewayToken()
const limactl = gateway.getLimactlPath()
const vm = gateway.getVmName()
const container = gateway.getContainerName()
@@ -1059,6 +816,8 @@ function resolveOpenclawAcpCommand(
'acp',
'--url',
gatewayUrlInsideContainer,
'--token',
token,
]
if (bridgeSessionKey) {
argv.push('--session', bridgeSessionKey)
@@ -1066,77 +825,6 @@ function resolveOpenclawAcpCommand(
return argv.join(' ')
}
async function ensureUsableCwd(
cwd: string,
isDefaultWorkspace: boolean,
): Promise<void> {
if (isDefaultWorkspace) {
await mkdir(cwd, { recursive: true })
return
}
let info: Stats
try {
info = await stat(cwd)
} catch (err) {
if (isNotFoundError(err)) {
throw new Error(`Selected workspace does not exist: ${cwd}`)
}
throw err
}
if (!info.isDirectory()) {
throw new Error(`Selected workspace is not a directory: ${cwd}`)
}
}
function isNotFoundError(err: unknown): boolean {
return (
typeof err === 'object' &&
err !== null &&
'code' in err &&
err.code === 'ENOENT'
)
}
function buildAgentCommandEnv(
agent: AgentDefinition,
paths: AgentRuntimePaths,
): Record<string, string> {
if (agent.adapter === 'codex') {
return {
AGENT_HOME: paths.agentHome,
CODEX_HOME: paths.codexHome,
}
}
if (agent.adapter === 'claude') {
return {
AGENT_HOME: paths.agentHome,
}
}
return {}
}
function stableCommandIdentity(env: Record<string, string>): string {
return Object.entries(env)
.sort(([left], [right]) => left.localeCompare(right))
.map(([key, value]) => `${key}=${value}`)
.join('\n')
}
function buildBrowserosAcpPrompt(prefix: string, message: string): string {
return `${prefix}
<user_request>
${escapePromptTagText(message)}
</user_request>`
}
function escapePromptTagText(value: string): string {
return value
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
}
async function applyRuntimeControls(
runtime: AcpxCoreRuntime,
handle: AcpRuntimeHandle,

View File

@@ -0,0 +1,27 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import type {
PrepareAcpxAgentContextInput,
PreparedAcpxAgentContext,
} from '../acpx-agent-adapter'
import {
finishBrowserosManagedContext,
prepareBrowserosManagedContext,
} from '../acpx-agent-common'
/** Prepares Claude Code with BrowserOS agent home while preserving host Claude auth. */
export async function prepareClaudeCodeContext(
input: PrepareAcpxAgentContextInput,
): Promise<PreparedAcpxAgentContext> {
const common = await prepareBrowserosManagedContext(input)
return finishBrowserosManagedContext({
...common,
commandEnv: {
AGENT_HOME: common.paths.agentHome,
},
})
}

View File

@@ -0,0 +1,33 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import type {
PrepareAcpxAgentContextInput,
PreparedAcpxAgentContext,
} from '../acpx-agent-adapter'
import {
finishBrowserosManagedContext,
prepareBrowserosManagedContext,
} from '../acpx-agent-common'
import { materializeCodexHome } from '../acpx-runtime-context'
/** Prepares Codex with a contained CODEX_HOME and BrowserOS agent home. */
export async function prepareCodexContext(
input: PrepareAcpxAgentContextInput,
): Promise<PreparedAcpxAgentContext> {
const common = await prepareBrowserosManagedContext(input)
await materializeCodexHome({
paths: common.paths,
skillNames: common.skillNames,
})
return finishBrowserosManagedContext({
...common,
commandEnv: {
AGENT_HOME: common.paths.agentHome,
CODEX_HOME: common.paths.codexHome,
},
})
}

View File

@@ -0,0 +1,219 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { randomUUID } from 'node:crypto'
import type { AcpSessionRecord, createRuntimeStore } from 'acpx/runtime'
import type {
OpenAIChatMessage,
OpenAIContentPart,
} from '../../../api/services/openclaw/openclaw-gateway-chat-client'
import { logger } from '../../logger'
import type { AcpxAdapterTurnInput } from '../acpx-agent-adapter'
import type { AgentStreamEvent } from '../types'
type ImageAttachment = Readonly<{ mediaType: string; data: string }>
export async function maybeHandleOpenClawTurn(
input: AcpxAdapterTurnInput,
): Promise<ReadableStream<AgentStreamEvent> | null> {
const imageAttachments = (input.prompt.attachments ?? []).filter((a) =>
a.mediaType.startsWith('image/'),
)
if (imageAttachments.length === 0 || !input.openclawGatewayChat) {
return null
}
return sendOpenclawViaGateway({
prompt: input.prompt,
sessionStore: input.sessionStore,
openclawGatewayChat: input.openclawGatewayChat,
imageAttachments,
cwd: input.prepared.cwd,
runPrompt: input.prepared.runPrompt,
})
}
/** Handles OpenClaw image turns through the gateway HTTP chat endpoint. */
async function sendOpenclawViaGateway(input: {
prompt: AcpxAdapterTurnInput['prompt']
sessionStore: AcpxAdapterTurnInput['sessionStore']
openclawGatewayChat: NonNullable<AcpxAdapterTurnInput['openclawGatewayChat']>
imageAttachments: ReadonlyArray<ImageAttachment>
cwd: string
runPrompt: string
}): Promise<ReadableStream<AgentStreamEvent>> {
const existingRecord = await input.sessionStore.load(input.prompt.sessionKey)
const priorMessages = existingRecord
? recordToOpenAIMessages(existingRecord)
: []
const userContent: OpenAIContentPart[] = [
{
type: 'text',
text: input.runPrompt,
},
...input.imageAttachments.map(
(a): OpenAIContentPart => ({
type: 'image_url',
image_url: { url: `data:${a.mediaType};base64,${a.data}` },
}),
),
]
const messages: OpenAIChatMessage[] = [
...priorMessages,
{ role: 'user', content: userContent },
]
logger.info('Agent harness gateway image turn dispatched', {
agentId: input.prompt.agent.id,
sessionKey: input.prompt.sessionKey,
cwd: input.cwd,
priorMessageCount: priorMessages.length,
imageAttachmentCount: input.imageAttachments.length,
})
const upstream = await input.openclawGatewayChat.streamTurn({
agentId: input.prompt.agent.id,
sessionKey: input.prompt.sessionKey,
messages,
signal: input.prompt.signal,
})
const sessionStore = input.sessionStore
const sessionKey = input.prompt.sessionKey
const userMessageText = input.prompt.message
const imageAttachments = input.imageAttachments
let accumulated = ''
return new ReadableStream<AgentStreamEvent>({
start: (controller) => {
const reader = upstream.getReader()
const persist = async () => {
if (!existingRecord || !accumulated) return
try {
await persistGatewayTurn(
sessionStore,
sessionKey,
userMessageText,
imageAttachments,
accumulated,
)
} catch (err) {
logger.warn(
'Failed to persist gateway image turn to acpx session record',
{
sessionKey,
error: err instanceof Error ? err.message : String(err),
},
)
}
}
;(async () => {
try {
while (true) {
const { done, value } = await reader.read()
if (done) break
if (value.type === 'text_delta') accumulated += value.text
controller.enqueue(value)
}
await persist()
controller.close()
} catch (err) {
controller.enqueue({
type: 'error',
message: err instanceof Error ? err.message : String(err),
})
controller.close()
}
})().catch(() => {})
},
cancel: () => {
// Best-effort: cancel propagation to the gateway is tracked separately.
},
})
}
async function persistGatewayTurn(
sessionStore: ReturnType<typeof createRuntimeStore>,
sessionKey: string,
userMessageText: string,
imageAttachments: ReadonlyArray<ImageAttachment>,
assistantText: string,
): Promise<void> {
const record = await sessionStore.load(sessionKey)
if (!record) return
const userContent: AcpxUserContent[] = [
{ Text: userMessageText } as AcpxUserContent,
]
for (const _image of imageAttachments) {
userContent.push({ Image: { source: 'base64' } } as AcpxUserContent)
}
const turnId = randomUUID()
const updated = {
...record,
messages: [
...record.messages,
{ User: { id: `user-${turnId}`, content: userContent } },
{ Agent: { content: [{ Text: assistantText }], tool_results: {} } },
],
lastUsedAt: new Date().toISOString(),
} as AcpSessionRecord
await sessionStore.save(updated)
}
function recordToOpenAIMessages(record: AcpSessionRecord): OpenAIChatMessage[] {
const messages: OpenAIChatMessage[] = []
for (const message of record.messages) {
if (message === 'Resume') continue
if ('User' in message) {
const text = message.User.content
.map(userContentToText)
.filter(Boolean)
.join('\n\n')
.trim()
if (text) messages.push({ role: 'user', content: text })
continue
}
if ('Agent' in message) {
const text = message.Agent.content
.map((part) => ('Text' in part ? part.Text : ''))
.join('')
.trim()
if (text) messages.push({ role: 'assistant', content: text })
}
}
return messages
}
type AcpxSessionMessage = AcpSessionRecord['messages'][number]
type AcpxUserContent = Extract<
Exclude<AcpxSessionMessage, 'Resume'>,
{ User: unknown }
>['User']['content'][number]
function userContentToText(content: AcpxUserContent): string {
if ('Text' in content) return unwrapPromptText(content.Text)
if ('Mention' in content) return content.Mention.content
if ('Image' in content) return content.Image.source ? '[image]' : ''
return ''
}
function unwrapPromptText(raw: string): string {
const runtimeMatch = raw.match(
/^<browseros_acpx_runtime\b[\s\S]*?<\/browseros_acpx_runtime>\n\n<user_request>\n([\s\S]*?)\n<\/user_request>$/,
)
if (runtimeMatch) return decodeBasicEntities(runtimeMatch[1]).trim()
const roleMatch = raw.match(
/^<role>[\s\S]*?<\/role>\n\n<user_request>\n([\s\S]*?)\n<\/user_request>$/,
)
if (roleMatch) return decodeBasicEntities(roleMatch[1]).trim()
return raw.trim()
}
function decodeBasicEntities(value: string): string {
return value
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&amp;/g, '&')
}

View File

@@ -0,0 +1,46 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import type {
PrepareAcpxAgentContextInput,
PreparedAcpxAgentContext,
} from '../acpx-agent-adapter'
import {
buildBrowserosAcpPrompt,
ensureUsableCwd,
resolveAgentRuntimePaths,
} from '../acpx-runtime-context'
export { maybeHandleOpenClawTurn } from './image-turn'
const OPENCLAW_BROWSEROS_ACP_INSTRUCTIONS =
'<role>You are running inside BrowserOS through the OpenClaw ACP adapter. Use your OpenClaw identity, memory, and browser tools.</role>'
/**
* Prepares OpenClaw without BrowserOS SOUL/MEMORY or BrowserOS MCP.
* OpenClaw runs inside the gateway VM/container, so a selected host cwd is not visible there.
*/
export async function prepareOpenClawContext(
input: PrepareAcpxAgentContextInput,
): Promise<PreparedAcpxAgentContext> {
const paths = resolveAgentRuntimePaths({
browserosDir: input.browserosDir,
agentId: input.agent.id,
})
await ensureUsableCwd(paths.effectiveCwd, true)
return {
cwd: paths.effectiveCwd,
runtimeSessionKey: input.sessionKey,
runPrompt: buildBrowserosAcpPrompt(
OPENCLAW_BROWSEROS_ACP_INSTRUCTIONS,
input.message,
),
commandEnv: {},
commandIdentity: 'openclaw',
useBrowserosMcp: false,
openclawSessionKey: input.sessionKey,
}
}

View File

@@ -159,31 +159,6 @@ describe('ContainerRuntime', () => {
)
})
it('passes private-ingress no-auth only when requested', async () => {
const deps = createDeps()
const runtime = new ContainerRuntime({
vm: deps.vm,
shell: deps.shell,
loader: deps.loader,
projectDir: PROJECT_DIR,
})
await runtime.startGateway({
...defaultSpec,
gatewayToken: undefined,
privateIngressNoAuth: true,
})
expect(deps.shell.createContainer).toHaveBeenCalledWith(
expect.objectContaining({
env: expect.objectContaining({
OPENCLAW_GATEWAY_PRIVATE_INGRESS_NO_AUTH: '1',
}),
}),
undefined,
)
})
it('delegates ensureReady and stopVm to VmRuntime', async () => {
const deps = createDeps()
const runtime = new ContainerRuntime({

View File

@@ -1,89 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
*/
import { afterEach, describe, expect, it, mock } from 'bun:test'
import { OpenClawGatewayChatClient } from '../../../../src/api/services/openclaw/openclaw-gateway-chat-client'
describe('OpenClawGatewayChatClient', () => {
const originalFetch = globalThis.fetch
afterEach(() => {
globalThis.fetch = originalFetch
})
it('posts chat completions without Authorization when no token provider is configured', async () => {
const fetchMock = mock(() =>
Promise.resolve(
new Response(emptyStream(), {
status: 200,
headers: { 'Content-Type': 'text/event-stream' },
}),
),
)
globalThis.fetch = fetchMock as typeof globalThis.fetch
const client = new OpenClawGatewayChatClient(() => 18794)
await client.streamTurn({
agentId: 'main',
sessionKey: 'main',
messages: [{ role: 'user', content: 'hi' }],
})
expect(fetchMock.mock.calls[0]?.[0]).toBe(
'http://127.0.0.1:18794/v1/chat/completions',
)
expect(fetchMock.mock.calls[0]?.[1]).toMatchObject({
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
})
expect(fetchHeaders(fetchMock)).not.toHaveProperty('Authorization')
})
it('keeps bearer auth for legacy token-auth gateways', async () => {
const fetchMock = mock(() =>
Promise.resolve(
new Response(emptyStream(), {
status: 200,
headers: { 'Content-Type': 'text/event-stream' },
}),
),
)
globalThis.fetch = fetchMock as typeof globalThis.fetch
const client = new OpenClawGatewayChatClient(
() => 18794,
async () => 'gateway-token',
)
await client.streamTurn({
agentId: 'ops',
sessionKey: 'main',
messages: [{ role: 'user', content: 'hi' }],
})
expect(fetchMock.mock.calls[0]?.[1]).toMatchObject({
headers: {
Authorization: 'Bearer gateway-token',
'Content-Type': 'application/json',
},
})
})
})
function emptyStream(): ReadableStream<Uint8Array> {
return new ReadableStream({
start(controller) {
controller.close()
},
})
}
function fetchHeaders(
fetchMock: ReturnType<typeof mock>,
): Record<string, string> {
return ((fetchMock.mock.calls[0]?.[1] as RequestInit | undefined)?.headers ??
{}) as Record<string, string>
}

View File

@@ -32,22 +32,6 @@ describe('OpenClawHttpClient', () => {
})
})
it('checks no-auth gateway availability without an Authorization header', async () => {
const fetchMock = mock(() => Promise.resolve(new Response('{}')))
globalThis.fetch = fetchMock as typeof globalThis.fetch
const client = new OpenClawHttpClient(18789)
await expect(client.isAuthenticated()).resolves.toBe(true)
expect(fetchMock.mock.calls[0]?.[0]).toBe(
'http://127.0.0.1:18789/v1/models',
)
expect(fetchMock.mock.calls[0]?.[1]).toMatchObject({
method: 'GET',
})
expect(fetchHeaders(fetchMock)).not.toHaveProperty('Authorization')
})
it('treats rejected gateway authentication as unavailable', async () => {
globalThis.fetch = mock(() =>
Promise.resolve(new Response('Unauthorized', { status: 401 })),
@@ -110,25 +94,6 @@ describe('OpenClawHttpClient', () => {
})
})
it('sends no Authorization header when no token provider is configured', async () => {
const fetchMock = mock(() =>
Promise.resolve(
new Response(JSON.stringify({ sessionKey: 'k', messages: [] }), {
status: 200,
}),
),
)
globalThis.fetch = fetchMock as typeof globalThis.fetch
const client = new OpenClawHttpClient(18789)
await client.getSessionHistory('k')
expect(fetchMock.mock.calls[0]?.[1]).toMatchObject({
method: 'GET',
})
expect(fetchHeaders(fetchMock)).not.toHaveProperty('Authorization')
})
it('omits limit and cursor from the query when undefined', async () => {
const fetchMock = mock(() =>
Promise.resolve(
@@ -250,33 +215,6 @@ describe('OpenClawHttpClient', () => {
])
})
it('keeps SSE Accept without Authorization when no token provider is configured', async () => {
const fetchMock = mock(() =>
Promise.resolve(
new Response(
new ReadableStream({
start(controller) {
controller.close()
},
}),
{ status: 200 },
),
),
)
globalThis.fetch = fetchMock as typeof globalThis.fetch
const client = new OpenClawHttpClient(18789)
await client.streamSessionHistory('k')
expect(fetchMock.mock.calls[0]?.[1]).toMatchObject({
method: 'GET',
headers: {
Accept: 'text/event-stream',
},
})
expect(fetchHeaders(fetchMock)).not.toHaveProperty('Authorization')
})
it('forwards upstream error frames and closes', async () => {
globalThis.fetch = mock(() =>
Promise.resolve(
@@ -377,10 +315,3 @@ async function readEvents(
return events
}
function fetchHeaders(
fetchMock: ReturnType<typeof mock>,
): Record<string, string> {
return ((fetchMock.mock.calls[0]?.[1] as RequestInit | undefined)?.headers ??
{}) as Record<string, string>
}

View File

@@ -338,7 +338,7 @@ describe('OpenClawService', () => {
expect(runOnboard).toHaveBeenCalledWith({
acceptRisk: true,
authChoice: 'skip',
gatewayAuth: 'none',
gatewayAuth: 'token',
gatewayBind: 'lan',
gatewayPort: 18789,
installDaemon: false,
@@ -680,49 +680,6 @@ describe('OpenClawService', () => {
expect(probe).toHaveBeenCalledTimes(1)
})
it('start ignores stale gateway tokens when config auth mode is none', async () => {
tempDir = await mkdtemp(join(tmpdir(), 'openclaw-service-'))
await mkdir(join(tempDir, '.openclaw'), { recursive: true })
await writeFile(
join(tempDir, '.openclaw', 'openclaw.json'),
JSON.stringify({
gateway: {
auth: {
mode: 'none',
token: 'stale-token',
},
},
}),
)
const ensureReady = mock(async () => {})
const startGateway = mock(async () => {})
const waitForReady = mock(async () => true)
const probe = mock(async () => {})
const service = new OpenClawService() as MutableOpenClawService
service.openclawDir = tempDir
service.runtime = {
ensureReady,
isReady: async () => false,
startGateway,
waitForReady,
}
service.cliClient = {
probe,
}
await service.start()
expect(startGateway).toHaveBeenCalledWith(
expect.objectContaining({
gatewayToken: undefined,
privateIngressNoAuth: true,
}),
expect.any(Function),
)
expect(service.token).not.toBe('stale-token')
})
it('serializes concurrent start calls and only starts the gateway once', async () => {
tempDir = await mkdtemp(join(tmpdir(), 'openclaw-service-'))
await mkdir(join(tempDir, '.openclaw'), { recursive: true })
@@ -1179,53 +1136,6 @@ describe('OpenClawService', () => {
expect(probe).toHaveBeenCalledTimes(1)
})
it('tryAutoStart reuses a ready no-auth gateway without Authorization', async () => {
tempDir = await mkdtemp(join(tmpdir(), 'openclaw-service-'))
await mkdir(join(tempDir, '.openclaw'), { recursive: true })
await writeFile(
join(tempDir, '.openclaw', 'openclaw.json'),
JSON.stringify({
gateway: {
auth: {
mode: 'none',
token: 'stale-token',
},
},
}),
)
const ensureReady = mock(async () => {})
const isReady = mock(async () => true)
const isGatewayCurrent = mock(async () => true)
const startGateway = mock(async () => {})
const probe = mock(async () => {})
const fetchMock = mock(() =>
Promise.resolve(new Response('', { status: 200 })),
)
globalThis.fetch = fetchMock as typeof globalThis.fetch
const service = new OpenClawService() as MutableOpenClawService
service.openclawDir = tempDir
service.runtime = {
ensureReady,
isReady,
isGatewayCurrent,
startGateway,
}
service.cliClient = { probe }
await service.tryAutoStart()
expect(startGateway).not.toHaveBeenCalled()
expect(fetchMock.mock.calls[0]?.[0]).toBe(
'http://127.0.0.1:18789/v1/models',
)
expect(fetchMock.mock.calls[0]?.[1]).toMatchObject({
method: 'GET',
})
expect(fetchHeaders(fetchMock)).not.toHaveProperty('Authorization')
expect(probe).toHaveBeenCalledTimes(1)
})
it('tryAutoStart recreates a ready gateway when the image is stale', async () => {
tempDir = await mkdtemp(join(tmpdir(), 'openclaw-service-'))
await mkdir(join(tempDir, '.openclaw'), { recursive: true })
@@ -1810,10 +1720,3 @@ function mockGatewayAuth(status = 200): ReturnType<typeof mock> {
globalThis.fetch = fetchMock as typeof globalThis.fetch
return fetchMock
}
function fetchHeaders(
fetchMock: ReturnType<typeof mock>,
): Record<string, string> {
return ((fetchMock.mock.calls[0]?.[1] as RequestInit | undefined)?.headers ??
{}) as Record<string, string>
}

View File

@@ -51,13 +51,17 @@ describe('CdpBackend', () => {
const originalReconnectDelay = TIMEOUTS.CDP_RECONNECT_DELAY
let fetchUrls: string[] = []
let failIpv4Discovery = false
let failAllDiscovery = false
let wsHost = '127.0.0.1'
let originalExit: typeof process.exit
beforeEach(() => {
MockWebSocket.instances = []
fetchUrls = []
failIpv4Discovery = false
failAllDiscovery = false
wsHost = '127.0.0.1'
originalExit = process.exit
;(TIMEOUTS as unknown as { CDP_CONNECT: number }).CDP_CONNECT = 200
;(
@@ -67,6 +71,9 @@ describe('CdpBackend', () => {
globalThis.fetch = (async (input: string | URL | Request) => {
const url = String(input)
fetchUrls.push(url)
if (failAllDiscovery) {
throw new Error('Unable to connect')
}
if (failIpv4Discovery && url.includes('127.0.0.1')) {
throw new Error('Unable to connect')
}
@@ -87,6 +94,7 @@ describe('CdpBackend', () => {
afterEach(() => {
globalThis.fetch = originalFetch
globalThis.WebSocket = originalWebSocket
process.exit = originalExit
;(TIMEOUTS as unknown as { CDP_CONNECT: number }).CDP_CONNECT =
originalConnectTimeout
;(
@@ -160,4 +168,31 @@ describe('CdpBackend', () => {
assert(fetchUrls.length >= 3)
await cdp.disconnect()
})
it('can disable process exit when reconnect retries are exhausted', async () => {
let exitCalled = false
process.exit = (() => {
exitCalled = true
throw new Error('process.exit should not be called')
}) as unknown as typeof process.exit
const cdp = new CdpBackend({ port: 9222, exitOnReconnectFailure: false })
const connectPromise = cdp.connect()
await waitFor(() => MockWebSocket.instances.length === 1)
const ws1 = MockWebSocket.instances[0]
ws1?.open()
await connectPromise
assert.strictEqual(cdp.isConnected(), true)
failAllDiscovery = true
ws1?.close()
await waitFor(() => fetchUrls.length >= 10)
await Bun.sleep(5)
assert.strictEqual(exitCalled, false)
assert.strictEqual(cdp.isConnected(), false)
await cdp.disconnect()
})
})

View File

@@ -0,0 +1,113 @@
/**
* @license
* Copyright 2025 BrowserOS
*/
import { afterEach, describe, expect, it } from 'bun:test'
import { mkdtemp, readFile, rm } from 'node:fs/promises'
import { tmpdir } from 'node:os'
import { join } from 'node:path'
import { prepareAcpxAgentContext } from '../../../src/lib/agents/acpx-agent-adapter'
import type { AgentDefinition } from '../../../src/lib/agents/agent-types'
describe('prepareAcpxAgentContext', () => {
const tempDirs: string[] = []
afterEach(async () => {
await Promise.all(
tempDirs.map((dir) => rm(dir, { recursive: true, force: true })),
)
tempDirs.length = 0
})
function makeAgent(adapter: AgentDefinition['adapter']): AgentDefinition {
return {
id: `${adapter}-agent`,
name: `${adapter} agent`,
adapter,
permissionMode: 'approve-all',
sessionKey: `agent:${adapter}-agent:main`,
createdAt: 1000,
updatedAt: 1000,
}
}
it('prepares Claude with BrowserOS memory, host auth, BrowserOS MCP, and fingerprinted session', async () => {
const browserosDir = await mkdtemp(join(tmpdir(), 'browseros-adapters-'))
tempDirs.push(browserosDir)
const prepared = await prepareAcpxAgentContext({
browserosDir,
agent: makeAgent('claude'),
sessionId: 'main',
sessionKey: 'agent:claude-agent:main',
cwdOverride: null,
isSelectedCwd: false,
message: 'remember this',
})
expect(prepared.commandEnv.AGENT_HOME).toContain('/claude-agent/home')
expect(prepared.commandEnv).not.toHaveProperty('CLAUDE_CONFIG_DIR')
expect(prepared.commandEnv).not.toHaveProperty('CODEX_HOME')
expect(prepared.useBrowserosMcp).toBe(true)
expect(prepared.openclawSessionKey).toBeNull()
expect(prepared.runtimeSessionKey).toMatch(
/^agent:claude-agent:main:[a-f0-9]{16}$/,
)
expect(prepared.runPrompt).toContain(
'Available skills: browseros, memory, soul',
)
expect(
await readFile(`${prepared.commandEnv.AGENT_HOME}/MEMORY.md`, 'utf8'),
).toContain('# MEMORY.md')
})
it('prepares Codex with CODEX_HOME and BrowserOS MCP', async () => {
const browserosDir = await mkdtemp(join(tmpdir(), 'browseros-adapters-'))
tempDirs.push(browserosDir)
const prepared = await prepareAcpxAgentContext({
browserosDir,
agent: makeAgent('codex'),
sessionId: 'main',
sessionKey: 'agent:codex-agent:main',
cwdOverride: null,
isSelectedCwd: false,
message: 'hi',
})
expect(prepared.commandEnv.AGENT_HOME).toContain('/codex-agent/home')
expect(prepared.commandEnv.CODEX_HOME).toContain(
'/codex-agent/runtime/codex-home',
)
expect(prepared.commandEnv).not.toHaveProperty('CLAUDE_CONFIG_DIR')
expect(prepared.useBrowserosMcp).toBe(true)
expect(prepared.openclawSessionKey).toBeNull()
expect(prepared.runPrompt).toContain('AGENT_HOME=')
})
it('prepares OpenClaw without BrowserOS memory, host cwd, skills, or MCP', async () => {
const browserosDir = await mkdtemp(join(tmpdir(), 'browseros-adapters-'))
tempDirs.push(browserosDir)
const ignoredSelectedCwd = join(browserosDir, 'missing-selected-workspace')
const prepared = await prepareAcpxAgentContext({
browserosDir,
agent: makeAgent('openclaw'),
sessionId: 'main',
sessionKey: 'agent:openclaw-agent:main',
cwdOverride: ignoredSelectedCwd,
isSelectedCwd: true,
message: 'browse',
})
expect(prepared.cwd).toBe(
join(browserosDir, 'agents', 'harness', 'workspace'),
)
expect(prepared.commandEnv).toEqual({})
expect(prepared.useBrowserosMcp).toBe(false)
expect(prepared.openclawSessionKey).toBe('agent:openclaw-agent:main')
expect(prepared.runtimeSessionKey).toBe('agent:openclaw-agent:main')
expect(prepared.runPrompt).not.toContain('SOUL.md stores')
expect(prepared.runPrompt).not.toContain('BrowserOS memory skill')
expect(prepared.runPrompt).not.toContain('AGENT_HOME/MEMORY.md')
expect(prepared.runPrompt).not.toContain('Available skills:')
})
})

View File

@@ -55,6 +55,9 @@ describe('acpx runtime context helpers', () => {
expect(paths.runtimeSkillsDir).toBe(
join(browserosDir, 'agents', 'harness', 'runtime-skills'),
)
expect(paths.runtimeRoot).toBe(
join(browserosDir, 'agents', 'harness', 'agent-1', 'runtime'),
)
expect(paths.codexHome).toBe(
join(
browserosDir,
@@ -257,4 +260,33 @@ describe('acpx runtime context helpers', () => {
)
expect(prompt).toContain('Available skills: browseros, memory, soul')
})
it('routes explicit memory requests to BrowserOS AGENT_HOME files', () => {
const agent: AgentDefinition = {
id: 'agent-1',
name: 'Researcher',
adapter: 'claude',
permissionMode: 'approve-all',
sessionKey: 'agent:agent-1:main',
createdAt: 1000,
updatedAt: 1000,
}
const paths = resolveAgentRuntimePaths({
browserosDir: '/tmp/browseros',
agentId: agent.id,
cwd: '/tmp/workspace',
})
const prompt = buildAcpxRuntimePromptPrefix({
agent,
paths,
skillNames: ['browseros', 'memory', 'soul'],
})
expect(prompt).toContain('When the user asks you to remember')
expect(prompt).toContain('use the BrowserOS memory skill')
expect(prompt).toContain('AGENT_HOME/MEMORY.md')
expect(prompt).toContain('AGENT_HOME/memory/YYYY-MM-DD.md')
expect(prompt).toContain('Do not use native Claude project memory')
})
})

View File

@@ -868,7 +868,7 @@ Use the BrowserOS MCP server for all browser tasks, including browsing the web,
)
})
it('injects AGENT_HOME into Claude ACP command resolution', async () => {
it('injects AGENT_HOME without CLAUDE_CONFIG_DIR into Claude ACP command resolution', async () => {
const browserosDir = await mkdtemp(
join(tmpdir(), 'browseros-acpx-browseros-'),
)
@@ -898,6 +898,7 @@ Use the BrowserOS MCP server for all browser tasks, including browsing the web,
const command =
getCreateRuntimeOptions(calls).agentRegistry.resolve('claude')
expect(command).toContain('env AGENT_HOME=')
expect(command).not.toContain('CLAUDE_CONFIG_DIR=')
expect(command).not.toContain('CODEX_HOME=')
})
@@ -1023,8 +1024,9 @@ Use the BrowserOS MCP server for all browser tasks, including browsing the web,
expect(command).toContain(
'nerdctl exec -i -e OPENCLAW_HIDE_BANNER=1 -e OPENCLAW_SUPPRESS_NOTES=1 browseros-openclaw-openclaw-gateway-1',
)
expect(command).toContain('openclaw acp --url ws://127.0.0.1:18789')
expect(command).not.toContain('--token')
expect(command).toContain(
'openclaw acp --url ws://127.0.0.1:18789 --token test-token-abc',
)
// sessionKey routing: the bridge needs --session <key> to map newSession
// requests to the matching gateway agent (acpx does not forward
// sessionKey via ACP newSession params).
@@ -1260,7 +1262,15 @@ Use the BrowserOS MCP server for all browser tasks, including browsing the web,
max_segments: 0,
},
closed: false,
messages: [],
messages: [
{
User: {
id: 'prior-user',
content: [{ Text: 'literal &amp; &lt;tag&gt;' } as never],
},
},
{ Agent: { content: [{ Text: 'Prior answer.' }], tool_results: {} } },
],
updated_at: seedTimestamp,
cumulative_token_usage: {},
request_token_usage: {},
@@ -1285,13 +1295,15 @@ Use the BrowserOS MCP server for all browser tasks, including browsing the web,
})
},
} as never
const calls: Array<{ method: string; input: unknown }> = []
const runtime = new AcpxRuntime({
cwd,
stateDir,
openclawGatewayChat,
// Provide a runtime factory that would fail loudly if reached —
// image turns must NOT fall through to the ACP path.
runtimeFactory: () => {
runtimeFactory: (options) => {
calls.push({ method: 'createRuntime', input: options })
throw new Error('ACP path should not be reached for image turns')
},
})
@@ -1322,6 +1334,9 @@ Use the BrowserOS MCP server for all browser tasks, including browsing the web,
{ type: 'done', stopReason: 'end_turn' },
])
expect(gatewayCalls).toHaveLength(1)
expect(
calls.filter((call) => call.method === 'createRuntime'),
).toHaveLength(0)
const gatewayInput = gatewayCalls[0]?.input as {
agentId: string
sessionKey: string
@@ -1331,6 +1346,10 @@ Use the BrowserOS MCP server for all browser tasks, including browsing the web,
}>
}
expect(gatewayInput.agentId).toBe('img-bot')
expect(gatewayInput.messages[0]).toEqual({
role: 'user',
content: 'literal &amp; &lt;tag&gt;',
})
expect(gatewayInput.messages.at(-1)?.role).toBe('user')
const userContent = gatewayInput.messages.at(-1)?.content
expect(Array.isArray(userContent)).toBe(true)
@@ -1345,7 +1364,7 @@ Use the BrowserOS MCP server for all browser tasks, including browsing the web,
agent,
sessionId: 'main',
})
expect(history.items.map((item) => item.role)).toEqual([
expect(history.items.slice(-2).map((item) => item.role)).toEqual([
'user',
'assistant',
])

View File

@@ -1,6 +1,5 @@
export const OPENCLAW_AGENT_NAME = 'openclaw'
export const OPENCLAW_IMAGE =
'ghcr.io/browseros-ai/openclaw:2026.5.2-browseros.1'
export const OPENCLAW_IMAGE = 'ghcr.io/openclaw/openclaw:2026.4.12'
export const OPENCLAW_GATEWAY_CONTAINER_PORT = 18789
export const OPENCLAW_CONTAINER_HOME = '/home/node/.openclaw'
export const OPENCLAW_COMPOSE_PROJECT_NAME = 'browseros-openclaw'

View File

@@ -1,8 +1,12 @@
BINARY := browseros-patch
PREFIX ?= /usr/local/bin
GOBIN := $(shell go env GOBIN)
ifeq ($(GOBIN),)
GOBIN := $(shell go env GOPATH)/bin
endif
PREFIX ?= $(GOBIN)
VERSION ?= dev
.PHONY: build install clean test fmt
.PHONY: build install uninstall clean test fmt
build:
go build -ldflags "-X github.com/browseros-ai/BrowserOS/packages/browseros/tools/patch/cmd.Version=$(VERSION)" -o $(BINARY) .
@@ -17,6 +21,10 @@ else
endif
@echo "Installed $(BINARY) to $(PREFIX)/$(BINARY)"
uninstall:
rm -f $(PREFIX)/$(BINARY)
@echo "Removed $(PREFIX)/$(BINARY)"
test:
go test ./...

View File

@@ -3,6 +3,7 @@ package workspace
import (
"fmt"
"path/filepath"
"slices"
"strings"
)
@@ -15,21 +16,21 @@ func Detect(reg *Registry, cwd string) (Entry, error) {
return Entry{}, err
}
clean := filepath.Clean(abs)
realClean := canonicalPath(clean)
var best Entry
bestLen := -1
for _, ws := range reg.Workspaces {
base := filepath.Clean(ws.Path)
if clean == base || strings.HasPrefix(clean, base+string(filepath.Separator)) {
if len(base) > bestLen {
realBase := canonicalPath(base)
if containsPath(clean, base) || containsPath(realClean, realBase) {
if len(realBase) > bestLen {
best = ws
bestLen = len(base)
bestLen = len(realBase)
}
}
}
if bestLen == -1 {
return Entry{}, fmt.Errorf(
`not inside a registered workspace; run "browseros-patch list" to inspect workspaces or pass one by name`,
)
return Entry{}, detectError(clean, realClean, reg.Workspaces)
}
return best, nil
}
@@ -47,3 +48,43 @@ func Resolve(reg *Registry, name string, cwd string, src string) (Entry, error)
}
return Detect(reg, cwd)
}
func canonicalPath(path string) string {
realPath, err := filepath.EvalSymlinks(path)
if err != nil {
return filepath.Clean(path)
}
return filepath.Clean(realPath)
}
func containsPath(path string, base string) bool {
return path == base || strings.HasPrefix(path, base+string(filepath.Separator))
}
func detectError(cwd string, resolvedCWD string, workspaces []Entry) error {
var builder strings.Builder
builder.WriteString(`not inside a registered workspace; run "browseros-patch list" to inspect workspaces or pass one by name`)
builder.WriteString("\n")
builder.WriteString("cwd: ")
builder.WriteString(cwd)
if resolvedCWD != cwd {
builder.WriteString("\nresolved cwd: ")
builder.WriteString(resolvedCWD)
}
if len(workspaces) > 0 {
builder.WriteString("\nregistered workspaces:")
sorted := append([]Entry(nil), workspaces...)
slices.SortFunc(sorted, func(a, b Entry) int {
return strings.Compare(a.Name, b.Name)
})
for _, ws := range sorted {
builder.WriteString("\n ")
builder.WriteString(ws.Name)
builder.WriteString(" ")
builder.WriteString(ws.Path)
}
builder.WriteString("\nexample: browseros-patch diff ")
builder.WriteString(sorted[0].Name)
}
return fmt.Errorf("%s", builder.String())
}

View File

@@ -70,7 +70,7 @@ func NormalizeWorkspacePath(raw string) (string, error) {
if _, err := os.Stat(filepath.Join(clean, ".git")); err != nil {
return "", fmt.Errorf("workspace is not a git checkout: %s", clean)
}
return clean, nil
return canonicalPath(clean), nil
}
func (r *Registry) Get(name string) (Entry, error) {

View File

@@ -3,6 +3,7 @@ package workspace
import (
"os"
"path/filepath"
"strings"
"testing"
)
@@ -36,6 +37,10 @@ func TestRegistryDetectsLongestMatchingWorkspace(t *testing.T) {
t.Fatalf("mkdir: %v", err)
}
}
detectedPath := filepath.Join(child, "chrome", "browser")
if err := os.MkdirAll(detectedPath, 0o755); err != nil {
t.Fatalf("mkdir detected path: %v", err)
}
reg := &Registry{Version: 1}
if _, err := reg.Add("parent", parent); err != nil {
@@ -45,7 +50,7 @@ func TestRegistryDetectsLongestMatchingWorkspace(t *testing.T) {
t.Fatalf("add child: %v", err)
}
ws, err := Detect(reg, filepath.Join(child, "chrome", "browser"))
ws, err := Detect(reg, detectedPath)
if err != nil {
t.Fatalf("Detect: %v", err)
}
@@ -53,3 +58,86 @@ func TestRegistryDetectsLongestMatchingWorkspace(t *testing.T) {
t.Fatalf("expected child workspace, got %q", ws.Name)
}
}
func TestDetectMatchesSymlinkedWorkingDirectory(t *testing.T) {
root := t.TempDir()
workspacePath := filepath.Join(root, "chromium-1", "src")
if err := os.MkdirAll(filepath.Join(workspacePath, ".git"), 0o755); err != nil {
t.Fatalf("mkdir workspace: %v", err)
}
if err := os.MkdirAll(filepath.Join(workspacePath, "chrome", "browser"), 0o755); err != nil {
t.Fatalf("mkdir workspace child: %v", err)
}
linkPath := filepath.Join(root, "ch-1")
if err := os.Symlink(workspacePath, linkPath); err != nil {
t.Fatalf("symlink workspace: %v", err)
}
reg := &Registry{Version: 1}
if _, err := reg.Add("ch1", workspacePath); err != nil {
t.Fatalf("add workspace: %v", err)
}
ws, err := Detect(reg, filepath.Join(linkPath, "chrome", "browser"))
if err != nil {
t.Fatalf("Detect: %v", err)
}
if ws.Name != "ch1" {
t.Fatalf("expected ch1 workspace, got %q", ws.Name)
}
}
func TestRegistryAddStoresCanonicalWorkspacePath(t *testing.T) {
root := t.TempDir()
workspacePath := filepath.Join(root, "chromium-1", "src")
if err := os.MkdirAll(filepath.Join(workspacePath, ".git"), 0o755); err != nil {
t.Fatalf("mkdir workspace: %v", err)
}
linkPath := filepath.Join(root, "ch-1")
if err := os.Symlink(workspacePath, linkPath); err != nil {
t.Fatalf("symlink workspace: %v", err)
}
reg := &Registry{Version: 1}
entry, err := reg.Add("ch1", linkPath)
if err != nil {
t.Fatalf("add workspace: %v", err)
}
expectedPath := canonicalPath(workspacePath)
if entry.Path != expectedPath {
t.Fatalf("expected canonical path %q, got %q", expectedPath, entry.Path)
}
}
func TestDetectErrorIncludesPathContextAndWorkspaceHint(t *testing.T) {
root := t.TempDir()
workspacePath := filepath.Join(root, "chromium-1", "src")
if err := os.MkdirAll(filepath.Join(workspacePath, ".git"), 0o755); err != nil {
t.Fatalf("mkdir workspace: %v", err)
}
outsidePath := filepath.Join(root, "outside")
if err := os.MkdirAll(outsidePath, 0o755); err != nil {
t.Fatalf("mkdir outside: %v", err)
}
reg := &Registry{Version: 1}
if _, err := reg.Add("ch1", workspacePath); err != nil {
t.Fatalf("add workspace: %v", err)
}
_, err := Detect(reg, outsidePath)
if err == nil {
t.Fatalf("expected Detect to fail")
}
message := err.Error()
for _, want := range []string{
"cwd: " + outsidePath,
"registered workspaces:",
"ch1 " + canonicalPath(workspacePath),
"example: browseros-patch diff ch1",
} {
if !strings.Contains(message, want) {
t.Fatalf("expected error to contain %q, got:\n%s", want, message)
}
}
}