mirror of
https://github.com/browseros-ai/BrowserOS.git
synced 2026-05-14 16:14:28 +00:00
Compare commits
4 Commits
fix/opencl
...
fix/patch-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8e83d3a670 | ||
|
|
d61d6fc8a9 | ||
|
|
d383b5e344 | ||
|
|
ce4bb44083 |
53
.github/workflows/eval-weekly.yml
vendored
53
.github/workflows/eval-weekly.yml
vendored
@@ -44,6 +44,19 @@ jobs:
|
||||
working-directory: packages/browseros-agent
|
||||
run: bun install --ignore-scripts
|
||||
|
||||
- name: Install Claude Code CLI
|
||||
working-directory: packages/browseros-agent/apps/eval
|
||||
env:
|
||||
EVAL_CONFIG: ${{ github.event.inputs.config || 'configs/legacy/browseros-agent-weekly.json' }}
|
||||
run: |
|
||||
if bun -e "const config = await Bun.file(process.env.EVAL_CONFIG).json(); process.exit(config.agent?.type === 'claude-code' ? 0 : 1)"; then
|
||||
npm install -g @anthropic-ai/claude-code@2.1.119
|
||||
echo "Claude Code CLI installed at $(command -v claude)"
|
||||
claude --version
|
||||
else
|
||||
echo "Eval config does not use Claude Code; skipping Claude Code CLI install"
|
||||
fi
|
||||
|
||||
- name: Install Python eval dependencies
|
||||
# agisdk pinned so silent upstream releases can't shift task definitions
|
||||
# or grader behavior. Bump intentionally with a documented re-baseline.
|
||||
@@ -67,13 +80,11 @@ jobs:
|
||||
env:
|
||||
FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
|
||||
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
|
||||
AWS_REGION: ${{ secrets.AWS_REGION || 'us-west-2' }}
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
CLAUDE_CODE_OAUTH_TOKEN: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
|
||||
NOPECHA_API_KEY: ${{ secrets.NOPECHA_API_KEY }}
|
||||
EVAL_R2_ACCOUNT_ID: ${{ secrets.EVAL_R2_ACCOUNT_ID }}
|
||||
EVAL_R2_ACCESS_KEY_ID: ${{ secrets.EVAL_R2_ACCESS_KEY_ID }}
|
||||
EVAL_R2_SECRET_ACCESS_KEY: ${{ secrets.EVAL_R2_SECRET_ACCESS_KEY }}
|
||||
EVAL_R2_BUCKET: ${{ secrets.EVAL_R2_BUCKET }}
|
||||
EVAL_R2_CDN_BASE_URL: ${{ secrets.EVAL_R2_CDN_BASE_URL }}
|
||||
BROWSEROS_BINARY: /usr/bin/browseros
|
||||
WEBARENA_INFINITY_DIR: /tmp/webarena-infinity
|
||||
# OpenClaw container runtime is macOS-only; opt the Linux runner
|
||||
@@ -82,7 +93,35 @@ jobs:
|
||||
EVAL_CONFIG: ${{ github.event.inputs.config || 'configs/legacy/browseros-agent-weekly.json' }}
|
||||
run: |
|
||||
echo "Running eval with config: $EVAL_CONFIG"
|
||||
xvfb-run --auto-servernum --server-args="-screen 0 1440x900x24" bun run src/index.ts suite --config "$EVAL_CONFIG" --publish r2
|
||||
xvfb-run --auto-servernum --server-args="-screen 0 1440x900x24" bun run src/index.ts suite --config "$EVAL_CONFIG"
|
||||
# Capture the run directory so report.html can be generated before the R2 publish step.
|
||||
SUMMARY_PATH="$(find results -name summary.json -type f -print | sort | tail -n 1)"
|
||||
if [ -z "$SUMMARY_PATH" ]; then
|
||||
echo "No eval run summary found"
|
||||
exit 1
|
||||
fi
|
||||
RUN_DIR="$(dirname "$SUMMARY_PATH")"
|
||||
echo "EVAL_RUN_DIR=$RUN_DIR" >> "$GITHUB_ENV"
|
||||
|
||||
- name: Generate run analysis report
|
||||
if: success()
|
||||
working-directory: packages/browseros-agent/apps/eval
|
||||
env:
|
||||
CLAUDE_CODE_OAUTH_TOKEN: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
|
||||
run: |
|
||||
echo "Generating run report for $EVAL_RUN_DIR"
|
||||
bun scripts/generate-report.ts --input "$EVAL_RUN_DIR" --output "$EVAL_RUN_DIR/report.html"
|
||||
|
||||
- name: Publish eval run to R2
|
||||
if: success()
|
||||
working-directory: packages/browseros-agent/apps/eval
|
||||
env:
|
||||
EVAL_R2_ACCOUNT_ID: ${{ secrets.EVAL_R2_ACCOUNT_ID }}
|
||||
EVAL_R2_ACCESS_KEY_ID: ${{ secrets.EVAL_R2_ACCESS_KEY_ID }}
|
||||
EVAL_R2_SECRET_ACCESS_KEY: ${{ secrets.EVAL_R2_SECRET_ACCESS_KEY }}
|
||||
EVAL_R2_BUCKET: ${{ secrets.EVAL_R2_BUCKET }}
|
||||
EVAL_R2_CDN_BASE_URL: ${{ secrets.EVAL_R2_CDN_BASE_URL }}
|
||||
run: bun run src/index.ts publish --run "$EVAL_RUN_DIR" --target r2
|
||||
|
||||
- name: Generate trend report
|
||||
if: success()
|
||||
@@ -97,7 +136,7 @@ jobs:
|
||||
EVAL_R2_CDN_BASE_URL: ${{ secrets.EVAL_R2_CDN_BASE_URL }}
|
||||
run: bun apps/eval/scripts/weekly-report.ts /tmp/eval-report.html
|
||||
|
||||
- name: Upload report as artifact
|
||||
- name: Upload trend report as artifact
|
||||
if: success()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
|
||||
@@ -26,6 +26,7 @@ import {
|
||||
filterTurnsPersistedInHistory,
|
||||
flattenHistoryPages,
|
||||
} from './claw-chat-types'
|
||||
import { consumePendingInitialMessage } from './pending-initial-message'
|
||||
import { QueuePanel } from './QueuePanel'
|
||||
import { useAgentConversation } from './useAgentConversation'
|
||||
import { useHarnessChatHistory } from './useHarnessChatHistory'
|
||||
@@ -113,25 +114,52 @@ function AgentConversationController({
|
||||
sendRef.current = send
|
||||
|
||||
useEffect(() => {
|
||||
if (disabled || !historyReady) return
|
||||
|
||||
// Registry-first: when the user submitted at /home with
|
||||
// attachments, the rich payload is here. URL `?q=` may also be
|
||||
// present and is the text-only fallback path; the registry wins
|
||||
// when both exist because it carries the binary attachments
|
||||
// alongside the text.
|
||||
const pending = consumePendingInitialMessage(agentId)
|
||||
if (pending) {
|
||||
// Mark the dedup ref so the text-only branch below doesn't
|
||||
// re-fire on the same render.
|
||||
if (initialMessageKey) {
|
||||
initialMessageSentRef.current = initialMessageKey
|
||||
}
|
||||
onInitialMessageConsumedRef.current()
|
||||
void sendRef.current({
|
||||
text: pending.text,
|
||||
attachments: pending.attachments.map((a) => a.payload),
|
||||
attachmentPreviews: pending.attachments.map((a) => ({
|
||||
id: a.id,
|
||||
kind: a.kind,
|
||||
mediaType: a.mediaType,
|
||||
name: a.name,
|
||||
dataUrl: a.dataUrl,
|
||||
})),
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
const query = initialMessage?.trim()
|
||||
if (!initialMessageKey) {
|
||||
// Reset is safe even on the post-registry-fire re-run: consume
|
||||
// is destructive, so the registry is already drained — there's
|
||||
// nothing left for a third run to re-send.
|
||||
initialMessageSentRef.current = null
|
||||
return
|
||||
}
|
||||
|
||||
if (
|
||||
!query ||
|
||||
initialMessageSentRef.current === initialMessageKey ||
|
||||
disabled ||
|
||||
!historyReady
|
||||
) {
|
||||
if (!query || initialMessageSentRef.current === initialMessageKey) {
|
||||
return
|
||||
}
|
||||
|
||||
initialMessageSentRef.current = initialMessageKey
|
||||
onInitialMessageConsumedRef.current()
|
||||
void sendRef.current({ text: query })
|
||||
}, [disabled, historyReady, initialMessage, initialMessageKey])
|
||||
}, [agentId, disabled, historyReady, initialMessage, initialMessageKey])
|
||||
|
||||
const handleSelectAgent = (entry: AgentEntry) => {
|
||||
navigate(`${agentPathPrefix}/${entry.agentId}`)
|
||||
|
||||
@@ -18,8 +18,12 @@ import { SignInHint } from '@/entrypoints/newtab/index/SignInHint'
|
||||
import { useActiveHint } from '@/entrypoints/newtab/index/useActiveHint'
|
||||
import { AgentCardDock } from './AgentCardDock'
|
||||
import { useAgentCommandData } from './agent-command-layout'
|
||||
import { ConversationInput } from './ConversationInput'
|
||||
import {
|
||||
ConversationInput,
|
||||
type ConversationInputSendInput,
|
||||
} from './ConversationInput'
|
||||
import { orderHomeAgents } from './home-agent-card.helpers'
|
||||
import { setPendingInitialMessage } from './pending-initial-message'
|
||||
|
||||
function EmptyAgentsState({ onOpenAgents }: { onOpenAgents: () => void }) {
|
||||
return (
|
||||
@@ -116,8 +120,19 @@ export const AgentCommandHome: FC = () => {
|
||||
}
|
||||
}, [legacyAgents, selectedAgentId])
|
||||
|
||||
const handleSend = (input: { text: string }) => {
|
||||
const handleSend = (input: ConversationInputSendInput) => {
|
||||
if (!selectedAgentId) return
|
||||
// Stash text + attachments in the in-memory registry. Text also
|
||||
// travels in `?q=` so a hard refresh / shareable URL still works
|
||||
// for text-only prompts; attachments are registry-only because a
|
||||
// multi-megabyte dataUrl can't ride a URL search param. The chat
|
||||
// screen prefers the registry when both are present.
|
||||
setPendingInitialMessage({
|
||||
agentId: selectedAgentId,
|
||||
text: input.text,
|
||||
attachments: input.attachments,
|
||||
createdAt: Date.now(),
|
||||
})
|
||||
navigate(
|
||||
`/home/agents/${selectedAgentId}?q=${encodeURIComponent(input.text)}`,
|
||||
)
|
||||
@@ -167,7 +182,7 @@ export const AgentCommandHome: FC = () => {
|
||||
streaming={false}
|
||||
disabled={!selectedAgentReady}
|
||||
status={selectedAgentStatus}
|
||||
attachmentsEnabled={false}
|
||||
attachmentsEnabled={true}
|
||||
placeholder={
|
||||
selectedAgentReady
|
||||
? `Ask ${selectedAgentName} to handle a task...`
|
||||
|
||||
@@ -0,0 +1,109 @@
|
||||
import { afterEach, describe, expect, it } from 'bun:test'
|
||||
import type { StagedAttachment } from '@/lib/attachments'
|
||||
import {
|
||||
consumePendingInitialMessage,
|
||||
peekPendingInitialMessage,
|
||||
setPendingInitialMessage,
|
||||
} from './pending-initial-message'
|
||||
|
||||
function makeAttachment(id: string): StagedAttachment {
|
||||
return {
|
||||
id,
|
||||
kind: 'image',
|
||||
mediaType: 'image/png',
|
||||
name: `${id}.png`,
|
||||
dataUrl: `data:image/png;base64,${id}`,
|
||||
payload: {
|
||||
kind: 'image',
|
||||
mediaType: 'image/png',
|
||||
name: `${id}.png`,
|
||||
dataUrl: `data:image/png;base64,${id}`,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
afterEach(() => {
|
||||
// Drain any leftover pending entry so tests don't leak into each
|
||||
// other (the module-scope state survives across `it` blocks).
|
||||
consumePendingInitialMessage('drain')
|
||||
// If still set, clear by consuming with the matching id.
|
||||
const leftover = peekPendingInitialMessage()
|
||||
if (leftover) consumePendingInitialMessage(leftover.agentId)
|
||||
})
|
||||
|
||||
describe('pending-initial-message', () => {
|
||||
it('consume returns the payload set for the same agentId', () => {
|
||||
setPendingInitialMessage({
|
||||
agentId: 'agent-a',
|
||||
text: 'hello',
|
||||
attachments: [makeAttachment('one')],
|
||||
createdAt: Date.now(),
|
||||
})
|
||||
const result = consumePendingInitialMessage('agent-a')
|
||||
expect(result?.text).toBe('hello')
|
||||
expect(result?.attachments).toHaveLength(1)
|
||||
expect(result?.attachments[0]?.id).toBe('one')
|
||||
})
|
||||
|
||||
it('consume is destructive — second call returns null', () => {
|
||||
setPendingInitialMessage({
|
||||
agentId: 'agent-a',
|
||||
text: 'hello',
|
||||
attachments: [],
|
||||
createdAt: Date.now(),
|
||||
})
|
||||
expect(consumePendingInitialMessage('agent-a')).not.toBeNull()
|
||||
expect(consumePendingInitialMessage('agent-a')).toBeNull()
|
||||
})
|
||||
|
||||
it('consume returns null and preserves entry when agentId differs', () => {
|
||||
setPendingInitialMessage({
|
||||
agentId: 'agent-a',
|
||||
text: 'hello',
|
||||
attachments: [],
|
||||
createdAt: Date.now(),
|
||||
})
|
||||
expect(consumePendingInitialMessage('agent-b')).toBeNull()
|
||||
expect(peekPendingInitialMessage()?.agentId).toBe('agent-a')
|
||||
expect(consumePendingInitialMessage('agent-a')).not.toBeNull()
|
||||
})
|
||||
|
||||
it('returns null for entries older than the TTL', () => {
|
||||
setPendingInitialMessage({
|
||||
agentId: 'agent-a',
|
||||
text: 'old',
|
||||
attachments: [],
|
||||
createdAt: Date.now() - 11_000, // older than 10 s TTL
|
||||
})
|
||||
expect(consumePendingInitialMessage('agent-a')).toBeNull()
|
||||
})
|
||||
|
||||
it('replaces a previous pending entry when set is called again', () => {
|
||||
setPendingInitialMessage({
|
||||
agentId: 'agent-a',
|
||||
text: 'first',
|
||||
attachments: [],
|
||||
createdAt: Date.now(),
|
||||
})
|
||||
setPendingInitialMessage({
|
||||
agentId: 'agent-b',
|
||||
text: 'second',
|
||||
attachments: [makeAttachment('two')],
|
||||
createdAt: Date.now(),
|
||||
})
|
||||
expect(consumePendingInitialMessage('agent-a')).toBeNull()
|
||||
const result = consumePendingInitialMessage('agent-b')
|
||||
expect(result?.text).toBe('second')
|
||||
expect(result?.attachments[0]?.id).toBe('two')
|
||||
})
|
||||
|
||||
it('no-ops when set is called with empty agentId', () => {
|
||||
setPendingInitialMessage({
|
||||
agentId: '',
|
||||
text: 'oops',
|
||||
attachments: [],
|
||||
createdAt: Date.now(),
|
||||
})
|
||||
expect(peekPendingInitialMessage()).toBeNull()
|
||||
})
|
||||
})
|
||||
@@ -0,0 +1,81 @@
|
||||
import type { StagedAttachment } from '@/lib/attachments'
|
||||
|
||||
/**
|
||||
* Same-tab in-memory handoff between the `/home` composer and the
|
||||
* chat screen at `/home/agents/:agentId`. URL search params (`?q=`)
|
||||
* carry the text fine, but cannot carry binary attachments — a multi-
|
||||
* megabyte image dataUrl would explode URL length limits and round-
|
||||
* trip badly. This module is the rich-data side channel for the same
|
||||
* navigation: the composer writes here, the chat screen reads here on
|
||||
* mount.
|
||||
*
|
||||
* Intentionally module-scope. Same render tree, same tab — no need
|
||||
* for sessionStorage (which would force JSON-serialising the dataUrls
|
||||
* and re-parsing on the read side). Cross-tab handoff is out of
|
||||
* scope: the user typing at home in tab A and switching to tab B's
|
||||
* chat would surface an empty registry there, which is the correct
|
||||
* behaviour.
|
||||
*/
|
||||
|
||||
export interface PendingInitialMessage {
|
||||
agentId: string
|
||||
text: string
|
||||
attachments: StagedAttachment[]
|
||||
createdAt: number
|
||||
}
|
||||
|
||||
/**
|
||||
* 10s TTL on the entry. A stale entry from a back-button journey
|
||||
* shouldn't fire on a future visit; if real-world latency makes 10s
|
||||
* too tight under slow harness boot, bump but never make it
|
||||
* indefinite.
|
||||
*/
|
||||
const PENDING_TTL_MS = 10_000
|
||||
|
||||
let pending: PendingInitialMessage | null = null
|
||||
let pendingTimer: ReturnType<typeof setTimeout> | null = null
|
||||
|
||||
function clearPending(): void {
|
||||
pending = null
|
||||
if (pendingTimer !== null) {
|
||||
clearTimeout(pendingTimer)
|
||||
pendingTimer = null
|
||||
}
|
||||
}
|
||||
|
||||
export function setPendingInitialMessage(payload: PendingInitialMessage): void {
|
||||
// Defensive: the home composer should never call this without an
|
||||
// agent selected. If it somehow does, no-op rather than holding a
|
||||
// payload we can't route.
|
||||
if (!payload.agentId) return
|
||||
clearPending()
|
||||
pending = payload
|
||||
pendingTimer = setTimeout(clearPending, PENDING_TTL_MS)
|
||||
}
|
||||
|
||||
/**
|
||||
* Destructive read. Returns the entry only if `agentId` matches and
|
||||
* the entry is fresh; clears the entry on success so Strict-Mode
|
||||
* double-invokes can't double-send.
|
||||
*/
|
||||
export function consumePendingInitialMessage(
|
||||
agentId: string,
|
||||
): PendingInitialMessage | null {
|
||||
if (!pending) return null
|
||||
if (pending.agentId !== agentId) return null
|
||||
if (Date.now() - pending.createdAt >= PENDING_TTL_MS) {
|
||||
clearPending()
|
||||
return null
|
||||
}
|
||||
const entry = pending
|
||||
clearPending()
|
||||
return entry
|
||||
}
|
||||
|
||||
/**
|
||||
* Non-mutating read for tests. Production code should never need this
|
||||
* — use `consume` and own the lifecycle.
|
||||
*/
|
||||
export function peekPendingInitialMessage(): PendingInitialMessage | null {
|
||||
return pending
|
||||
}
|
||||
26
packages/browseros-agent/apps/eval/configs/legacy/browseros-agent-kimi-k2-5-agisdk-real.json
vendored
Normal file
26
packages/browseros-agent/apps/eval/configs/legacy/browseros-agent-kimi-k2-5-agisdk-real.json
vendored
Normal file
@@ -0,0 +1,26 @@
|
||||
{
|
||||
"agent": {
|
||||
"type": "single",
|
||||
"provider": "openai-compatible",
|
||||
"model": "moonshotai/kimi-k2.5",
|
||||
"apiKey": "OPENROUTER_API_KEY",
|
||||
"baseUrl": "https://openrouter.ai/api/v1",
|
||||
"supportsImages": true
|
||||
},
|
||||
"dataset": "../../data/agisdk-real.jsonl",
|
||||
"num_workers": 3,
|
||||
"restart_server_per_task": true,
|
||||
"browseros": {
|
||||
"server_url": "http://127.0.0.1:9110",
|
||||
"base_cdp_port": 9010,
|
||||
"base_server_port": 9110,
|
||||
"base_extension_port": 9310,
|
||||
"load_extensions": false,
|
||||
"headless": false
|
||||
},
|
||||
"captcha": {
|
||||
"api_key_env": "NOPECHA_API_KEY"
|
||||
},
|
||||
"graders": ["agisdk_state_diff"],
|
||||
"timeout_ms": 1800000
|
||||
}
|
||||
27
packages/browseros-agent/apps/eval/configs/legacy/browseros-agent-opus-4-6-agisdk-real.json
vendored
Normal file
27
packages/browseros-agent/apps/eval/configs/legacy/browseros-agent-opus-4-6-agisdk-real.json
vendored
Normal file
@@ -0,0 +1,27 @@
|
||||
{
|
||||
"agent": {
|
||||
"type": "single",
|
||||
"provider": "bedrock",
|
||||
"model": "global.anthropic.claude-opus-4-6-v1",
|
||||
"region": "AWS_REGION",
|
||||
"accessKeyId": "AWS_ACCESS_KEY_ID",
|
||||
"secretAccessKey": "AWS_SECRET_ACCESS_KEY",
|
||||
"supportsImages": true
|
||||
},
|
||||
"dataset": "../../data/agisdk-real.jsonl",
|
||||
"num_workers": 2,
|
||||
"restart_server_per_task": true,
|
||||
"browseros": {
|
||||
"server_url": "http://127.0.0.1:9110",
|
||||
"base_cdp_port": 9010,
|
||||
"base_server_port": 9110,
|
||||
"base_extension_port": 9310,
|
||||
"load_extensions": false,
|
||||
"headless": false
|
||||
},
|
||||
"captcha": {
|
||||
"api_key_env": "NOPECHA_API_KEY"
|
||||
},
|
||||
"graders": ["agisdk_state_diff"],
|
||||
"timeout_ms": 1800000
|
||||
}
|
||||
@@ -8,7 +8,7 @@
|
||||
"supportsImages": true
|
||||
},
|
||||
"dataset": "../../data/agisdk-real.jsonl",
|
||||
"num_workers": 10,
|
||||
"num_workers": 3,
|
||||
"restart_server_per_task": true,
|
||||
"browseros": {
|
||||
"server_url": "http://127.0.0.1:9110",
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
{
|
||||
"agent": {
|
||||
"type": "claude-code",
|
||||
"model": "opus"
|
||||
"model": "opus",
|
||||
"extraArgs": ["--permission-mode", "bypassPermissions"]
|
||||
},
|
||||
"dataset": "../../data/agisdk-real.jsonl",
|
||||
"num_workers": 1,
|
||||
|
||||
191
packages/browseros-agent/apps/eval/scripts/generate-report.ts
vendored
Normal file
191
packages/browseros-agent/apps/eval/scripts/generate-report.ts
vendored
Normal file
@@ -0,0 +1,191 @@
|
||||
#!/usr/bin/env bun
|
||||
|
||||
import { mkdir, stat } from 'node:fs/promises'
|
||||
import { dirname, resolve } from 'node:path'
|
||||
import { query as claudeQuery } from '@anthropic-ai/claude-agent-sdk'
|
||||
import { readRunMetricSummary } from '../src/reporting/task-metrics'
|
||||
|
||||
export const DEFAULT_REPORT_MODEL = 'claude-opus-4-6'
|
||||
export const DEFAULT_REPORT_MAX_TURNS = 300
|
||||
|
||||
type Env = Record<string, string | undefined>
|
||||
type ClaudeQuery = (input: unknown) => AsyncIterable<Record<string, unknown>>
|
||||
|
||||
export interface ReportAgentInvocation {
|
||||
inputDir: string
|
||||
outputPath: string
|
||||
prompt: string
|
||||
}
|
||||
|
||||
export interface GenerateEvalReportOptions {
|
||||
inputDir: string
|
||||
outputPath: string
|
||||
runAgent?: (invocation: ReportAgentInvocation) => Promise<void>
|
||||
}
|
||||
|
||||
interface ClaudeReportAgentDeps {
|
||||
query?: ClaudeQuery
|
||||
env?: Env
|
||||
}
|
||||
|
||||
function usage(): string {
|
||||
return `Usage: bun scripts/generate-report.ts --input <run-dir> --output <report.html>`
|
||||
}
|
||||
|
||||
function parseArgs(
|
||||
argv: string[],
|
||||
): Pick<GenerateEvalReportOptions, 'inputDir' | 'outputPath'> {
|
||||
let inputDir = ''
|
||||
let outputPath = ''
|
||||
for (let i = 0; i < argv.length; i++) {
|
||||
const arg = argv[i]
|
||||
if (arg === '--input' || arg === '--run') {
|
||||
inputDir = argv[++i] ?? ''
|
||||
} else if (arg === '--output' || arg === '--out') {
|
||||
outputPath = argv[++i] ?? ''
|
||||
} else if (arg === '--help' || arg === '-h') {
|
||||
console.log(usage())
|
||||
process.exit(0)
|
||||
}
|
||||
}
|
||||
if (!inputDir || !outputPath) {
|
||||
throw new Error(usage())
|
||||
}
|
||||
return { inputDir, outputPath }
|
||||
}
|
||||
|
||||
function claudeCodeEnv(env: Env): Env {
|
||||
return {
|
||||
CLAUDE_CODE_OAUTH_TOKEN: env.CLAUDE_CODE_OAUTH_TOKEN,
|
||||
ANTHROPIC_API_KEY: env.ANTHROPIC_API_KEY,
|
||||
HOME: env.HOME,
|
||||
PATH: env.PATH,
|
||||
SHELL: env.SHELL,
|
||||
TMPDIR: env.TMPDIR,
|
||||
TMP: env.TMP,
|
||||
TEMP: env.TEMP,
|
||||
USER: env.USER,
|
||||
CLAUDECODE: '',
|
||||
}
|
||||
}
|
||||
|
||||
async function buildReportPrompt(
|
||||
inputDir: string,
|
||||
outputPath: string,
|
||||
): Promise<string> {
|
||||
const metrics = await readRunMetricSummary(inputDir)
|
||||
|
||||
return `Analyze this BrowserOS eval run and write a shareable HTML report.
|
||||
|
||||
Run directory: ${inputDir}
|
||||
Output file to write: ${outputPath}
|
||||
|
||||
You are running with the run directory as cwd. Inspect the local artifacts:
|
||||
- summary.json for run totals and pass rate
|
||||
- each task directory's metadata.json for query, final answer, timing, screenshots, and grader results
|
||||
- each task directory's messages.jsonl for tool calls, tool errors, and recent trajectory
|
||||
- screenshots/ for visual evidence
|
||||
- grader-artifacts/ when present for grader-specific context
|
||||
|
||||
Write the final report directly to the output file path above. Do not print the
|
||||
report instead of writing it. Do not modify any input artifacts. The only file
|
||||
you should create or overwrite is the requested report.html.
|
||||
|
||||
The report should follow the style and density of the Shadowfax AGI SDK report:
|
||||
- Title like "AGI SDK Random-10 Failure Report" or a run-specific equivalent
|
||||
- Run directory and note that screenshots are embedded as data URIs
|
||||
- Summary cards for total tasks, passed, failed, pass rate, average duration, average steps, and average tool calls
|
||||
- A Metrics section with compact charts for Duration by task, Steps by task, Tool calls by task, and Tool errors by task
|
||||
- Task Summary table with task id, status, score, duration, steps, and prompt
|
||||
- Include tool calls and tool errors in the Task Summary table
|
||||
- Failure sections with stable anchors using each task id, for example <section id="agisdk-networkin-10">
|
||||
- For each failed task: Diagnosis, Evidence, Next Check, final screenshot, AGI SDK / grader criteria, final answer, and recent trajectory events
|
||||
- Make failure links in the summary table point to the task anchors
|
||||
- Keep the HTML self-contained: inline CSS and embedded final screenshots as data:image/png;base64 URIs
|
||||
- Escape user/model text correctly so task outputs cannot break the page
|
||||
|
||||
Analysis guidance:
|
||||
- Focus on why the model failed: task understanding, browser/tool usage, missing verification, tool errors, max-step/timeout, bad final answer, or grader ambiguity
|
||||
- Use messages.jsonl strategically. Do not paste huge DOM outputs into the report. Summarize only the relevant recent trajectory and evidence.
|
||||
- Limit trajectory analysis to the most relevant 200-300 events/calls across the run. Prefer failed tasks and the final/key actions for each failure.
|
||||
- If a grader criterion is boolean-only or ambiguous, say so and identify what additional artifact would make it debuggable.
|
||||
|
||||
Deterministic run metrics computed from metadata.json and messages.jsonl:
|
||||
\`\`\`json
|
||||
${JSON.stringify(metrics, null, 2)}
|
||||
\`\`\`
|
||||
|
||||
After writing the file, verify that ${outputPath} exists and is non-empty.`
|
||||
}
|
||||
|
||||
async function assertRunDir(inputDir: string): Promise<void> {
|
||||
const inputStat = await stat(inputDir).catch(() => null)
|
||||
if (!inputStat?.isDirectory()) {
|
||||
throw new Error(`Not a run directory: ${inputDir}`)
|
||||
}
|
||||
}
|
||||
|
||||
async function assertReportWritten(outputPath: string): Promise<void> {
|
||||
const outputStat = await stat(outputPath).catch(() => null)
|
||||
if (!outputStat?.isFile() || outputStat.size === 0) {
|
||||
throw new Error(`Report was not written: ${outputPath}`)
|
||||
}
|
||||
}
|
||||
|
||||
export async function runClaudeCodeReportAgent(
|
||||
invocation: ReportAgentInvocation,
|
||||
deps: ClaudeReportAgentDeps = {},
|
||||
): Promise<void> {
|
||||
const query = deps.query ?? (claudeQuery as unknown as ClaudeQuery)
|
||||
let resultSubtype: string | undefined
|
||||
|
||||
for await (const message of query({
|
||||
prompt: invocation.prompt,
|
||||
options: {
|
||||
cwd: invocation.inputDir,
|
||||
model: DEFAULT_REPORT_MODEL,
|
||||
systemPrompt:
|
||||
'You are an eval failure analyst. Produce a concise, evidence-backed, self-contained HTML report from local run artifacts.',
|
||||
permissionMode: 'bypassPermissions',
|
||||
allowDangerouslySkipPermissions: true,
|
||||
maxTurns: DEFAULT_REPORT_MAX_TURNS,
|
||||
env: claudeCodeEnv(deps.env ?? process.env),
|
||||
},
|
||||
})) {
|
||||
if (message.type === 'result') {
|
||||
resultSubtype =
|
||||
typeof message.subtype === 'string' ? message.subtype : undefined
|
||||
}
|
||||
}
|
||||
|
||||
if (resultSubtype && resultSubtype !== 'success') {
|
||||
throw new Error(`Claude Code report agent failed: ${resultSubtype}`)
|
||||
}
|
||||
}
|
||||
|
||||
export async function generateEvalReport(
|
||||
options: GenerateEvalReportOptions,
|
||||
): Promise<void> {
|
||||
const inputDir = resolve(options.inputDir)
|
||||
const outputPath = resolve(options.outputPath)
|
||||
|
||||
await assertRunDir(inputDir)
|
||||
await mkdir(dirname(outputPath), { recursive: true })
|
||||
|
||||
const invocation = {
|
||||
inputDir,
|
||||
outputPath,
|
||||
prompt: await buildReportPrompt(inputDir, outputPath),
|
||||
}
|
||||
await (options.runAgent ?? runClaudeCodeReportAgent)(invocation)
|
||||
await assertReportWritten(outputPath)
|
||||
}
|
||||
|
||||
if (import.meta.main) {
|
||||
try {
|
||||
await generateEvalReport(parseArgs(Bun.argv.slice(2)))
|
||||
} catch (error) {
|
||||
console.error(error instanceof Error ? error.message : String(error))
|
||||
process.exit(1)
|
||||
}
|
||||
}
|
||||
@@ -134,7 +134,10 @@ export class OrchestratorExecutorEvaluator implements AgentEvaluator {
|
||||
|
||||
// Connect to Chrome via CDP — same per-worker offset used by app-manager.
|
||||
const cdpPort = config.browseros.base_cdp_port + workerIndex
|
||||
const cdp = new CdpBackend({ port: cdpPort })
|
||||
const cdp = new CdpBackend({
|
||||
port: cdpPort,
|
||||
exitOnReconnectFailure: false,
|
||||
})
|
||||
await cdp.connect()
|
||||
const browser = new Browser(cdp)
|
||||
capture.screenshot.setBrowser(browser)
|
||||
|
||||
@@ -43,7 +43,10 @@ export class SingleAgentEvaluator implements AgentEvaluator {
|
||||
|
||||
// Connect to Chrome via CDP — same per-worker offset used by app-manager.
|
||||
const cdpPort = config.browseros.base_cdp_port + workerIndex
|
||||
const cdp = new CdpBackend({ port: cdpPort })
|
||||
const cdp = new CdpBackend({
|
||||
port: cdpPort,
|
||||
exitOnReconnectFailure: false,
|
||||
})
|
||||
await cdp.connect()
|
||||
|
||||
const browser = new Browser(cdp)
|
||||
|
||||
@@ -536,6 +536,12 @@ export interface DashboardConfig {
|
||||
configMode?: boolean
|
||||
}
|
||||
|
||||
export function shouldAutoOpenDashboard(
|
||||
env: Record<string, string | undefined> = process.env,
|
||||
): boolean {
|
||||
return env.CI !== 'true'
|
||||
}
|
||||
|
||||
export function startDashboard(config: DashboardConfig) {
|
||||
const port = config.port ?? 9900
|
||||
dashboardConfigMode = config.configMode ?? false
|
||||
@@ -558,10 +564,12 @@ export function startDashboard(config: DashboardConfig) {
|
||||
console.log(` Dashboard: ${url}`)
|
||||
|
||||
// Auto-open browser
|
||||
try {
|
||||
Bun.spawn(['open', url], { stdout: 'ignore', stderr: 'ignore' })
|
||||
} catch {
|
||||
/* ignore if open command fails */
|
||||
if (shouldAutoOpenDashboard()) {
|
||||
try {
|
||||
Bun.spawn(['open', url], { stdout: 'ignore', stderr: 'ignore' })
|
||||
} catch {
|
||||
/* ignore if open command fails */
|
||||
}
|
||||
}
|
||||
|
||||
return { url, port }
|
||||
|
||||
@@ -61,6 +61,17 @@
|
||||
.header-stats .stat-pass { color: #3fb950; }
|
||||
.header-stats .stat-fail { color: #f85149; }
|
||||
.header-stats .stat-score { color: #f0883e; }
|
||||
.header-report {
|
||||
color: #58a6ff;
|
||||
text-decoration: none;
|
||||
font-size: 12px;
|
||||
font-weight: 600;
|
||||
border: 1px solid #30363d;
|
||||
border-radius: 6px;
|
||||
padding: 5px 9px;
|
||||
white-space: nowrap;
|
||||
}
|
||||
.header-report:hover { border-color: #58a6ff; background: #1c2333; }
|
||||
|
||||
/* ── 3-column layout ─────────────────────────────────────────── */
|
||||
.layout {
|
||||
@@ -84,6 +95,7 @@
|
||||
background: #161b22;
|
||||
border-bottom: 1px solid #30363d;
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 12px;
|
||||
font-size: 11px;
|
||||
font-weight: 600;
|
||||
@@ -93,6 +105,80 @@
|
||||
}
|
||||
.sidebar-stats .s-pass { color: #3fb950; }
|
||||
.sidebar-stats .s-fail { color: #f85149; }
|
||||
.sidebar-metrics {
|
||||
padding: 12px 16px;
|
||||
background: #0d1117;
|
||||
border-bottom: 1px solid #21262d;
|
||||
}
|
||||
.metric-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(3, minmax(0, 1fr));
|
||||
gap: 8px;
|
||||
margin-bottom: 12px;
|
||||
}
|
||||
.metric-cell {
|
||||
min-width: 0;
|
||||
}
|
||||
.metric-label {
|
||||
display: block;
|
||||
font-size: 9px;
|
||||
font-weight: 600;
|
||||
color: #6e7681;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.04em;
|
||||
white-space: nowrap;
|
||||
}
|
||||
.metric-value {
|
||||
display: block;
|
||||
font-size: 13px;
|
||||
font-weight: 700;
|
||||
color: #e6edf3;
|
||||
margin-top: 2px;
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
}
|
||||
.mini-chart {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 6px;
|
||||
}
|
||||
.mini-chart-title {
|
||||
font-size: 10px;
|
||||
font-weight: 700;
|
||||
color: #8b949e;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.04em;
|
||||
}
|
||||
.mini-bar-row {
|
||||
display: grid;
|
||||
grid-template-columns: minmax(60px, 1fr) 70px 28px;
|
||||
gap: 8px;
|
||||
align-items: center;
|
||||
font-size: 10px;
|
||||
color: #8b949e;
|
||||
}
|
||||
.mini-bar-name {
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
white-space: nowrap;
|
||||
font-family: 'SF Mono', SFMono-Regular, Consolas, 'Liberation Mono', Menlo, monospace;
|
||||
}
|
||||
.mini-bar-track {
|
||||
height: 6px;
|
||||
background: #21262d;
|
||||
border-radius: 999px;
|
||||
overflow: hidden;
|
||||
}
|
||||
.mini-bar-fill {
|
||||
height: 100%;
|
||||
background: #58a6ff;
|
||||
border-radius: 999px;
|
||||
}
|
||||
.mini-bar-value {
|
||||
color: #e6edf3;
|
||||
font-variant-numeric: tabular-nums;
|
||||
text-align: right;
|
||||
}
|
||||
.sidebar-filter {
|
||||
padding: 8px 12px;
|
||||
border-bottom: 1px solid #21262d;
|
||||
@@ -526,6 +612,7 @@
|
||||
<div class="header-sep"></div>
|
||||
<span class="header-run" id="header-run"></span>
|
||||
<span class="header-date" id="header-date"></span>
|
||||
<a class="header-report" id="header-report" target="_blank" rel="noopener" style="display: none;">Run Report</a>
|
||||
<div class="header-stats" id="header-stats"></div>
|
||||
</div>
|
||||
|
||||
@@ -533,6 +620,7 @@
|
||||
<!-- Left sidebar -->
|
||||
<div class="sidebar" id="sidebar">
|
||||
<div class="sidebar-stats" id="sidebar-stats"></div>
|
||||
<div class="sidebar-metrics" id="sidebar-metrics"></div>
|
||||
<div class="sidebar-filter">
|
||||
<input type="text" id="filter-input" placeholder="Search tasks..." autocomplete="off" spellcheck="false" />
|
||||
</div>
|
||||
@@ -627,7 +715,23 @@
|
||||
if (stats.avgScore !== null) {
|
||||
parts.push(`<span class="stat-score">avg ${stats.avgScore}%</span>`);
|
||||
}
|
||||
if (stats.avgDurationMs !== null) {
|
||||
parts.push(`<span>${fmtDuration(stats.avgDurationMs)} avg</span>`);
|
||||
}
|
||||
if (stats.avgToolCalls !== null) {
|
||||
parts.push(`<span>${fmtCompact(stats.avgToolCalls)} tools/task</span>`);
|
||||
}
|
||||
el.innerHTML = parts.join('');
|
||||
|
||||
const reportLink = document.getElementById('header-report');
|
||||
const url = reportUrl(manifest);
|
||||
if (url) {
|
||||
reportLink.href = url;
|
||||
reportLink.style.display = '';
|
||||
} else {
|
||||
reportLink.removeAttribute('href');
|
||||
reportLink.style.display = 'none';
|
||||
}
|
||||
}
|
||||
|
||||
// ── Sidebar rendering ─────────────────────────────────────────
|
||||
@@ -639,11 +743,49 @@
|
||||
statsEl.innerHTML =
|
||||
'<span>' + stats.total + ' total</span>' +
|
||||
'<span class="s-pass">' + stats.passed + ' pass</span>' +
|
||||
'<span class="s-fail">' + stats.failed + ' fail</span>';
|
||||
'<span class="s-fail">' + stats.failed + ' fail</span>' +
|
||||
(stats.avgSteps !== null ? '<span>' + fmtCompact(stats.avgSteps) + ' steps/task</span>' : '') +
|
||||
(stats.avgToolCalls !== null ? '<span>' + fmtCompact(stats.avgToolCalls) + ' tools/task</span>' : '');
|
||||
|
||||
renderSidebarMetrics(tasks, stats);
|
||||
|
||||
renderTaskList('');
|
||||
}
|
||||
|
||||
function renderSidebarMetrics(tasks, stats) {
|
||||
const el = document.getElementById('sidebar-metrics');
|
||||
if (!el) return;
|
||||
|
||||
const chartTasks = tasks
|
||||
.slice()
|
||||
.sort((a, b) => taskMetrics(b).toolCalls - taskMetrics(a).toolCalls)
|
||||
.slice(0, 5);
|
||||
const maxCalls = Math.max(1, ...chartTasks.map((task) => taskMetrics(task).toolCalls));
|
||||
|
||||
const bars = chartTasks.map((task) => {
|
||||
const calls = taskMetrics(task).toolCalls;
|
||||
const width = Math.max(4, Math.round((calls / maxCalls) * 100));
|
||||
return (
|
||||
'<div class="mini-bar-row">' +
|
||||
'<span class="mini-bar-name" title="' + escAttr(task.queryId || task.id || 'Untitled') + '">' + esc(task.queryId || task.id || 'Untitled') + '</span>' +
|
||||
'<span class="mini-bar-track"><span class="mini-bar-fill" style="width: ' + width + '%"></span></span>' +
|
||||
'<span class="mini-bar-value">' + fmtCompact(calls) + '</span>' +
|
||||
'</div>'
|
||||
);
|
||||
}).join('');
|
||||
|
||||
el.innerHTML =
|
||||
'<div class="metric-grid">' +
|
||||
'<div class="metric-cell"><span class="metric-label">Avg Time</span><span class="metric-value">' + (stats.avgDurationMs !== null ? fmtDuration(stats.avgDurationMs) : '-') + '</span></div>' +
|
||||
'<div class="metric-cell"><span class="metric-label">Avg Steps</span><span class="metric-value">' + (stats.avgSteps !== null ? fmtCompact(stats.avgSteps) : '-') + '</span></div>' +
|
||||
'<div class="metric-cell"><span class="metric-label">Avg Tools</span><span class="metric-value">' + (stats.avgToolCalls !== null ? fmtCompact(stats.avgToolCalls) : '-') + '</span></div>' +
|
||||
'</div>' +
|
||||
'<div class="mini-chart">' +
|
||||
'<div class="mini-chart-title">Tool Calls by Task</div>' +
|
||||
(bars || '<div class="task-meta-line"><span>No tool calls recorded</span></div>') +
|
||||
'</div>';
|
||||
}
|
||||
|
||||
function renderTaskList(filter) {
|
||||
const list = document.getElementById('task-list');
|
||||
list.innerHTML = '';
|
||||
@@ -668,8 +810,11 @@
|
||||
}
|
||||
|
||||
const metaParts = [];
|
||||
if (task.durationMs) metaParts.push(fmtDuration(task.durationMs));
|
||||
if (task.screenshotCount) metaParts.push(`${task.screenshotCount} steps`);
|
||||
const metrics = taskMetrics(task);
|
||||
if (metrics.durationMs) metaParts.push(fmtDuration(metrics.durationMs));
|
||||
if (metrics.steps) metaParts.push(`${fmtCompact(metrics.steps)} steps`);
|
||||
if (metrics.toolCalls) metaParts.push(`${fmtCompact(metrics.toolCalls)} tools`);
|
||||
if (metrics.toolErrors) metaParts.push(`${fmtCompact(metrics.toolErrors)} errors`);
|
||||
|
||||
item.innerHTML =
|
||||
'<div class="task-row">' +
|
||||
@@ -714,7 +859,7 @@
|
||||
}
|
||||
|
||||
function artifactPath(task, artifact) {
|
||||
const manifestPath = task.paths && task.paths[artifact];
|
||||
const manifestPath = task.paths?.[artifact];
|
||||
if (typeof manifestPath === 'string' && manifestPath.length > 0) {
|
||||
return manifestPath.replace(/^\/+/, '');
|
||||
}
|
||||
@@ -725,6 +870,17 @@
|
||||
return `${basePath}/${artifactPath(task, artifact)}`;
|
||||
}
|
||||
|
||||
function runArtifactUrl(path) {
|
||||
if (typeof path !== 'string' || path.length === 0) return null;
|
||||
return `${basePath}/${path.replace(/^\/+/, '')}`;
|
||||
}
|
||||
|
||||
function reportUrl(manifest, task) {
|
||||
const url = runArtifactUrl(manifest?.reportPath);
|
||||
if (!url || !task) return url;
|
||||
return `${url}#${encodeURIComponent(task.queryId || task.id || '')}`;
|
||||
}
|
||||
|
||||
function metadataUrl(task) {
|
||||
return artifactUrl(task, 'metadata');
|
||||
}
|
||||
@@ -905,10 +1061,38 @@
|
||||
}
|
||||
|
||||
// Duration
|
||||
if (task.durationMs) {
|
||||
const metrics = taskMetrics(task);
|
||||
if (metrics.durationMs) {
|
||||
html += '<div class="db-section">';
|
||||
html += '<span class="db-label">Duration</span>';
|
||||
html += `<span class="db-value">${fmtDuration(task.durationMs)}</span>`;
|
||||
html += `<span class="db-value">${fmtDuration(metrics.durationMs)}</span>`;
|
||||
html += '</div>';
|
||||
}
|
||||
|
||||
if (metrics.steps) {
|
||||
html += '<div class="db-section">';
|
||||
html += '<span class="db-label">Steps</span>';
|
||||
html += `<span class="db-value">${fmtCompact(metrics.steps)}</span>`;
|
||||
html += '</div>';
|
||||
}
|
||||
|
||||
html += '<div class="db-section">';
|
||||
html += '<span class="db-label">Tool Calls</span>';
|
||||
html += `<span class="db-value">${fmtCompact(metrics.toolCalls)}</span>`;
|
||||
html += '</div>';
|
||||
|
||||
if (metrics.toolErrors) {
|
||||
html += '<div class="db-section">';
|
||||
html += '<span class="db-label">Tool Errors</span>';
|
||||
html += `<span class="db-value">${fmtCompact(metrics.toolErrors)}</span>`;
|
||||
html += '</div>';
|
||||
}
|
||||
|
||||
const reportLink = reportUrl(manifest, task);
|
||||
if (reportLink) {
|
||||
html += '<div class="db-section">';
|
||||
html += '<span class="db-label">Report</span>';
|
||||
html += `<span class="db-value"><a href="${escAttr(reportLink)}" target="_blank" rel="noopener">Open task analysis</a></span>`;
|
||||
html += '</div>';
|
||||
}
|
||||
|
||||
@@ -1234,8 +1418,25 @@
|
||||
function computeStats(tasks) {
|
||||
const total = tasks.length;
|
||||
let passed = 0, failed = 0, totalScore = 0, scoredCount = 0;
|
||||
let totalDurationMs = 0, durationCount = 0;
|
||||
let totalSteps = 0, stepsCount = 0;
|
||||
let totalToolCalls = 0, toolCount = 0;
|
||||
let totalToolErrors = 0;
|
||||
|
||||
tasks.forEach((t) => {
|
||||
const metrics = taskMetrics(t);
|
||||
if (metrics.durationMs > 0) {
|
||||
totalDurationMs += metrics.durationMs;
|
||||
durationCount++;
|
||||
}
|
||||
if (metrics.steps > 0) {
|
||||
totalSteps += metrics.steps;
|
||||
stepsCount++;
|
||||
}
|
||||
totalToolCalls += metrics.toolCalls;
|
||||
totalToolErrors += metrics.toolErrors;
|
||||
toolCount++;
|
||||
|
||||
const graders = t.graderResults || {};
|
||||
const keys = Object.keys(graders);
|
||||
if (keys.length > 0) {
|
||||
@@ -1254,7 +1455,34 @@
|
||||
total: total,
|
||||
passed: passed,
|
||||
failed: failed,
|
||||
avgScore: scoredCount > 0 ? Math.round((totalScore / scoredCount) * 100) : null
|
||||
avgScore: scoredCount > 0 ? Math.round((totalScore / scoredCount) * 100) : null,
|
||||
avgDurationMs: durationCount > 0 ? totalDurationMs / durationCount : null,
|
||||
avgSteps: stepsCount > 0 ? totalSteps / stepsCount : null,
|
||||
avgToolCalls: toolCount > 0 ? totalToolCalls / toolCount : null,
|
||||
totalToolCalls: totalToolCalls,
|
||||
totalToolErrors: totalToolErrors
|
||||
};
|
||||
}
|
||||
|
||||
function taskMetrics(task) {
|
||||
const metrics = task.metrics || {};
|
||||
const screenshots = Number.isFinite(Number(metrics.screenshots))
|
||||
? Number(metrics.screenshots)
|
||||
: Number(task.screenshotCount || 0);
|
||||
return {
|
||||
durationMs: Number.isFinite(Number(metrics.durationMs))
|
||||
? Number(metrics.durationMs)
|
||||
: Number(task.durationMs || 0),
|
||||
steps: Number.isFinite(Number(metrics.steps))
|
||||
? Number(metrics.steps)
|
||||
: screenshots,
|
||||
screenshots: screenshots,
|
||||
toolCalls: Number.isFinite(Number(metrics.toolCalls))
|
||||
? Number(metrics.toolCalls)
|
||||
: 0,
|
||||
toolErrors: Number.isFinite(Number(metrics.toolErrors))
|
||||
? Number(metrics.toolErrors)
|
||||
: 0
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1310,6 +1538,13 @@
|
||||
return `${h}h ${remM}m`;
|
||||
}
|
||||
|
||||
function fmtCompact(value) {
|
||||
const num = Number(value);
|
||||
if (!Number.isFinite(num)) return '0';
|
||||
if (Number.isInteger(num)) return String(num);
|
||||
return num.toFixed(1);
|
||||
}
|
||||
|
||||
function showFatalError(msgHtml) {
|
||||
document.getElementById('center-panel').innerHTML =
|
||||
'<div class="placeholder error">' +
|
||||
|
||||
@@ -5,6 +5,7 @@ import {
|
||||
PutObjectCommand,
|
||||
S3Client,
|
||||
} from '@aws-sdk/client-s3'
|
||||
import { readTaskMetrics } from '../reporting/task-metrics'
|
||||
import {
|
||||
buildViewerManifest,
|
||||
type ViewerManifestTaskInput,
|
||||
@@ -315,6 +316,7 @@ export class R2Publisher {
|
||||
graderResults:
|
||||
(meta.grader_results as ViewerManifestTaskInput['graderResults']) ||
|
||||
{},
|
||||
metrics: await readTaskMetrics(taskPath, meta, screenshotCount),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -379,10 +381,12 @@ export class R2Publisher {
|
||||
await readFile(join(runDir, 'summary.json'), 'utf-8'),
|
||||
) as Record<string, unknown>
|
||||
} catch {}
|
||||
const reportStat = await stat(join(runDir, 'report.html')).catch(() => null)
|
||||
|
||||
return buildViewerManifest({
|
||||
runId,
|
||||
uploadedAt: this.now().toISOString(),
|
||||
reportPath: reportStat?.isFile() ? 'report.html' : undefined,
|
||||
agentConfig,
|
||||
dataset,
|
||||
summary: summaryData
|
||||
|
||||
188
packages/browseros-agent/apps/eval/src/reporting/task-metrics.ts
vendored
Normal file
188
packages/browseros-agent/apps/eval/src/reporting/task-metrics.ts
vendored
Normal file
@@ -0,0 +1,188 @@
|
||||
import { readdir, readFile, stat } from 'node:fs/promises'
|
||||
import { join } from 'node:path'
|
||||
|
||||
export interface EvalTaskMetrics {
|
||||
durationMs: number
|
||||
steps: number
|
||||
screenshots: number
|
||||
toolCalls: number
|
||||
toolErrors: number
|
||||
}
|
||||
|
||||
export interface EvalRunMetrics {
|
||||
taskCount: number
|
||||
totalDurationMs: number
|
||||
avgDurationMs: number
|
||||
totalSteps: number
|
||||
avgSteps: number
|
||||
totalToolCalls: number
|
||||
avgToolCalls: number
|
||||
totalToolErrors: number
|
||||
avgToolErrors: number
|
||||
}
|
||||
|
||||
export interface EvalTaskMetricSummary {
|
||||
queryId: string
|
||||
status: string
|
||||
score?: number
|
||||
pass?: boolean
|
||||
metrics: EvalTaskMetrics
|
||||
}
|
||||
|
||||
export interface EvalRunMetricSummary {
|
||||
run: EvalRunMetrics
|
||||
tasks: EvalTaskMetricSummary[]
|
||||
}
|
||||
|
||||
interface TaskDirEntry {
|
||||
taskId: string
|
||||
taskPath: string
|
||||
}
|
||||
|
||||
function numberValue(value: unknown): number {
|
||||
return typeof value === 'number' && Number.isFinite(value) ? value : 0
|
||||
}
|
||||
|
||||
export function countMessageMetrics(messagesJsonl: string): {
|
||||
toolCalls: number
|
||||
toolErrors: number
|
||||
} {
|
||||
let toolCalls = 0
|
||||
let toolErrors = 0
|
||||
|
||||
for (const line of messagesJsonl.split('\n')) {
|
||||
const trimmed = line.trim()
|
||||
if (!trimmed) continue
|
||||
try {
|
||||
const event = JSON.parse(trimmed) as { type?: unknown }
|
||||
if (event.type === 'tool-input-available') toolCalls++
|
||||
if (event.type === 'tool-output-error') toolErrors++
|
||||
} catch {
|
||||
// Ignore malformed telemetry lines; the raw artifact is still uploaded.
|
||||
}
|
||||
}
|
||||
|
||||
return { toolCalls, toolErrors }
|
||||
}
|
||||
|
||||
export function buildTaskMetrics(
|
||||
metadata: Record<string, unknown>,
|
||||
messageMetrics: { toolCalls: number; toolErrors: number },
|
||||
screenshotCount = 0,
|
||||
): EvalTaskMetrics {
|
||||
const screenshots = numberValue(metadata.screenshot_count) || screenshotCount
|
||||
return {
|
||||
durationMs: numberValue(metadata.total_duration_ms),
|
||||
steps: numberValue(metadata.total_steps) || screenshots,
|
||||
screenshots,
|
||||
toolCalls: messageMetrics.toolCalls,
|
||||
toolErrors: messageMetrics.toolErrors,
|
||||
}
|
||||
}
|
||||
|
||||
export function buildRunMetrics(metrics: EvalTaskMetrics[]): EvalRunMetrics {
|
||||
const taskCount = metrics.length
|
||||
const totalDurationMs = metrics.reduce((sum, metric) => {
|
||||
return sum + metric.durationMs
|
||||
}, 0)
|
||||
const totalSteps = metrics.reduce((sum, metric) => sum + metric.steps, 0)
|
||||
const totalToolCalls = metrics.reduce((sum, metric) => {
|
||||
return sum + metric.toolCalls
|
||||
}, 0)
|
||||
const totalToolErrors = metrics.reduce((sum, metric) => {
|
||||
return sum + metric.toolErrors
|
||||
}, 0)
|
||||
|
||||
return {
|
||||
taskCount,
|
||||
totalDurationMs,
|
||||
avgDurationMs: taskCount > 0 ? totalDurationMs / taskCount : 0,
|
||||
totalSteps,
|
||||
avgSteps: taskCount > 0 ? totalSteps / taskCount : 0,
|
||||
totalToolCalls,
|
||||
avgToolCalls: taskCount > 0 ? totalToolCalls / taskCount : 0,
|
||||
totalToolErrors,
|
||||
avgToolErrors: taskCount > 0 ? totalToolErrors / taskCount : 0,
|
||||
}
|
||||
}
|
||||
|
||||
export async function readTaskMetrics(
|
||||
taskPath: string,
|
||||
metadata: Record<string, unknown>,
|
||||
screenshotCount = 0,
|
||||
): Promise<EvalTaskMetrics> {
|
||||
const messages = await readFile(join(taskPath, 'messages.jsonl'), 'utf-8')
|
||||
.then(countMessageMetrics)
|
||||
.catch(() => ({ toolCalls: 0, toolErrors: 0 }))
|
||||
return buildTaskMetrics(metadata, messages, screenshotCount)
|
||||
}
|
||||
|
||||
function statusFromMetadata(metadata: Record<string, unknown>): string {
|
||||
const termination = metadata.termination_reason
|
||||
if (termination === 'timeout') return 'timeout'
|
||||
if (Array.isArray(metadata.errors) && metadata.errors.length > 0) {
|
||||
return 'failed'
|
||||
}
|
||||
return 'completed'
|
||||
}
|
||||
|
||||
function primaryGrade(metadata: Record<string, unknown>): {
|
||||
score?: number
|
||||
pass?: boolean
|
||||
} {
|
||||
const graders = metadata.grader_results as
|
||||
| Record<string, { score?: unknown; pass?: unknown }>
|
||||
| undefined
|
||||
const first = graders ? Object.values(graders)[0] : undefined
|
||||
return {
|
||||
...(typeof first?.score === 'number' ? { score: first.score } : {}),
|
||||
...(typeof first?.pass === 'boolean' ? { pass: first.pass } : {}),
|
||||
}
|
||||
}
|
||||
|
||||
async function readTaskDirs(runDir: string): Promise<TaskDirEntry[]> {
|
||||
const canonicalTasksDir = join(runDir, 'tasks')
|
||||
const canonicalStat = await stat(canonicalTasksDir).catch(() => null)
|
||||
const baseDir = canonicalStat?.isDirectory() ? canonicalTasksDir : runDir
|
||||
const entries = await readdir(baseDir, { withFileTypes: true }).catch(
|
||||
() => [],
|
||||
)
|
||||
|
||||
return entries
|
||||
.filter((entry) => entry.isDirectory())
|
||||
.filter((entry) => entry.name !== 'screenshots')
|
||||
.filter((entry) => entry.name !== 'tasks')
|
||||
.map((entry) => ({
|
||||
taskId: entry.name,
|
||||
taskPath: join(baseDir, entry.name),
|
||||
}))
|
||||
}
|
||||
|
||||
export async function readRunMetricSummary(
|
||||
runDir: string,
|
||||
): Promise<EvalRunMetricSummary> {
|
||||
const tasks: EvalTaskMetricSummary[] = []
|
||||
|
||||
for (const entry of await readTaskDirs(runDir)) {
|
||||
const metadata = await readFile(
|
||||
join(entry.taskPath, 'metadata.json'),
|
||||
'utf-8',
|
||||
)
|
||||
.then((text) => JSON.parse(text) as Record<string, unknown>)
|
||||
.catch(() => null)
|
||||
if (!metadata) continue
|
||||
|
||||
const metrics = await readTaskMetrics(entry.taskPath, metadata)
|
||||
tasks.push({
|
||||
queryId: (metadata.query_id as string | undefined) || entry.taskId,
|
||||
status: statusFromMetadata(metadata),
|
||||
...primaryGrade(metadata),
|
||||
metrics,
|
||||
})
|
||||
}
|
||||
|
||||
return {
|
||||
run: buildRunMetrics(tasks.map((task) => task.metrics)),
|
||||
tasks,
|
||||
}
|
||||
}
|
||||
@@ -36,5 +36,6 @@ export async function resolveProviderConfig(
|
||||
accessKeyId: resolveEnvValue(agent.accessKeyId),
|
||||
secretAccessKey: resolveEnvValue(agent.secretAccessKey),
|
||||
sessionToken: resolveEnvValue(agent.sessionToken),
|
||||
region: resolveEnvValue(agent.region),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,3 +1,8 @@
|
||||
import {
|
||||
buildRunMetrics,
|
||||
type EvalRunMetrics,
|
||||
type EvalTaskMetrics,
|
||||
} from '../reporting/task-metrics'
|
||||
import type { GraderResult } from '../types'
|
||||
|
||||
export const VIEWER_MANIFEST_SCHEMA_VERSION = 2
|
||||
@@ -20,6 +25,7 @@ export interface ViewerManifestTaskInput {
|
||||
status: string
|
||||
durationMs: number
|
||||
screenshotCount: number
|
||||
metrics?: EvalTaskMetrics
|
||||
graderResults: Record<string, GraderResult>
|
||||
}
|
||||
|
||||
@@ -35,9 +41,11 @@ export interface ViewerManifest {
|
||||
suiteId?: string
|
||||
variantId?: string
|
||||
uploadedAt?: string
|
||||
reportPath?: string
|
||||
agentConfig?: Record<string, unknown>
|
||||
dataset?: string
|
||||
summary?: Record<string, unknown>
|
||||
metrics?: EvalRunMetrics
|
||||
tasks: ViewerManifestTask[]
|
||||
}
|
||||
|
||||
@@ -46,6 +54,7 @@ export interface BuildViewerManifestInput {
|
||||
suiteId?: string
|
||||
variantId?: string
|
||||
uploadedAt?: string
|
||||
reportPath?: string
|
||||
agentConfig?: Record<string, unknown>
|
||||
dataset?: string
|
||||
summary?: Record<string, unknown>
|
||||
@@ -68,22 +77,37 @@ function taskPaths(queryId: string): ViewerManifestTaskPaths {
|
||||
export function buildViewerManifest(
|
||||
input: BuildViewerManifestInput,
|
||||
): ViewerManifest {
|
||||
const tasks = input.tasks.map((task) => {
|
||||
const { artifactId, ...publicTask } = task
|
||||
const metrics =
|
||||
publicTask.metrics ??
|
||||
({
|
||||
durationMs: publicTask.durationMs,
|
||||
steps: publicTask.screenshotCount,
|
||||
screenshots: publicTask.screenshotCount,
|
||||
toolCalls: 0,
|
||||
toolErrors: 0,
|
||||
} satisfies EvalTaskMetrics)
|
||||
|
||||
return {
|
||||
...publicTask,
|
||||
metrics,
|
||||
startUrl: publicTask.startUrl ?? '',
|
||||
paths: taskPaths(artifactId ?? publicTask.queryId),
|
||||
}
|
||||
})
|
||||
|
||||
return {
|
||||
schemaVersion: VIEWER_MANIFEST_SCHEMA_VERSION,
|
||||
runId: input.runId,
|
||||
...(input.suiteId ? { suiteId: input.suiteId } : {}),
|
||||
...(input.variantId ? { variantId: input.variantId } : {}),
|
||||
...(input.uploadedAt ? { uploadedAt: input.uploadedAt } : {}),
|
||||
...(input.reportPath ? { reportPath: input.reportPath } : {}),
|
||||
...(input.agentConfig ? { agentConfig: input.agentConfig } : {}),
|
||||
...(input.dataset ? { dataset: input.dataset } : {}),
|
||||
...(input.summary ? { summary: input.summary } : {}),
|
||||
tasks: input.tasks.map((task) => {
|
||||
const { artifactId, ...publicTask } = task
|
||||
return {
|
||||
...publicTask,
|
||||
startUrl: publicTask.startUrl ?? '',
|
||||
paths: taskPaths(artifactId ?? publicTask.queryId),
|
||||
}
|
||||
}),
|
||||
metrics: buildRunMetrics(tasks.map((task) => task.metrics)),
|
||||
tasks,
|
||||
}
|
||||
}
|
||||
|
||||
12
packages/browseros-agent/apps/eval/tests/dashboard/server.test.ts
vendored
Normal file
12
packages/browseros-agent/apps/eval/tests/dashboard/server.test.ts
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
import { describe, expect, it } from 'bun:test'
|
||||
import { shouldAutoOpenDashboard } from '../../src/dashboard/server'
|
||||
|
||||
describe('dashboard server', () => {
|
||||
it('does not auto-open the dashboard in CI', () => {
|
||||
expect(shouldAutoOpenDashboard({ CI: 'true' })).toBe(false)
|
||||
})
|
||||
|
||||
it('auto-opens the dashboard outside CI by default', () => {
|
||||
expect(shouldAutoOpenDashboard({})).toBe(true)
|
||||
})
|
||||
})
|
||||
@@ -40,6 +40,7 @@ async function writeRunFixture(
|
||||
start_url: 'https://example.test',
|
||||
termination_reason: 'completed',
|
||||
total_duration_ms: 1200,
|
||||
total_steps: 4,
|
||||
screenshot_count: 1,
|
||||
agent_config: { type: 'single', model: 'kimi' },
|
||||
grader_results: {
|
||||
@@ -47,13 +48,22 @@ async function writeRunFixture(
|
||||
},
|
||||
}),
|
||||
)
|
||||
await writeFile(join(taskDir, 'messages.jsonl'), '{"type":"user"}\n')
|
||||
await writeFile(
|
||||
join(taskDir, 'messages.jsonl'),
|
||||
[
|
||||
'{"type":"user"}',
|
||||
'{"type":"tool-input-available","toolName":"click"}',
|
||||
'{"type":"tool-input-available","toolName":"take_snapshot"}',
|
||||
'{"type":"tool-output-error","toolName":"click"}',
|
||||
].join('\n'),
|
||||
)
|
||||
await writeFile(join(taskDir, 'grades.json'), '{"ok":true}')
|
||||
await writeFile(join(taskDir, 'screenshots', '1.png'), 'png')
|
||||
await writeFile(
|
||||
join(runDir, 'summary.json'),
|
||||
JSON.stringify({ passRate: 1, avgDurationMs: 1200 }),
|
||||
)
|
||||
await writeFile(join(runDir, 'report.html'), '<html>report</html>')
|
||||
return { runDir, runId: `${configName}-${timestamp}` }
|
||||
}
|
||||
|
||||
@@ -110,6 +120,9 @@ describe('R2Publisher', () => {
|
||||
expect(byKey.get(`runs/${runId}/summary.json`)?.ContentType).toBe(
|
||||
'application/json',
|
||||
)
|
||||
expect(byKey.get(`runs/${runId}/report.html`)?.ContentType).toBe(
|
||||
'text/html',
|
||||
)
|
||||
expect(byKey.get('viewer.html')?.ContentType).toBe('text/html')
|
||||
expect(result.viewerUrl).toBe(
|
||||
`https://eval.example.test/viewer.html?run=${runId}`,
|
||||
@@ -126,12 +139,28 @@ describe('R2Publisher', () => {
|
||||
uploadedAt: '2026-04-29T12:00:00.000Z',
|
||||
agentConfig: { type: 'single', model: 'kimi' },
|
||||
dataset: 'webbench',
|
||||
reportPath: 'report.html',
|
||||
summary: { passRate: 1, avgDurationMs: 1200 },
|
||||
metrics: {
|
||||
taskCount: 1,
|
||||
avgDurationMs: 1200,
|
||||
avgSteps: 4,
|
||||
avgToolCalls: 2,
|
||||
totalToolCalls: 2,
|
||||
totalToolErrors: 1,
|
||||
},
|
||||
tasks: [
|
||||
{
|
||||
queryId: 'task-1',
|
||||
status: 'completed',
|
||||
screenshotCount: 1,
|
||||
metrics: {
|
||||
durationMs: 1200,
|
||||
steps: 4,
|
||||
screenshots: 1,
|
||||
toolCalls: 2,
|
||||
toolErrors: 1,
|
||||
},
|
||||
paths: {
|
||||
attempt: 'tasks/task-1/attempt.json',
|
||||
metadata: 'tasks/task-1/metadata.json',
|
||||
|
||||
@@ -6,6 +6,7 @@ interface ViewerPathResolvers {
|
||||
artifactUrl(task: Record<string, unknown>, artifact: string): string
|
||||
metadataUrl(task: Record<string, unknown>): string
|
||||
messagesUrl(task: Record<string, unknown>): string
|
||||
reportUrl(manifest: Record<string, unknown>): string | null
|
||||
screenshotUrl(task: Record<string, unknown>, step: number): string
|
||||
}
|
||||
|
||||
@@ -24,7 +25,7 @@ async function loadViewerPathResolvers(): Promise<ViewerPathResolvers> {
|
||||
`
|
||||
const basePath = 'runs/run-1';
|
||||
${block}
|
||||
return { artifactUrl, metadataUrl, messagesUrl, screenshotUrl };
|
||||
return { artifactUrl, metadataUrl, messagesUrl, reportUrl, screenshotUrl };
|
||||
`,
|
||||
) as () => ViewerPathResolvers
|
||||
return createResolvers()
|
||||
@@ -60,6 +61,35 @@ async function runAutoSelectFromHash(hash: string): Promise<unknown> {
|
||||
return runAutoSelect()
|
||||
}
|
||||
|
||||
async function runComputeStats(): Promise<unknown> {
|
||||
const html = await readFile(
|
||||
join(import.meta.dir, '..', '..', 'src', 'dashboard', 'viewer.html'),
|
||||
'utf-8',
|
||||
)
|
||||
const start = html.indexOf('function computeStats(tasks)')
|
||||
const end = html.indexOf('function resolveStatus(task)', start)
|
||||
expect(start).toBeGreaterThan(-1)
|
||||
expect(end).toBeGreaterThan(start)
|
||||
|
||||
const block = html.slice(start, end)
|
||||
const compute = new Function(
|
||||
`
|
||||
${block}
|
||||
return computeStats([
|
||||
{
|
||||
graderResults: { agisdk_state_diff: { pass: true, score: 1 } },
|
||||
metrics: { durationMs: 1000, steps: 4, toolCalls: 3, toolErrors: 0 }
|
||||
},
|
||||
{
|
||||
graderResults: { agisdk_state_diff: { pass: false, score: 0 } },
|
||||
metrics: { durationMs: 3000, steps: 8, toolCalls: 5, toolErrors: 2 }
|
||||
}
|
||||
]);
|
||||
`,
|
||||
) as () => unknown
|
||||
return compute()
|
||||
}
|
||||
|
||||
describe('R2 viewer artifact path compatibility', () => {
|
||||
it('uses explicit manifest paths for new uploaded runs', async () => {
|
||||
const resolvers = await loadViewerPathResolvers()
|
||||
@@ -95,6 +125,15 @@ describe('R2 viewer artifact path compatibility', () => {
|
||||
)
|
||||
})
|
||||
|
||||
it('resolves manifest-level run report links', async () => {
|
||||
const resolvers = await loadViewerPathResolvers()
|
||||
|
||||
expect(resolvers.reportUrl({ reportPath: 'report.html' })).toBe(
|
||||
'runs/run-1/report.html',
|
||||
)
|
||||
expect(resolvers.reportUrl({})).toBe(null)
|
||||
})
|
||||
|
||||
it('falls back to legacy inferred paths for old uploaded runs', async () => {
|
||||
const resolvers = await loadViewerPathResolvers()
|
||||
const task = { queryId: 'legacy-task' }
|
||||
@@ -127,4 +166,17 @@ describe('R2 viewer artifact path compatibility', () => {
|
||||
queryId: 'legacy-task',
|
||||
})
|
||||
})
|
||||
|
||||
it('computes run-level timing and tool metrics for the viewer', async () => {
|
||||
expect(await runComputeStats()).toMatchObject({
|
||||
total: 2,
|
||||
passed: 1,
|
||||
failed: 1,
|
||||
avgDurationMs: 2000,
|
||||
avgSteps: 6,
|
||||
avgToolCalls: 4,
|
||||
totalToolCalls: 8,
|
||||
totalToolErrors: 2,
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
159
packages/browseros-agent/apps/eval/tests/reporting/generate-report-script.test.ts
vendored
Normal file
159
packages/browseros-agent/apps/eval/tests/reporting/generate-report-script.test.ts
vendored
Normal file
@@ -0,0 +1,159 @@
|
||||
import { describe, expect, it } from 'bun:test'
|
||||
import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises'
|
||||
import { tmpdir } from 'node:os'
|
||||
import { join } from 'node:path'
|
||||
import {
|
||||
DEFAULT_REPORT_MAX_TURNS,
|
||||
DEFAULT_REPORT_MODEL,
|
||||
generateEvalReport,
|
||||
runClaudeCodeReportAgent,
|
||||
} from '../../scripts/generate-report'
|
||||
|
||||
async function writeRunFixture(): Promise<string> {
|
||||
const runDir = await mkdtemp(join(tmpdir(), 'eval-report-script-'))
|
||||
const taskDir = join(runDir, 'agisdk-networkin-10')
|
||||
await mkdir(join(taskDir, 'screenshots'), { recursive: true })
|
||||
await writeFile(
|
||||
join(runDir, 'summary.json'),
|
||||
JSON.stringify({
|
||||
total: 1,
|
||||
completed: 1,
|
||||
passRate: 0,
|
||||
avgDurationMs: 1234,
|
||||
}),
|
||||
)
|
||||
await writeFile(
|
||||
join(taskDir, 'metadata.json'),
|
||||
JSON.stringify({
|
||||
query_id: 'agisdk-networkin-10',
|
||||
dataset: 'agisdk-real',
|
||||
query: 'Send a follow-up message starting with "Following up on".',
|
||||
termination_reason: 'completed',
|
||||
total_duration_ms: 1234,
|
||||
total_steps: 2,
|
||||
screenshot_count: 1,
|
||||
final_answer: 'No app action was taken.',
|
||||
errors: [],
|
||||
warnings: [],
|
||||
agent_config: { type: 'single', model: 'kimi' },
|
||||
grader_results: {
|
||||
agisdk_state_diff: {
|
||||
score: 0,
|
||||
pass: false,
|
||||
reasoning: 'Some criteria failed',
|
||||
details: {
|
||||
per_criterion: [
|
||||
{ passed: true, detail: 'message starts correctly' },
|
||||
{ passed: false, detail: 'message was not sent' },
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
}),
|
||||
)
|
||||
await writeFile(
|
||||
join(taskDir, 'messages.jsonl'),
|
||||
[
|
||||
JSON.stringify({
|
||||
type: 'tool-input-available',
|
||||
timestamp: '2026-04-30T00:00:00.000Z',
|
||||
toolCallId: 'call-1',
|
||||
toolName: 'memory_search',
|
||||
input: { q: 'chat' },
|
||||
}),
|
||||
JSON.stringify({
|
||||
type: 'tool-output-error',
|
||||
timestamp: '2026-04-30T00:00:01.000Z',
|
||||
toolCallId: 'call-1',
|
||||
errorText: 'memory unavailable',
|
||||
}),
|
||||
].join('\n'),
|
||||
)
|
||||
await writeFile(join(taskDir, 'screenshots', '1.png'), 'png')
|
||||
return runDir
|
||||
}
|
||||
|
||||
describe('generate-report script', () => {
|
||||
it('delegates report.html creation to Claude Code', async () => {
|
||||
const runDir = await writeRunFixture()
|
||||
const outputPath = join(runDir, 'report.html')
|
||||
let prompt = ''
|
||||
|
||||
await generateEvalReport({
|
||||
inputDir: runDir,
|
||||
outputPath,
|
||||
runAgent: async (invocation) => {
|
||||
prompt = invocation.prompt
|
||||
await writeFile(
|
||||
invocation.outputPath,
|
||||
'<!doctype html><h1>Claude-written report</h1>',
|
||||
)
|
||||
},
|
||||
})
|
||||
|
||||
expect(await readFile(outputPath, 'utf-8')).toContain(
|
||||
'Claude-written report',
|
||||
)
|
||||
expect(prompt).toContain('AGI SDK Random-10 Failure Report')
|
||||
expect(prompt).toContain('summary.json')
|
||||
expect(prompt).toContain('messages.jsonl')
|
||||
expect(prompt).toContain('screenshots')
|
||||
expect(prompt).toContain('Deterministic run metrics')
|
||||
expect(prompt).toContain('"queryId": "agisdk-networkin-10"')
|
||||
expect(prompt).toContain('"toolCalls": 1')
|
||||
expect(prompt).toContain('"toolErrors": 1')
|
||||
expect(prompt).toContain('Duration by task')
|
||||
expect(prompt).toContain('Tool calls by task')
|
||||
expect(prompt).toContain(outputPath)
|
||||
})
|
||||
|
||||
it('fails when the Claude Code agent does not write the report', async () => {
|
||||
const runDir = await writeRunFixture()
|
||||
|
||||
await expect(
|
||||
generateEvalReport({
|
||||
inputDir: runDir,
|
||||
outputPath: join(runDir, 'missing-report.html'),
|
||||
runAgent: async () => {},
|
||||
}),
|
||||
).rejects.toThrow('Report was not written')
|
||||
})
|
||||
|
||||
it('runs Claude Code with Opus 4.6, full bypass, and bounded turns', async () => {
|
||||
const runDir = await writeRunFixture()
|
||||
const calls: unknown[] = []
|
||||
|
||||
await runClaudeCodeReportAgent(
|
||||
{
|
||||
inputDir: runDir,
|
||||
outputPath: join(runDir, 'report.html'),
|
||||
prompt: 'write the report',
|
||||
},
|
||||
{
|
||||
query: async function* (call: unknown) {
|
||||
calls.push(call)
|
||||
yield { type: 'result', subtype: 'success', result: 'done' }
|
||||
},
|
||||
env: {
|
||||
CLAUDE_CODE_OAUTH_TOKEN: 'token',
|
||||
EVAL_R2_SECRET_ACCESS_KEY: 'secret',
|
||||
HOME: '/tmp/home',
|
||||
PATH: '/bin',
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
expect(calls).toHaveLength(1)
|
||||
expect(calls[0]).toMatchObject({
|
||||
prompt: 'write the report',
|
||||
options: {
|
||||
cwd: runDir,
|
||||
model: DEFAULT_REPORT_MODEL,
|
||||
maxTurns: DEFAULT_REPORT_MAX_TURNS,
|
||||
permissionMode: 'bypassPermissions',
|
||||
allowDangerouslySkipPermissions: true,
|
||||
},
|
||||
})
|
||||
expect(JSON.stringify(calls[0])).not.toContain('secret')
|
||||
})
|
||||
})
|
||||
@@ -13,10 +13,10 @@ describe('adaptEvalConfigFile', () => {
|
||||
expect(adapted.suite.id).toBe('browseros-agent-weekly')
|
||||
expect(adapted.suite.dataset).toBe('../../data/agisdk-real.jsonl')
|
||||
expect(adapted.suite.graders).toEqual(['agisdk_state_diff'])
|
||||
expect(adapted.suite.workers).toBe(10)
|
||||
expect(adapted.suite.workers).toBe(3)
|
||||
expect(adapted.suite.restartBrowserPerTask).toBe(true)
|
||||
expect(adapted.suite.timeoutMs).toBe(1_800_000)
|
||||
expect(adapted.evalConfig.num_workers).toBe(10)
|
||||
expect(adapted.evalConfig.num_workers).toBe(3)
|
||||
expect(adapted.evalConfig.browseros.server_url).toBe(
|
||||
'http://127.0.0.1:9110',
|
||||
)
|
||||
@@ -38,6 +38,34 @@ describe('adaptEvalConfigFile', () => {
|
||||
)
|
||||
})
|
||||
|
||||
it('adapts BrowserOS AGI SDK comparison configs', async () => {
|
||||
const kimi = await adaptEvalConfigFile(
|
||||
'apps/eval/configs/legacy/browseros-agent-kimi-k2-5-agisdk-real.json',
|
||||
)
|
||||
const opus = await adaptEvalConfigFile(
|
||||
'apps/eval/configs/legacy/browseros-agent-opus-4-6-agisdk-real.json',
|
||||
)
|
||||
|
||||
expect(kimi.suite.id).toBe('browseros-agent-kimi-k2-5-agisdk-real')
|
||||
expect(kimi.evalConfig.agent).toMatchObject({
|
||||
type: 'single',
|
||||
provider: 'openai-compatible',
|
||||
model: 'moonshotai/kimi-k2.5',
|
||||
})
|
||||
expect(kimi.evalConfig.num_workers).toBe(3)
|
||||
|
||||
expect(opus.suite.id).toBe('browseros-agent-opus-4-6-agisdk-real')
|
||||
expect(opus.evalConfig.agent).toMatchObject({
|
||||
type: 'single',
|
||||
provider: 'bedrock',
|
||||
model: 'global.anthropic.claude-opus-4-6-v1',
|
||||
region: 'AWS_REGION',
|
||||
accessKeyId: 'AWS_ACCESS_KEY_ID',
|
||||
secretAccessKey: 'AWS_SECRET_ACCESS_KEY',
|
||||
})
|
||||
expect(opus.evalConfig.num_workers).toBe(2)
|
||||
})
|
||||
|
||||
it('adapts claude-code configs without provider credentials', async () => {
|
||||
const dir = await mkdtemp(join(tmpdir(), 'claude-code-config-'))
|
||||
const configPath = join(dir, 'claude-code-agisdk.json')
|
||||
|
||||
38
packages/browseros-agent/apps/eval/tests/utils/resolve-provider-config.test.ts
vendored
Normal file
38
packages/browseros-agent/apps/eval/tests/utils/resolve-provider-config.test.ts
vendored
Normal file
@@ -0,0 +1,38 @@
|
||||
import { describe, expect, it } from 'bun:test'
|
||||
import { resolveProviderConfig } from '../../src/utils/resolve-provider-config'
|
||||
|
||||
describe('resolveProviderConfig', () => {
|
||||
it('resolves Bedrock region from environment variables', async () => {
|
||||
const previous = {
|
||||
AWS_REGION: process.env.AWS_REGION,
|
||||
AWS_ACCESS_KEY_ID: process.env.AWS_ACCESS_KEY_ID,
|
||||
AWS_SECRET_ACCESS_KEY: process.env.AWS_SECRET_ACCESS_KEY,
|
||||
}
|
||||
process.env.AWS_REGION = 'us-west-2'
|
||||
process.env.AWS_ACCESS_KEY_ID = 'test-access-key'
|
||||
process.env.AWS_SECRET_ACCESS_KEY = 'test-secret-key'
|
||||
|
||||
try {
|
||||
const resolved = await resolveProviderConfig({
|
||||
provider: 'bedrock',
|
||||
model: 'global.anthropic.claude-opus-4-6-v1',
|
||||
region: 'AWS_REGION',
|
||||
accessKeyId: 'AWS_ACCESS_KEY_ID',
|
||||
secretAccessKey: 'AWS_SECRET_ACCESS_KEY',
|
||||
})
|
||||
|
||||
expect(resolved).toMatchObject({
|
||||
provider: 'bedrock',
|
||||
model: 'global.anthropic.claude-opus-4-6-v1',
|
||||
region: process.env.AWS_REGION,
|
||||
accessKeyId: process.env.AWS_ACCESS_KEY_ID,
|
||||
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY,
|
||||
})
|
||||
} finally {
|
||||
for (const [key, value] of Object.entries(previous)) {
|
||||
if (value === undefined) delete process.env[key]
|
||||
else process.env[key] = value
|
||||
}
|
||||
}
|
||||
})
|
||||
})
|
||||
@@ -9,6 +9,7 @@ describe('buildViewerManifest', () => {
|
||||
suiteId: 'agisdk-daily-10',
|
||||
variantId: 'kimi',
|
||||
uploadedAt: '2026-04-29T06:00:00.000Z',
|
||||
reportPath: 'report.html',
|
||||
summary: { total: 1, passRate: 0 },
|
||||
tasks: [
|
||||
{
|
||||
@@ -18,6 +19,13 @@ describe('buildViewerManifest', () => {
|
||||
status: 'completed',
|
||||
durationMs: 353_000,
|
||||
screenshotCount: 42,
|
||||
metrics: {
|
||||
durationMs: 353_000,
|
||||
steps: 47,
|
||||
screenshots: 42,
|
||||
toolCalls: 19,
|
||||
toolErrors: 2,
|
||||
},
|
||||
graderResults: {
|
||||
agisdk_state_diff: {
|
||||
score: 0,
|
||||
@@ -32,6 +40,7 @@ describe('buildViewerManifest', () => {
|
||||
|
||||
const publishManifest: R2RunManifest = manifest
|
||||
expect(publishManifest.schemaVersion).toBe(2)
|
||||
expect(manifest.reportPath).toBe('report.html')
|
||||
expect(manifest.tasks[0].paths.messages).toBe(
|
||||
'tasks/agisdk-dashdish-4/messages.jsonl',
|
||||
)
|
||||
@@ -41,6 +50,21 @@ describe('buildViewerManifest', () => {
|
||||
expect(manifest.tasks[0].paths.graderArtifacts).toBe(
|
||||
'tasks/agisdk-dashdish-4/grader-artifacts',
|
||||
)
|
||||
expect(manifest.metrics).toMatchObject({
|
||||
taskCount: 1,
|
||||
avgDurationMs: 353_000,
|
||||
avgSteps: 47,
|
||||
avgToolCalls: 19,
|
||||
totalToolCalls: 19,
|
||||
totalToolErrors: 2,
|
||||
})
|
||||
expect(manifest.tasks[0].metrics).toEqual({
|
||||
durationMs: 353_000,
|
||||
steps: 47,
|
||||
screenshots: 42,
|
||||
toolCalls: 19,
|
||||
toolErrors: 2,
|
||||
})
|
||||
expect(manifest.tasks[0].graderResults.agisdk_state_diff.details).toEqual({
|
||||
missing: ['checkout item'],
|
||||
})
|
||||
|
||||
@@ -143,8 +143,9 @@ export async function createHttpServer(config: HttpServerConfig) {
|
||||
getLimactlPath: () => resolveBundledLimactl(resourcesDir),
|
||||
getVmName: () => VM_NAME,
|
||||
},
|
||||
openclawGatewayChat: new OpenClawGatewayChatClient(() =>
|
||||
getOpenClawService().getPort(),
|
||||
openclawGatewayChat: new OpenClawGatewayChatClient(
|
||||
() => getOpenClawService().getPort(),
|
||||
async () => getOpenClawService().getGatewayToken(),
|
||||
),
|
||||
openclawProvisioner: {
|
||||
createAgent: (input) => getOpenClawService().createAgent(input),
|
||||
|
||||
@@ -53,7 +53,6 @@ export type GatewayContainerSpec = {
|
||||
hostHome: string
|
||||
envFilePath: string
|
||||
gatewayToken?: string
|
||||
privateIngressNoAuth?: boolean
|
||||
timezone: string
|
||||
}
|
||||
|
||||
@@ -418,9 +417,6 @@ export class ContainerRuntime {
|
||||
...(input.gatewayToken
|
||||
? { OPENCLAW_GATEWAY_TOKEN: input.gatewayToken }
|
||||
: {}),
|
||||
...(input.privateIngressNoAuth
|
||||
? { OPENCLAW_GATEWAY_PRIVATE_INGRESS_NO_AUTH: '1' }
|
||||
: {}),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -35,23 +35,22 @@ export interface GatewayChatTurnInput {
|
||||
signal?: AbortSignal
|
||||
}
|
||||
|
||||
type GatewayTokenProvider = () => Promise<string | null | undefined>
|
||||
|
||||
export class OpenClawGatewayChatClient {
|
||||
constructor(
|
||||
private readonly getHostPort: () => number,
|
||||
private readonly getToken?: GatewayTokenProvider,
|
||||
private readonly getToken: () => Promise<string>,
|
||||
) {}
|
||||
|
||||
async streamTurn(
|
||||
input: GatewayChatTurnInput,
|
||||
): Promise<ReadableStream<AgentStreamEvent>> {
|
||||
const token = await this.getToken()
|
||||
const response = await fetch(
|
||||
`http://127.0.0.1:${this.getHostPort()}/v1/chat/completions`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: {
|
||||
...(await this.authHeaders()),
|
||||
Authorization: `Bearer ${token}`,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
@@ -81,12 +80,6 @@ export class OpenClawGatewayChatClient {
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
private async authHeaders(): Promise<Record<string, string>> {
|
||||
const token = await this.getToken?.()
|
||||
const trimmed = token?.trim()
|
||||
return trimmed ? { Authorization: `Bearer ${trimmed}` } : {}
|
||||
}
|
||||
}
|
||||
|
||||
function resolveAgentModel(agentId: string): string {
|
||||
|
||||
@@ -73,12 +73,10 @@ export type OpenClawSessionHistoryEvent =
|
||||
}
|
||||
| { type: 'error'; data: { message: string } }
|
||||
|
||||
type GatewayTokenProvider = () => Promise<string | null | undefined>
|
||||
|
||||
export class OpenClawHttpClient {
|
||||
constructor(
|
||||
private readonly hostPort: number,
|
||||
private readonly getToken?: GatewayTokenProvider,
|
||||
private readonly getToken: () => Promise<string>,
|
||||
) {}
|
||||
|
||||
async getSessionHistory(
|
||||
@@ -105,11 +103,14 @@ export class OpenClawHttpClient {
|
||||
|
||||
async isAuthenticated(): Promise<boolean> {
|
||||
try {
|
||||
const token = await this.getToken()
|
||||
const response = await fetch(
|
||||
`http://127.0.0.1:${this.hostPort}/v1/models`,
|
||||
{
|
||||
method: 'GET',
|
||||
headers: await this.authHeaders(),
|
||||
headers: {
|
||||
Authorization: `Bearer ${token}`,
|
||||
},
|
||||
},
|
||||
)
|
||||
return response.ok
|
||||
@@ -123,12 +124,13 @@ export class OpenClawHttpClient {
|
||||
input: OpenClawSessionHistoryInput,
|
||||
extraHeaders: Record<string, string>,
|
||||
): Promise<Response> {
|
||||
const token = await this.getToken()
|
||||
const response = await fetch(
|
||||
`http://127.0.0.1:${this.hostPort}${buildHistoryPath(sessionKey, input)}`,
|
||||
{
|
||||
method: 'GET',
|
||||
headers: {
|
||||
...(await this.authHeaders()),
|
||||
Authorization: `Bearer ${token}`,
|
||||
...extraHeaders,
|
||||
},
|
||||
signal: input.signal,
|
||||
@@ -147,12 +149,6 @@ export class OpenClawHttpClient {
|
||||
}
|
||||
return response
|
||||
}
|
||||
|
||||
private async authHeaders(): Promise<Record<string, string>> {
|
||||
const token = await this.getToken?.()
|
||||
const trimmed = token?.trim()
|
||||
return trimmed ? { Authorization: `Bearer ${trimmed}` } : {}
|
||||
}
|
||||
}
|
||||
|
||||
function buildHistoryPath(
|
||||
|
||||
@@ -54,10 +54,10 @@ export class OpenClawObserver {
|
||||
|
||||
constructor(private readonly session: ClawSession) {}
|
||||
|
||||
/** Start observing the gateway at the given URL. */
|
||||
connect(gatewayUrl: string, token?: string | null): void {
|
||||
/** Start observing the gateway at the given URL with the given token. */
|
||||
connect(gatewayUrl: string, token: string): void {
|
||||
this.gatewayUrl = gatewayUrl
|
||||
this.gatewayToken = token?.trim() || null
|
||||
this.gatewayToken = token
|
||||
this.closed = false
|
||||
this.doConnect()
|
||||
}
|
||||
@@ -83,7 +83,7 @@ export class OpenClawObserver {
|
||||
// ── Private ─────────────────────────────────────────────────────────
|
||||
|
||||
private doConnect(): void {
|
||||
if (this.closed || !this.gatewayUrl) return
|
||||
if (this.closed || !this.gatewayUrl || !this.gatewayToken) return
|
||||
|
||||
const wsUrl = this.gatewayUrl
|
||||
.replace(/^http:\/\//, 'ws://')
|
||||
@@ -101,37 +101,6 @@ export class OpenClawObserver {
|
||||
|
||||
let handshakeSent = false
|
||||
|
||||
/**
|
||||
* Send the gateway protocol connect frame. BrowserOS no-auth gateways omit
|
||||
* auth entirely; legacy token-mode gateways can still pass a token in.
|
||||
*/
|
||||
const sendConnectRequest = () => {
|
||||
if (handshakeSent) return
|
||||
handshakeSent = true
|
||||
const connectReq: RequestFrame = {
|
||||
type: 'req',
|
||||
id: HANDSHAKE_REQUEST_ID,
|
||||
method: 'connect',
|
||||
params: {
|
||||
minProtocol: PROTOCOL_VERSION,
|
||||
maxProtocol: PROTOCOL_VERSION,
|
||||
client: {
|
||||
id: 'openclaw-tui',
|
||||
displayName: 'browseros-observer',
|
||||
version: '1.0.0',
|
||||
platform: 'node',
|
||||
mode: 'ui',
|
||||
},
|
||||
role: 'operator',
|
||||
scopes: ['operator.read'],
|
||||
...(this.gatewayToken ? { auth: { token: this.gatewayToken } } : {}),
|
||||
},
|
||||
}
|
||||
ws.send(JSON.stringify(connectReq))
|
||||
}
|
||||
|
||||
ws.on('open', sendConnectRequest)
|
||||
|
||||
ws.on('message', (raw) => {
|
||||
let frame: IncomingFrame
|
||||
try {
|
||||
@@ -140,14 +109,34 @@ export class OpenClawObserver {
|
||||
return
|
||||
}
|
||||
|
||||
// Older gateway builds emit connect.challenge before the connect
|
||||
// response; keep this path so the observer tolerates both flows.
|
||||
// The gateway sends a connect.challenge event before accepting
|
||||
// the connect request. Send the handshake after receiving it.
|
||||
if (
|
||||
frame.type === 'event' &&
|
||||
frame.event === 'connect.challenge' &&
|
||||
!handshakeSent
|
||||
) {
|
||||
sendConnectRequest()
|
||||
handshakeSent = true
|
||||
const connectReq: RequestFrame = {
|
||||
type: 'req',
|
||||
id: HANDSHAKE_REQUEST_ID,
|
||||
method: 'connect',
|
||||
params: {
|
||||
minProtocol: PROTOCOL_VERSION,
|
||||
maxProtocol: PROTOCOL_VERSION,
|
||||
client: {
|
||||
id: 'openclaw-tui',
|
||||
displayName: 'browseros-observer',
|
||||
version: '1.0.0',
|
||||
platform: 'node',
|
||||
mode: 'ui',
|
||||
},
|
||||
role: 'operator',
|
||||
scopes: ['operator.read'],
|
||||
auth: { token: this.gatewayToken },
|
||||
},
|
||||
}
|
||||
ws.send(JSON.stringify(connectReq))
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@@ -262,7 +262,6 @@ export class OpenClawService {
|
||||
private hostPort = OPENCLAW_GATEWAY_CONTAINER_PORT
|
||||
private token: string
|
||||
private tokenLoaded = false
|
||||
private gatewayAuthMode: 'unknown' | 'none' | 'token' | 'password' = 'unknown'
|
||||
private lastError: string | null = null
|
||||
private browserosServerPort: number
|
||||
private resourcesDir: string | null
|
||||
@@ -285,8 +284,9 @@ export class OpenClawService {
|
||||
this.token = crypto.randomUUID()
|
||||
this.cliClient = new OpenClawCliClient(this.runtime)
|
||||
this.bootstrapCliClient = this.buildBootstrapCliClient()
|
||||
this.httpClient = new OpenClawHttpClient(this.hostPort, () =>
|
||||
this.getGatewayHttpToken(),
|
||||
this.httpClient = new OpenClawHttpClient(
|
||||
this.hostPort,
|
||||
async () => this.token,
|
||||
)
|
||||
this.browserosServerPort =
|
||||
config.browserosServerPort ?? DEFAULT_PORTS.server
|
||||
@@ -324,9 +324,13 @@ export class OpenClawService {
|
||||
}
|
||||
|
||||
/**
|
||||
* Legacy gateway auth token accessor. BrowserOS configures new bundled
|
||||
* gateways with `gateway.auth.mode=none`; this remains for older token-auth
|
||||
* gateway clients that still ask the service for a token.
|
||||
* Current gateway auth token. The token string is loaded from
|
||||
* `gateway.auth.token` in the persisted openclaw.json during setup,
|
||||
* with a freshly generated UUID as fallback. Exposed so the ACPx
|
||||
* harness can pass it to spawned `openclaw acp` child processes via
|
||||
* the documented `OPENCLAW_GATEWAY_TOKEN` env var (avoids both the
|
||||
* `--token` process-listing leak and reliance on a token-file path
|
||||
* that doesn't exist as a discrete file inside the container).
|
||||
*/
|
||||
getGatewayToken(): string {
|
||||
return this.token
|
||||
@@ -397,7 +401,7 @@ export class OpenClawService {
|
||||
await this.bootstrapCliClient.runOnboard({
|
||||
acceptRisk: true,
|
||||
authChoice: 'skip',
|
||||
gatewayAuth: 'none',
|
||||
gatewayAuth: 'token',
|
||||
gatewayBind: 'lan',
|
||||
gatewayPort: OPENCLAW_GATEWAY_CONTAINER_PORT,
|
||||
installDaemon: false,
|
||||
@@ -997,8 +1001,9 @@ export class OpenClawService {
|
||||
private setPort(hostPort: number): void {
|
||||
if (hostPort === this.hostPort) return
|
||||
this.hostPort = hostPort
|
||||
this.httpClient = new OpenClawHttpClient(this.hostPort, () =>
|
||||
this.getGatewayHttpToken(),
|
||||
this.httpClient = new OpenClawHttpClient(
|
||||
this.hostPort,
|
||||
async () => this.token,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -1032,15 +1037,24 @@ export class OpenClawService {
|
||||
}
|
||||
|
||||
private async isGatewayAuthenticated(hostPort: number): Promise<boolean> {
|
||||
if (!this.tokenLoaded) {
|
||||
logger.debug(
|
||||
'OpenClaw gateway port is ready before auth token is loaded',
|
||||
{
|
||||
hostPort,
|
||||
},
|
||||
)
|
||||
return false
|
||||
}
|
||||
|
||||
const client =
|
||||
hostPort === this.hostPort
|
||||
? this.httpClient
|
||||
: new OpenClawHttpClient(hostPort, () => this.getGatewayHttpToken())
|
||||
: new OpenClawHttpClient(hostPort, async () => this.token)
|
||||
const authenticated = await client.isAuthenticated()
|
||||
if (!authenticated) {
|
||||
logger.warn('OpenClaw gateway readiness probe failed', {
|
||||
logger.warn('OpenClaw gateway port rejected current auth token', {
|
||||
hostPort,
|
||||
authMode: this.gatewayAuthMode,
|
||||
})
|
||||
}
|
||||
return authenticated
|
||||
@@ -1104,9 +1118,7 @@ export class OpenClawService {
|
||||
// ClawSession starts empty after the JSONL seed was removed; the WS
|
||||
// observer fills in agent status as events arrive.
|
||||
const url = `http://127.0.0.1:${this.hostPort}`
|
||||
const token =
|
||||
this.gatewayAuthMode === 'token' && this.tokenLoaded ? this.token : null
|
||||
this.observer.connect(url, token)
|
||||
this.observer.connect(url, this.token)
|
||||
}
|
||||
|
||||
private classifyControlPlaneError(
|
||||
@@ -1342,11 +1354,7 @@ export class OpenClawService {
|
||||
hostPort: this.hostPort,
|
||||
hostHome: this.openclawDir,
|
||||
envFilePath: this.getStateEnvPath(),
|
||||
gatewayToken:
|
||||
this.gatewayAuthMode === 'token' && this.tokenLoaded
|
||||
? this.token
|
||||
: undefined,
|
||||
privateIngressNoAuth: this.gatewayAuthMode === 'none',
|
||||
gatewayToken: this.tokenLoaded ? this.token : undefined,
|
||||
timezone: Intl.DateTimeFormat().resolvedOptions().timeZone,
|
||||
}
|
||||
}
|
||||
@@ -1452,7 +1460,7 @@ export class OpenClawService {
|
||||
}
|
||||
|
||||
private async ensureTokenLoaded(): Promise<void> {
|
||||
if (this.gatewayAuthMode !== 'unknown') {
|
||||
if (this.tokenLoaded) {
|
||||
return
|
||||
}
|
||||
if (!existsSync(this.getStateConfigPath())) {
|
||||
@@ -1464,7 +1472,6 @@ export class OpenClawService {
|
||||
|
||||
private async refreshGatewayAuthToken(): Promise<void> {
|
||||
this.tokenLoaded = false
|
||||
this.gatewayAuthMode = 'unknown'
|
||||
if (!existsSync(this.getStateConfigPath())) {
|
||||
return
|
||||
}
|
||||
@@ -1479,28 +1486,16 @@ export class OpenClawService {
|
||||
) as {
|
||||
gateway?: {
|
||||
auth?: {
|
||||
mode?: unknown
|
||||
token?: unknown
|
||||
}
|
||||
}
|
||||
}
|
||||
const auth = config.gateway?.auth
|
||||
const mode = auth?.mode
|
||||
if (mode === 'none') {
|
||||
this.gatewayAuthMode = 'none'
|
||||
logger.debug('OpenClaw gateway config uses no auth')
|
||||
return
|
||||
}
|
||||
|
||||
const token = auth?.token
|
||||
const token = config.gateway?.auth?.token
|
||||
if (typeof token === 'string' && token) {
|
||||
this.token = token
|
||||
this.tokenLoaded = true
|
||||
this.gatewayAuthMode = 'token'
|
||||
logger.info('Loaded OpenClaw gateway token from mounted config')
|
||||
return
|
||||
}
|
||||
this.gatewayAuthMode = mode === 'password' ? 'password' : 'none'
|
||||
} catch (err) {
|
||||
logger.warn('Failed to load OpenClaw gateway token from mounted config', {
|
||||
error: err instanceof Error ? err.message : String(err),
|
||||
@@ -1508,13 +1503,6 @@ export class OpenClawService {
|
||||
}
|
||||
}
|
||||
|
||||
private async getGatewayHttpToken(): Promise<string | null> {
|
||||
await this.ensureTokenLoaded()
|
||||
return this.gatewayAuthMode === 'token' && this.tokenLoaded
|
||||
? this.token
|
||||
: null
|
||||
}
|
||||
|
||||
private createProgressLogger(
|
||||
onLog?: (msg: string) => void,
|
||||
): (msg: string) => void {
|
||||
|
||||
@@ -23,11 +23,17 @@ interface CdpVersion {
|
||||
const LOOPBACK_DISCOVERY_HOSTS = ['127.0.0.1', 'localhost', '[::1]'] as const
|
||||
type LoopbackDiscoveryHost = (typeof LOOPBACK_DISCOVERY_HOSTS)[number]
|
||||
|
||||
interface CdpBackendConfig {
|
||||
port: number
|
||||
exitOnReconnectFailure?: boolean
|
||||
}
|
||||
|
||||
// biome-ignore lint/correctness/noUnusedVariables: declaration merging adds ProtocolApi properties to the class
|
||||
interface CdpBackend extends ProtocolApi {}
|
||||
// biome-ignore lint/suspicious/noUnsafeDeclarationMerging: intentional — Object.assign fills these at runtime
|
||||
class CdpBackend implements ICdpBackend {
|
||||
private port: number
|
||||
private exitOnReconnectFailure: boolean
|
||||
private ws: WebSocket | null = null
|
||||
private messageId = 0
|
||||
private pending = new Map<number, PendingRequest>()
|
||||
@@ -44,8 +50,9 @@ class CdpBackend implements ICdpBackend {
|
||||
private keepaliveTimer: ReturnType<typeof setInterval> | null = null
|
||||
private preferredDiscoveryHost: LoopbackDiscoveryHost | null = null
|
||||
|
||||
constructor(config: { port: number }) {
|
||||
constructor(config: CdpBackendConfig) {
|
||||
this.port = config.port
|
||||
this.exitOnReconnectFailure = config.exitOnReconnectFailure ?? true
|
||||
|
||||
const rawSend: RawSend = (method, params) => this.rawSend(method, params)
|
||||
const rawOn: RawOn = (event, handler) => this.rawOn(event, handler)
|
||||
@@ -293,7 +300,8 @@ class CdpBackend implements ICdpBackend {
|
||||
private async reconnectLoop(): Promise<void> {
|
||||
do {
|
||||
this.reconnectRequested = false
|
||||
await this.reconnectWithRetries()
|
||||
const reconnected = await this.reconnectWithRetries()
|
||||
if (!reconnected) return
|
||||
} while (
|
||||
!this.disconnecting &&
|
||||
(this.reconnectRequested || !this.connected)
|
||||
@@ -309,12 +317,12 @@ class CdpBackend implements ICdpBackend {
|
||||
this.pending.clear()
|
||||
}
|
||||
|
||||
private async reconnectWithRetries(): Promise<void> {
|
||||
private async reconnectWithRetries(): Promise<boolean> {
|
||||
const maxRetries = CDP_LIMITS.RECONNECT_MAX_RETRIES
|
||||
const delay = TIMEOUTS.CDP_RECONNECT_DELAY
|
||||
|
||||
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
||||
if (this.disconnecting) return
|
||||
if (this.disconnecting) return false
|
||||
|
||||
try {
|
||||
logger.info(`CDP reconnection attempt ${attempt}/${maxRetries}...`)
|
||||
@@ -322,7 +330,7 @@ class CdpBackend implements ICdpBackend {
|
||||
await this.attemptConnect()
|
||||
this.startKeepalive()
|
||||
logger.info('CDP reconnected successfully')
|
||||
return
|
||||
return true
|
||||
} catch (error) {
|
||||
const msg = error instanceof Error ? error.message : String(error)
|
||||
logger.warn(
|
||||
@@ -331,10 +339,14 @@ class CdpBackend implements ICdpBackend {
|
||||
}
|
||||
}
|
||||
|
||||
logger.error(
|
||||
`CDP reconnection failed after ${maxRetries} attempts, exiting for restart`,
|
||||
)
|
||||
process.exit(EXIT_CODES.GENERAL_ERROR)
|
||||
if (this.exitOnReconnectFailure) {
|
||||
logger.error(
|
||||
`CDP reconnection failed after ${maxRetries} attempts, exiting for restart`,
|
||||
)
|
||||
process.exit(EXIT_CODES.GENERAL_ERROR)
|
||||
}
|
||||
logger.error(`CDP reconnection failed after ${maxRetries} attempts`)
|
||||
return false
|
||||
}
|
||||
|
||||
async disconnect(): Promise<void> {
|
||||
|
||||
@@ -0,0 +1,74 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 BrowserOS
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*/
|
||||
|
||||
import type { createRuntimeStore } from 'acpx/runtime'
|
||||
import type { OpenClawGatewayChatClient } from '../../api/services/openclaw/openclaw-gateway-chat-client'
|
||||
import type { AgentDefinition } from './agent-types'
|
||||
import { prepareClaudeCodeContext } from './claude-code/prepare'
|
||||
import { prepareCodexContext } from './codex/prepare'
|
||||
import {
|
||||
maybeHandleOpenClawTurn,
|
||||
prepareOpenClawContext,
|
||||
} from './openclaw/prepare'
|
||||
import type { AgentPromptInput, AgentStreamEvent } from './types'
|
||||
|
||||
export interface PreparedAcpxAgentContext {
|
||||
cwd: string
|
||||
runtimeSessionKey: string
|
||||
runPrompt: string
|
||||
commandEnv: Record<string, string>
|
||||
commandIdentity: string
|
||||
useBrowserosMcp: boolean
|
||||
openclawSessionKey: string | null
|
||||
}
|
||||
|
||||
export interface PrepareAcpxAgentContextInput {
|
||||
browserosDir: string
|
||||
agent: AgentDefinition
|
||||
sessionId: 'main'
|
||||
sessionKey: string
|
||||
cwdOverride: string | null
|
||||
isSelectedCwd: boolean
|
||||
message: string
|
||||
}
|
||||
|
||||
export interface AcpxAdapterTurnInput {
|
||||
prompt: AgentPromptInput
|
||||
prepared: PreparedAcpxAgentContext
|
||||
sessionStore: ReturnType<typeof createRuntimeStore>
|
||||
openclawGatewayChat: OpenClawGatewayChatClient | null
|
||||
}
|
||||
|
||||
export interface AcpxAgentAdapter {
|
||||
prepare(
|
||||
input: PrepareAcpxAgentContextInput,
|
||||
): Promise<PreparedAcpxAgentContext>
|
||||
maybeHandleTurn?(
|
||||
input: AcpxAdapterTurnInput,
|
||||
): Promise<ReadableStream<AgentStreamEvent> | null>
|
||||
}
|
||||
|
||||
const ADAPTERS: Record<AgentDefinition['adapter'], AcpxAgentAdapter> = {
|
||||
claude: { prepare: prepareClaudeCodeContext },
|
||||
codex: { prepare: prepareCodexContext },
|
||||
openclaw: {
|
||||
prepare: prepareOpenClawContext,
|
||||
maybeHandleTurn: maybeHandleOpenClawTurn,
|
||||
},
|
||||
}
|
||||
|
||||
export function getAcpxAgentAdapter(
|
||||
adapter: AgentDefinition['adapter'],
|
||||
): AcpxAgentAdapter {
|
||||
return ADAPTERS[adapter]
|
||||
}
|
||||
|
||||
/** Prepares adapter-specific filesystem, prompt, env, and session identity for one ACPX turn. */
|
||||
export async function prepareAcpxAgentContext(
|
||||
input: PrepareAcpxAgentContextInput,
|
||||
): Promise<PreparedAcpxAgentContext> {
|
||||
return getAcpxAgentAdapter(input.agent.adapter).prepare(input)
|
||||
}
|
||||
@@ -0,0 +1,95 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 BrowserOS
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*/
|
||||
|
||||
import type {
|
||||
PrepareAcpxAgentContextInput,
|
||||
PreparedAcpxAgentContext,
|
||||
} from './acpx-agent-adapter'
|
||||
import type { AgentRuntimePaths } from './acpx-runtime-context'
|
||||
import {
|
||||
BROWSEROS_ACPX_OPERATING_PROMPT_VERSION,
|
||||
buildAcpxRuntimePromptPrefix,
|
||||
buildBrowserosAcpPrompt,
|
||||
ensureAgentHome,
|
||||
ensureRuntimeSkills,
|
||||
ensureUsableCwd,
|
||||
resolveAgentRuntimePaths,
|
||||
} from './acpx-runtime-context'
|
||||
import {
|
||||
deriveRuntimeSessionKey,
|
||||
saveLatestRuntimeState,
|
||||
} from './acpx-runtime-state'
|
||||
|
||||
export interface BrowserosManagedContext {
|
||||
input: PrepareAcpxAgentContextInput
|
||||
paths: AgentRuntimePaths
|
||||
skillNames: string[]
|
||||
promptPrefix: string
|
||||
}
|
||||
|
||||
/** Builds the common BrowserOS-managed home, skills, cwd, and prompt prefix for Claude/Codex. */
|
||||
export async function prepareBrowserosManagedContext(
|
||||
input: PrepareAcpxAgentContextInput,
|
||||
): Promise<BrowserosManagedContext> {
|
||||
const paths = resolveAgentRuntimePaths({
|
||||
browserosDir: input.browserosDir,
|
||||
agentId: input.agent.id,
|
||||
cwd: input.cwdOverride,
|
||||
})
|
||||
await ensureUsableCwd(paths.effectiveCwd, !input.isSelectedCwd)
|
||||
await ensureAgentHome(paths)
|
||||
const skillNames = await ensureRuntimeSkills(paths.runtimeSkillsDir)
|
||||
const promptPrefix = buildAcpxRuntimePromptPrefix({
|
||||
agent: input.agent,
|
||||
paths,
|
||||
skillNames,
|
||||
})
|
||||
return { input, paths, skillNames, promptPrefix }
|
||||
}
|
||||
|
||||
/** Finalizes BrowserOS-managed prep into the uniform adapter context consumed by AcpxRuntime. */
|
||||
export async function finishBrowserosManagedContext(input: {
|
||||
input: PrepareAcpxAgentContextInput
|
||||
paths: AgentRuntimePaths
|
||||
skillNames: string[]
|
||||
promptPrefix: string
|
||||
commandEnv: Record<string, string>
|
||||
}): Promise<PreparedAcpxAgentContext> {
|
||||
const commandIdentity = stableCommandIdentity(input.commandEnv)
|
||||
const runtimeSessionKey = deriveRuntimeSessionKey({
|
||||
agentId: input.input.agent.id,
|
||||
sessionId: input.input.sessionId,
|
||||
adapter: input.input.agent.adapter,
|
||||
cwd: input.paths.effectiveCwd,
|
||||
agentHome: input.paths.agentHome,
|
||||
promptVersion: BROWSEROS_ACPX_OPERATING_PROMPT_VERSION,
|
||||
skillIdentity: input.skillNames.join(','),
|
||||
commandIdentity,
|
||||
})
|
||||
await saveLatestRuntimeState(input.paths.runtimeStatePath, {
|
||||
sessionId: input.input.sessionId,
|
||||
runtimeSessionKey,
|
||||
cwd: input.paths.effectiveCwd,
|
||||
agentHome: input.paths.agentHome,
|
||||
updatedAt: Date.now(),
|
||||
})
|
||||
return {
|
||||
cwd: input.paths.effectiveCwd,
|
||||
runtimeSessionKey,
|
||||
runPrompt: buildBrowserosAcpPrompt(input.promptPrefix, input.input.message),
|
||||
commandEnv: input.commandEnv,
|
||||
commandIdentity,
|
||||
useBrowserosMcp: true,
|
||||
openclawSessionKey: null,
|
||||
}
|
||||
}
|
||||
|
||||
export function stableCommandIdentity(env: Record<string, string>): string {
|
||||
return Object.entries(env)
|
||||
.sort(([left], [right]) => left.localeCompare(right))
|
||||
.map(([key, value]) => `${key}=${value}`)
|
||||
.join('\n')
|
||||
}
|
||||
@@ -35,6 +35,7 @@ export interface AgentRuntimePaths {
|
||||
effectiveCwd: string
|
||||
runtimeStatePath: string
|
||||
runtimeSkillsDir: string
|
||||
runtimeRoot: string
|
||||
codexHome: string
|
||||
}
|
||||
|
||||
@@ -45,6 +46,7 @@ export function resolveAgentRuntimePaths(input: {
|
||||
}): AgentRuntimePaths {
|
||||
const harnessDir = join(input.browserosDir, 'agents', 'harness')
|
||||
const defaultWorkspaceCwd = join(harnessDir, 'workspace')
|
||||
const runtimeRoot = join(harnessDir, input.agentId, 'runtime')
|
||||
return {
|
||||
browserosDir: input.browserosDir,
|
||||
harnessDir,
|
||||
@@ -57,7 +59,8 @@ export function resolveAgentRuntimePaths(input: {
|
||||
`${input.agentId}.json`,
|
||||
),
|
||||
runtimeSkillsDir: join(harnessDir, 'runtime-skills'),
|
||||
codexHome: join(harnessDir, input.agentId, 'runtime', 'codex-home'),
|
||||
runtimeRoot,
|
||||
codexHome: join(runtimeRoot, 'codex-home'),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -110,7 +113,7 @@ export async function materializeCodexHome(input: {
|
||||
}
|
||||
}
|
||||
|
||||
/** Builds the stable BrowserOS operating instructions prepended to ACP turns. */
|
||||
/** Builds stable BrowserOS-managed instructions for Claude/Codex ACP turns. */
|
||||
export function buildAcpxRuntimePromptPrefix(input: {
|
||||
agent: AgentDefinition
|
||||
paths: AgentRuntimePaths
|
||||
@@ -134,6 +137,12 @@ BrowserOS has made runtime skills available for this ACPX session.
|
||||
Skill root: ${input.paths.runtimeSkillsDir}
|
||||
Available skills: ${input.skillNames.join(', ')}
|
||||
When a task calls for one of these skills, read its SKILL.md from that root and follow it.
|
||||
|
||||
When the user asks you to remember, save feedback, store a preference, or update memory in this BrowserOS ACPX context, use the BrowserOS memory skill.
|
||||
Write BrowserOS memory only under AGENT_HOME:
|
||||
- AGENT_HOME/MEMORY.md for durable promoted preferences and operating patterns.
|
||||
- AGENT_HOME/memory/YYYY-MM-DD.md for daily notes and candidate memories.
|
||||
Do not use native Claude project memory, native CLI memory, or workspace files for BrowserOS memory.
|
||||
</browseros_acpx_runtime>`
|
||||
}
|
||||
|
||||
@@ -148,6 +157,40 @@ export function wrapCommandWithEnv(
|
||||
return prefix ? `env ${prefix} ${command}` : command
|
||||
}
|
||||
|
||||
/** Ensures the runtime cwd exists, creating only the managed default workspace. */
|
||||
export async function ensureUsableCwd(
|
||||
cwd: string,
|
||||
isDefaultWorkspace: boolean,
|
||||
): Promise<void> {
|
||||
if (isDefaultWorkspace) {
|
||||
await mkdir(cwd, { recursive: true })
|
||||
return
|
||||
}
|
||||
let info: Stats
|
||||
try {
|
||||
info = await stat(cwd)
|
||||
} catch (err) {
|
||||
if (isNotFoundError(err)) {
|
||||
throw new Error(`Selected workspace does not exist: ${cwd}`)
|
||||
}
|
||||
throw err
|
||||
}
|
||||
if (!info.isDirectory()) {
|
||||
throw new Error(`Selected workspace is not a directory: ${cwd}`)
|
||||
}
|
||||
}
|
||||
|
||||
export function buildBrowserosAcpPrompt(
|
||||
prefix: string,
|
||||
message: string,
|
||||
): string {
|
||||
return `${prefix}
|
||||
|
||||
<user_request>
|
||||
${escapePromptTagText(message)}
|
||||
</user_request>`
|
||||
}
|
||||
|
||||
async function writeFileIfMissing(
|
||||
path: string,
|
||||
content: string,
|
||||
@@ -207,7 +250,7 @@ async function sourceFileExists(path: string): Promise<boolean> {
|
||||
throw err
|
||||
}
|
||||
if (!info.isFile()) {
|
||||
throw new Error(`Expected Codex source file to be a file: ${path}`)
|
||||
throw new Error(`Expected source file to be a file: ${path}`)
|
||||
}
|
||||
return true
|
||||
}
|
||||
@@ -216,6 +259,13 @@ function shellQuote(value: string): string {
|
||||
return `'${value.replace(/'/g, "'\\''")}'`
|
||||
}
|
||||
|
||||
function escapePromptTagText(value: string): string {
|
||||
return value
|
||||
.replace(/&/g, '&')
|
||||
.replace(/</g, '<')
|
||||
.replace(/>/g, '>')
|
||||
}
|
||||
|
||||
function isNotFoundError(err: unknown): boolean {
|
||||
return (
|
||||
typeof err === 'object' &&
|
||||
|
||||
@@ -114,6 +114,11 @@ Do not store memory files in the project workspace.
|
||||
|
||||
## Write
|
||||
|
||||
- When the user explicitly asks you to remember, save feedback, store a preference, or update memory, use this skill.
|
||||
- Write BrowserOS memory only under $AGENT_HOME.
|
||||
- Use $AGENT_HOME/MEMORY.md for durable promoted preferences and operating patterns.
|
||||
- Use $AGENT_HOME/memory/YYYY-MM-DD.md for daily notes and candidate memories.
|
||||
- Do not use native Claude project memory, native CLI memory, or workspace files for BrowserOS memory.
|
||||
- Put observations and task breadcrumbs in today's daily note first.
|
||||
- Promote only stable patterns into MEMORY.md.
|
||||
- Do not promote one-off facts, raw transcripts, temporary state, secrets, or credentials.
|
||||
|
||||
@@ -4,9 +4,6 @@
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*/
|
||||
|
||||
import { randomUUID } from 'node:crypto'
|
||||
import type { Stats } from 'node:fs'
|
||||
import { mkdir, stat } from 'node:fs/promises'
|
||||
import { join } from 'node:path'
|
||||
import { OPENCLAW_GATEWAY_CONTAINER_PORT } from '@browseros/shared/constants/openclaw'
|
||||
import { DEFAULT_PORTS } from '@browseros/shared/constants/ports'
|
||||
@@ -22,28 +19,18 @@ import {
|
||||
createAgentRegistry,
|
||||
createRuntimeStore,
|
||||
} from 'acpx/runtime'
|
||||
import type {
|
||||
OpenAIChatMessage,
|
||||
OpenAIContentPart,
|
||||
OpenClawGatewayChatClient,
|
||||
} from '../../api/services/openclaw/openclaw-gateway-chat-client'
|
||||
import type { OpenClawGatewayChatClient } from '../../api/services/openclaw/openclaw-gateway-chat-client'
|
||||
import { getBrowserosDir } from '../browseros-dir'
|
||||
import { logger } from '../logger'
|
||||
import type { AgentRuntimePaths } from './acpx-runtime-context'
|
||||
import {
|
||||
BROWSEROS_ACPX_OPERATING_PROMPT_VERSION,
|
||||
buildAcpxRuntimePromptPrefix,
|
||||
ensureAgentHome,
|
||||
ensureRuntimeSkills,
|
||||
materializeCodexHome,
|
||||
getAcpxAgentAdapter,
|
||||
prepareAcpxAgentContext,
|
||||
} from './acpx-agent-adapter'
|
||||
import {
|
||||
resolveAgentRuntimePaths,
|
||||
wrapCommandWithEnv,
|
||||
} from './acpx-runtime-context'
|
||||
import {
|
||||
deriveRuntimeSessionKey,
|
||||
loadLatestRuntimeState,
|
||||
saveLatestRuntimeState,
|
||||
} from './acpx-runtime-state'
|
||||
import { loadLatestRuntimeState } from './acpx-runtime-state'
|
||||
import type {
|
||||
AgentDefinition,
|
||||
AgentHistoryEntry,
|
||||
@@ -67,7 +54,7 @@ import type {
|
||||
* current token and VM/container paths at spawn time.
|
||||
*/
|
||||
export interface OpenclawGatewayAccessor {
|
||||
/** Current gateway auth token. Kept for legacy token-auth gateway clients. */
|
||||
/** Current gateway auth token. Passed to `openclaw acp --token`. */
|
||||
getGatewayToken(): string
|
||||
/** Container name e.g. browseros-openclaw-openclaw-gateway-1. */
|
||||
getContainerName(): string
|
||||
@@ -107,6 +94,8 @@ interface PreparedRuntimeContext {
|
||||
runPrompt: string
|
||||
agentCommandEnv: Record<string, string>
|
||||
commandIdentity: string
|
||||
useBrowserosMcp: boolean
|
||||
openclawSessionKey: string | null
|
||||
}
|
||||
|
||||
const BROWSEROS_ACP_AGENT_INSTRUCTIONS = `<role>
|
||||
@@ -194,16 +183,11 @@ export class AcpxRuntime implements AgentRuntime {
|
||||
async send(
|
||||
input: AgentPromptInput,
|
||||
): Promise<ReadableStream<AgentStreamEvent>> {
|
||||
const prepared =
|
||||
input.agent.adapter === 'openclaw'
|
||||
? null
|
||||
: await this.prepareRuntimeContext(input, input.cwd ?? this.defaultCwd)
|
||||
const cwd =
|
||||
prepared?.cwd ??
|
||||
(await this.resolveNonManagedCwd(
|
||||
input.cwd ?? this.defaultCwd,
|
||||
!!input.cwd,
|
||||
))
|
||||
const prepared = await this.prepareRuntimeContext(
|
||||
input,
|
||||
input.cwd ?? this.defaultCwd,
|
||||
)
|
||||
const cwd = prepared.cwd
|
||||
const imageAttachments = (input.attachments ?? []).filter((a) =>
|
||||
a.mediaType.startsWith('image/'),
|
||||
)
|
||||
@@ -221,42 +205,38 @@ export class AcpxRuntime implements AgentRuntime {
|
||||
imageAttachmentCount: imageAttachments.length,
|
||||
})
|
||||
|
||||
// Image carve-out for OpenClaw: the openclaw `acp` bridge silently
|
||||
// drops ACP `image` content blocks, so the model never sees the
|
||||
// attachment. Divert image-bearing turns to the gateway's HTTP
|
||||
// /v1/chat/completions endpoint (which accepts OpenAI-style
|
||||
// `image_url` parts) and pipe its SSE back through the same
|
||||
// AgentStreamEvent shape callers already consume.
|
||||
if (
|
||||
input.agent.adapter === 'openclaw' &&
|
||||
imageAttachments.length > 0 &&
|
||||
this.openclawGatewayChat
|
||||
) {
|
||||
return this.sendOpenclawViaGateway(input, imageAttachments, cwd)
|
||||
}
|
||||
const adapter = getAcpxAgentAdapter(input.agent.adapter)
|
||||
const adapterStream =
|
||||
(await adapter.maybeHandleTurn?.({
|
||||
prompt: input,
|
||||
prepared: {
|
||||
cwd: prepared.cwd,
|
||||
runtimeSessionKey: prepared.runtimeSessionKey,
|
||||
runPrompt: prepared.runPrompt,
|
||||
commandEnv: prepared.agentCommandEnv,
|
||||
commandIdentity: prepared.commandIdentity,
|
||||
useBrowserosMcp: prepared.useBrowserosMcp,
|
||||
openclawSessionKey: prepared.openclawSessionKey,
|
||||
},
|
||||
sessionStore: this.sessionStore,
|
||||
openclawGatewayChat: this.openclawGatewayChat,
|
||||
})) ?? null
|
||||
if (adapterStream) return adapterStream
|
||||
|
||||
const runtime = this.getRuntime({
|
||||
cwd,
|
||||
permissionMode: input.permissionMode,
|
||||
nonInteractivePermissions: 'fail',
|
||||
commandEnv: prepared?.agentCommandEnv ?? {},
|
||||
commandIdentity: prepared?.commandIdentity ?? 'openclaw',
|
||||
// OpenClaw agents need their gateway sessionKey baked into the
|
||||
// spawn command (acpx does not forward sessionKey to newSession);
|
||||
// claude/codex don't, and including it would split their cache.
|
||||
openclawSessionKey:
|
||||
input.agent.adapter === 'openclaw' ? input.sessionKey : null,
|
||||
commandEnv: prepared.agentCommandEnv,
|
||||
commandIdentity: prepared.commandIdentity,
|
||||
useBrowserosMcp: prepared.useBrowserosMcp,
|
||||
openclawSessionKey: prepared.openclawSessionKey,
|
||||
})
|
||||
|
||||
return createAcpxEventStream(runtime, input, {
|
||||
cwd,
|
||||
runtimeSessionKey: prepared?.runtimeSessionKey ?? input.sessionKey,
|
||||
runPrompt:
|
||||
prepared?.runPrompt ??
|
||||
buildBrowserosAcpPrompt(
|
||||
BROWSEROS_ACP_AGENT_INSTRUCTIONS,
|
||||
input.message,
|
||||
),
|
||||
runtimeSessionKey: prepared.runtimeSessionKey,
|
||||
runPrompt: prepared.runPrompt,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -277,64 +257,27 @@ export class AcpxRuntime implements AgentRuntime {
|
||||
return (await this.sessionStore.load(agent.sessionKey)) ?? null
|
||||
}
|
||||
|
||||
private async resolveNonManagedCwd(
|
||||
cwdOverride: string | null,
|
||||
isSelectedCwd: boolean,
|
||||
): Promise<string> {
|
||||
const paths = resolveAgentRuntimePaths({
|
||||
browserosDir: this.browserosDir,
|
||||
agentId: 'openclaw',
|
||||
cwd: cwdOverride,
|
||||
})
|
||||
await ensureUsableCwd(paths.effectiveCwd, !isSelectedCwd)
|
||||
return paths.effectiveCwd
|
||||
}
|
||||
|
||||
private async prepareRuntimeContext(
|
||||
input: AgentPromptInput,
|
||||
cwdOverride: string | null,
|
||||
): Promise<PreparedRuntimeContext> {
|
||||
const paths = resolveAgentRuntimePaths({
|
||||
const prepared = await prepareAcpxAgentContext({
|
||||
browserosDir: this.browserosDir,
|
||||
agentId: input.agent.id,
|
||||
cwd: cwdOverride,
|
||||
})
|
||||
await ensureUsableCwd(paths.effectiveCwd, !input.cwd)
|
||||
await ensureAgentHome(paths)
|
||||
const skillNames = await ensureRuntimeSkills(paths.runtimeSkillsDir)
|
||||
if (input.agent.adapter === 'codex') {
|
||||
await materializeCodexHome({ paths, skillNames })
|
||||
}
|
||||
const promptPrefix = buildAcpxRuntimePromptPrefix({
|
||||
agent: input.agent,
|
||||
paths,
|
||||
skillNames,
|
||||
})
|
||||
const agentCommandEnv = buildAgentCommandEnv(input.agent, paths)
|
||||
const commandIdentity = stableCommandIdentity(agentCommandEnv)
|
||||
const runtimeSessionKey = deriveRuntimeSessionKey({
|
||||
agentId: input.agent.id,
|
||||
sessionId: input.sessionId,
|
||||
adapter: input.agent.adapter,
|
||||
cwd: paths.effectiveCwd,
|
||||
agentHome: paths.agentHome,
|
||||
promptVersion: BROWSEROS_ACPX_OPERATING_PROMPT_VERSION,
|
||||
skillIdentity: skillNames.join(','),
|
||||
commandIdentity,
|
||||
})
|
||||
await saveLatestRuntimeState(paths.runtimeStatePath, {
|
||||
sessionId: input.sessionId,
|
||||
runtimeSessionKey,
|
||||
cwd: paths.effectiveCwd,
|
||||
agentHome: paths.agentHome,
|
||||
updatedAt: Date.now(),
|
||||
sessionKey: input.sessionKey,
|
||||
cwdOverride,
|
||||
isSelectedCwd: !!input.cwd,
|
||||
message: input.message,
|
||||
})
|
||||
return {
|
||||
cwd: paths.effectiveCwd,
|
||||
runtimeSessionKey,
|
||||
runPrompt: buildBrowserosAcpPrompt(promptPrefix, input.message),
|
||||
agentCommandEnv,
|
||||
commandIdentity,
|
||||
cwd: prepared.cwd,
|
||||
runtimeSessionKey: prepared.runtimeSessionKey,
|
||||
runPrompt: prepared.runPrompt,
|
||||
agentCommandEnv: prepared.commandEnv,
|
||||
commandIdentity: prepared.commandIdentity,
|
||||
useBrowserosMcp: prepared.useBrowserosMcp,
|
||||
openclawSessionKey: prepared.openclawSessionKey,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -344,6 +287,7 @@ export class AcpxRuntime implements AgentRuntime {
|
||||
nonInteractivePermissions: AcpRuntimeOptions['nonInteractivePermissions']
|
||||
commandEnv: Record<string, string>
|
||||
commandIdentity: string
|
||||
useBrowserosMcp: boolean
|
||||
openclawSessionKey: string | null
|
||||
}): AcpxCoreRuntime {
|
||||
const key = JSON.stringify({
|
||||
@@ -351,16 +295,12 @@ export class AcpxRuntime implements AgentRuntime {
|
||||
permissionMode: input.permissionMode,
|
||||
nonInteractivePermissions: input.nonInteractivePermissions,
|
||||
commandIdentity: input.commandIdentity,
|
||||
useBrowserosMcp: input.useBrowserosMcp,
|
||||
openclawSessionKey: input.openclawSessionKey,
|
||||
})
|
||||
const existing = this.runtimes.get(key)
|
||||
if (existing) return existing
|
||||
|
||||
// OpenClaw exposes its provider tools through the gateway, not through
|
||||
// ACP-side MCP servers. Forwarding the BrowserOS HTTP MCP to its bridge
|
||||
// makes newSession fail because openclaw rejects unsupported transports.
|
||||
// Claude/codex still need the BrowserOS MCP for browser tooling.
|
||||
const isOpenclaw = input.openclawSessionKey !== null
|
||||
const runtime = this.runtimeFactory({
|
||||
cwd: input.cwd,
|
||||
sessionStore: this.sessionStore,
|
||||
@@ -369,9 +309,9 @@ export class AcpxRuntime implements AgentRuntime {
|
||||
openclawSessionKey: input.openclawSessionKey,
|
||||
commandEnv: input.commandEnv,
|
||||
}),
|
||||
mcpServers: isOpenclaw
|
||||
? []
|
||||
: createBrowserosMcpServers(this.browserosServerPort),
|
||||
mcpServers: input.useBrowserosMcp
|
||||
? createBrowserosMcpServers(this.browserosServerPort)
|
||||
: [],
|
||||
permissionMode: input.permissionMode,
|
||||
nonInteractivePermissions: input.nonInteractivePermissions,
|
||||
})
|
||||
@@ -383,195 +323,11 @@ export class AcpxRuntime implements AgentRuntime {
|
||||
nonInteractivePermissions: input.nonInteractivePermissions,
|
||||
browserosServerPort: this.browserosServerPort,
|
||||
commandIdentity: input.commandIdentity,
|
||||
useBrowserosMcp: input.useBrowserosMcp,
|
||||
openclawSessionKey: input.openclawSessionKey,
|
||||
})
|
||||
return runtime
|
||||
}
|
||||
|
||||
/**
|
||||
* Drives an OpenClaw turn that includes image attachments through the
|
||||
* gateway HTTP endpoint, which translates OpenAI-style `image_url`
|
||||
* content parts into provider-native multimodal calls. Streams back
|
||||
* `AgentStreamEvent` so the chat panel renders identically to ACP
|
||||
* turns. On natural completion, appends a synthetic user+assistant
|
||||
* pair to the acpx session record so the turn shows up in
|
||||
* `getHistory()` after a reload.
|
||||
*
|
||||
* Persistence is best-effort: when no session record exists yet (e.g.
|
||||
* the very first turn for a fresh agent is image-only), the live
|
||||
* stream still works but the turn is absent from history on reload.
|
||||
* Subsequent text turns through ACP create/update the record normally.
|
||||
*/
|
||||
private async sendOpenclawViaGateway(
|
||||
input: AgentPromptInput,
|
||||
imageAttachments: ReadonlyArray<{ mediaType: string; data: string }>,
|
||||
cwd: string,
|
||||
): Promise<ReadableStream<AgentStreamEvent>> {
|
||||
if (!this.openclawGatewayChat) {
|
||||
throw new Error(
|
||||
'OpenClaw gateway chat client is not wired into AcpxRuntime',
|
||||
)
|
||||
}
|
||||
|
||||
const existingRecord = await this.sessionStore.load(input.sessionKey)
|
||||
const priorMessages = existingRecord
|
||||
? recordToOpenAIMessages(existingRecord)
|
||||
: []
|
||||
const userContent: OpenAIContentPart[] = [
|
||||
{
|
||||
type: 'text',
|
||||
text: buildBrowserosAcpPrompt(
|
||||
BROWSEROS_ACP_AGENT_INSTRUCTIONS,
|
||||
input.message,
|
||||
),
|
||||
},
|
||||
...imageAttachments.map(
|
||||
(a): OpenAIContentPart => ({
|
||||
type: 'image_url',
|
||||
image_url: { url: `data:${a.mediaType};base64,${a.data}` },
|
||||
}),
|
||||
),
|
||||
]
|
||||
const messages: OpenAIChatMessage[] = [
|
||||
...priorMessages,
|
||||
{ role: 'user', content: userContent },
|
||||
]
|
||||
|
||||
logger.info('Agent harness gateway image turn dispatched', {
|
||||
agentId: input.agent.id,
|
||||
sessionKey: input.sessionKey,
|
||||
cwd,
|
||||
priorMessageCount: priorMessages.length,
|
||||
imageAttachmentCount: imageAttachments.length,
|
||||
})
|
||||
|
||||
const upstream = await this.openclawGatewayChat.streamTurn({
|
||||
agentId: input.agent.id,
|
||||
sessionKey: input.sessionKey,
|
||||
messages,
|
||||
signal: input.signal,
|
||||
})
|
||||
|
||||
const sessionStore = this.sessionStore
|
||||
const sessionKey = input.sessionKey
|
||||
const userMessageText = input.message
|
||||
let accumulated = ''
|
||||
|
||||
return new ReadableStream<AgentStreamEvent>({
|
||||
start: (controller) => {
|
||||
const reader = upstream.getReader()
|
||||
const persist = async () => {
|
||||
if (!existingRecord || !accumulated) return
|
||||
try {
|
||||
await persistGatewayTurn(
|
||||
sessionStore,
|
||||
sessionKey,
|
||||
userMessageText,
|
||||
imageAttachments,
|
||||
accumulated,
|
||||
)
|
||||
} catch (err) {
|
||||
logger.warn(
|
||||
'Failed to persist gateway image turn to acpx session record',
|
||||
{
|
||||
sessionKey,
|
||||
error: err instanceof Error ? err.message : String(err),
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
||||
;(async () => {
|
||||
try {
|
||||
while (true) {
|
||||
const { done, value } = await reader.read()
|
||||
if (done) break
|
||||
if (value.type === 'text_delta') accumulated += value.text
|
||||
controller.enqueue(value)
|
||||
}
|
||||
await persist()
|
||||
controller.close()
|
||||
} catch (err) {
|
||||
controller.enqueue({
|
||||
type: 'error',
|
||||
message: err instanceof Error ? err.message : String(err),
|
||||
})
|
||||
controller.close()
|
||||
}
|
||||
})().catch(() => {})
|
||||
},
|
||||
cancel: () => {
|
||||
// Best-effort: cancel propagation to the gateway is its own
|
||||
// upstream issue (see plan), but at least drop our reader so
|
||||
// the OpenAI SSE parse loop exits.
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
async function persistGatewayTurn(
|
||||
sessionStore: ReturnType<typeof createRuntimeStore>,
|
||||
sessionKey: string,
|
||||
userMessageText: string,
|
||||
imageAttachments: ReadonlyArray<{ mediaType: string; data: string }>,
|
||||
assistantText: string,
|
||||
): Promise<void> {
|
||||
const record = await sessionStore.load(sessionKey)
|
||||
if (!record) return
|
||||
const userContent: AcpxUserContent[] = [
|
||||
{
|
||||
Text: buildBrowserosAcpPrompt(
|
||||
BROWSEROS_ACP_AGENT_INSTRUCTIONS,
|
||||
userMessageText,
|
||||
),
|
||||
} as AcpxUserContent,
|
||||
]
|
||||
for (const _image of imageAttachments) {
|
||||
// The history mapper's `userContentToText` reads `Image.source` and
|
||||
// emits `[image]` for any non-empty value — we just need a truthy
|
||||
// marker so the placeholder renders. We don't store the base64 in
|
||||
// the record (it's already in the gateway's transcript and would
|
||||
// bloat the JSON file).
|
||||
userContent.push({ Image: { source: 'base64' } } as AcpxUserContent)
|
||||
}
|
||||
// The acpx persistence layer requires User messages to carry an `id`
|
||||
// and Agent messages to carry a `tool_results` object — without them
|
||||
// the record fails to round-trip through `parseSessionRecord` on next
|
||||
// load. See acpx/dist/prompt-turn-... `isUserMessage`/`isAgentMessage`.
|
||||
const turnId = randomUUID()
|
||||
const updated = {
|
||||
...record,
|
||||
messages: [
|
||||
...record.messages,
|
||||
{ User: { id: `user-${turnId}`, content: userContent } },
|
||||
{ Agent: { content: [{ Text: assistantText }], tool_results: {} } },
|
||||
],
|
||||
lastUsedAt: new Date().toISOString(),
|
||||
} as AcpSessionRecord
|
||||
await sessionStore.save(updated)
|
||||
}
|
||||
|
||||
function recordToOpenAIMessages(record: AcpSessionRecord): OpenAIChatMessage[] {
|
||||
const messages: OpenAIChatMessage[] = []
|
||||
for (const message of record.messages) {
|
||||
if (message === 'Resume') continue
|
||||
if ('User' in message) {
|
||||
const text = message.User.content
|
||||
.map(userContentToText)
|
||||
.filter(Boolean)
|
||||
.join('\n\n')
|
||||
.trim()
|
||||
if (text) messages.push({ role: 'user', content: text })
|
||||
continue
|
||||
}
|
||||
if ('Agent' in message) {
|
||||
const text = message.Agent.content
|
||||
.map((part) => ('Text' in part ? part.Text : ''))
|
||||
.join('')
|
||||
.trim()
|
||||
if (text) messages.push({ role: 'assistant', content: text })
|
||||
}
|
||||
}
|
||||
return messages
|
||||
}
|
||||
|
||||
type AcpxSessionMessage = AcpSessionRecord['messages'][number]
|
||||
@@ -1000,8 +756,8 @@ function createBrowserosAgentRegistry(input: {
|
||||
* already installed alongside the gateway is reused; BrowserOS does
|
||||
* not require a host-side openclaw install.
|
||||
*
|
||||
* Auth: BrowserOS configures the bundled gateway with `gateway.auth.mode=none`,
|
||||
* so no gateway token flag is needed for the local ACP bridge.
|
||||
* Auth: `openclaw acp --url ...` deliberately does not reuse implicit
|
||||
* env/config credentials, so pass the gateway token explicitly.
|
||||
*
|
||||
* Banner output: OPENCLAW_HIDE_BANNER and OPENCLAW_SUPPRESS_NOTES
|
||||
* suppress non-JSON-RPC chatter on stdout that would otherwise corrupt
|
||||
@@ -1011,6 +767,7 @@ function resolveOpenclawAcpCommand(
|
||||
gateway: OpenclawGatewayAccessor,
|
||||
sessionKey: string | null,
|
||||
): string {
|
||||
const token = gateway.getGatewayToken()
|
||||
const limactl = gateway.getLimactlPath()
|
||||
const vm = gateway.getVmName()
|
||||
const container = gateway.getContainerName()
|
||||
@@ -1059,6 +816,8 @@ function resolveOpenclawAcpCommand(
|
||||
'acp',
|
||||
'--url',
|
||||
gatewayUrlInsideContainer,
|
||||
'--token',
|
||||
token,
|
||||
]
|
||||
if (bridgeSessionKey) {
|
||||
argv.push('--session', bridgeSessionKey)
|
||||
@@ -1066,77 +825,6 @@ function resolveOpenclawAcpCommand(
|
||||
return argv.join(' ')
|
||||
}
|
||||
|
||||
async function ensureUsableCwd(
|
||||
cwd: string,
|
||||
isDefaultWorkspace: boolean,
|
||||
): Promise<void> {
|
||||
if (isDefaultWorkspace) {
|
||||
await mkdir(cwd, { recursive: true })
|
||||
return
|
||||
}
|
||||
let info: Stats
|
||||
try {
|
||||
info = await stat(cwd)
|
||||
} catch (err) {
|
||||
if (isNotFoundError(err)) {
|
||||
throw new Error(`Selected workspace does not exist: ${cwd}`)
|
||||
}
|
||||
throw err
|
||||
}
|
||||
if (!info.isDirectory()) {
|
||||
throw new Error(`Selected workspace is not a directory: ${cwd}`)
|
||||
}
|
||||
}
|
||||
|
||||
function isNotFoundError(err: unknown): boolean {
|
||||
return (
|
||||
typeof err === 'object' &&
|
||||
err !== null &&
|
||||
'code' in err &&
|
||||
err.code === 'ENOENT'
|
||||
)
|
||||
}
|
||||
|
||||
function buildAgentCommandEnv(
|
||||
agent: AgentDefinition,
|
||||
paths: AgentRuntimePaths,
|
||||
): Record<string, string> {
|
||||
if (agent.adapter === 'codex') {
|
||||
return {
|
||||
AGENT_HOME: paths.agentHome,
|
||||
CODEX_HOME: paths.codexHome,
|
||||
}
|
||||
}
|
||||
if (agent.adapter === 'claude') {
|
||||
return {
|
||||
AGENT_HOME: paths.agentHome,
|
||||
}
|
||||
}
|
||||
return {}
|
||||
}
|
||||
|
||||
function stableCommandIdentity(env: Record<string, string>): string {
|
||||
return Object.entries(env)
|
||||
.sort(([left], [right]) => left.localeCompare(right))
|
||||
.map(([key, value]) => `${key}=${value}`)
|
||||
.join('\n')
|
||||
}
|
||||
|
||||
function buildBrowserosAcpPrompt(prefix: string, message: string): string {
|
||||
return `${prefix}
|
||||
|
||||
<user_request>
|
||||
${escapePromptTagText(message)}
|
||||
</user_request>`
|
||||
}
|
||||
|
||||
function escapePromptTagText(value: string): string {
|
||||
return value
|
||||
.replace(/&/g, '&')
|
||||
.replace(/</g, '<')
|
||||
.replace(/>/g, '>')
|
||||
}
|
||||
|
||||
async function applyRuntimeControls(
|
||||
runtime: AcpxCoreRuntime,
|
||||
handle: AcpRuntimeHandle,
|
||||
|
||||
@@ -0,0 +1,27 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 BrowserOS
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*/
|
||||
|
||||
import type {
|
||||
PrepareAcpxAgentContextInput,
|
||||
PreparedAcpxAgentContext,
|
||||
} from '../acpx-agent-adapter'
|
||||
import {
|
||||
finishBrowserosManagedContext,
|
||||
prepareBrowserosManagedContext,
|
||||
} from '../acpx-agent-common'
|
||||
|
||||
/** Prepares Claude Code with BrowserOS agent home while preserving host Claude auth. */
|
||||
export async function prepareClaudeCodeContext(
|
||||
input: PrepareAcpxAgentContextInput,
|
||||
): Promise<PreparedAcpxAgentContext> {
|
||||
const common = await prepareBrowserosManagedContext(input)
|
||||
return finishBrowserosManagedContext({
|
||||
...common,
|
||||
commandEnv: {
|
||||
AGENT_HOME: common.paths.agentHome,
|
||||
},
|
||||
})
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 BrowserOS
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*/
|
||||
|
||||
import type {
|
||||
PrepareAcpxAgentContextInput,
|
||||
PreparedAcpxAgentContext,
|
||||
} from '../acpx-agent-adapter'
|
||||
import {
|
||||
finishBrowserosManagedContext,
|
||||
prepareBrowserosManagedContext,
|
||||
} from '../acpx-agent-common'
|
||||
import { materializeCodexHome } from '../acpx-runtime-context'
|
||||
|
||||
/** Prepares Codex with a contained CODEX_HOME and BrowserOS agent home. */
|
||||
export async function prepareCodexContext(
|
||||
input: PrepareAcpxAgentContextInput,
|
||||
): Promise<PreparedAcpxAgentContext> {
|
||||
const common = await prepareBrowserosManagedContext(input)
|
||||
await materializeCodexHome({
|
||||
paths: common.paths,
|
||||
skillNames: common.skillNames,
|
||||
})
|
||||
return finishBrowserosManagedContext({
|
||||
...common,
|
||||
commandEnv: {
|
||||
AGENT_HOME: common.paths.agentHome,
|
||||
CODEX_HOME: common.paths.codexHome,
|
||||
},
|
||||
})
|
||||
}
|
||||
@@ -0,0 +1,219 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 BrowserOS
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*/
|
||||
|
||||
import { randomUUID } from 'node:crypto'
|
||||
import type { AcpSessionRecord, createRuntimeStore } from 'acpx/runtime'
|
||||
import type {
|
||||
OpenAIChatMessage,
|
||||
OpenAIContentPart,
|
||||
} from '../../../api/services/openclaw/openclaw-gateway-chat-client'
|
||||
import { logger } from '../../logger'
|
||||
import type { AcpxAdapterTurnInput } from '../acpx-agent-adapter'
|
||||
import type { AgentStreamEvent } from '../types'
|
||||
|
||||
type ImageAttachment = Readonly<{ mediaType: string; data: string }>
|
||||
|
||||
export async function maybeHandleOpenClawTurn(
|
||||
input: AcpxAdapterTurnInput,
|
||||
): Promise<ReadableStream<AgentStreamEvent> | null> {
|
||||
const imageAttachments = (input.prompt.attachments ?? []).filter((a) =>
|
||||
a.mediaType.startsWith('image/'),
|
||||
)
|
||||
if (imageAttachments.length === 0 || !input.openclawGatewayChat) {
|
||||
return null
|
||||
}
|
||||
return sendOpenclawViaGateway({
|
||||
prompt: input.prompt,
|
||||
sessionStore: input.sessionStore,
|
||||
openclawGatewayChat: input.openclawGatewayChat,
|
||||
imageAttachments,
|
||||
cwd: input.prepared.cwd,
|
||||
runPrompt: input.prepared.runPrompt,
|
||||
})
|
||||
}
|
||||
|
||||
/** Handles OpenClaw image turns through the gateway HTTP chat endpoint. */
|
||||
async function sendOpenclawViaGateway(input: {
|
||||
prompt: AcpxAdapterTurnInput['prompt']
|
||||
sessionStore: AcpxAdapterTurnInput['sessionStore']
|
||||
openclawGatewayChat: NonNullable<AcpxAdapterTurnInput['openclawGatewayChat']>
|
||||
imageAttachments: ReadonlyArray<ImageAttachment>
|
||||
cwd: string
|
||||
runPrompt: string
|
||||
}): Promise<ReadableStream<AgentStreamEvent>> {
|
||||
const existingRecord = await input.sessionStore.load(input.prompt.sessionKey)
|
||||
const priorMessages = existingRecord
|
||||
? recordToOpenAIMessages(existingRecord)
|
||||
: []
|
||||
const userContent: OpenAIContentPart[] = [
|
||||
{
|
||||
type: 'text',
|
||||
text: input.runPrompt,
|
||||
},
|
||||
...input.imageAttachments.map(
|
||||
(a): OpenAIContentPart => ({
|
||||
type: 'image_url',
|
||||
image_url: { url: `data:${a.mediaType};base64,${a.data}` },
|
||||
}),
|
||||
),
|
||||
]
|
||||
const messages: OpenAIChatMessage[] = [
|
||||
...priorMessages,
|
||||
{ role: 'user', content: userContent },
|
||||
]
|
||||
|
||||
logger.info('Agent harness gateway image turn dispatched', {
|
||||
agentId: input.prompt.agent.id,
|
||||
sessionKey: input.prompt.sessionKey,
|
||||
cwd: input.cwd,
|
||||
priorMessageCount: priorMessages.length,
|
||||
imageAttachmentCount: input.imageAttachments.length,
|
||||
})
|
||||
|
||||
const upstream = await input.openclawGatewayChat.streamTurn({
|
||||
agentId: input.prompt.agent.id,
|
||||
sessionKey: input.prompt.sessionKey,
|
||||
messages,
|
||||
signal: input.prompt.signal,
|
||||
})
|
||||
|
||||
const sessionStore = input.sessionStore
|
||||
const sessionKey = input.prompt.sessionKey
|
||||
const userMessageText = input.prompt.message
|
||||
const imageAttachments = input.imageAttachments
|
||||
let accumulated = ''
|
||||
|
||||
return new ReadableStream<AgentStreamEvent>({
|
||||
start: (controller) => {
|
||||
const reader = upstream.getReader()
|
||||
const persist = async () => {
|
||||
if (!existingRecord || !accumulated) return
|
||||
try {
|
||||
await persistGatewayTurn(
|
||||
sessionStore,
|
||||
sessionKey,
|
||||
userMessageText,
|
||||
imageAttachments,
|
||||
accumulated,
|
||||
)
|
||||
} catch (err) {
|
||||
logger.warn(
|
||||
'Failed to persist gateway image turn to acpx session record',
|
||||
{
|
||||
sessionKey,
|
||||
error: err instanceof Error ? err.message : String(err),
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
||||
;(async () => {
|
||||
try {
|
||||
while (true) {
|
||||
const { done, value } = await reader.read()
|
||||
if (done) break
|
||||
if (value.type === 'text_delta') accumulated += value.text
|
||||
controller.enqueue(value)
|
||||
}
|
||||
await persist()
|
||||
controller.close()
|
||||
} catch (err) {
|
||||
controller.enqueue({
|
||||
type: 'error',
|
||||
message: err instanceof Error ? err.message : String(err),
|
||||
})
|
||||
controller.close()
|
||||
}
|
||||
})().catch(() => {})
|
||||
},
|
||||
cancel: () => {
|
||||
// Best-effort: cancel propagation to the gateway is tracked separately.
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
async function persistGatewayTurn(
|
||||
sessionStore: ReturnType<typeof createRuntimeStore>,
|
||||
sessionKey: string,
|
||||
userMessageText: string,
|
||||
imageAttachments: ReadonlyArray<ImageAttachment>,
|
||||
assistantText: string,
|
||||
): Promise<void> {
|
||||
const record = await sessionStore.load(sessionKey)
|
||||
if (!record) return
|
||||
const userContent: AcpxUserContent[] = [
|
||||
{ Text: userMessageText } as AcpxUserContent,
|
||||
]
|
||||
for (const _image of imageAttachments) {
|
||||
userContent.push({ Image: { source: 'base64' } } as AcpxUserContent)
|
||||
}
|
||||
const turnId = randomUUID()
|
||||
const updated = {
|
||||
...record,
|
||||
messages: [
|
||||
...record.messages,
|
||||
{ User: { id: `user-${turnId}`, content: userContent } },
|
||||
{ Agent: { content: [{ Text: assistantText }], tool_results: {} } },
|
||||
],
|
||||
lastUsedAt: new Date().toISOString(),
|
||||
} as AcpSessionRecord
|
||||
await sessionStore.save(updated)
|
||||
}
|
||||
|
||||
function recordToOpenAIMessages(record: AcpSessionRecord): OpenAIChatMessage[] {
|
||||
const messages: OpenAIChatMessage[] = []
|
||||
for (const message of record.messages) {
|
||||
if (message === 'Resume') continue
|
||||
if ('User' in message) {
|
||||
const text = message.User.content
|
||||
.map(userContentToText)
|
||||
.filter(Boolean)
|
||||
.join('\n\n')
|
||||
.trim()
|
||||
if (text) messages.push({ role: 'user', content: text })
|
||||
continue
|
||||
}
|
||||
if ('Agent' in message) {
|
||||
const text = message.Agent.content
|
||||
.map((part) => ('Text' in part ? part.Text : ''))
|
||||
.join('')
|
||||
.trim()
|
||||
if (text) messages.push({ role: 'assistant', content: text })
|
||||
}
|
||||
}
|
||||
return messages
|
||||
}
|
||||
|
||||
type AcpxSessionMessage = AcpSessionRecord['messages'][number]
|
||||
type AcpxUserContent = Extract<
|
||||
Exclude<AcpxSessionMessage, 'Resume'>,
|
||||
{ User: unknown }
|
||||
>['User']['content'][number]
|
||||
|
||||
function userContentToText(content: AcpxUserContent): string {
|
||||
if ('Text' in content) return unwrapPromptText(content.Text)
|
||||
if ('Mention' in content) return content.Mention.content
|
||||
if ('Image' in content) return content.Image.source ? '[image]' : ''
|
||||
return ''
|
||||
}
|
||||
|
||||
function unwrapPromptText(raw: string): string {
|
||||
const runtimeMatch = raw.match(
|
||||
/^<browseros_acpx_runtime\b[\s\S]*?<\/browseros_acpx_runtime>\n\n<user_request>\n([\s\S]*?)\n<\/user_request>$/,
|
||||
)
|
||||
if (runtimeMatch) return decodeBasicEntities(runtimeMatch[1]).trim()
|
||||
const roleMatch = raw.match(
|
||||
/^<role>[\s\S]*?<\/role>\n\n<user_request>\n([\s\S]*?)\n<\/user_request>$/,
|
||||
)
|
||||
if (roleMatch) return decodeBasicEntities(roleMatch[1]).trim()
|
||||
return raw.trim()
|
||||
}
|
||||
|
||||
function decodeBasicEntities(value: string): string {
|
||||
return value
|
||||
.replace(/</g, '<')
|
||||
.replace(/>/g, '>')
|
||||
.replace(/&/g, '&')
|
||||
}
|
||||
@@ -0,0 +1,46 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 BrowserOS
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*/
|
||||
|
||||
import type {
|
||||
PrepareAcpxAgentContextInput,
|
||||
PreparedAcpxAgentContext,
|
||||
} from '../acpx-agent-adapter'
|
||||
import {
|
||||
buildBrowserosAcpPrompt,
|
||||
ensureUsableCwd,
|
||||
resolveAgentRuntimePaths,
|
||||
} from '../acpx-runtime-context'
|
||||
|
||||
export { maybeHandleOpenClawTurn } from './image-turn'
|
||||
|
||||
const OPENCLAW_BROWSEROS_ACP_INSTRUCTIONS =
|
||||
'<role>You are running inside BrowserOS through the OpenClaw ACP adapter. Use your OpenClaw identity, memory, and browser tools.</role>'
|
||||
|
||||
/**
|
||||
* Prepares OpenClaw without BrowserOS SOUL/MEMORY or BrowserOS MCP.
|
||||
* OpenClaw runs inside the gateway VM/container, so a selected host cwd is not visible there.
|
||||
*/
|
||||
export async function prepareOpenClawContext(
|
||||
input: PrepareAcpxAgentContextInput,
|
||||
): Promise<PreparedAcpxAgentContext> {
|
||||
const paths = resolveAgentRuntimePaths({
|
||||
browserosDir: input.browserosDir,
|
||||
agentId: input.agent.id,
|
||||
})
|
||||
await ensureUsableCwd(paths.effectiveCwd, true)
|
||||
return {
|
||||
cwd: paths.effectiveCwd,
|
||||
runtimeSessionKey: input.sessionKey,
|
||||
runPrompt: buildBrowserosAcpPrompt(
|
||||
OPENCLAW_BROWSEROS_ACP_INSTRUCTIONS,
|
||||
input.message,
|
||||
),
|
||||
commandEnv: {},
|
||||
commandIdentity: 'openclaw',
|
||||
useBrowserosMcp: false,
|
||||
openclawSessionKey: input.sessionKey,
|
||||
}
|
||||
}
|
||||
@@ -159,31 +159,6 @@ describe('ContainerRuntime', () => {
|
||||
)
|
||||
})
|
||||
|
||||
it('passes private-ingress no-auth only when requested', async () => {
|
||||
const deps = createDeps()
|
||||
const runtime = new ContainerRuntime({
|
||||
vm: deps.vm,
|
||||
shell: deps.shell,
|
||||
loader: deps.loader,
|
||||
projectDir: PROJECT_DIR,
|
||||
})
|
||||
|
||||
await runtime.startGateway({
|
||||
...defaultSpec,
|
||||
gatewayToken: undefined,
|
||||
privateIngressNoAuth: true,
|
||||
})
|
||||
|
||||
expect(deps.shell.createContainer).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
env: expect.objectContaining({
|
||||
OPENCLAW_GATEWAY_PRIVATE_INGRESS_NO_AUTH: '1',
|
||||
}),
|
||||
}),
|
||||
undefined,
|
||||
)
|
||||
})
|
||||
|
||||
it('delegates ensureReady and stopVm to VmRuntime', async () => {
|
||||
const deps = createDeps()
|
||||
const runtime = new ContainerRuntime({
|
||||
|
||||
@@ -1,89 +0,0 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 BrowserOS
|
||||
*/
|
||||
|
||||
import { afterEach, describe, expect, it, mock } from 'bun:test'
|
||||
import { OpenClawGatewayChatClient } from '../../../../src/api/services/openclaw/openclaw-gateway-chat-client'
|
||||
|
||||
describe('OpenClawGatewayChatClient', () => {
|
||||
const originalFetch = globalThis.fetch
|
||||
|
||||
afterEach(() => {
|
||||
globalThis.fetch = originalFetch
|
||||
})
|
||||
|
||||
it('posts chat completions without Authorization when no token provider is configured', async () => {
|
||||
const fetchMock = mock(() =>
|
||||
Promise.resolve(
|
||||
new Response(emptyStream(), {
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'text/event-stream' },
|
||||
}),
|
||||
),
|
||||
)
|
||||
globalThis.fetch = fetchMock as typeof globalThis.fetch
|
||||
const client = new OpenClawGatewayChatClient(() => 18794)
|
||||
|
||||
await client.streamTurn({
|
||||
agentId: 'main',
|
||||
sessionKey: 'main',
|
||||
messages: [{ role: 'user', content: 'hi' }],
|
||||
})
|
||||
|
||||
expect(fetchMock.mock.calls[0]?.[0]).toBe(
|
||||
'http://127.0.0.1:18794/v1/chat/completions',
|
||||
)
|
||||
expect(fetchMock.mock.calls[0]?.[1]).toMatchObject({
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
})
|
||||
expect(fetchHeaders(fetchMock)).not.toHaveProperty('Authorization')
|
||||
})
|
||||
|
||||
it('keeps bearer auth for legacy token-auth gateways', async () => {
|
||||
const fetchMock = mock(() =>
|
||||
Promise.resolve(
|
||||
new Response(emptyStream(), {
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'text/event-stream' },
|
||||
}),
|
||||
),
|
||||
)
|
||||
globalThis.fetch = fetchMock as typeof globalThis.fetch
|
||||
const client = new OpenClawGatewayChatClient(
|
||||
() => 18794,
|
||||
async () => 'gateway-token',
|
||||
)
|
||||
|
||||
await client.streamTurn({
|
||||
agentId: 'ops',
|
||||
sessionKey: 'main',
|
||||
messages: [{ role: 'user', content: 'hi' }],
|
||||
})
|
||||
|
||||
expect(fetchMock.mock.calls[0]?.[1]).toMatchObject({
|
||||
headers: {
|
||||
Authorization: 'Bearer gateway-token',
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
function emptyStream(): ReadableStream<Uint8Array> {
|
||||
return new ReadableStream({
|
||||
start(controller) {
|
||||
controller.close()
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
function fetchHeaders(
|
||||
fetchMock: ReturnType<typeof mock>,
|
||||
): Record<string, string> {
|
||||
return ((fetchMock.mock.calls[0]?.[1] as RequestInit | undefined)?.headers ??
|
||||
{}) as Record<string, string>
|
||||
}
|
||||
@@ -32,22 +32,6 @@ describe('OpenClawHttpClient', () => {
|
||||
})
|
||||
})
|
||||
|
||||
it('checks no-auth gateway availability without an Authorization header', async () => {
|
||||
const fetchMock = mock(() => Promise.resolve(new Response('{}')))
|
||||
globalThis.fetch = fetchMock as typeof globalThis.fetch
|
||||
const client = new OpenClawHttpClient(18789)
|
||||
|
||||
await expect(client.isAuthenticated()).resolves.toBe(true)
|
||||
|
||||
expect(fetchMock.mock.calls[0]?.[0]).toBe(
|
||||
'http://127.0.0.1:18789/v1/models',
|
||||
)
|
||||
expect(fetchMock.mock.calls[0]?.[1]).toMatchObject({
|
||||
method: 'GET',
|
||||
})
|
||||
expect(fetchHeaders(fetchMock)).not.toHaveProperty('Authorization')
|
||||
})
|
||||
|
||||
it('treats rejected gateway authentication as unavailable', async () => {
|
||||
globalThis.fetch = mock(() =>
|
||||
Promise.resolve(new Response('Unauthorized', { status: 401 })),
|
||||
@@ -110,25 +94,6 @@ describe('OpenClawHttpClient', () => {
|
||||
})
|
||||
})
|
||||
|
||||
it('sends no Authorization header when no token provider is configured', async () => {
|
||||
const fetchMock = mock(() =>
|
||||
Promise.resolve(
|
||||
new Response(JSON.stringify({ sessionKey: 'k', messages: [] }), {
|
||||
status: 200,
|
||||
}),
|
||||
),
|
||||
)
|
||||
globalThis.fetch = fetchMock as typeof globalThis.fetch
|
||||
const client = new OpenClawHttpClient(18789)
|
||||
|
||||
await client.getSessionHistory('k')
|
||||
|
||||
expect(fetchMock.mock.calls[0]?.[1]).toMatchObject({
|
||||
method: 'GET',
|
||||
})
|
||||
expect(fetchHeaders(fetchMock)).not.toHaveProperty('Authorization')
|
||||
})
|
||||
|
||||
it('omits limit and cursor from the query when undefined', async () => {
|
||||
const fetchMock = mock(() =>
|
||||
Promise.resolve(
|
||||
@@ -250,33 +215,6 @@ describe('OpenClawHttpClient', () => {
|
||||
])
|
||||
})
|
||||
|
||||
it('keeps SSE Accept without Authorization when no token provider is configured', async () => {
|
||||
const fetchMock = mock(() =>
|
||||
Promise.resolve(
|
||||
new Response(
|
||||
new ReadableStream({
|
||||
start(controller) {
|
||||
controller.close()
|
||||
},
|
||||
}),
|
||||
{ status: 200 },
|
||||
),
|
||||
),
|
||||
)
|
||||
globalThis.fetch = fetchMock as typeof globalThis.fetch
|
||||
const client = new OpenClawHttpClient(18789)
|
||||
|
||||
await client.streamSessionHistory('k')
|
||||
|
||||
expect(fetchMock.mock.calls[0]?.[1]).toMatchObject({
|
||||
method: 'GET',
|
||||
headers: {
|
||||
Accept: 'text/event-stream',
|
||||
},
|
||||
})
|
||||
expect(fetchHeaders(fetchMock)).not.toHaveProperty('Authorization')
|
||||
})
|
||||
|
||||
it('forwards upstream error frames and closes', async () => {
|
||||
globalThis.fetch = mock(() =>
|
||||
Promise.resolve(
|
||||
@@ -377,10 +315,3 @@ async function readEvents(
|
||||
|
||||
return events
|
||||
}
|
||||
|
||||
function fetchHeaders(
|
||||
fetchMock: ReturnType<typeof mock>,
|
||||
): Record<string, string> {
|
||||
return ((fetchMock.mock.calls[0]?.[1] as RequestInit | undefined)?.headers ??
|
||||
{}) as Record<string, string>
|
||||
}
|
||||
|
||||
@@ -338,7 +338,7 @@ describe('OpenClawService', () => {
|
||||
expect(runOnboard).toHaveBeenCalledWith({
|
||||
acceptRisk: true,
|
||||
authChoice: 'skip',
|
||||
gatewayAuth: 'none',
|
||||
gatewayAuth: 'token',
|
||||
gatewayBind: 'lan',
|
||||
gatewayPort: 18789,
|
||||
installDaemon: false,
|
||||
@@ -680,49 +680,6 @@ describe('OpenClawService', () => {
|
||||
expect(probe).toHaveBeenCalledTimes(1)
|
||||
})
|
||||
|
||||
it('start ignores stale gateway tokens when config auth mode is none', async () => {
|
||||
tempDir = await mkdtemp(join(tmpdir(), 'openclaw-service-'))
|
||||
await mkdir(join(tempDir, '.openclaw'), { recursive: true })
|
||||
await writeFile(
|
||||
join(tempDir, '.openclaw', 'openclaw.json'),
|
||||
JSON.stringify({
|
||||
gateway: {
|
||||
auth: {
|
||||
mode: 'none',
|
||||
token: 'stale-token',
|
||||
},
|
||||
},
|
||||
}),
|
||||
)
|
||||
const ensureReady = mock(async () => {})
|
||||
const startGateway = mock(async () => {})
|
||||
const waitForReady = mock(async () => true)
|
||||
const probe = mock(async () => {})
|
||||
const service = new OpenClawService() as MutableOpenClawService
|
||||
|
||||
service.openclawDir = tempDir
|
||||
service.runtime = {
|
||||
ensureReady,
|
||||
isReady: async () => false,
|
||||
startGateway,
|
||||
waitForReady,
|
||||
}
|
||||
service.cliClient = {
|
||||
probe,
|
||||
}
|
||||
|
||||
await service.start()
|
||||
|
||||
expect(startGateway).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
gatewayToken: undefined,
|
||||
privateIngressNoAuth: true,
|
||||
}),
|
||||
expect.any(Function),
|
||||
)
|
||||
expect(service.token).not.toBe('stale-token')
|
||||
})
|
||||
|
||||
it('serializes concurrent start calls and only starts the gateway once', async () => {
|
||||
tempDir = await mkdtemp(join(tmpdir(), 'openclaw-service-'))
|
||||
await mkdir(join(tempDir, '.openclaw'), { recursive: true })
|
||||
@@ -1179,53 +1136,6 @@ describe('OpenClawService', () => {
|
||||
expect(probe).toHaveBeenCalledTimes(1)
|
||||
})
|
||||
|
||||
it('tryAutoStart reuses a ready no-auth gateway without Authorization', async () => {
|
||||
tempDir = await mkdtemp(join(tmpdir(), 'openclaw-service-'))
|
||||
await mkdir(join(tempDir, '.openclaw'), { recursive: true })
|
||||
await writeFile(
|
||||
join(tempDir, '.openclaw', 'openclaw.json'),
|
||||
JSON.stringify({
|
||||
gateway: {
|
||||
auth: {
|
||||
mode: 'none',
|
||||
token: 'stale-token',
|
||||
},
|
||||
},
|
||||
}),
|
||||
)
|
||||
const ensureReady = mock(async () => {})
|
||||
const isReady = mock(async () => true)
|
||||
const isGatewayCurrent = mock(async () => true)
|
||||
const startGateway = mock(async () => {})
|
||||
const probe = mock(async () => {})
|
||||
const fetchMock = mock(() =>
|
||||
Promise.resolve(new Response('', { status: 200 })),
|
||||
)
|
||||
globalThis.fetch = fetchMock as typeof globalThis.fetch
|
||||
const service = new OpenClawService() as MutableOpenClawService
|
||||
|
||||
service.openclawDir = tempDir
|
||||
service.runtime = {
|
||||
ensureReady,
|
||||
isReady,
|
||||
isGatewayCurrent,
|
||||
startGateway,
|
||||
}
|
||||
service.cliClient = { probe }
|
||||
|
||||
await service.tryAutoStart()
|
||||
|
||||
expect(startGateway).not.toHaveBeenCalled()
|
||||
expect(fetchMock.mock.calls[0]?.[0]).toBe(
|
||||
'http://127.0.0.1:18789/v1/models',
|
||||
)
|
||||
expect(fetchMock.mock.calls[0]?.[1]).toMatchObject({
|
||||
method: 'GET',
|
||||
})
|
||||
expect(fetchHeaders(fetchMock)).not.toHaveProperty('Authorization')
|
||||
expect(probe).toHaveBeenCalledTimes(1)
|
||||
})
|
||||
|
||||
it('tryAutoStart recreates a ready gateway when the image is stale', async () => {
|
||||
tempDir = await mkdtemp(join(tmpdir(), 'openclaw-service-'))
|
||||
await mkdir(join(tempDir, '.openclaw'), { recursive: true })
|
||||
@@ -1810,10 +1720,3 @@ function mockGatewayAuth(status = 200): ReturnType<typeof mock> {
|
||||
globalThis.fetch = fetchMock as typeof globalThis.fetch
|
||||
return fetchMock
|
||||
}
|
||||
|
||||
function fetchHeaders(
|
||||
fetchMock: ReturnType<typeof mock>,
|
||||
): Record<string, string> {
|
||||
return ((fetchMock.mock.calls[0]?.[1] as RequestInit | undefined)?.headers ??
|
||||
{}) as Record<string, string>
|
||||
}
|
||||
|
||||
@@ -51,13 +51,17 @@ describe('CdpBackend', () => {
|
||||
const originalReconnectDelay = TIMEOUTS.CDP_RECONNECT_DELAY
|
||||
let fetchUrls: string[] = []
|
||||
let failIpv4Discovery = false
|
||||
let failAllDiscovery = false
|
||||
let wsHost = '127.0.0.1'
|
||||
let originalExit: typeof process.exit
|
||||
|
||||
beforeEach(() => {
|
||||
MockWebSocket.instances = []
|
||||
fetchUrls = []
|
||||
failIpv4Discovery = false
|
||||
failAllDiscovery = false
|
||||
wsHost = '127.0.0.1'
|
||||
originalExit = process.exit
|
||||
|
||||
;(TIMEOUTS as unknown as { CDP_CONNECT: number }).CDP_CONNECT = 200
|
||||
;(
|
||||
@@ -67,6 +71,9 @@ describe('CdpBackend', () => {
|
||||
globalThis.fetch = (async (input: string | URL | Request) => {
|
||||
const url = String(input)
|
||||
fetchUrls.push(url)
|
||||
if (failAllDiscovery) {
|
||||
throw new Error('Unable to connect')
|
||||
}
|
||||
if (failIpv4Discovery && url.includes('127.0.0.1')) {
|
||||
throw new Error('Unable to connect')
|
||||
}
|
||||
@@ -87,6 +94,7 @@ describe('CdpBackend', () => {
|
||||
afterEach(() => {
|
||||
globalThis.fetch = originalFetch
|
||||
globalThis.WebSocket = originalWebSocket
|
||||
process.exit = originalExit
|
||||
;(TIMEOUTS as unknown as { CDP_CONNECT: number }).CDP_CONNECT =
|
||||
originalConnectTimeout
|
||||
;(
|
||||
@@ -160,4 +168,31 @@ describe('CdpBackend', () => {
|
||||
assert(fetchUrls.length >= 3)
|
||||
await cdp.disconnect()
|
||||
})
|
||||
|
||||
it('can disable process exit when reconnect retries are exhausted', async () => {
|
||||
let exitCalled = false
|
||||
process.exit = (() => {
|
||||
exitCalled = true
|
||||
throw new Error('process.exit should not be called')
|
||||
}) as unknown as typeof process.exit
|
||||
|
||||
const cdp = new CdpBackend({ port: 9222, exitOnReconnectFailure: false })
|
||||
const connectPromise = cdp.connect()
|
||||
|
||||
await waitFor(() => MockWebSocket.instances.length === 1)
|
||||
const ws1 = MockWebSocket.instances[0]
|
||||
ws1?.open()
|
||||
await connectPromise
|
||||
assert.strictEqual(cdp.isConnected(), true)
|
||||
|
||||
failAllDiscovery = true
|
||||
ws1?.close()
|
||||
|
||||
await waitFor(() => fetchUrls.length >= 10)
|
||||
await Bun.sleep(5)
|
||||
|
||||
assert.strictEqual(exitCalled, false)
|
||||
assert.strictEqual(cdp.isConnected(), false)
|
||||
await cdp.disconnect()
|
||||
})
|
||||
})
|
||||
|
||||
@@ -0,0 +1,113 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 BrowserOS
|
||||
*/
|
||||
|
||||
import { afterEach, describe, expect, it } from 'bun:test'
|
||||
import { mkdtemp, readFile, rm } from 'node:fs/promises'
|
||||
import { tmpdir } from 'node:os'
|
||||
import { join } from 'node:path'
|
||||
import { prepareAcpxAgentContext } from '../../../src/lib/agents/acpx-agent-adapter'
|
||||
import type { AgentDefinition } from '../../../src/lib/agents/agent-types'
|
||||
|
||||
describe('prepareAcpxAgentContext', () => {
|
||||
const tempDirs: string[] = []
|
||||
|
||||
afterEach(async () => {
|
||||
await Promise.all(
|
||||
tempDirs.map((dir) => rm(dir, { recursive: true, force: true })),
|
||||
)
|
||||
tempDirs.length = 0
|
||||
})
|
||||
|
||||
function makeAgent(adapter: AgentDefinition['adapter']): AgentDefinition {
|
||||
return {
|
||||
id: `${adapter}-agent`,
|
||||
name: `${adapter} agent`,
|
||||
adapter,
|
||||
permissionMode: 'approve-all',
|
||||
sessionKey: `agent:${adapter}-agent:main`,
|
||||
createdAt: 1000,
|
||||
updatedAt: 1000,
|
||||
}
|
||||
}
|
||||
|
||||
it('prepares Claude with BrowserOS memory, host auth, BrowserOS MCP, and fingerprinted session', async () => {
|
||||
const browserosDir = await mkdtemp(join(tmpdir(), 'browseros-adapters-'))
|
||||
tempDirs.push(browserosDir)
|
||||
const prepared = await prepareAcpxAgentContext({
|
||||
browserosDir,
|
||||
agent: makeAgent('claude'),
|
||||
sessionId: 'main',
|
||||
sessionKey: 'agent:claude-agent:main',
|
||||
cwdOverride: null,
|
||||
isSelectedCwd: false,
|
||||
message: 'remember this',
|
||||
})
|
||||
|
||||
expect(prepared.commandEnv.AGENT_HOME).toContain('/claude-agent/home')
|
||||
expect(prepared.commandEnv).not.toHaveProperty('CLAUDE_CONFIG_DIR')
|
||||
expect(prepared.commandEnv).not.toHaveProperty('CODEX_HOME')
|
||||
expect(prepared.useBrowserosMcp).toBe(true)
|
||||
expect(prepared.openclawSessionKey).toBeNull()
|
||||
expect(prepared.runtimeSessionKey).toMatch(
|
||||
/^agent:claude-agent:main:[a-f0-9]{16}$/,
|
||||
)
|
||||
expect(prepared.runPrompt).toContain(
|
||||
'Available skills: browseros, memory, soul',
|
||||
)
|
||||
expect(
|
||||
await readFile(`${prepared.commandEnv.AGENT_HOME}/MEMORY.md`, 'utf8'),
|
||||
).toContain('# MEMORY.md')
|
||||
})
|
||||
|
||||
it('prepares Codex with CODEX_HOME and BrowserOS MCP', async () => {
|
||||
const browserosDir = await mkdtemp(join(tmpdir(), 'browseros-adapters-'))
|
||||
tempDirs.push(browserosDir)
|
||||
const prepared = await prepareAcpxAgentContext({
|
||||
browserosDir,
|
||||
agent: makeAgent('codex'),
|
||||
sessionId: 'main',
|
||||
sessionKey: 'agent:codex-agent:main',
|
||||
cwdOverride: null,
|
||||
isSelectedCwd: false,
|
||||
message: 'hi',
|
||||
})
|
||||
|
||||
expect(prepared.commandEnv.AGENT_HOME).toContain('/codex-agent/home')
|
||||
expect(prepared.commandEnv.CODEX_HOME).toContain(
|
||||
'/codex-agent/runtime/codex-home',
|
||||
)
|
||||
expect(prepared.commandEnv).not.toHaveProperty('CLAUDE_CONFIG_DIR')
|
||||
expect(prepared.useBrowserosMcp).toBe(true)
|
||||
expect(prepared.openclawSessionKey).toBeNull()
|
||||
expect(prepared.runPrompt).toContain('AGENT_HOME=')
|
||||
})
|
||||
|
||||
it('prepares OpenClaw without BrowserOS memory, host cwd, skills, or MCP', async () => {
|
||||
const browserosDir = await mkdtemp(join(tmpdir(), 'browseros-adapters-'))
|
||||
tempDirs.push(browserosDir)
|
||||
const ignoredSelectedCwd = join(browserosDir, 'missing-selected-workspace')
|
||||
const prepared = await prepareAcpxAgentContext({
|
||||
browserosDir,
|
||||
agent: makeAgent('openclaw'),
|
||||
sessionId: 'main',
|
||||
sessionKey: 'agent:openclaw-agent:main',
|
||||
cwdOverride: ignoredSelectedCwd,
|
||||
isSelectedCwd: true,
|
||||
message: 'browse',
|
||||
})
|
||||
|
||||
expect(prepared.cwd).toBe(
|
||||
join(browserosDir, 'agents', 'harness', 'workspace'),
|
||||
)
|
||||
expect(prepared.commandEnv).toEqual({})
|
||||
expect(prepared.useBrowserosMcp).toBe(false)
|
||||
expect(prepared.openclawSessionKey).toBe('agent:openclaw-agent:main')
|
||||
expect(prepared.runtimeSessionKey).toBe('agent:openclaw-agent:main')
|
||||
expect(prepared.runPrompt).not.toContain('SOUL.md stores')
|
||||
expect(prepared.runPrompt).not.toContain('BrowserOS memory skill')
|
||||
expect(prepared.runPrompt).not.toContain('AGENT_HOME/MEMORY.md')
|
||||
expect(prepared.runPrompt).not.toContain('Available skills:')
|
||||
})
|
||||
})
|
||||
@@ -55,6 +55,9 @@ describe('acpx runtime context helpers', () => {
|
||||
expect(paths.runtimeSkillsDir).toBe(
|
||||
join(browserosDir, 'agents', 'harness', 'runtime-skills'),
|
||||
)
|
||||
expect(paths.runtimeRoot).toBe(
|
||||
join(browserosDir, 'agents', 'harness', 'agent-1', 'runtime'),
|
||||
)
|
||||
expect(paths.codexHome).toBe(
|
||||
join(
|
||||
browserosDir,
|
||||
@@ -257,4 +260,33 @@ describe('acpx runtime context helpers', () => {
|
||||
)
|
||||
expect(prompt).toContain('Available skills: browseros, memory, soul')
|
||||
})
|
||||
|
||||
it('routes explicit memory requests to BrowserOS AGENT_HOME files', () => {
|
||||
const agent: AgentDefinition = {
|
||||
id: 'agent-1',
|
||||
name: 'Researcher',
|
||||
adapter: 'claude',
|
||||
permissionMode: 'approve-all',
|
||||
sessionKey: 'agent:agent-1:main',
|
||||
createdAt: 1000,
|
||||
updatedAt: 1000,
|
||||
}
|
||||
const paths = resolveAgentRuntimePaths({
|
||||
browserosDir: '/tmp/browseros',
|
||||
agentId: agent.id,
|
||||
cwd: '/tmp/workspace',
|
||||
})
|
||||
|
||||
const prompt = buildAcpxRuntimePromptPrefix({
|
||||
agent,
|
||||
paths,
|
||||
skillNames: ['browseros', 'memory', 'soul'],
|
||||
})
|
||||
|
||||
expect(prompt).toContain('When the user asks you to remember')
|
||||
expect(prompt).toContain('use the BrowserOS memory skill')
|
||||
expect(prompt).toContain('AGENT_HOME/MEMORY.md')
|
||||
expect(prompt).toContain('AGENT_HOME/memory/YYYY-MM-DD.md')
|
||||
expect(prompt).toContain('Do not use native Claude project memory')
|
||||
})
|
||||
})
|
||||
|
||||
@@ -868,7 +868,7 @@ Use the BrowserOS MCP server for all browser tasks, including browsing the web,
|
||||
)
|
||||
})
|
||||
|
||||
it('injects AGENT_HOME into Claude ACP command resolution', async () => {
|
||||
it('injects AGENT_HOME without CLAUDE_CONFIG_DIR into Claude ACP command resolution', async () => {
|
||||
const browserosDir = await mkdtemp(
|
||||
join(tmpdir(), 'browseros-acpx-browseros-'),
|
||||
)
|
||||
@@ -898,6 +898,7 @@ Use the BrowserOS MCP server for all browser tasks, including browsing the web,
|
||||
const command =
|
||||
getCreateRuntimeOptions(calls).agentRegistry.resolve('claude')
|
||||
expect(command).toContain('env AGENT_HOME=')
|
||||
expect(command).not.toContain('CLAUDE_CONFIG_DIR=')
|
||||
expect(command).not.toContain('CODEX_HOME=')
|
||||
})
|
||||
|
||||
@@ -1023,8 +1024,9 @@ Use the BrowserOS MCP server for all browser tasks, including browsing the web,
|
||||
expect(command).toContain(
|
||||
'nerdctl exec -i -e OPENCLAW_HIDE_BANNER=1 -e OPENCLAW_SUPPRESS_NOTES=1 browseros-openclaw-openclaw-gateway-1',
|
||||
)
|
||||
expect(command).toContain('openclaw acp --url ws://127.0.0.1:18789')
|
||||
expect(command).not.toContain('--token')
|
||||
expect(command).toContain(
|
||||
'openclaw acp --url ws://127.0.0.1:18789 --token test-token-abc',
|
||||
)
|
||||
// sessionKey routing: the bridge needs --session <key> to map newSession
|
||||
// requests to the matching gateway agent (acpx does not forward
|
||||
// sessionKey via ACP newSession params).
|
||||
@@ -1260,7 +1262,15 @@ Use the BrowserOS MCP server for all browser tasks, including browsing the web,
|
||||
max_segments: 0,
|
||||
},
|
||||
closed: false,
|
||||
messages: [],
|
||||
messages: [
|
||||
{
|
||||
User: {
|
||||
id: 'prior-user',
|
||||
content: [{ Text: 'literal & <tag>' } as never],
|
||||
},
|
||||
},
|
||||
{ Agent: { content: [{ Text: 'Prior answer.' }], tool_results: {} } },
|
||||
],
|
||||
updated_at: seedTimestamp,
|
||||
cumulative_token_usage: {},
|
||||
request_token_usage: {},
|
||||
@@ -1285,13 +1295,15 @@ Use the BrowserOS MCP server for all browser tasks, including browsing the web,
|
||||
})
|
||||
},
|
||||
} as never
|
||||
const calls: Array<{ method: string; input: unknown }> = []
|
||||
const runtime = new AcpxRuntime({
|
||||
cwd,
|
||||
stateDir,
|
||||
openclawGatewayChat,
|
||||
// Provide a runtime factory that would fail loudly if reached —
|
||||
// image turns must NOT fall through to the ACP path.
|
||||
runtimeFactory: () => {
|
||||
runtimeFactory: (options) => {
|
||||
calls.push({ method: 'createRuntime', input: options })
|
||||
throw new Error('ACP path should not be reached for image turns')
|
||||
},
|
||||
})
|
||||
@@ -1322,6 +1334,9 @@ Use the BrowserOS MCP server for all browser tasks, including browsing the web,
|
||||
{ type: 'done', stopReason: 'end_turn' },
|
||||
])
|
||||
expect(gatewayCalls).toHaveLength(1)
|
||||
expect(
|
||||
calls.filter((call) => call.method === 'createRuntime'),
|
||||
).toHaveLength(0)
|
||||
const gatewayInput = gatewayCalls[0]?.input as {
|
||||
agentId: string
|
||||
sessionKey: string
|
||||
@@ -1331,6 +1346,10 @@ Use the BrowserOS MCP server for all browser tasks, including browsing the web,
|
||||
}>
|
||||
}
|
||||
expect(gatewayInput.agentId).toBe('img-bot')
|
||||
expect(gatewayInput.messages[0]).toEqual({
|
||||
role: 'user',
|
||||
content: 'literal & <tag>',
|
||||
})
|
||||
expect(gatewayInput.messages.at(-1)?.role).toBe('user')
|
||||
const userContent = gatewayInput.messages.at(-1)?.content
|
||||
expect(Array.isArray(userContent)).toBe(true)
|
||||
@@ -1345,7 +1364,7 @@ Use the BrowserOS MCP server for all browser tasks, including browsing the web,
|
||||
agent,
|
||||
sessionId: 'main',
|
||||
})
|
||||
expect(history.items.map((item) => item.role)).toEqual([
|
||||
expect(history.items.slice(-2).map((item) => item.role)).toEqual([
|
||||
'user',
|
||||
'assistant',
|
||||
])
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
export const OPENCLAW_AGENT_NAME = 'openclaw'
|
||||
export const OPENCLAW_IMAGE =
|
||||
'ghcr.io/browseros-ai/openclaw:2026.5.2-browseros.1'
|
||||
export const OPENCLAW_IMAGE = 'ghcr.io/openclaw/openclaw:2026.4.12'
|
||||
export const OPENCLAW_GATEWAY_CONTAINER_PORT = 18789
|
||||
export const OPENCLAW_CONTAINER_HOME = '/home/node/.openclaw'
|
||||
export const OPENCLAW_COMPOSE_PROJECT_NAME = 'browseros-openclaw'
|
||||
|
||||
@@ -1,8 +1,12 @@
|
||||
BINARY := browseros-patch
|
||||
PREFIX ?= /usr/local/bin
|
||||
GOBIN := $(shell go env GOBIN)
|
||||
ifeq ($(GOBIN),)
|
||||
GOBIN := $(shell go env GOPATH)/bin
|
||||
endif
|
||||
PREFIX ?= $(GOBIN)
|
||||
VERSION ?= dev
|
||||
|
||||
.PHONY: build install clean test fmt
|
||||
.PHONY: build install uninstall clean test fmt
|
||||
|
||||
build:
|
||||
go build -ldflags "-X github.com/browseros-ai/BrowserOS/packages/browseros/tools/patch/cmd.Version=$(VERSION)" -o $(BINARY) .
|
||||
@@ -17,6 +21,10 @@ else
|
||||
endif
|
||||
@echo "Installed $(BINARY) to $(PREFIX)/$(BINARY)"
|
||||
|
||||
uninstall:
|
||||
rm -f $(PREFIX)/$(BINARY)
|
||||
@echo "Removed $(PREFIX)/$(BINARY)"
|
||||
|
||||
test:
|
||||
go test ./...
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@ package workspace
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"slices"
|
||||
"strings"
|
||||
)
|
||||
|
||||
@@ -15,21 +16,21 @@ func Detect(reg *Registry, cwd string) (Entry, error) {
|
||||
return Entry{}, err
|
||||
}
|
||||
clean := filepath.Clean(abs)
|
||||
realClean := canonicalPath(clean)
|
||||
var best Entry
|
||||
bestLen := -1
|
||||
for _, ws := range reg.Workspaces {
|
||||
base := filepath.Clean(ws.Path)
|
||||
if clean == base || strings.HasPrefix(clean, base+string(filepath.Separator)) {
|
||||
if len(base) > bestLen {
|
||||
realBase := canonicalPath(base)
|
||||
if containsPath(clean, base) || containsPath(realClean, realBase) {
|
||||
if len(realBase) > bestLen {
|
||||
best = ws
|
||||
bestLen = len(base)
|
||||
bestLen = len(realBase)
|
||||
}
|
||||
}
|
||||
}
|
||||
if bestLen == -1 {
|
||||
return Entry{}, fmt.Errorf(
|
||||
`not inside a registered workspace; run "browseros-patch list" to inspect workspaces or pass one by name`,
|
||||
)
|
||||
return Entry{}, detectError(clean, realClean, reg.Workspaces)
|
||||
}
|
||||
return best, nil
|
||||
}
|
||||
@@ -47,3 +48,43 @@ func Resolve(reg *Registry, name string, cwd string, src string) (Entry, error)
|
||||
}
|
||||
return Detect(reg, cwd)
|
||||
}
|
||||
|
||||
func canonicalPath(path string) string {
|
||||
realPath, err := filepath.EvalSymlinks(path)
|
||||
if err != nil {
|
||||
return filepath.Clean(path)
|
||||
}
|
||||
return filepath.Clean(realPath)
|
||||
}
|
||||
|
||||
func containsPath(path string, base string) bool {
|
||||
return path == base || strings.HasPrefix(path, base+string(filepath.Separator))
|
||||
}
|
||||
|
||||
func detectError(cwd string, resolvedCWD string, workspaces []Entry) error {
|
||||
var builder strings.Builder
|
||||
builder.WriteString(`not inside a registered workspace; run "browseros-patch list" to inspect workspaces or pass one by name`)
|
||||
builder.WriteString("\n")
|
||||
builder.WriteString("cwd: ")
|
||||
builder.WriteString(cwd)
|
||||
if resolvedCWD != cwd {
|
||||
builder.WriteString("\nresolved cwd: ")
|
||||
builder.WriteString(resolvedCWD)
|
||||
}
|
||||
if len(workspaces) > 0 {
|
||||
builder.WriteString("\nregistered workspaces:")
|
||||
sorted := append([]Entry(nil), workspaces...)
|
||||
slices.SortFunc(sorted, func(a, b Entry) int {
|
||||
return strings.Compare(a.Name, b.Name)
|
||||
})
|
||||
for _, ws := range sorted {
|
||||
builder.WriteString("\n ")
|
||||
builder.WriteString(ws.Name)
|
||||
builder.WriteString(" ")
|
||||
builder.WriteString(ws.Path)
|
||||
}
|
||||
builder.WriteString("\nexample: browseros-patch diff ")
|
||||
builder.WriteString(sorted[0].Name)
|
||||
}
|
||||
return fmt.Errorf("%s", builder.String())
|
||||
}
|
||||
|
||||
@@ -70,7 +70,7 @@ func NormalizeWorkspacePath(raw string) (string, error) {
|
||||
if _, err := os.Stat(filepath.Join(clean, ".git")); err != nil {
|
||||
return "", fmt.Errorf("workspace is not a git checkout: %s", clean)
|
||||
}
|
||||
return clean, nil
|
||||
return canonicalPath(clean), nil
|
||||
}
|
||||
|
||||
func (r *Registry) Get(name string) (Entry, error) {
|
||||
|
||||
@@ -3,6 +3,7 @@ package workspace
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
@@ -36,6 +37,10 @@ func TestRegistryDetectsLongestMatchingWorkspace(t *testing.T) {
|
||||
t.Fatalf("mkdir: %v", err)
|
||||
}
|
||||
}
|
||||
detectedPath := filepath.Join(child, "chrome", "browser")
|
||||
if err := os.MkdirAll(detectedPath, 0o755); err != nil {
|
||||
t.Fatalf("mkdir detected path: %v", err)
|
||||
}
|
||||
|
||||
reg := &Registry{Version: 1}
|
||||
if _, err := reg.Add("parent", parent); err != nil {
|
||||
@@ -45,7 +50,7 @@ func TestRegistryDetectsLongestMatchingWorkspace(t *testing.T) {
|
||||
t.Fatalf("add child: %v", err)
|
||||
}
|
||||
|
||||
ws, err := Detect(reg, filepath.Join(child, "chrome", "browser"))
|
||||
ws, err := Detect(reg, detectedPath)
|
||||
if err != nil {
|
||||
t.Fatalf("Detect: %v", err)
|
||||
}
|
||||
@@ -53,3 +58,86 @@ func TestRegistryDetectsLongestMatchingWorkspace(t *testing.T) {
|
||||
t.Fatalf("expected child workspace, got %q", ws.Name)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDetectMatchesSymlinkedWorkingDirectory(t *testing.T) {
|
||||
root := t.TempDir()
|
||||
workspacePath := filepath.Join(root, "chromium-1", "src")
|
||||
if err := os.MkdirAll(filepath.Join(workspacePath, ".git"), 0o755); err != nil {
|
||||
t.Fatalf("mkdir workspace: %v", err)
|
||||
}
|
||||
if err := os.MkdirAll(filepath.Join(workspacePath, "chrome", "browser"), 0o755); err != nil {
|
||||
t.Fatalf("mkdir workspace child: %v", err)
|
||||
}
|
||||
linkPath := filepath.Join(root, "ch-1")
|
||||
if err := os.Symlink(workspacePath, linkPath); err != nil {
|
||||
t.Fatalf("symlink workspace: %v", err)
|
||||
}
|
||||
|
||||
reg := &Registry{Version: 1}
|
||||
if _, err := reg.Add("ch1", workspacePath); err != nil {
|
||||
t.Fatalf("add workspace: %v", err)
|
||||
}
|
||||
|
||||
ws, err := Detect(reg, filepath.Join(linkPath, "chrome", "browser"))
|
||||
if err != nil {
|
||||
t.Fatalf("Detect: %v", err)
|
||||
}
|
||||
if ws.Name != "ch1" {
|
||||
t.Fatalf("expected ch1 workspace, got %q", ws.Name)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRegistryAddStoresCanonicalWorkspacePath(t *testing.T) {
|
||||
root := t.TempDir()
|
||||
workspacePath := filepath.Join(root, "chromium-1", "src")
|
||||
if err := os.MkdirAll(filepath.Join(workspacePath, ".git"), 0o755); err != nil {
|
||||
t.Fatalf("mkdir workspace: %v", err)
|
||||
}
|
||||
linkPath := filepath.Join(root, "ch-1")
|
||||
if err := os.Symlink(workspacePath, linkPath); err != nil {
|
||||
t.Fatalf("symlink workspace: %v", err)
|
||||
}
|
||||
|
||||
reg := &Registry{Version: 1}
|
||||
entry, err := reg.Add("ch1", linkPath)
|
||||
if err != nil {
|
||||
t.Fatalf("add workspace: %v", err)
|
||||
}
|
||||
expectedPath := canonicalPath(workspacePath)
|
||||
if entry.Path != expectedPath {
|
||||
t.Fatalf("expected canonical path %q, got %q", expectedPath, entry.Path)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDetectErrorIncludesPathContextAndWorkspaceHint(t *testing.T) {
|
||||
root := t.TempDir()
|
||||
workspacePath := filepath.Join(root, "chromium-1", "src")
|
||||
if err := os.MkdirAll(filepath.Join(workspacePath, ".git"), 0o755); err != nil {
|
||||
t.Fatalf("mkdir workspace: %v", err)
|
||||
}
|
||||
outsidePath := filepath.Join(root, "outside")
|
||||
if err := os.MkdirAll(outsidePath, 0o755); err != nil {
|
||||
t.Fatalf("mkdir outside: %v", err)
|
||||
}
|
||||
|
||||
reg := &Registry{Version: 1}
|
||||
if _, err := reg.Add("ch1", workspacePath); err != nil {
|
||||
t.Fatalf("add workspace: %v", err)
|
||||
}
|
||||
|
||||
_, err := Detect(reg, outsidePath)
|
||||
if err == nil {
|
||||
t.Fatalf("expected Detect to fail")
|
||||
}
|
||||
message := err.Error()
|
||||
for _, want := range []string{
|
||||
"cwd: " + outsidePath,
|
||||
"registered workspaces:",
|
||||
"ch1 " + canonicalPath(workspacePath),
|
||||
"example: browseros-patch diff ch1",
|
||||
} {
|
||||
if !strings.Contains(message, want) {
|
||||
t.Fatalf("expected error to contain %q, got:\n%s", want, message)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user