mirror of
https://github.com/browseros-ai/BrowserOS.git
synced 2026-05-21 21:05:09 +00:00
- Crosshair: 2.5x larger (20px ring, 40px lines, 3px stroke), glow filter, semi-transparent fill circle, bigger label - Page ID: add resolvePageId() helper that falls back to first available page when the agent's page reference is stale - Add prod-tasks.jsonl with 20 real-world queries covering finance, legal, healthcare, tech, real estate, HR, and more Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
289 lines
8.7 KiB
TypeScript
289 lines
8.7 KiB
TypeScript
import { randomUUID } from 'node:crypto'
|
|
import { mkdir, writeFile } from 'node:fs/promises'
|
|
import { join } from 'node:path'
|
|
import { AiSdkAgent } from '@browseros/server/agent/tool-loop'
|
|
import type { ResolvedAgentConfig } from '@browseros/server/agent/types'
|
|
import { Browser } from '@browseros/server/browser'
|
|
import { CdpBackend } from '@browseros/server/browser/backends/cdp'
|
|
import { registry } from '@browseros/server/tools/registry'
|
|
import type { Task } from '../../src/types'
|
|
import { injectCrosshair, removeCrosshair } from './crosshair'
|
|
import { buildTaskManifest, saveTaskManifest } from './manifest'
|
|
import type { ShowcaseStep, ShowcaseTaskManifest } from './types'
|
|
|
|
const ELEMENT_TOOLS = new Set([
|
|
'click',
|
|
'fill',
|
|
'hover',
|
|
'clear',
|
|
'select_option',
|
|
'drag',
|
|
'focus',
|
|
'check',
|
|
'uncheck',
|
|
])
|
|
|
|
const COORDINATE_TOOLS = new Set(['click_at', 'hover_at', 'type_at', 'drag_at'])
|
|
|
|
const CONTROLLER_STUB = {
|
|
start: async () => {},
|
|
stop: async () => {},
|
|
isConnected: () => false,
|
|
send: async () => ({}),
|
|
// biome-ignore lint/suspicious/noExplicitAny: ControllerBackend type not exported
|
|
} as any
|
|
|
|
async function resolvePageId(
|
|
browser: Browser,
|
|
requestedId: number,
|
|
): Promise<number> {
|
|
const pages = await browser.listPages()
|
|
if (pages.some((p) => p.pageId === requestedId)) return requestedId
|
|
if (pages.length > 0) return pages[0].pageId
|
|
return requestedId
|
|
}
|
|
|
|
export interface ExecuteTaskResult {
|
|
manifest: ShowcaseTaskManifest
|
|
status: 'completed' | 'timeout' | 'failed'
|
|
}
|
|
|
|
export async function executeShowcaseTask(
|
|
task: Task,
|
|
cdpPort: number,
|
|
outputDir: string,
|
|
agentConfig: {
|
|
model: string
|
|
provider: string
|
|
apiKey?: string
|
|
baseUrl?: string
|
|
},
|
|
timeoutMs: number,
|
|
): Promise<ExecuteTaskResult> {
|
|
const executionId = randomUUID()
|
|
const taskDir = join(outputDir, executionId)
|
|
const screenshotDir = join(taskDir, 'screenshots')
|
|
await mkdir(screenshotDir, { recursive: true })
|
|
|
|
const cdp = new CdpBackend({ port: cdpPort })
|
|
await cdp.connect()
|
|
const browser = new Browser(cdp, CONTROLLER_STUB)
|
|
|
|
const pages = await browser.listPages()
|
|
const activePage = pages[0]
|
|
let activePageId = activePage?.pageId ?? 1
|
|
|
|
// Navigate to start URL
|
|
if (task.start_url && task.start_url !== 'about:blank') {
|
|
await browser.goto(activePageId, task.start_url)
|
|
}
|
|
|
|
const conversationId = randomUUID()
|
|
const resolvedConfig: ResolvedAgentConfig = {
|
|
conversationId,
|
|
// biome-ignore lint/suspicious/noExplicitAny: LLMProvider type validated at runtime
|
|
provider: agentConfig.provider as any,
|
|
model: agentConfig.model,
|
|
apiKey: agentConfig.apiKey,
|
|
baseUrl: agentConfig.baseUrl,
|
|
workingDir: `/tmp/browseros-showcase-${conversationId}`,
|
|
evalMode: true,
|
|
supportsImages: true,
|
|
}
|
|
|
|
const browserContext = activePage
|
|
? {
|
|
activeTab: {
|
|
id: activePage.tabId,
|
|
pageId: activePage.pageId,
|
|
url: activePage.url,
|
|
title: activePage.title,
|
|
},
|
|
}
|
|
: undefined
|
|
|
|
let agent: AiSdkAgent | null = null
|
|
const steps: ShowcaseStep[] = []
|
|
let stepNum = 0
|
|
let finalText: string | null = null
|
|
let status: 'completed' | 'timeout' | 'failed' = 'completed'
|
|
const startTime = Date.now()
|
|
|
|
try {
|
|
agent = await AiSdkAgent.create({
|
|
resolvedConfig,
|
|
browser,
|
|
registry,
|
|
browserContext,
|
|
})
|
|
|
|
let pendingStep: Partial<ShowcaseStep> | null = null
|
|
|
|
const abortController = new AbortController()
|
|
const timeoutHandle = setTimeout(() => abortController.abort(), timeoutMs)
|
|
|
|
try {
|
|
const result = await agent.toolLoopAgent.generate({
|
|
prompt: task.query,
|
|
abortSignal: abortController.signal,
|
|
|
|
experimental_onToolCallStart: async ({ toolCall }) => {
|
|
try {
|
|
const input = (toolCall.input ?? {}) as Record<string, unknown>
|
|
if (typeof input.page === 'number') {
|
|
activePageId = input.page
|
|
}
|
|
const pageId = await resolvePageId(browser, activePageId)
|
|
activePageId = pageId
|
|
|
|
const beforeResult = await browser.screenshot(pageId, {
|
|
format: 'png',
|
|
fullPage: false,
|
|
})
|
|
const beforePath = join(screenshotDir, `${stepNum}_before.png`)
|
|
await writeFile(
|
|
beforePath,
|
|
Buffer.from(beforeResult.data, 'base64'),
|
|
)
|
|
|
|
let axTree = ''
|
|
try {
|
|
axTree = await browser.snapshot(pageId)
|
|
} catch {
|
|
// snapshot can fail on some pages
|
|
}
|
|
|
|
let coords: { x: number; y: number } | undefined
|
|
const elementId = input.element as number | undefined
|
|
if (
|
|
elementId !== undefined &&
|
|
ELEMENT_TOOLS.has(toolCall.toolName)
|
|
) {
|
|
try {
|
|
coords = await browser.getElementCenter(pageId, elementId)
|
|
} catch {
|
|
// element may have been removed
|
|
}
|
|
} else if (
|
|
COORDINATE_TOOLS.has(toolCall.toolName) &&
|
|
typeof input.x === 'number' &&
|
|
typeof input.y === 'number'
|
|
) {
|
|
coords = { x: input.x, y: input.y }
|
|
}
|
|
|
|
pendingStep = {
|
|
stepIndex: stepNum,
|
|
toolName: toolCall.toolName,
|
|
toolInput: input,
|
|
beforeScreenshot: beforePath,
|
|
accessibilitySnapshot: axTree,
|
|
elementCoordinates: coords,
|
|
timestamp: new Date().toISOString(),
|
|
}
|
|
|
|
if (coords) {
|
|
try {
|
|
await injectCrosshair(
|
|
browser,
|
|
pageId,
|
|
coords,
|
|
toolCall.toolName,
|
|
)
|
|
const annotatedResult = await browser.screenshot(pageId, {
|
|
format: 'png',
|
|
fullPage: false,
|
|
})
|
|
const annotatedPath = join(
|
|
screenshotDir,
|
|
`${stepNum}_annotated.png`,
|
|
)
|
|
await writeFile(
|
|
annotatedPath,
|
|
Buffer.from(annotatedResult.data, 'base64'),
|
|
)
|
|
pendingStep.annotatedScreenshot = annotatedPath
|
|
await removeCrosshair(browser, pageId)
|
|
} catch {
|
|
// annotation is best-effort
|
|
}
|
|
}
|
|
} catch (err) {
|
|
console.warn(
|
|
` Step ${stepNum} before-capture failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
)
|
|
}
|
|
},
|
|
|
|
experimental_onToolCallFinish: async ({ toolResult }) => {
|
|
try {
|
|
const pageId = await resolvePageId(browser, activePageId)
|
|
activePageId = pageId
|
|
const afterResult = await browser.screenshot(pageId, {
|
|
format: 'png',
|
|
fullPage: false,
|
|
})
|
|
const afterPath = join(screenshotDir, `${stepNum}_after.png`)
|
|
await writeFile(afterPath, Buffer.from(afterResult.data, 'base64'))
|
|
|
|
if (pendingStep) {
|
|
pendingStep.afterScreenshot = afterPath
|
|
pendingStep.toolOutput = toolResult
|
|
steps.push(pendingStep as ShowcaseStep)
|
|
stepNum++
|
|
}
|
|
} catch (err) {
|
|
console.warn(
|
|
` Step ${stepNum} after-capture failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
)
|
|
}
|
|
pendingStep = null
|
|
},
|
|
|
|
onStepFinish: async ({ text }) => {
|
|
if (text && steps.length > 0) {
|
|
const lastStep = steps[steps.length - 1]
|
|
lastStep.assistantText = text
|
|
}
|
|
},
|
|
})
|
|
|
|
finalText = result.text || null
|
|
} catch (err) {
|
|
if (abortController.signal.aborted) {
|
|
status = 'timeout'
|
|
console.log(` ${task.query_id}: timed out after ${timeoutMs / 1000}s`)
|
|
} else {
|
|
status = 'failed'
|
|
console.error(
|
|
` ${task.query_id}: failed — ${err instanceof Error ? err.message : String(err)}`,
|
|
)
|
|
}
|
|
} finally {
|
|
clearTimeout(timeoutHandle)
|
|
}
|
|
|
|
const totalDurationMs = Date.now() - startTime
|
|
|
|
const manifest = buildTaskManifest({
|
|
executionId,
|
|
taskId: task.query_id,
|
|
query: task.query,
|
|
startUrl: task.start_url ?? 'about:blank',
|
|
dataset: task.dataset,
|
|
steps,
|
|
finalAnswer: finalText,
|
|
model: agentConfig.model,
|
|
provider: agentConfig.provider,
|
|
totalDurationMs,
|
|
})
|
|
|
|
await saveTaskManifest(outputDir, executionId, manifest)
|
|
|
|
return { manifest, status }
|
|
} finally {
|
|
if (agent) await agent.dispose().catch(() => {})
|
|
await cdp.disconnect().catch(() => {})
|
|
}
|
|
}
|