mirror of
https://github.com/browseros-ai/BrowserOS.git
synced 2026-05-14 16:14:28 +00:00
Compare commits
2 Commits
fix/patch-
...
fix/evals-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
dd73a4f5e1 | ||
|
|
f2f8f5cbd2 |
@@ -1,22 +1,27 @@
|
||||
import { randomUUID } from 'node:crypto'
|
||||
import { MAX_ACTIONS_PER_DELEGATION } from '../../constants'
|
||||
import { McpClient, type McpToolResult } from '../../utils/mcp-client'
|
||||
import { sleep } from '../../utils/sleep'
|
||||
import { MAX_ACTIONS_PER_DELEGATION } from '../../../../constants'
|
||||
import { McpClient, type McpToolResult } from '../../../../utils/mcp-client'
|
||||
import { sleep } from '../../../../utils/sleep'
|
||||
import type {
|
||||
ExecutorConfig,
|
||||
ExecutorResult,
|
||||
} from '../../../orchestrator-executor/types'
|
||||
import type { ExecutorCallbacks } from '../../executor-backend'
|
||||
import {
|
||||
extractCladoThinking,
|
||||
formatCladoHistory,
|
||||
getCladoActionSignature,
|
||||
parseCladoActions,
|
||||
summarizeCladoPrediction,
|
||||
} from '../orchestrated/backends/clado/clado-actions'
|
||||
} from './clado-actions'
|
||||
import {
|
||||
normalizeCladoDirection,
|
||||
normalizeCladoPressKey,
|
||||
normalizeCladoScrollAmount,
|
||||
prepareCladoToolArgs,
|
||||
resolveCladoPoint,
|
||||
} from '../orchestrated/backends/clado/clado-browser-driver'
|
||||
import { CladoActionClient } from '../orchestrated/backends/clado/clado-client'
|
||||
} from './clado-browser-driver'
|
||||
import { CladoActionClient } from './clado-client'
|
||||
import {
|
||||
CLADO_ACTION_PROVIDER,
|
||||
type CladoAction,
|
||||
@@ -24,9 +29,7 @@ import {
|
||||
type CladoActionResponse,
|
||||
type CladoViewport,
|
||||
isCladoActionProvider,
|
||||
} from '../orchestrated/backends/clado/types'
|
||||
import type { ExecutorCallbacks } from './executor'
|
||||
import type { ExecutorConfig, ExecutorResult } from './types'
|
||||
} from './types'
|
||||
|
||||
const MAX_CONSECUTIVE_PARSE_FAILURES = 3
|
||||
|
||||
@@ -45,10 +48,8 @@ export class CladoActionExecutor {
|
||||
private currentUrl = ''
|
||||
|
||||
constructor(
|
||||
private readonly config: ExecutorConfig,
|
||||
config: ExecutorConfig,
|
||||
serverUrl: string,
|
||||
readonly _windowId?: number,
|
||||
readonly _tabId?: number,
|
||||
initialPageId?: number,
|
||||
) {
|
||||
if (!isCladoActionProvider(config.provider)) {
|
||||
@@ -0,0 +1,56 @@
|
||||
import type { ResolvedAgentConfig } from '@browseros/server/agent/types'
|
||||
import type {
|
||||
DelegationResult,
|
||||
ExecutorBackend,
|
||||
ExecutorCallbacks,
|
||||
} from '../../executor-backend'
|
||||
import { CladoActionExecutor } from './clado-action-executor'
|
||||
|
||||
export interface CladoExecutorBackendOptions {
|
||||
configTemplate: ResolvedAgentConfig
|
||||
serverUrl: string
|
||||
initialPageId?: number
|
||||
callbacks?: ExecutorCallbacks
|
||||
}
|
||||
|
||||
/** Executes delegated goals through the Clado visual action model. */
|
||||
export class CladoExecutorBackend implements ExecutorBackend {
|
||||
readonly kind = 'clado'
|
||||
private executor: CladoActionExecutor | null = null
|
||||
|
||||
constructor(private readonly options: CladoExecutorBackendOptions) {}
|
||||
|
||||
async execute(
|
||||
instruction: string,
|
||||
signal?: AbortSignal,
|
||||
): Promise<DelegationResult> {
|
||||
const executor = this.getExecutor()
|
||||
const result = await executor.execute(instruction, signal)
|
||||
return result
|
||||
}
|
||||
|
||||
async close(): Promise<void> {
|
||||
await this.executor?.close()
|
||||
}
|
||||
|
||||
getTotalSteps(): number {
|
||||
return this.executor?.getTotalSteps() ?? 0
|
||||
}
|
||||
|
||||
private getExecutor(): CladoActionExecutor {
|
||||
if (this.executor) return this.executor
|
||||
|
||||
this.executor = new CladoActionExecutor(
|
||||
{
|
||||
provider: this.options.configTemplate.provider,
|
||||
model: this.options.configTemplate.model,
|
||||
apiKey: this.options.configTemplate.apiKey ?? '',
|
||||
baseUrl: this.options.configTemplate.baseUrl,
|
||||
},
|
||||
this.options.serverUrl,
|
||||
this.options.initialPageId,
|
||||
)
|
||||
this.executor.setCallbacks(this.options.callbacks ?? {})
|
||||
return this.executor
|
||||
}
|
||||
}
|
||||
@@ -1,8 +1,13 @@
|
||||
import type { ResolvedAgentConfig } from '@browseros/server/agent/types'
|
||||
import type { Browser } from '@browseros/server/browser'
|
||||
import type { ExecutorCallbacks } from '../../orchestrator-executor/executor'
|
||||
import type { ExecutorBackend, ExecutorBackendKind } from '../executor-backend'
|
||||
import { ExecutorAdapterBackend } from './tool-loop-backend'
|
||||
import type {
|
||||
ExecutorBackend,
|
||||
ExecutorBackendKind,
|
||||
ExecutorCallbacks,
|
||||
} from '../executor-backend'
|
||||
import { CladoExecutorBackend } from './clado/clado-executor-backend'
|
||||
import { isCladoActionProvider } from './clado/types'
|
||||
import { ToolLoopExecutorBackend } from './tool-loop/tool-loop-executor-backend'
|
||||
|
||||
export interface CreateExecutorBackendOptions {
|
||||
backendKind?: ExecutorBackendKind
|
||||
@@ -18,28 +23,38 @@ export interface CreateExecutorBackendOptions {
|
||||
}
|
||||
|
||||
export function backendKindForProvider(provider: string): ExecutorBackendKind {
|
||||
return provider === 'clado-action' ? 'clado' : 'tool-loop'
|
||||
return isCladoActionProvider(provider) ? 'clado' : 'tool-loop'
|
||||
}
|
||||
|
||||
/** Creates the backend used for one orchestrator delegation. */
|
||||
export function createExecutorBackend(
|
||||
options: CreateExecutorBackendOptions,
|
||||
): ExecutorBackend {
|
||||
if (options.executor) return options.executor
|
||||
|
||||
const kind =
|
||||
options.backendKind ??
|
||||
backendKindForProvider(
|
||||
options.provider ?? options.configTemplate?.provider ?? '',
|
||||
)
|
||||
|
||||
return new ExecutorAdapterBackend({
|
||||
kind,
|
||||
configTemplate: options.configTemplate,
|
||||
browser: options.browser,
|
||||
serverUrl: options.serverUrl,
|
||||
windowId: options.windowId,
|
||||
tabId: options.tabId,
|
||||
initialPageId: options.initialPageId,
|
||||
if (kind === 'clado') {
|
||||
return new CladoExecutorBackend({
|
||||
configTemplate: required(options.configTemplate, 'configTemplate'),
|
||||
serverUrl: required(options.serverUrl, 'serverUrl'),
|
||||
initialPageId: options.initialPageId,
|
||||
callbacks: options.callbacks,
|
||||
})
|
||||
}
|
||||
|
||||
return new ToolLoopExecutorBackend({
|
||||
configTemplate: required(options.configTemplate, 'configTemplate'),
|
||||
browser: options.browser ?? null,
|
||||
callbacks: options.callbacks,
|
||||
executor: options.executor,
|
||||
})
|
||||
}
|
||||
|
||||
function required<T>(value: T | undefined, name: string): T {
|
||||
if (value === undefined) throw new Error(`${name} is required`)
|
||||
return value
|
||||
}
|
||||
|
||||
@@ -1,72 +0,0 @@
|
||||
import type { ResolvedAgentConfig } from '@browseros/server/agent/types'
|
||||
import type { Browser } from '@browseros/server/browser'
|
||||
import {
|
||||
Executor,
|
||||
type ExecutorCallbacks,
|
||||
} from '../../orchestrator-executor/executor'
|
||||
import type {
|
||||
DelegationResult,
|
||||
ExecutorBackend,
|
||||
ExecutorBackendKind,
|
||||
} from '../executor-backend'
|
||||
|
||||
interface ExecutorRunner {
|
||||
execute(instruction: string, signal?: AbortSignal): Promise<DelegationResult>
|
||||
close(): Promise<void>
|
||||
getTotalSteps(): number
|
||||
}
|
||||
|
||||
export interface ExecutorAdapterBackendOptions {
|
||||
kind: ExecutorBackendKind
|
||||
configTemplate?: ResolvedAgentConfig
|
||||
browser?: Browser | null
|
||||
serverUrl?: string
|
||||
windowId?: number
|
||||
tabId?: number
|
||||
initialPageId?: number
|
||||
callbacks?: ExecutorCallbacks
|
||||
executor?: ExecutorRunner
|
||||
}
|
||||
|
||||
export class ExecutorAdapterBackend implements ExecutorBackend {
|
||||
readonly kind: ExecutorBackendKind
|
||||
private readonly executor: ExecutorRunner
|
||||
|
||||
constructor(options: ExecutorAdapterBackendOptions) {
|
||||
this.kind = options.kind
|
||||
this.executor =
|
||||
options.executor ??
|
||||
new Executor(
|
||||
required(options.configTemplate, 'configTemplate'),
|
||||
options.browser ?? null,
|
||||
required(options.serverUrl, 'serverUrl'),
|
||||
{
|
||||
isCladoAction: options.kind === 'clado',
|
||||
windowId: options.windowId,
|
||||
tabId: options.tabId,
|
||||
initialPageId: options.initialPageId,
|
||||
callbacks: options.callbacks,
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
execute(
|
||||
instruction: string,
|
||||
signal?: AbortSignal,
|
||||
): Promise<DelegationResult> {
|
||||
return this.executor.execute(instruction, signal)
|
||||
}
|
||||
|
||||
close(): Promise<void> {
|
||||
return this.executor.close()
|
||||
}
|
||||
|
||||
getTotalSteps(): number {
|
||||
return this.executor.getTotalSteps()
|
||||
}
|
||||
}
|
||||
|
||||
function required<T>(value: T | undefined, name: string): T {
|
||||
if (value === undefined) throw new Error(`${name} is required`)
|
||||
return value
|
||||
}
|
||||
@@ -0,0 +1,144 @@
|
||||
import { randomUUID } from 'node:crypto'
|
||||
import { AiSdkAgent } from '@browseros/server/agent/tool-loop'
|
||||
import type { ResolvedAgentConfig } from '@browseros/server/agent/types'
|
||||
import type { Browser } from '@browseros/server/browser'
|
||||
import { registry } from '@browseros/server/tools/registry'
|
||||
import type { BrowserContext } from '@browseros/shared/schemas/browser-context'
|
||||
import type {
|
||||
DelegationResult,
|
||||
ExecutorBackend,
|
||||
ExecutorCallbacks,
|
||||
} from '../../executor-backend'
|
||||
import { TOOL_LOOP_EXECUTOR_SYSTEM_PROMPT } from './tool-loop-executor-prompt'
|
||||
|
||||
export interface ToolLoopExecutorBackendOptions {
|
||||
configTemplate: ResolvedAgentConfig
|
||||
browser: Browser | null
|
||||
callbacks?: ExecutorCallbacks
|
||||
}
|
||||
|
||||
/** Executes delegated goals through the BrowserOS ToolLoopAgent. */
|
||||
export class ToolLoopExecutorBackend implements ExecutorBackend {
|
||||
readonly kind = 'tool-loop'
|
||||
private stepsUsed = 0
|
||||
private currentUrl = ''
|
||||
|
||||
constructor(private readonly options: ToolLoopExecutorBackendOptions) {}
|
||||
|
||||
async execute(
|
||||
instruction: string,
|
||||
signal?: AbortSignal,
|
||||
): Promise<DelegationResult> {
|
||||
const browser = this.options.browser
|
||||
if (!browser) {
|
||||
throw new Error('Browser instance is required for tool-loop executor')
|
||||
}
|
||||
|
||||
const stepsAtStart = this.stepsUsed
|
||||
const toolsUsed: string[] = []
|
||||
let status: DelegationResult['status'] = 'done'
|
||||
let resultText = ''
|
||||
|
||||
const conversationId = randomUUID()
|
||||
const agentConfig: ResolvedAgentConfig = {
|
||||
...this.options.configTemplate,
|
||||
conversationId,
|
||||
userSystemPrompt: TOOL_LOOP_EXECUTOR_SYSTEM_PROMPT,
|
||||
evalMode: true,
|
||||
workingDir: `/tmp/browseros-eval-executor-${conversationId}`,
|
||||
}
|
||||
|
||||
const browserContext = await this.browserContext(browser)
|
||||
let agent: AiSdkAgent | null = null
|
||||
|
||||
try {
|
||||
agent = await AiSdkAgent.create({
|
||||
resolvedConfig: agentConfig,
|
||||
browser,
|
||||
registry,
|
||||
browserContext,
|
||||
})
|
||||
|
||||
await agent.toolLoopAgent.generate({
|
||||
prompt: instruction,
|
||||
abortSignal: signal,
|
||||
|
||||
experimental_onToolCallStart: ({ toolCall }) => {
|
||||
const input = toolCall.input as Record<string, unknown> | undefined
|
||||
if (input && typeof input.url === 'string' && input.url.length > 0) {
|
||||
this.currentUrl = input.url
|
||||
}
|
||||
this.options.callbacks?.onToolCallStart?.({
|
||||
toolCallId: toolCall.toolCallId,
|
||||
toolName: toolCall.toolName,
|
||||
input: toolCall.input,
|
||||
})
|
||||
},
|
||||
|
||||
experimental_onToolCallFinish: async () => {
|
||||
this.stepsUsed++
|
||||
await this.options.callbacks?.onToolCallFinish?.()
|
||||
},
|
||||
|
||||
onStepFinish: async ({ toolCalls, toolResults, text }) => {
|
||||
if (toolCalls) {
|
||||
for (const toolCall of toolCalls) {
|
||||
if (!toolsUsed.includes(toolCall.toolName)) {
|
||||
toolsUsed.push(toolCall.toolName)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (text) resultText = text
|
||||
|
||||
await this.options.callbacks?.onStepFinish?.({
|
||||
toolCalls,
|
||||
toolResults,
|
||||
text,
|
||||
})
|
||||
},
|
||||
})
|
||||
} catch {
|
||||
status = signal?.aborted ? 'timeout' : 'blocked'
|
||||
} finally {
|
||||
if (agent) await agent.dispose().catch(() => {})
|
||||
}
|
||||
|
||||
if (status === 'done' && signal?.aborted) {
|
||||
status = 'timeout'
|
||||
}
|
||||
|
||||
return {
|
||||
observation: resultText || 'Execution completed with no actions taken.',
|
||||
status,
|
||||
url: this.currentUrl,
|
||||
actionsPerformed: this.stepsUsed - stepsAtStart,
|
||||
toolsUsed,
|
||||
}
|
||||
}
|
||||
|
||||
async close(): Promise<void> {
|
||||
// No persistent resources; AiSdkAgent is disposed at the end of each execute() call.
|
||||
}
|
||||
|
||||
getTotalSteps(): number {
|
||||
return this.stepsUsed
|
||||
}
|
||||
|
||||
private async browserContext(
|
||||
browser: Browser,
|
||||
): Promise<BrowserContext | undefined> {
|
||||
const pages = await browser.listPages()
|
||||
const activePage = pages[0]
|
||||
if (!activePage) return undefined
|
||||
|
||||
return {
|
||||
activeTab: {
|
||||
id: activePage.tabId,
|
||||
pageId: activePage.pageId,
|
||||
url: activePage.url,
|
||||
title: activePage.title,
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
export const TOOL_LOOP_EXECUTOR_SYSTEM_PROMPT = `You are a browser executor. You receive a single goal-level instruction and execute it using browser tools.
|
||||
|
||||
## Your Job
|
||||
1. Execute browser actions to achieve the given goal
|
||||
2. Stop as soon as the goal is accomplished -- do NOT perform extra actions
|
||||
3. Write a final observation describing the result
|
||||
|
||||
## Final Response Format
|
||||
When done, your response MUST include:
|
||||
- What you accomplished (or what went wrong)
|
||||
- What the page currently shows: key headings, links, data, or content visible
|
||||
- The current URL from the address bar
|
||||
- If you got stuck, what is blocking progress
|
||||
|
||||
## Rules
|
||||
- Only do what was asked. Do not navigate away, open extra tabs, or reorganize the browser.
|
||||
- If the goal is to navigate somewhere, confirm you arrived by describing what you see.
|
||||
- If the goal is to click something, confirm the result of the click.
|
||||
- If you cannot find what was asked for, say so clearly -- do not guess or improvise.
|
||||
- Prefer browser_navigate over browser_open_tab for going to URLs.
|
||||
- Do NOT call browser_group_tabs or other organizational tools.`
|
||||
@@ -3,6 +3,28 @@ import type { ExecutorResult } from '../orchestrator-executor/types'
|
||||
export type ExecutorBackendKind = 'tool-loop' | 'clado'
|
||||
export type DelegationResult = ExecutorResult
|
||||
|
||||
export interface ToolCallInfo {
|
||||
toolCallId: string
|
||||
toolName: string
|
||||
input: unknown
|
||||
}
|
||||
|
||||
export interface ToolResultInfo {
|
||||
toolCallId: string
|
||||
toolName: string
|
||||
output: unknown
|
||||
}
|
||||
|
||||
export interface ExecutorCallbacks {
|
||||
onToolCallStart?: (toolCall: ToolCallInfo) => void
|
||||
onToolCallFinish?: () => Promise<void>
|
||||
onStepFinish?: (step: {
|
||||
toolCalls?: ReadonlyArray<ToolCallInfo>
|
||||
toolResults?: ReadonlyArray<ToolResultInfo>
|
||||
text?: string
|
||||
}) => Promise<void>
|
||||
}
|
||||
|
||||
export interface ExecutorBackend {
|
||||
readonly kind: ExecutorBackendKind
|
||||
execute(instruction: string, signal?: AbortSignal): Promise<DelegationResult>
|
||||
|
||||
@@ -1,243 +0,0 @@
|
||||
/**
|
||||
* Executor - Wraps AiSdkAgent for page-level browser actions (direct CDP)
|
||||
*
|
||||
* The executor:
|
||||
* - Receives goal-level instructions from orchestrator
|
||||
* - Executes browser actions until the goal is accomplished
|
||||
* - Returns observation to orchestrator (not full history)
|
||||
*/
|
||||
|
||||
import { randomUUID } from 'node:crypto'
|
||||
import { AiSdkAgent } from '@browseros/server/agent/tool-loop'
|
||||
import type { ResolvedAgentConfig } from '@browseros/server/agent/types'
|
||||
import type { Browser } from '@browseros/server/browser'
|
||||
import { registry } from '@browseros/server/tools/registry'
|
||||
import type { BrowserContext } from '@browseros/shared/schemas/browser-context'
|
||||
import { CladoActionExecutor } from './clado-action-executor'
|
||||
import type { ExecutorResult } from './types'
|
||||
|
||||
const EXECUTOR_SYSTEM_PROMPT = `You are a browser executor. You receive a single goal-level instruction and execute it using browser tools.
|
||||
|
||||
## Your Job
|
||||
1. Execute browser actions to achieve the given goal
|
||||
2. Stop as soon as the goal is accomplished — do NOT perform extra actions
|
||||
3. Write a final observation describing the result
|
||||
|
||||
## Final Response Format
|
||||
When done, your response MUST include:
|
||||
- What you accomplished (or what went wrong)
|
||||
- What the page currently shows: key headings, links, data, or content visible
|
||||
- The current URL from the address bar
|
||||
- If you got stuck, what is blocking progress
|
||||
|
||||
## Rules
|
||||
- Only do what was asked. Do not navigate away, open extra tabs, or reorganize the browser.
|
||||
- If the goal is to navigate somewhere, confirm you arrived by describing what you see.
|
||||
- If the goal is to click something, confirm the result of the click.
|
||||
- If you cannot find what was asked for, say so clearly — do not guess or improvise.
|
||||
- Prefer browser_navigate over browser_open_tab for going to URLs.
|
||||
- Do NOT call browser_group_tabs or other organizational tools.`
|
||||
|
||||
export interface ToolCallInfo {
|
||||
toolCallId: string
|
||||
toolName: string
|
||||
input: unknown
|
||||
}
|
||||
|
||||
export interface ToolResultInfo {
|
||||
toolCallId: string
|
||||
toolName: string
|
||||
output: unknown
|
||||
}
|
||||
|
||||
export interface ExecutorCallbacks {
|
||||
onToolCallStart?: (toolCall: ToolCallInfo) => void
|
||||
onToolCallFinish?: () => Promise<void>
|
||||
onStepFinish?: (step: {
|
||||
toolCalls?: ReadonlyArray<ToolCallInfo>
|
||||
toolResults?: ReadonlyArray<ToolResultInfo>
|
||||
text?: string
|
||||
}) => Promise<void>
|
||||
}
|
||||
|
||||
export class Executor {
|
||||
private cladoExecutor: CladoActionExecutor | null = null
|
||||
private stepsUsed = 0
|
||||
private currentUrl = ''
|
||||
private configTemplate: ResolvedAgentConfig
|
||||
private isCladoAction: boolean
|
||||
private browser: Browser | null
|
||||
private serverUrl: string
|
||||
private windowId?: number
|
||||
private tabId?: number
|
||||
private initialPageId?: number
|
||||
private callbacks: ExecutorCallbacks
|
||||
|
||||
constructor(
|
||||
configTemplate: ResolvedAgentConfig,
|
||||
browser: Browser | null,
|
||||
serverUrl: string,
|
||||
options?: {
|
||||
isCladoAction?: boolean
|
||||
windowId?: number
|
||||
tabId?: number
|
||||
initialPageId?: number
|
||||
callbacks?: ExecutorCallbacks
|
||||
},
|
||||
) {
|
||||
this.configTemplate = configTemplate
|
||||
this.isCladoAction = options?.isCladoAction ?? false
|
||||
this.browser = browser
|
||||
this.serverUrl = serverUrl
|
||||
this.windowId = options?.windowId
|
||||
this.tabId = options?.tabId
|
||||
this.initialPageId = options?.initialPageId
|
||||
this.callbacks = options?.callbacks ?? {}
|
||||
}
|
||||
|
||||
async execute(
|
||||
instruction: string,
|
||||
signal?: AbortSignal,
|
||||
): Promise<ExecutorResult> {
|
||||
if (this.isCladoAction) {
|
||||
if (!this.cladoExecutor) {
|
||||
this.cladoExecutor = new CladoActionExecutor(
|
||||
{
|
||||
provider: this.configTemplate.provider,
|
||||
model: this.configTemplate.model,
|
||||
apiKey: this.configTemplate.apiKey ?? '',
|
||||
baseUrl: this.configTemplate.baseUrl,
|
||||
},
|
||||
this.serverUrl,
|
||||
this.windowId,
|
||||
this.tabId,
|
||||
this.initialPageId,
|
||||
)
|
||||
this.cladoExecutor.setCallbacks(this.callbacks)
|
||||
}
|
||||
|
||||
const result = await this.cladoExecutor.execute(instruction, signal)
|
||||
this.stepsUsed = this.cladoExecutor.getTotalSteps()
|
||||
this.currentUrl = result.url || this.currentUrl
|
||||
return result
|
||||
}
|
||||
|
||||
if (!this.browser) {
|
||||
throw new Error('Browser instance is required for standard executor path')
|
||||
}
|
||||
|
||||
const stepsAtStart = this.stepsUsed
|
||||
const toolsUsed: string[] = []
|
||||
let status: 'done' | 'blocked' | 'timeout' = 'done'
|
||||
let resultText = ''
|
||||
|
||||
const conversationId = randomUUID()
|
||||
const agentConfig: ResolvedAgentConfig = {
|
||||
...this.configTemplate,
|
||||
conversationId,
|
||||
userSystemPrompt: EXECUTOR_SYSTEM_PROMPT,
|
||||
evalMode: true,
|
||||
workingDir: `/tmp/browseros-eval-executor-${conversationId}`,
|
||||
}
|
||||
|
||||
// Build browser context so executor agent knows the correct page ID
|
||||
let browserContext: BrowserContext | undefined
|
||||
if (this.browser) {
|
||||
const pages = await this.browser.listPages()
|
||||
const activePage = pages[0]
|
||||
if (activePage) {
|
||||
browserContext = {
|
||||
activeTab: {
|
||||
id: activePage.tabId,
|
||||
pageId: activePage.pageId,
|
||||
url: activePage.url,
|
||||
title: activePage.title,
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let agent: AiSdkAgent | null = null
|
||||
|
||||
try {
|
||||
agent = await AiSdkAgent.create({
|
||||
resolvedConfig: agentConfig,
|
||||
browser: this.browser,
|
||||
registry,
|
||||
browserContext,
|
||||
})
|
||||
|
||||
await agent.toolLoopAgent.generate({
|
||||
prompt: instruction,
|
||||
abortSignal: signal,
|
||||
|
||||
experimental_onToolCallStart: ({ toolCall }) => {
|
||||
const input = toolCall.input as Record<string, unknown> | undefined
|
||||
if (input && typeof input.url === 'string' && input.url.length > 0) {
|
||||
this.currentUrl = input.url
|
||||
}
|
||||
this.callbacks.onToolCallStart?.({
|
||||
toolCallId: toolCall.toolCallId,
|
||||
toolName: toolCall.toolName,
|
||||
input: toolCall.input,
|
||||
})
|
||||
},
|
||||
|
||||
experimental_onToolCallFinish: async () => {
|
||||
this.stepsUsed++
|
||||
await this.callbacks.onToolCallFinish?.()
|
||||
},
|
||||
|
||||
onStepFinish: async ({ toolCalls, toolResults, text }) => {
|
||||
if (toolCalls) {
|
||||
for (const tc of toolCalls) {
|
||||
if (!toolsUsed.includes(tc.toolName)) {
|
||||
toolsUsed.push(tc.toolName)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (text) {
|
||||
resultText = text
|
||||
}
|
||||
|
||||
await this.callbacks.onStepFinish?.({ toolCalls, toolResults, text })
|
||||
},
|
||||
})
|
||||
} catch {
|
||||
if (signal?.aborted) {
|
||||
status = 'timeout'
|
||||
} else {
|
||||
status = 'blocked'
|
||||
}
|
||||
} finally {
|
||||
if (agent) await agent.dispose().catch(() => {})
|
||||
}
|
||||
|
||||
if (status === 'done' && signal?.aborted) {
|
||||
status = 'timeout'
|
||||
}
|
||||
|
||||
const observation =
|
||||
resultText || 'Execution completed with no actions taken.'
|
||||
|
||||
return {
|
||||
observation,
|
||||
status,
|
||||
url: this.currentUrl,
|
||||
actionsPerformed: this.stepsUsed - stepsAtStart,
|
||||
toolsUsed,
|
||||
}
|
||||
}
|
||||
|
||||
async close(): Promise<void> {
|
||||
await this.cladoExecutor?.close()
|
||||
}
|
||||
|
||||
getTotalSteps(): number {
|
||||
if (this.isCladoAction) {
|
||||
return this.cladoExecutor?.getTotalSteps() ?? 0
|
||||
}
|
||||
return this.stepsUsed
|
||||
}
|
||||
}
|
||||
@@ -24,16 +24,16 @@ import {
|
||||
resolveProviderConfig,
|
||||
} from '../../utils/resolve-provider-config'
|
||||
import { withEvalTimeout } from '../../utils/with-eval-timeout'
|
||||
import { isCladoActionProvider } from '../orchestrated/backends/clado/types'
|
||||
import { createExecutorBackend } from '../orchestrated/backends/create-executor-backend'
|
||||
import type { ExecutorCallbacks } from '../orchestrated/executor-backend'
|
||||
import type { AgentContext, AgentEvaluator, AgentResult } from '../types'
|
||||
import type { ExecutorCallbacks } from './executor'
|
||||
import { OrchestratorAgent } from './orchestrator-agent'
|
||||
import type { ExecutorFactory, ExecutorResult } from './types'
|
||||
|
||||
interface ResolvedConfigs {
|
||||
orchestratorConfig: ResolvedAgentConfig & { maxTurns?: number }
|
||||
executorConfig: ResolvedAgentConfig
|
||||
isCladoAction: boolean
|
||||
}
|
||||
|
||||
function toResolvedAgentConfig(
|
||||
@@ -68,7 +68,10 @@ async function resolveAgentConfig(
|
||||
if (!executorModel) {
|
||||
throw new Error('executor.model is required in config')
|
||||
}
|
||||
if (config.executor.provider === 'clado-action' && !config.executor.baseUrl) {
|
||||
if (
|
||||
isCladoActionProvider(config.executor.provider) &&
|
||||
!config.executor.baseUrl
|
||||
) {
|
||||
throw new Error(
|
||||
'executor.baseUrl is required in config for clado-action provider',
|
||||
)
|
||||
@@ -76,10 +79,8 @@ async function resolveAgentConfig(
|
||||
|
||||
const resolvedOrchestrator = await resolveProviderConfig(config.orchestrator)
|
||||
|
||||
const isCladoAction = config.executor.provider === 'clado-action'
|
||||
|
||||
let executorConfig: ResolvedAgentConfig
|
||||
if (isCladoAction) {
|
||||
if (isCladoActionProvider(config.executor.provider)) {
|
||||
executorConfig = {
|
||||
conversationId: crypto.randomUUID(),
|
||||
provider: config.executor.provider as ResolvedAgentConfig['provider'],
|
||||
@@ -108,7 +109,7 @@ async function resolveAgentConfig(
|
||||
maxTurns: config.orchestrator.maxTurns,
|
||||
}
|
||||
|
||||
return { orchestratorConfig, executorConfig, isCladoAction }
|
||||
return { orchestratorConfig, executorConfig }
|
||||
}
|
||||
|
||||
export class OrchestratorExecutorEvaluator implements AgentEvaluator {
|
||||
@@ -128,7 +129,7 @@ export class OrchestratorExecutorEvaluator implements AgentEvaluator {
|
||||
}
|
||||
|
||||
const agentConfig = config.agent as OrchestratorExecutorConfig
|
||||
const { orchestratorConfig, executorConfig, isCladoAction } =
|
||||
const { orchestratorConfig, executorConfig } =
|
||||
await resolveAgentConfig(agentConfig)
|
||||
|
||||
// Connect to Chrome via CDP — same per-worker offset used by app-manager.
|
||||
@@ -237,7 +238,6 @@ export class OrchestratorExecutorEvaluator implements AgentEvaluator {
|
||||
capture.emitEvent(task.query_id, delegateInputEvent)
|
||||
|
||||
const executor = createExecutorBackend({
|
||||
backendKind: isCladoAction ? 'clado' : 'tool-loop',
|
||||
configTemplate: executorConfig,
|
||||
browser,
|
||||
serverUrl: config.browseros.server_url,
|
||||
@@ -331,6 +331,5 @@ export class OrchestratorExecutorEvaluator implements AgentEvaluator {
|
||||
}
|
||||
}
|
||||
|
||||
export { Executor } from './executor'
|
||||
export { OrchestratorAgent } from './orchestrator-agent'
|
||||
export * from './types'
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
import { describe, expect, it } from 'bun:test'
|
||||
import { CladoExecutorBackend } from '../../src/agents/orchestrated/backends/clado/clado-executor-backend'
|
||||
import {
|
||||
backendKindForProvider,
|
||||
createExecutorBackend,
|
||||
} from '../../src/agents/orchestrated/backends/create-executor-backend'
|
||||
import { ToolLoopExecutorBackend } from '../../src/agents/orchestrated/backends/tool-loop/tool-loop-executor-backend'
|
||||
import type { ExecutorBackend } from '../../src/agents/orchestrated/executor-backend'
|
||||
|
||||
describe('executor backend boundary', () => {
|
||||
@@ -11,6 +13,32 @@ describe('executor backend boundary', () => {
|
||||
expect(backendKindForProvider('openai-compatible')).toBe('tool-loop')
|
||||
})
|
||||
|
||||
it('creates concrete backend classes for each executor path', () => {
|
||||
expect(
|
||||
createExecutorBackend({
|
||||
backendKind: 'tool-loop',
|
||||
configTemplate: {
|
||||
provider: 'openai-compatible',
|
||||
model: 'tool-loop-model',
|
||||
},
|
||||
browser: null,
|
||||
serverUrl: 'http://127.0.0.1:9110',
|
||||
}),
|
||||
).toBeInstanceOf(ToolLoopExecutorBackend)
|
||||
|
||||
expect(
|
||||
createExecutorBackend({
|
||||
backendKind: 'clado',
|
||||
configTemplate: {
|
||||
provider: 'clado-action',
|
||||
model: 'clado-model',
|
||||
baseUrl: 'https://clado.example.test',
|
||||
},
|
||||
serverUrl: 'http://127.0.0.1:9110',
|
||||
}),
|
||||
).toBeInstanceOf(CladoExecutorBackend)
|
||||
})
|
||||
|
||||
it('forwards execution and step state through the backend interface', async () => {
|
||||
const signal = new AbortController().signal
|
||||
const fakeBackend: ExecutorBackend = {
|
||||
@@ -33,7 +61,6 @@ describe('executor backend boundary', () => {
|
||||
}
|
||||
|
||||
const backend = createExecutorBackend({
|
||||
backendKind: 'tool-loop',
|
||||
executor: fakeBackend,
|
||||
})
|
||||
const result = await backend.execute('Click checkout', signal)
|
||||
|
||||
Reference in New Issue
Block a user