mirror of
https://github.com/browseros-ai/BrowserOS.git
synced 2026-05-18 11:06:19 +00:00
feat: new APIs for eval mode support (#250)
* feat: eval mode * feat: eval mode
This commit is contained in:
33
apps/controller-ext/src/actions/browser/CloseWindowAction.ts
Normal file
33
apps/controller-ext/src/actions/browser/CloseWindowAction.ts
Normal file
@@ -0,0 +1,33 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 BrowserOS
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*/
|
||||
import { z } from 'zod'
|
||||
import { ActionHandler } from '../ActionHandler'
|
||||
|
||||
const CloseWindowInputSchema = z.object({
|
||||
windowId: z.number().int().positive().describe('ID of the window to close'),
|
||||
})
|
||||
|
||||
const CloseWindowOutputSchema = z.object({
|
||||
success: z.boolean().describe('Whether the window was successfully closed'),
|
||||
})
|
||||
|
||||
type CloseWindowInput = z.infer<typeof CloseWindowInputSchema>
|
||||
type CloseWindowOutput = z.infer<typeof CloseWindowOutputSchema>
|
||||
|
||||
export class CloseWindowAction extends ActionHandler<
|
||||
CloseWindowInput,
|
||||
CloseWindowOutput
|
||||
> {
|
||||
readonly inputSchema = CloseWindowInputSchema
|
||||
|
||||
async execute(input: CloseWindowInput): Promise<CloseWindowOutput> {
|
||||
await chrome.windows.remove(input.windowId)
|
||||
|
||||
return {
|
||||
success: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,68 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 BrowserOS
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*/
|
||||
import { z } from 'zod'
|
||||
import { ActionHandler } from '../ActionHandler'
|
||||
|
||||
const CreateWindowInputSchema = z.object({
|
||||
url: z
|
||||
.string()
|
||||
.optional()
|
||||
.default('about:blank')
|
||||
.describe('URL to open in the new window'),
|
||||
incognito: z
|
||||
.boolean()
|
||||
.optional()
|
||||
.default(false)
|
||||
.describe('Create an incognito window'),
|
||||
focused: z
|
||||
.boolean()
|
||||
.optional()
|
||||
.default(true)
|
||||
.describe('Whether to focus the new window'),
|
||||
})
|
||||
|
||||
const CreateWindowOutputSchema = z.object({
|
||||
windowId: z.number().describe('ID of the newly created window'),
|
||||
tabId: z.number().describe('ID of the first tab in the new window'),
|
||||
})
|
||||
|
||||
type CreateWindowInput = z.infer<typeof CreateWindowInputSchema>
|
||||
type CreateWindowOutput = z.infer<typeof CreateWindowOutputSchema>
|
||||
|
||||
export class CreateWindowAction extends ActionHandler<
|
||||
CreateWindowInput,
|
||||
CreateWindowOutput
|
||||
> {
|
||||
readonly inputSchema = CreateWindowInputSchema
|
||||
|
||||
async execute(input: CreateWindowInput): Promise<CreateWindowOutput> {
|
||||
const createData: chrome.windows.CreateData = {
|
||||
url: input.url,
|
||||
focused: input.focused,
|
||||
incognito: input.incognito,
|
||||
}
|
||||
|
||||
const createdWindow = await chrome.windows.create(createData)
|
||||
|
||||
if (!createdWindow) {
|
||||
throw new Error('Failed to create window')
|
||||
}
|
||||
|
||||
if (createdWindow.id === undefined) {
|
||||
throw new Error('Created window has no ID')
|
||||
}
|
||||
|
||||
const tabId = createdWindow.tabs?.[0]?.id
|
||||
if (tabId === undefined) {
|
||||
throw new Error('Created window has no tab')
|
||||
}
|
||||
|
||||
return {
|
||||
windowId: createdWindow.id,
|
||||
tabId,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -12,6 +12,8 @@ import { CaptureScreenshotPointerAction } from '@/actions/browser/CaptureScreens
|
||||
import { ClearAction } from '@/actions/browser/ClearAction'
|
||||
import { ClickAction } from '@/actions/browser/ClickAction'
|
||||
import { ClickCoordinatesAction } from '@/actions/browser/ClickCoordinatesAction'
|
||||
import { CloseWindowAction } from '@/actions/browser/CloseWindowAction'
|
||||
import { CreateWindowAction } from '@/actions/browser/CreateWindowAction'
|
||||
import { ExecuteJavaScriptAction } from '@/actions/browser/ExecuteJavaScriptAction'
|
||||
import { GetAccessibilityTreeAction } from '@/actions/browser/GetAccessibilityTreeAction'
|
||||
import { GetInteractiveSnapshotAction } from '@/actions/browser/GetInteractiveSnapshotAction'
|
||||
@@ -192,6 +194,9 @@ export class BrowserOSController {
|
||||
this.actionRegistry.register('updateTabGroup', new UpdateTabGroupAction())
|
||||
this.actionRegistry.register('ungroupTabs', new UngroupTabsAction())
|
||||
|
||||
this.actionRegistry.register('createWindow', new CreateWindowAction())
|
||||
this.actionRegistry.register('closeWindow', new CloseWindowAction())
|
||||
|
||||
this.actionRegistry.register('getBookmarks', new GetBookmarksAction())
|
||||
this.actionRegistry.register('createBookmark', new CreateBookmarkAction())
|
||||
this.actionRegistry.register('removeBookmark', new RemoveBookmarkAction())
|
||||
|
||||
@@ -23,6 +23,18 @@
|
||||
".": {
|
||||
"types": "./src/rpc.ts",
|
||||
"import": "./src/rpc.ts"
|
||||
},
|
||||
"./agent": {
|
||||
"types": "./src/agent/gemini-agent.ts",
|
||||
"default": "./src/agent/gemini-agent.ts"
|
||||
},
|
||||
"./agent/types": {
|
||||
"types": "./src/agent/types.ts",
|
||||
"default": "./src/agent/types.ts"
|
||||
},
|
||||
"./lib/clients/gateway": {
|
||||
"types": "./src/lib/clients/gateway.ts",
|
||||
"default": "./src/lib/clients/gateway.ts"
|
||||
}
|
||||
},
|
||||
"dependencies": {
|
||||
|
||||
@@ -26,13 +26,29 @@ import type { HonoSSEStream } from './provider-adapter/types'
|
||||
import { UIMessageStreamWriter } from './provider-adapter/ui-message-stream'
|
||||
import type { ResolvedAgentConfig } from './types'
|
||||
|
||||
interface ToolExecutionResult {
|
||||
export interface ToolExecutionResult {
|
||||
parts: Part[]
|
||||
isError: boolean
|
||||
errorMessage?: string
|
||||
}
|
||||
|
||||
export interface ToolExecutionHooks {
|
||||
onBeforeToolCall?: (
|
||||
toolName: string,
|
||||
args: unknown,
|
||||
browserContext?: BrowserContext,
|
||||
) => Promise<void>
|
||||
|
||||
onAfterToolCall?: (
|
||||
toolName: string,
|
||||
result: ToolExecutionResult,
|
||||
browserContext?: BrowserContext,
|
||||
) => Promise<void>
|
||||
}
|
||||
|
||||
export class GeminiAgent {
|
||||
private toolHooks?: ToolExecutionHooks
|
||||
|
||||
private constructor(
|
||||
private client: GeminiClient,
|
||||
private geminiConfig: GeminiConfig,
|
||||
@@ -40,6 +56,10 @@ export class GeminiAgent {
|
||||
private conversationId: string,
|
||||
) {}
|
||||
|
||||
setToolHooks(hooks: ToolExecutionHooks): void {
|
||||
this.toolHooks = hooks
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a GeminiAgent with pre-resolved config and MCP servers.
|
||||
* Config resolution and MCP building happens in ChatService (visible there).
|
||||
@@ -81,6 +101,7 @@ export class GeminiAgent {
|
||||
|
||||
// Build excluded tools list - always exclude save_memory and google_web_search
|
||||
// Conditionally exclude screenshot tools if model doesn't support images
|
||||
// Exclude window management tools unless in eval mode
|
||||
const excludedTools = ['save_memory', 'google_web_search']
|
||||
if (config.supportsImages === false) {
|
||||
excludedTools.push(
|
||||
@@ -89,6 +110,9 @@ export class GeminiAgent {
|
||||
)
|
||||
logger.info('Model does not support images, excluding screenshot tools')
|
||||
}
|
||||
if (config.evalMode !== true) {
|
||||
excludedTools.push('browser_create_window', 'browser_close_window')
|
||||
}
|
||||
|
||||
const geminiConfig = new GeminiConfig({
|
||||
sessionId: config.conversationId,
|
||||
@@ -303,11 +327,24 @@ export class GeminiAgent {
|
||||
for (const requestInfo of toolCallRequests) {
|
||||
if (abortSignal.aborted) break
|
||||
|
||||
await this.toolHooks?.onBeforeToolCall?.(
|
||||
requestInfo.name,
|
||||
requestInfo.args,
|
||||
browserContext,
|
||||
)
|
||||
|
||||
const result = await this.handleToolExecution(
|
||||
requestInfo,
|
||||
abortSignal,
|
||||
browserContext,
|
||||
)
|
||||
|
||||
await this.toolHooks?.onAfterToolCall?.(
|
||||
requestInfo.name,
|
||||
result,
|
||||
browserContext,
|
||||
)
|
||||
|
||||
toolResponseParts.push(...result.parts)
|
||||
|
||||
if (uiStream) {
|
||||
|
||||
@@ -35,4 +35,6 @@ export interface ResolvedAgentConfig {
|
||||
sessionExecutionDir: string
|
||||
/** Whether the model supports image inputs (vision). Defaults to true. */
|
||||
supportsImages?: boolean
|
||||
/** Eval mode - enables window management tools. Defaults to false. */
|
||||
evalMode?: boolean
|
||||
}
|
||||
|
||||
@@ -47,6 +47,8 @@ export {
|
||||
ungroupTabs,
|
||||
updateTabGroup,
|
||||
} from './tools/tab-management'
|
||||
// Window Management
|
||||
export { closeWindow, createWindow } from './tools/window-management'
|
||||
// Types
|
||||
export type { Context } from './types/context'
|
||||
export type { ImageContentData, Response } from './types/response'
|
||||
@@ -86,8 +88,9 @@ import {
|
||||
ungroupTabs,
|
||||
updateTabGroup,
|
||||
} from './tools/tab-management'
|
||||
import { closeWindow, createWindow } from './tools/window-management'
|
||||
|
||||
// Array export for convenience (32 tools)
|
||||
// Array export for convenience (33 tools total)
|
||||
export const allControllerTools = [
|
||||
getActiveTab,
|
||||
listTabs,
|
||||
@@ -121,4 +124,6 @@ export const allControllerTools = [
|
||||
removeBookmark,
|
||||
searchHistory,
|
||||
getRecentHistory,
|
||||
createWindow,
|
||||
closeWindow,
|
||||
]
|
||||
|
||||
@@ -0,0 +1,70 @@
|
||||
/**
|
||||
* @license
|
||||
* Copyright 2025 BrowserOS
|
||||
*/
|
||||
import { z } from 'zod'
|
||||
|
||||
import { ToolCategories } from '../../types/tool-categories'
|
||||
import { defineTool } from '../../types/tool-definition'
|
||||
import type { Context } from '../types/context'
|
||||
import type { Response } from '../types/response'
|
||||
|
||||
export const createWindow = defineTool<z.ZodRawShape, Context, Response>({
|
||||
name: 'browser_create_window',
|
||||
description:
|
||||
'Create a new browser window. Returns the windowId and tabId of the created window.',
|
||||
annotations: {
|
||||
category: ToolCategories.TAB_MANAGEMENT,
|
||||
readOnlyHint: false,
|
||||
},
|
||||
schema: {
|
||||
url: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe('URL to open in the new window. Defaults to about:blank'),
|
||||
incognito: z.boolean().optional().describe('Create an incognito window'),
|
||||
focused: z
|
||||
.boolean()
|
||||
.optional()
|
||||
.describe('Whether to focus the new window. Defaults to true'),
|
||||
},
|
||||
handler: async (request, response, context) => {
|
||||
const params = request.params as {
|
||||
url?: string
|
||||
incognito?: boolean
|
||||
focused?: boolean
|
||||
}
|
||||
|
||||
const result = await context.executeAction('createWindow', {
|
||||
url: params.url || 'about:blank',
|
||||
incognito: params.incognito || false,
|
||||
focused: params.focused ?? true,
|
||||
})
|
||||
const data = result as { windowId: number; tabId: number }
|
||||
|
||||
response.appendResponseLine(`Created window ${data.windowId}`)
|
||||
response.appendResponseLine(`Tab ID: ${data.tabId}`)
|
||||
|
||||
response.addStructuredContent('windowId', data.windowId)
|
||||
response.addStructuredContent('tabId', data.tabId)
|
||||
},
|
||||
})
|
||||
|
||||
export const closeWindow = defineTool<z.ZodRawShape, Context, Response>({
|
||||
name: 'browser_close_window',
|
||||
description: 'Close a browser window by its windowId.',
|
||||
annotations: {
|
||||
category: ToolCategories.TAB_MANAGEMENT,
|
||||
readOnlyHint: false,
|
||||
},
|
||||
schema: {
|
||||
windowId: z.coerce.number().describe('The ID of the window to close'),
|
||||
},
|
||||
handler: async (request, response, context) => {
|
||||
const { windowId } = request.params as { windowId: number }
|
||||
|
||||
await context.executeAction('closeWindow', { windowId })
|
||||
|
||||
response.appendResponseLine(`Closed window ${windowId}`)
|
||||
},
|
||||
})
|
||||
4
bun.lock
4
bun.lock
@@ -124,7 +124,7 @@
|
||||
},
|
||||
"apps/server": {
|
||||
"name": "@browseros/server",
|
||||
"version": "0.0.42",
|
||||
"version": "0.0.44",
|
||||
"bin": {
|
||||
"browseros-server": "./src/index.ts",
|
||||
},
|
||||
@@ -175,7 +175,7 @@
|
||||
},
|
||||
"packages/agent-sdk": {
|
||||
"name": "@browseros-ai/agent-sdk",
|
||||
"version": "0.0.4",
|
||||
"version": "0.0.5",
|
||||
"dependencies": {
|
||||
"eventsource-parser": "^3.0.6",
|
||||
"zod-to-json-schema": "^3.24.1",
|
||||
|
||||
Reference in New Issue
Block a user