feat: scoped controller context (#301)

* feat: remove wrapped controller tools and enrich context with windowid

* refactor: remove windowid from all the tools

* feat: pass window id to mcp server via request headers

* feat: enrich possible toolcalls to reduce roundtrip
This commit is contained in:
Dani Akash
2026-02-05 23:48:58 +05:30
committed by GitHub
parent d7be7520b8
commit d242adde26
18 changed files with 219 additions and 272 deletions

View File

@@ -228,22 +228,6 @@ export class GeminiAgent {
return `${contextLines.join('\n')}\n\n---\n\n`
}
private injectWindowIdIntoToolArgs(
requestInfo: ToolCallRequestInfo,
browserContext?: BrowserContext,
): void {
if (browserContext?.windowId && requestInfo.name.startsWith('browser_')) {
logger.debug('Injecting windowId into tool args', {
tool: requestInfo.name,
windowId: browserContext.windowId,
})
requestInfo.args = {
...requestInfo.args,
windowId: browserContext.windowId,
}
}
}
private async executeToolWithTimeout(
requestInfo: ToolCallRequestInfo,
abortSignal: AbortSignal,
@@ -271,10 +255,7 @@ export class GeminiAgent {
private async handleToolExecution(
requestInfo: ToolCallRequestInfo,
abortSignal: AbortSignal,
browserContext?: BrowserContext,
): Promise<ToolExecutionResult> {
this.injectWindowIdIntoToolArgs(requestInfo, browserContext)
try {
const completedToolCall = await this.executeToolWithTimeout(
requestInfo,
@@ -368,11 +349,7 @@ export class GeminiAgent {
browserContext,
)
const result = await this.handleToolExecution(
requestInfo,
abortSignal,
browserContext,
)
const result = await this.handleToolExecution(requestInfo, abortSignal)
await this.toolHooks?.onAfterToolCall?.(
requestInfo.name,

View File

@@ -4,18 +4,27 @@
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { AsyncLocalStorage } from 'node:async_hooks'
import { StreamableHTTPTransport } from '@hono/mcp'
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'
import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js'
import type {
CallToolResult,
ImageContent,
TextContent,
} from '@modelcontextprotocol/sdk/types.js'
import { SetLevelRequestSchema } from '@modelcontextprotocol/sdk/types.js'
import { Hono } from 'hono'
import type { z } from 'zod'
import type { McpContext } from '../../browser/cdp/context'
import type { ControllerContext } from '../../browser/extension/context'
import {
type ControllerContext,
ScopedControllerContext,
} from '../../browser/extension/context'
import { logger } from '../../lib/logger'
import { metrics } from '../../lib/metrics'
import type { MutexPool } from '../../lib/mutex'
import { Sentry } from '../../lib/sentry'
import { ControllerResponse } from '../../tools/controller-based/response/controller-response'
import { McpResponse } from '../../tools/response/mcp-response'
import type { ToolDefinition } from '../../tools/types/tool-definition'
import type { Env } from '../types'
@@ -31,6 +40,9 @@ interface McpRouteDeps {
}
const MCP_SOURCE_HEADER = 'X-BrowserOS-Source'
const MCP_WINDOW_ID_HEADER = 'X-BrowserOS-Window-Id'
const windowIdStore = new AsyncLocalStorage<number | undefined>()
type McpRequestSource = 'gemini-agent' | 'sdk-internal' | 'third-party'
@@ -77,31 +89,44 @@ function createMcpServerWithTools(deps: McpRouteDeps): McpServer {
(async (params: Record<string, unknown>): Promise<CallToolResult> => {
const startTime = performance.now()
// Serialize tool execution per-window (allows parallel execution across windows)
const windowId = params.windowId as number | undefined
// Resolve windowId: explicit param takes priority over request header
const windowId =
(params.windowId as number | undefined) ?? windowIdStore.getStore()
const guard = await mutexPool.getMutex(windowId).acquire()
try {
const isControllerTool = tool.name.startsWith('browser_')
logger.info(
`${tool.name} request: ${JSON.stringify(params, null, ' ')}`,
)
// Detect if this is a controller tool (browser_* tools)
const isControllerTool = tool.name.startsWith('browser_')
const contextForResponse =
isControllerTool && controllerContext
? controllerContext
: cdpContext
// Create response handler and execute tool
const response = new McpResponse()
await tool.handler({ params }, response, cdpContext)
// Process and return response
try {
const content = await response.handle(
tool.name,
contextForResponse as McpContext,
)
let content: Array<TextContent | ImageContent>
let structuredContent: Record<string, unknown> | undefined
if (isControllerTool) {
const { windowId: _, ...cleanParams } = params
const scopedContext = new ScopedControllerContext(
controllerContext.bridge,
windowId,
)
const response = new ControllerResponse()
await tool.handler(
{ params: cleanParams },
response,
scopedContext,
)
content = await response.handle(scopedContext)
structuredContent = response.structuredContent
} else {
const response = new McpResponse()
await tool.handler({ params }, response, cdpContext)
content = await response.handle(
tool.name,
cdpContext as McpContext,
)
structuredContent = response.structuredContent
}
// Log successful tool execution (non-blocking)
metrics.log('tool_executed', {
@@ -110,7 +135,6 @@ function createMcpServerWithTools(deps: McpRouteDeps): McpServer {
success: true,
})
const structuredContent = response.structuredContent
return {
content,
...(structuredContent && { structuredContent }),
@@ -158,36 +182,41 @@ export function createMcpRoutes(deps: McpRouteDeps) {
}
const source = getMcpRequestSource(c.req.header(MCP_SOURCE_HEADER))
const headerWindowId = c.req.header(MCP_WINDOW_ID_HEADER)
const requestWindowId = headerWindowId ? Number(headerWindowId) : undefined
metrics.log('mcp.request', { source })
try {
// Create a new transport for EACH request to prevent request ID collisions.
// Different clients may use the same JSON-RPC request IDs, which would cause
// responses to be routed to the wrong HTTP connections if transport state is shared.
const transport = new StreamableHTTPTransport({
sessionIdGenerator: undefined, // Stateless mode - no session management
enableJsonResponse: true, // Return JSON responses (not SSE streams)
})
return windowIdStore.run(requestWindowId, async () => {
try {
// Create a new transport for EACH request to prevent request ID collisions.
// Different clients may use the same JSON-RPC request IDs, which would cause
// responses to be routed to the wrong HTTP connections if transport state is shared.
const transport = new StreamableHTTPTransport({
sessionIdGenerator: undefined, // Stateless mode - no session management
enableJsonResponse: true, // Return JSON responses (not SSE streams)
})
// Connect the server to this transport
await mcpServer.connect(transport)
// Connect the server to this transport
await mcpServer.connect(transport)
// Handle the request and return response
return transport.handleRequest(c)
} catch (error) {
Sentry.captureException(error)
logger.error('Error handling MCP request', {
error: error instanceof Error ? error.message : String(error),
})
// Handle the request and return response
return transport.handleRequest(c)
} catch (error) {
Sentry.captureException(error)
logger.error('Error handling MCP request', {
error: error instanceof Error ? error.message : String(error),
})
return c.json(
{
jsonrpc: '2.0',
error: { code: -32603, message: 'Internal server error' },
id: null,
},
500,
)
}
return c.json(
{
jsonrpc: '2.0',
error: { code: -32603, message: 'Internal server error' },
id: null,
},
500,
)
}
})
})
}

View File

@@ -168,6 +168,9 @@ export class ChatService {
headers: {
Accept: 'application/json, text/event-stream',
'X-BrowserOS-Source': 'gemini-agent',
...(browserContext?.windowId != null && {
'X-BrowserOS-Window-Id': String(browserContext.windowId),
}),
},
trust: true,
})

View File

@@ -22,4 +22,31 @@ export class ControllerContext implements Context {
isConnected(): boolean {
return this.controllerBridge.isConnected()
}
get bridge(): ControllerBridge {
return this.controllerBridge
}
}
export class ScopedControllerContext implements Context {
constructor(
private controllerBridge: ControllerBridge,
private windowId?: number,
) {}
async executeAction(action: string, payload: unknown): Promise<unknown> {
const enriched =
this.windowId != null
? { ...(payload as Record<string, unknown>), windowId: this.windowId }
: payload
return this.controllerBridge.sendRequest(
action,
enriched,
TIMEOUTS.CONTROLLER_DEFAULT,
)
}
isConnected(): boolean {
return this.controllerBridge.isConnected()
}
}

View File

@@ -68,7 +68,7 @@ export class Application {
logger.info(
`Loaded ${(await import('./tools/controller-based/registry')).allControllerTools.length} controller (extension) tools`,
)
const tools = createToolRegistry(cdpContext, controllerContext)
const tools = createToolRegistry(cdpContext)
const mutexPool = new MutexPool()
try {

View File

@@ -7,6 +7,7 @@ import type {
TextContent,
} from '@modelcontextprotocol/sdk/types.js'
import type { Context } from '../types/context'
import type { ImageContentData, Response } from '../types/response'
/**
@@ -50,6 +51,46 @@ export class ControllerResponse implements Response {
: undefined
}
#includeSnapshot = false
#includeScreenshot = false
setIncludeSnapshot(value: boolean): void {
this.#includeSnapshot = value
}
setIncludeScreenshot(value: boolean): void {
this.#includeScreenshot = value
}
async handle(context: Context): Promise<Array<TextContent | ImageContent>> {
const content = this.toContent()
if (this.#includeSnapshot) {
const result = await context.executeAction('getPageContent', {})
const text = (result as { content?: string })?.content
if (text) {
content.push({
type: 'text',
text: `\n## Page Content After Action\n${text}`,
})
}
}
if (this.#includeScreenshot) {
const result = await context.executeAction('captureScreenshot', {})
const data = result as { data?: string; mimeType?: string }
if (data?.data) {
content.push({
type: 'image',
data: data.data,
mimeType: data.mimeType ?? 'image/png',
})
}
}
return content
}
/**
* Convert collected data to MCP content format
*/

View File

@@ -20,19 +20,16 @@ export const executeJavaScript = defineTool<z.ZodRawShape, Context, Response>({
schema: {
tabId: z.coerce.number().describe('Tab ID to execute code in'),
code: z.string().describe('JavaScript code to execute'),
windowId: z.number().optional().describe('Window ID for routing'),
},
handler: async (request, response, context) => {
const { tabId, code, windowId } = request.params as {
const { tabId, code } = request.params as {
tabId: number
code: string
windowId?: number
}
const result = await context.executeAction('executeJavaScript', {
tabId,
code,
windowId,
})
// biome-ignore lint/suspicious/noExplicitAny: JS execution returns arbitrary values
const data = result as { result: any }
@@ -70,23 +67,21 @@ export const sendKeys = defineTool<z.ZodRawShape, Context, Response>({
'PageDown',
])
.describe('Keyboard key to send'),
windowId: z.number().optional().describe('Window ID for routing'),
},
handler: async (request, response, context) => {
const { tabId, key, windowId } = request.params as {
const { tabId, key } = request.params as {
tabId: number
key: string
windowId?: number
}
const result = await context.executeAction('sendKeys', {
tabId,
key,
windowId,
})
const data = result as { success: boolean; message: string }
response.appendResponseLine(data.message)
response.setIncludeSnapshot?.(true)
},
})
@@ -97,12 +92,9 @@ export const checkAvailability = defineTool<z.ZodRawShape, Context, Response>({
category: ToolCategories.ADVANCED,
readOnlyHint: true,
},
schema: {
windowId: z.number().optional().describe('Window ID for routing'),
},
handler: async (request, response, context) => {
const { windowId } = request.params as { windowId?: number }
const result = await context.executeAction('checkBrowserOS', { windowId })
schema: {},
handler: async (_request, response, context) => {
const result = await context.executeAction('checkBrowserOS', {})
const data = result as {
available: boolean
apis?: string[]

View File

@@ -21,17 +21,12 @@ export const getBookmarks = defineTool<z.ZodRawShape, Context, Response>({
.string()
.optional()
.describe('Optional folder ID to get bookmarks from (omit for all)'),
windowId: z.number().optional().describe('Window ID for routing'),
},
handler: async (request, response, context) => {
const { folderId, windowId } = request.params as {
folderId?: string
windowId?: number
}
const { folderId } = request.params as { folderId?: string }
const result = await context.executeAction('getBookmarks', {
folderId,
windowId,
})
const data = result as {
bookmarks: Array<{
@@ -75,21 +70,18 @@ export const createBookmark = defineTool<z.ZodRawShape, Context, Response>({
.describe(
'Folder ID to create bookmark in (from browser_get_bookmarks or browser_create_bookmark_folder)',
),
windowId: z.number().optional().describe('Window ID for routing'),
},
handler: async (request, response, context) => {
const { title, url, parentId, windowId } = request.params as {
const { title, url, parentId } = request.params as {
title: string
url: string
parentId?: string
windowId?: number
}
const result = await context.executeAction('createBookmark', {
title,
url,
parentId,
windowId,
})
const data = result as { id: string; title: string; url: string }
@@ -108,15 +100,11 @@ export const removeBookmark = defineTool<z.ZodRawShape, Context, Response>({
},
schema: {
bookmarkId: z.string().describe('Bookmark ID to remove'),
windowId: z.number().optional().describe('Window ID for routing'),
},
handler: async (request, response, context) => {
const { bookmarkId, windowId } = request.params as {
bookmarkId: string
windowId?: number
}
const { bookmarkId } = request.params as { bookmarkId: string }
await context.executeAction('removeBookmark', { id: bookmarkId, windowId })
await context.executeAction('removeBookmark', { id: bookmarkId })
response.appendResponseLine(`Removed bookmark ${bookmarkId}`)
},
@@ -133,21 +121,18 @@ export const updateBookmark = defineTool<z.ZodRawShape, Context, Response>({
bookmarkId: z.string().describe('Bookmark ID to update'),
title: z.string().optional().describe('New title for the bookmark'),
url: z.string().url().optional().describe('New URL for the bookmark'),
windowId: z.number().optional().describe('Window ID for routing'),
},
handler: async (request, response, context) => {
const { bookmarkId, title, url, windowId } = request.params as {
const { bookmarkId, title, url } = request.params as {
bookmarkId: string
title?: string
url?: string
windowId?: number
}
const result = await context.executeAction('updateBookmark', {
id: bookmarkId,
title,
url,
windowId,
})
const data = result as { id: string; title: string; url?: string }
@@ -177,19 +162,16 @@ export const createBookmarkFolder = defineTool<
.string()
.optional()
.describe('Parent folder ID (defaults to Bookmarks Bar)'),
windowId: z.number().optional().describe('Window ID for routing'),
},
handler: async (request, response, context) => {
const { title, parentId, windowId } = request.params as {
const { title, parentId } = request.params as {
title: string
parentId?: string
windowId?: number
}
const result = await context.executeAction('createBookmarkFolder', {
title,
parentId,
windowId,
})
const data = result as {
id: string
@@ -215,17 +197,12 @@ export const getBookmarkChildren = defineTool<z.ZodRawShape, Context, Response>(
},
schema: {
folderId: z.string().describe('Folder ID to get children from'),
windowId: z.number().optional().describe('Window ID for routing'),
},
handler: async (request, response, context) => {
const { folderId, windowId } = request.params as {
folderId: string
windowId?: number
}
const { folderId } = request.params as { folderId: string }
const result = await context.executeAction('getBookmarkChildren', {
folderId,
windowId,
})
const data = result as {
children: Array<{
@@ -278,21 +255,18 @@ export const moveBookmark = defineTool<z.ZodRawShape, Context, Response>({
.min(0)
.optional()
.describe('Position within parent (0-based)'),
windowId: z.number().optional().describe('Window ID for routing'),
},
handler: async (request, response, context) => {
const { bookmarkId, parentId, index, windowId } = request.params as {
const { bookmarkId, parentId, index } = request.params as {
bookmarkId: string
parentId?: string
index?: number
windowId?: number
}
const result = await context.executeAction('moveBookmark', {
id: bookmarkId,
parentId,
index,
windowId,
})
const data = result as {
id: string
@@ -322,19 +296,16 @@ export const removeBookmarkTree = defineTool<z.ZodRawShape, Context, Response>({
schema: {
folderId: z.string().describe('Folder ID to remove'),
confirm: z.boolean().describe('Must be true to confirm recursive deletion'),
windowId: z.number().optional().describe('Window ID for routing'),
},
handler: async (request, response, context) => {
const { folderId, confirm, windowId } = request.params as {
const { folderId, confirm } = request.params as {
folderId: string
confirm: boolean
windowId?: number
}
const result = await context.executeAction('removeBookmarkTree', {
id: folderId,
confirm,
windowId,
})
const data = result as {
success: boolean

View File

@@ -29,7 +29,6 @@ export const getPageContent = defineTool<z.ZodRawShape, Context, Response>({
},
schema: {
tabId: z.coerce.number().describe('Tab ID to extract content from'),
windowId: z.number().optional().describe('Window ID for routing'),
type: z
.enum(['text', 'text-with-links'])
.describe('Type of content to extract: text or text-with-links'),
@@ -81,7 +80,6 @@ export const getPageContent = defineTool<z.ZodRawShape, Context, Response>({
page?: string
contextWindow?: string
options?: { context?: 'visible' | 'full'; includeSections?: string[] }
windowId?: number
}
try {
@@ -101,7 +99,6 @@ export const getPageContent = defineTool<z.ZodRawShape, Context, Response>({
const snapshotResult = await context.executeAction('getSnapshot', {
tabId: params.tabId,
type: includeLinks ? 'links' : 'text',
windowId: params.windowId,
})
const snapshot = snapshotResult as Snapshot

View File

@@ -20,17 +20,15 @@ export const clickCoordinates = defineTool<z.ZodRawShape, Context, Response>({
tabId: z.coerce.number().describe('Tab ID to click in'),
x: z.coerce.number().describe('X coordinate'),
y: z.coerce.number().describe('Y coordinate'),
windowId: z.number().optional().describe('Window ID for routing'),
},
handler: async (request, response, context) => {
const { tabId, x, y, windowId } = request.params as {
const { tabId, x, y } = request.params as {
tabId: number
x: number
y: number
windowId?: number
}
await context.executeAction('clickCoordinates', { tabId, x, y, windowId })
await context.executeAction('clickCoordinates', { tabId, x, y })
response.appendResponseLine(
`Clicked at coordinates (${x}, ${y}) in tab ${tabId}`,
@@ -50,15 +48,13 @@ export const typeAtCoordinates = defineTool<z.ZodRawShape, Context, Response>({
x: z.coerce.number().describe('X coordinate'),
y: z.coerce.number().describe('Y coordinate'),
text: z.string().describe('Text to type'),
windowId: z.number().optional().describe('Window ID for routing'),
},
handler: async (request, response, context) => {
const { tabId, x, y, text, windowId } = request.params as {
const { tabId, x, y, text } = request.params as {
tabId: number
x: number
y: number
text: string
windowId?: number
}
await context.executeAction('typeAtCoordinates', {
@@ -66,7 +62,6 @@ export const typeAtCoordinates = defineTool<z.ZodRawShape, Context, Response>({
x,
y,
text,
windowId,
})
response.appendResponseLine(

View File

@@ -22,19 +22,16 @@ export const searchHistory = defineTool<z.ZodRawShape, Context, Response>({
.number()
.optional()
.describe('Maximum number of results to return (default: 100)'),
windowId: z.number().optional().describe('Window ID for routing'),
},
handler: async (request, response, context) => {
const { query, maxResults, windowId } = request.params as {
const { query, maxResults } = request.params as {
query: string
maxResults?: number
windowId?: number
}
const result = await context.executeAction('searchHistory', {
query,
maxResults,
windowId,
})
const data = result as {
items: Array<{
@@ -80,17 +77,12 @@ export const getRecentHistory = defineTool<z.ZodRawShape, Context, Response>({
.number()
.optional()
.describe('Number of recent items to retrieve (default: 20)'),
windowId: z.number().optional().describe('Window ID for routing'),
},
handler: async (request, response, context) => {
const { count, windowId } = request.params as {
count?: number
windowId?: number
}
const { count } = request.params as { count?: number }
const result = await context.executeAction('getRecentHistory', {
count,
windowId,
})
const data = result as {
items: Array<{

View File

@@ -35,22 +35,15 @@ export const getInteractiveElements = defineTool<
.boolean()
.optional()
.describe('Use simplified format (default: false)'),
windowId: z.number().optional().describe('Window ID for routing'),
},
handler: async (request, response, context) => {
const {
tabId,
simplified = false,
windowId,
} = request.params as {
const { tabId, simplified = false } = request.params as {
tabId: number
simplified?: boolean
windowId?: number
}
const result = await context.executeAction('getInteractiveSnapshot', {
tabId,
windowId,
})
const snapshot = result as {
snapshotId: number
@@ -148,24 +141,20 @@ export const grepInteractiveElements = defineTool<
.describe(
'Number of elements to show before and after each match (default: 2). Set to 0 to show only matches.',
),
windowId: z.number().optional().describe('Window ID for routing'),
},
handler: async (request, response, ctx) => {
const {
tabId,
pattern,
context: contextLines = 2,
windowId,
} = request.params as {
tabId: number
pattern: string
context?: number
windowId?: number
}
const result = await ctx.executeAction('getInteractiveSnapshot', {
tabId,
windowId,
})
const snapshot = result as {
snapshotId: number
@@ -262,18 +251,17 @@ export const clickElement = defineTool<z.ZodRawShape, Context, Response>({
nodeId: z.coerce
.number()
.describe('Node ID from browser_get_interactive_elements'),
windowId: z.number().optional().describe('Window ID for routing'),
},
handler: async (request, response, context) => {
const { tabId, nodeId, windowId } = request.params as {
const { tabId, nodeId } = request.params as {
tabId: number
nodeId: number
windowId?: number
}
await context.executeAction('click', { tabId, nodeId, windowId })
await context.executeAction('click', { tabId, nodeId })
response.appendResponseLine(`Clicked element ${nodeId} in tab ${tabId}`)
response.setIncludeSnapshot?.(true)
},
})
@@ -288,22 +276,21 @@ export const typeText = defineTool<z.ZodRawShape, Context, Response>({
tabId: z.coerce.number().describe('Tab ID containing the element'),
nodeId: z.coerce.number().describe('Node ID of the input element'),
text: z.string().describe('Text to type into the element'),
windowId: z.number().optional().describe('Window ID for routing'),
},
handler: async (request, response, context) => {
const { tabId, nodeId, text, windowId } = request.params as {
const { tabId, nodeId, text } = request.params as {
tabId: number
nodeId: number
text: string
windowId?: number
}
await context.executeAction('click', { tabId, nodeId, windowId })
await context.executeAction('inputText', { tabId, nodeId, text, windowId })
await context.executeAction('click', { tabId, nodeId })
await context.executeAction('inputText', { tabId, nodeId, text })
response.appendResponseLine(
`Typed text into element ${nodeId} in tab ${tabId}`,
)
response.setIncludeSnapshot?.(true)
},
})
@@ -317,17 +304,15 @@ export const clearInput = defineTool<z.ZodRawShape, Context, Response>({
schema: {
tabId: z.coerce.number().describe('Tab ID containing the element'),
nodeId: z.coerce.number().describe('Node ID of the input element'),
windowId: z.number().optional().describe('Window ID for routing'),
},
handler: async (request, response, context) => {
const { tabId, nodeId, windowId } = request.params as {
const { tabId, nodeId } = request.params as {
tabId: number
nodeId: number
windowId?: number
}
await context.executeAction('click', { tabId, nodeId, windowId })
await context.executeAction('clear', { tabId, nodeId, windowId })
await context.executeAction('click', { tabId, nodeId })
await context.executeAction('clear', { tabId, nodeId })
response.appendResponseLine(`Cleared element ${nodeId} in tab ${tabId}`)
},
@@ -343,16 +328,14 @@ export const scrollToElement = defineTool<z.ZodRawShape, Context, Response>({
schema: {
tabId: z.coerce.number().describe('Tab ID containing the element'),
nodeId: z.coerce.number().describe('Node ID of the element to scroll to'),
windowId: z.number().optional().describe('Window ID for routing'),
},
handler: async (request, response, context) => {
const { tabId, nodeId, windowId } = request.params as {
const { tabId, nodeId } = request.params as {
tabId: number
nodeId: number
windowId?: number
}
await context.executeAction('scrollToNode', { tabId, nodeId, windowId })
await context.executeAction('scrollToNode', { tabId, nodeId })
response.appendResponseLine(`Scrolled to element ${nodeId} in tab ${tabId}`)
},

View File

@@ -22,16 +22,11 @@ export const navigate = defineTool<z.ZodRawShape, Context, Response>({
.number()
.optional()
.describe('Tab ID to navigate (optional, defaults to active tab)'),
windowId: z
.number()
.optional()
.describe('Window ID (used when tabId not provided)'),
},
handler: async (request, response, context) => {
const params = request.params as {
url: string
tabId?: number
windowId?: number
}
const result = await context.executeAction('navigate', params)
@@ -49,5 +44,6 @@ export const navigate = defineTool<z.ZodRawShape, Context, Response>({
response.addStructuredContent('tabId', data.tabId)
response.addStructuredContent('windowId', data.windowId)
response.addStructuredContent('url', data.url)
response.setIncludeSnapshot?.(true)
},
})

View File

@@ -39,7 +39,6 @@ export const getScreenshotPointer = defineTool<
.string()
.optional()
.describe('Optional label to show with pointer (e.g., "Click", "Type")'),
windowId: z.number().optional().describe('Window ID for routing'),
},
handler: async (request, response, context) => {
const params = request.params as {
@@ -47,7 +46,6 @@ export const getScreenshotPointer = defineTool<
nodeId: number
size?: string
pointerLabel?: string
windowId?: number
}
const result = await context.executeAction(
@@ -104,7 +102,6 @@ export const getScreenshot = defineTool<z.ZodRawShape, Context, Response>({
.number()
.optional()
.describe('Exact height in pixels (overrides size)'),
windowId: z.number().optional().describe('Window ID for routing'),
},
handler: async (request, response, context) => {
const params = request.params as {
@@ -113,7 +110,6 @@ export const getScreenshot = defineTool<z.ZodRawShape, Context, Response>({
showHighlights?: boolean
width?: number
height?: number
windowId?: number
}
const result = await context.executeAction('captureScreenshot', params)

View File

@@ -18,15 +18,11 @@ export const scrollDown = defineTool<z.ZodRawShape, Context, Response>({
},
schema: {
tabId: z.coerce.number().describe('Tab ID to scroll'),
windowId: z.number().optional().describe('Window ID for routing'),
},
handler: async (request, response, context) => {
const { tabId, windowId } = request.params as {
tabId: number
windowId?: number
}
const { tabId } = request.params as { tabId: number }
await context.executeAction('scrollDown', { tabId, windowId })
await context.executeAction('scrollDown', { tabId })
response.appendResponseLine(`Scrolled down in tab ${tabId}`)
},
@@ -41,15 +37,11 @@ export const scrollUp = defineTool<z.ZodRawShape, Context, Response>({
},
schema: {
tabId: z.coerce.number().describe('Tab ID to scroll'),
windowId: z.number().optional().describe('Window ID for routing'),
},
handler: async (request, response, context) => {
const { tabId, windowId } = request.params as {
tabId: number
windowId?: number
}
const { tabId } = request.params as { tabId: number }
await context.executeAction('scrollUp', { tabId, windowId })
await context.executeAction('scrollUp', { tabId })
response.appendResponseLine(`Scrolled up in tab ${tabId}`)
},

View File

@@ -16,12 +16,9 @@ export const getActiveTab = defineTool<z.ZodRawShape, Context, Response>({
category: ToolCategories.TAB_MANAGEMENT,
readOnlyHint: true,
},
schema: {
windowId: z.number().optional().describe('Window ID (injected by agent)'),
},
handler: async (request, response, context) => {
const params = request.params as { windowId?: number }
const result = await context.executeAction('getActiveTab', params)
schema: {},
handler: async (_request, response, context) => {
const result = await context.executeAction('getActiveTab', {})
const data = result as {
tabId: number
url: string
@@ -48,12 +45,9 @@ export const listTabs = defineTool<z.ZodRawShape, Context, Response>({
category: ToolCategories.TAB_MANAGEMENT,
readOnlyHint: true,
},
schema: {
windowId: z.number().optional().describe('Window ID (injected by agent)'),
},
handler: async (request, response, context) => {
const params = request.params as { windowId?: number }
const result = await context.executeAction('getTabs', params)
schema: {},
handler: async (_request, response, context) => {
const result = await context.executeAction('getTabs', {})
const data = result as {
tabs: Array<{
id: number
@@ -99,13 +93,11 @@ export const openTab = defineTool<z.ZodRawShape, Context, Response>({
.boolean()
.optional()
.describe('Whether to make the new tab active (default: true)'),
windowId: z.number().optional().describe('Window ID (injected by agent)'),
},
handler: async (request, response, context) => {
const params = request.params as {
url?: string
active?: boolean
windowId?: number
}
const result = await context.executeAction('openTab', params)
@@ -126,15 +118,11 @@ export const closeTab = defineTool<z.ZodRawShape, Context, Response>({
},
schema: {
tabId: z.coerce.number().describe('ID of the tab to close'),
windowId: z.number().optional().describe('Window ID for routing'),
},
handler: async (request, response, context) => {
const { tabId, windowId } = request.params as {
tabId: number
windowId?: number
}
const { tabId } = request.params as { tabId: number }
await context.executeAction('closeTab', { tabId, windowId })
await context.executeAction('closeTab', { tabId })
response.appendResponseLine(`Closed tab ${tabId}`)
},
@@ -149,15 +137,11 @@ export const switchTab = defineTool<z.ZodRawShape, Context, Response>({
},
schema: {
tabId: z.coerce.number().describe('ID of the tab to switch to'),
windowId: z.number().optional().describe('Window ID for routing'),
},
handler: async (request, response, context) => {
const { tabId, windowId } = request.params as {
tabId: number
windowId?: number
}
const { tabId } = request.params as { tabId: number }
const result = await context.executeAction('switchTab', { tabId, windowId })
const result = await context.executeAction('switchTab', { tabId })
const data = result as { tabId: number; url: string; title: string }
response.appendResponseLine(`Switched to tab: ${data.title}`)
@@ -174,17 +158,12 @@ export const getLoadStatus = defineTool<z.ZodRawShape, Context, Response>({
},
schema: {
tabId: z.coerce.number().describe('Tab ID to check'),
windowId: z.number().optional().describe('Window ID for routing'),
},
handler: async (request, response, context) => {
const { tabId, windowId } = request.params as {
tabId: number
windowId?: number
}
const { tabId } = request.params as { tabId: number }
const result = await context.executeAction('getPageLoadStatus', {
tabId,
windowId,
})
const data = result as {
tabId: number
@@ -218,12 +197,9 @@ export const listTabGroups = defineTool<z.ZodRawShape, Context, Response>({
category: ToolCategories.TAB_MANAGEMENT,
readOnlyHint: true,
},
schema: {
windowId: z.number().optional().describe('Window ID (injected by agent)'),
},
handler: async (request, response, context) => {
const params = request.params as { windowId?: number }
const result = await context.executeAction('listTabGroups', params)
schema: {},
handler: async (_request, response, context) => {
const result = await context.executeAction('listTabGroups', {})
const data = result as {
groups: Array<{
id: number
@@ -291,15 +267,13 @@ export const groupTabs = defineTool<z.ZodRawShape, Context, Response>({
.number()
.optional()
.describe('Existing group ID to add tabs to'),
windowId: z.number().optional().describe('Window ID for routing'),
},
handler: async (request, response, context) => {
const { tabIds, title, color, groupId, windowId } = request.params as {
const { tabIds, title, color, groupId } = request.params as {
tabIds: number[]
title?: string
color?: string
groupId?: number
windowId?: number
}
const result = await context.executeAction('groupTabs', {
@@ -307,7 +281,6 @@ export const groupTabs = defineTool<z.ZodRawShape, Context, Response>({
title,
color,
groupId,
windowId,
})
const data = result as {
groupId: number
@@ -356,15 +329,13 @@ export const updateTabGroup = defineTool<z.ZodRawShape, Context, Response>({
.boolean()
.optional()
.describe('Whether to collapse (hide) the group tabs'),
windowId: z.number().optional().describe('Window ID for routing'),
},
handler: async (request, response, context) => {
const { groupId, title, color, collapsed, windowId } = request.params as {
const { groupId, title, color, collapsed } = request.params as {
groupId: number
title?: string
color?: string
collapsed?: boolean
windowId?: number
}
const result = await context.executeAction('updateTabGroup', {
@@ -372,7 +343,6 @@ export const updateTabGroup = defineTool<z.ZodRawShape, Context, Response>({
title,
color,
collapsed,
windowId,
})
const data = result as {
groupId: number
@@ -403,17 +373,12 @@ export const ungroupTabs = defineTool<z.ZodRawShape, Context, Response>({
tabIds: z
.array(z.coerce.number())
.describe('Array of tab IDs to remove from their groups'),
windowId: z.number().optional().describe('Window ID for routing'),
},
handler: async (request, response, context) => {
const { tabIds, windowId } = request.params as {
tabIds: number[]
windowId?: number
}
const { tabIds } = request.params as { tabIds: number[] }
const result = await context.executeAction('ungroupTabs', {
tabIds,
windowId,
})
const data = result as { ungroupedCount: number }

View File

@@ -40,4 +40,16 @@ export interface Response {
* Add a key-value pair to structured content (flat, no nesting)
*/
addStructuredContent(key: string, value: unknown): void
/**
* Request page content snapshot to be appended after tool execution.
* Only supported by ControllerResponse (no-op on other implementations).
*/
setIncludeSnapshot?(value: boolean): void
/**
* Request screenshot to be appended after tool execution.
* Only supported by ControllerResponse (no-op on other implementations).
*/
setIncludeScreenshot?(value: boolean): void
}

View File

@@ -7,7 +7,6 @@
*/
import type { McpContext } from '../browser/cdp/context'
import type { ControllerContext } from '../browser/extension/context'
import { logger } from '../lib/logger'
import { allCdpTools } from './cdp-based/registry'
@@ -16,34 +15,14 @@ import type { ToolDefinition } from './types/tool-definition'
export function createToolRegistry(
cdpContext: McpContext | null,
controllerContext: ControllerContext,
// biome-ignore lint/suspicious/noExplicitAny: heterogeneous tool registry requires any
): Array<ToolDefinition<any, any, any>> {
const cdpTools = cdpContext ? allCdpTools : []
const wrappedControllerTools = wrapControllerTools(
allControllerTools,
controllerContext,
)
logger.info(
`Total tools available: ${cdpTools.length + wrappedControllerTools.length} ` +
`(${cdpTools.length} CDP + ${wrappedControllerTools.length} extension)`,
`Total tools available: ${cdpTools.length + allControllerTools.length} ` +
`(${cdpTools.length} CDP + ${allControllerTools.length} extension)`,
)
return [...cdpTools, ...wrappedControllerTools]
}
function wrapControllerTools(
tools: typeof allControllerTools,
controllerContext: ControllerContext,
// biome-ignore lint/suspicious/noExplicitAny: wrapper function for heterogeneous tools
): Array<ToolDefinition<any, any, any>> {
// biome-ignore lint/suspicious/noExplicitAny: tool has heterogeneous schema
return tools.map((tool: any) => ({
...tool,
// biome-ignore lint/suspicious/noExplicitAny: handler params are dynamically typed
handler: async (request: any, response: any, _context: any) => {
return tool.handler(request, response, controllerContext)
},
}))
return [...cdpTools, ...allControllerTools]
}