mirror of
https://github.com/browseros-ai/BrowserOS.git
synced 2026-05-18 11:06:19 +00:00
fix: refactor SDK BrowserService to use Browser class directly (#406)
* fix: refactor SDK BrowserService to use Browser class directly The tools system was completely rewritten with new tool names and response formats. BrowserService was calling non-existent MCP tools (browser_get_active_tab, browser_navigate, etc.) that returned structuredContent which no longer exists. Replaced MCP HTTP client calls with direct Browser class method calls: - getActiveTab → browser.getActivePage() / browser.listPages() - getPageContent → browser.contentAsMarkdown() - getScreenshot → browser.screenshot() - navigate → browser.goto() with tabId/windowId resolution - getPageLoadStatus → browser.listPages() with isLoading check - getInteractiveElements → browser.snapshot() / browser.enhancedSnapshot() Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: address PR review — consistent tabId guard and remove dead PageContent type - Change `if (tabId)` to `if (tabId !== undefined)` in navigate() to match the guard style used for windowId and elsewhere in the file - Remove orphaned PageContent interface no longer imported after refactor Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -49,11 +49,9 @@ async function waitForPageLoad(
|
||||
}
|
||||
|
||||
export function createSdkRoutes(deps: SdkDeps) {
|
||||
const { port, browserosId } = deps
|
||||
const { port, browser, browserosId } = deps
|
||||
|
||||
const mcpServerUrl = `http://127.0.0.1:${port}/mcp`
|
||||
|
||||
const browserService = new BrowserService(mcpServerUrl)
|
||||
const browserService = new BrowserService(browser)
|
||||
const chatService = new ChatService(port)
|
||||
const extractService = new ExtractService()
|
||||
const verifyService = new VerifyService()
|
||||
|
||||
@@ -110,6 +110,7 @@ export async function createHttpServer(config: HttpServerConfig) {
|
||||
'/sdk',
|
||||
createSdkRoutes({
|
||||
port,
|
||||
browser,
|
||||
browserosId,
|
||||
}),
|
||||
)
|
||||
|
||||
@@ -3,77 +3,75 @@
|
||||
* Copyright 2025 BrowserOS
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*
|
||||
* Browser Service - MCP-based browser operations for SDK
|
||||
* Browser Service - Direct browser operations for SDK
|
||||
*/
|
||||
|
||||
import {
|
||||
callMcpTool,
|
||||
getImageContent,
|
||||
getTextContent,
|
||||
} from '../../utils/mcp-client'
|
||||
import type { Browser } from '../../../browser/browser'
|
||||
import type {
|
||||
ActiveTab,
|
||||
InteractiveElements,
|
||||
NavigateResult,
|
||||
PageContent,
|
||||
PageLoadStatus,
|
||||
Screenshot,
|
||||
} from './types'
|
||||
import { SdkError } from './types'
|
||||
|
||||
export class BrowserService {
|
||||
constructor(private mcpServerUrl: string) {}
|
||||
constructor(private browser: Browser) {}
|
||||
|
||||
private async getPageIdForTab(tabId: number): Promise<number> {
|
||||
const resolved = await this.browser.resolveTabIds([tabId])
|
||||
const pageId = resolved.get(tabId)
|
||||
if (pageId === undefined) {
|
||||
throw new SdkError(`Tab ${tabId} not found`, 404)
|
||||
}
|
||||
return pageId
|
||||
}
|
||||
|
||||
async getActiveTab(windowId?: number): Promise<ActiveTab> {
|
||||
const result = await callMcpTool<ActiveTab>(
|
||||
this.mcpServerUrl,
|
||||
'browser_get_active_tab',
|
||||
windowId ? { windowId } : {},
|
||||
)
|
||||
|
||||
if (result.isError || !result.structuredContent?.tabId) {
|
||||
throw new SdkError('Failed to get active tab')
|
||||
if (windowId !== undefined) {
|
||||
// Find the active tab in the specified window
|
||||
const pages = await this.browser.listPages()
|
||||
const page = pages.find((p) => p.windowId === windowId && p.isActive)
|
||||
if (!page) {
|
||||
throw new SdkError('No active tab found in specified window')
|
||||
}
|
||||
return {
|
||||
tabId: page.tabId,
|
||||
url: page.url,
|
||||
title: page.title,
|
||||
windowId: page.windowId ?? 0,
|
||||
}
|
||||
}
|
||||
|
||||
return result.structuredContent
|
||||
const page = await this.browser.getActivePage()
|
||||
if (!page) {
|
||||
throw new SdkError('No active tab found')
|
||||
}
|
||||
|
||||
return {
|
||||
tabId: page.tabId,
|
||||
url: page.url,
|
||||
title: page.title,
|
||||
windowId: page.windowId ?? 0,
|
||||
}
|
||||
}
|
||||
|
||||
async getPageContent(tabId: number): Promise<string> {
|
||||
const result = await callMcpTool<PageContent>(
|
||||
this.mcpServerUrl,
|
||||
'browser_get_page_content',
|
||||
{ tabId, type: 'text' },
|
||||
)
|
||||
|
||||
if (result.isError) {
|
||||
throw new SdkError('Failed to get page content')
|
||||
}
|
||||
|
||||
const content = result.structuredContent?.content || getTextContent(result)
|
||||
const pageId = await this.getPageIdForTab(tabId)
|
||||
const content = await this.browser.contentAsMarkdown(pageId, {})
|
||||
if (!content) {
|
||||
throw new SdkError('No content found on page', 400)
|
||||
}
|
||||
|
||||
return content
|
||||
}
|
||||
|
||||
async getScreenshot(tabId: number): Promise<Screenshot> {
|
||||
const result = await callMcpTool(
|
||||
this.mcpServerUrl,
|
||||
'browser_get_screenshot',
|
||||
{ tabId, size: 'medium' },
|
||||
)
|
||||
|
||||
if (result.isError) {
|
||||
throw new SdkError('Failed to capture screenshot')
|
||||
}
|
||||
|
||||
const image = getImageContent(result)
|
||||
if (!image) {
|
||||
throw new SdkError('Screenshot not available')
|
||||
}
|
||||
|
||||
return image
|
||||
const pageId = await this.getPageIdForTab(tabId)
|
||||
return await this.browser.screenshot(pageId, {
|
||||
format: 'png',
|
||||
fullPage: false,
|
||||
})
|
||||
}
|
||||
|
||||
async navigate(
|
||||
@@ -81,62 +79,60 @@ export class BrowserService {
|
||||
tabId?: number,
|
||||
windowId?: number,
|
||||
): Promise<NavigateResult> {
|
||||
const result = await callMcpTool<NavigateResult>(
|
||||
this.mcpServerUrl,
|
||||
'browser_navigate',
|
||||
{
|
||||
url,
|
||||
...(tabId && { tabId }),
|
||||
...(windowId && { windowId }),
|
||||
},
|
||||
)
|
||||
|
||||
if (result.isError || !result.structuredContent?.tabId) {
|
||||
throw new SdkError(getTextContent(result) || 'Navigation failed')
|
||||
if (tabId !== undefined) {
|
||||
const pages = await this.browser.listPages()
|
||||
const page = pages.find((p) => p.tabId === tabId)
|
||||
if (!page) {
|
||||
throw new SdkError(`Tab ${tabId} not found`, 404)
|
||||
}
|
||||
await this.browser.goto(page.pageId, url)
|
||||
return { tabId, windowId: page.windowId ?? 0 }
|
||||
}
|
||||
|
||||
return result.structuredContent
|
||||
if (windowId !== undefined) {
|
||||
const pages = await this.browser.listPages()
|
||||
const page = pages.find((p) => p.windowId === windowId && p.isActive)
|
||||
if (!page) {
|
||||
throw new SdkError('No active tab in specified window')
|
||||
}
|
||||
await this.browser.goto(page.pageId, url)
|
||||
return { tabId: page.tabId, windowId }
|
||||
}
|
||||
|
||||
const activePage = await this.browser.getActivePage()
|
||||
if (!activePage) {
|
||||
throw new SdkError('No active tab to navigate')
|
||||
}
|
||||
await this.browser.goto(activePage.pageId, url)
|
||||
return {
|
||||
tabId: activePage.tabId,
|
||||
windowId: activePage.windowId ?? 0,
|
||||
}
|
||||
}
|
||||
|
||||
async getPageLoadStatus(tabId: number): Promise<PageLoadStatus> {
|
||||
const result = await callMcpTool<PageLoadStatus>(
|
||||
this.mcpServerUrl,
|
||||
'browser_get_load_status',
|
||||
{ tabId },
|
||||
)
|
||||
|
||||
if (result.isError || result.structuredContent?.tabId === undefined) {
|
||||
throw new SdkError(
|
||||
getTextContent(result) || 'Failed to get page load status',
|
||||
)
|
||||
const pages = await this.browser.listPages()
|
||||
const page = pages.find((p) => p.tabId === tabId)
|
||||
if (!page) {
|
||||
throw new SdkError('Tab not found', 404)
|
||||
}
|
||||
return {
|
||||
tabId: page.tabId,
|
||||
isDOMContentLoaded: !page.isLoading,
|
||||
isResourcesLoading: page.isLoading,
|
||||
isPageComplete: !page.isLoading,
|
||||
}
|
||||
|
||||
return result.structuredContent
|
||||
}
|
||||
|
||||
async getInteractiveElements(
|
||||
tabId: number,
|
||||
simplified = false,
|
||||
windowId?: number,
|
||||
_windowId?: number,
|
||||
): Promise<InteractiveElements> {
|
||||
const result = await callMcpTool<InteractiveElements>(
|
||||
this.mcpServerUrl,
|
||||
'browser_get_interactive_elements',
|
||||
{
|
||||
tabId,
|
||||
simplified,
|
||||
...(windowId && { windowId }),
|
||||
},
|
||||
)
|
||||
|
||||
if (result.isError) {
|
||||
throw new SdkError(
|
||||
getTextContent(result) || 'Failed to get interactive elements',
|
||||
)
|
||||
}
|
||||
|
||||
const content = result.structuredContent?.content || getTextContent(result)
|
||||
|
||||
const pageId = await this.getPageIdForTab(tabId)
|
||||
const content = simplified
|
||||
? await this.browser.snapshot(pageId)
|
||||
: await this.browser.enhancedSnapshot(pageId)
|
||||
return { content }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
import { BrowserContextSchema } from '@browseros/shared/schemas/browser-context'
|
||||
import { LLMConfigSchema } from '@browseros/shared/schemas/llm'
|
||||
import { z } from 'zod'
|
||||
import type { Browser } from '../../../browser/browser'
|
||||
|
||||
// Request validation schemas
|
||||
|
||||
@@ -53,6 +54,7 @@ export type VerifyRequest = z.infer<typeof VerifyRequestSchema>
|
||||
|
||||
export interface SdkDeps {
|
||||
port: number
|
||||
browser: Browser
|
||||
browserosId?: string
|
||||
}
|
||||
|
||||
@@ -63,10 +65,6 @@ export interface ActiveTab {
|
||||
windowId: number
|
||||
}
|
||||
|
||||
export interface PageContent {
|
||||
content: string
|
||||
}
|
||||
|
||||
export interface Screenshot {
|
||||
data: string
|
||||
mimeType: string
|
||||
|
||||
Reference in New Issue
Block a user