diff --git a/packages/browseros-agent/apps/server/src/api/services/sdk/browser.ts b/packages/browseros-agent/apps/server/src/api/services/sdk/browser.ts index 2828bf287..cca5835eb 100644 --- a/packages/browseros-agent/apps/server/src/api/services/sdk/browser.ts +++ b/packages/browseros-agent/apps/server/src/api/services/sdk/browser.ts @@ -6,7 +6,7 @@ * Browser Service - Direct browser operations for SDK */ -import type { Browser } from '../../../browser/browser' +import type { Browser, PageInfo } from '../../../browser/browser' import type { ActiveTab, InteractiveElements, @@ -19,6 +19,65 @@ import { SdkError } from './types' export class BrowserService { constructor(private browser: Browser) {} + private selectPage(pages: PageInfo[], windowId?: number): PageInfo | null { + const scopedPages = + windowId === undefined + ? pages + : pages.filter((page) => page.windowId === windowId) + if (scopedPages.length === 0) { + return null + } + return ( + scopedPages.find((page) => page.isActive) ?? + scopedPages.find((page) => !page.isHidden) ?? + scopedPages[0] + ) + } + + private async findExistingPage(windowId?: number): Promise { + if (windowId === undefined) { + const activePage = await this.browser.getActivePage() + if (activePage) { + return activePage + } + } + + return this.selectPage(await this.browser.listPages(), windowId) + } + + private async resolveExistingPage(windowId?: number): Promise { + const page = await this.findExistingPage(windowId) + if (!page) { + throw new SdkError( + windowId === undefined + ? 'No active tab found' + : 'No tab found in specified window', + ) + } + return page + } + + private async resolveNavigationPage(windowId?: number): Promise { + const existingPage = await this.findExistingPage(windowId) + if (existingPage) { + return existingPage + } + if (windowId !== undefined) { + throw new SdkError('No tab found in specified window') + } + + const pageId = await this.browser.newPage('about:blank', { + background: false, + }) + const createdPage = (await this.browser.listPages()).find( + (page) => page.pageId === pageId, + ) + if (!createdPage) { + throw new SdkError('Failed to create a tab for navigation') + } + return createdPage + } + private async getPageIdForTab(tabId: number): Promise { const resolved = await this.browser.resolveTabIds([tabId]) const pageId = resolved.get(tabId) @@ -29,26 +88,7 @@ export class BrowserService { } async getActiveTab(windowId?: number): Promise { - if (windowId !== undefined) { - // Find the active tab in the specified window - const pages = await this.browser.listPages() - const page = pages.find((p) => p.windowId === windowId && p.isActive) - if (!page) { - throw new SdkError('No active tab found in specified window') - } - return { - tabId: page.tabId, - url: page.url, - title: page.title, - windowId: page.windowId ?? 0, - } - } - - const page = await this.browser.getActivePage() - if (!page) { - throw new SdkError('No active tab found') - } - + const page = await this.resolveExistingPage(windowId) return { tabId: page.tabId, url: page.url, @@ -89,20 +129,7 @@ export class BrowserService { return { tabId, windowId: page.windowId ?? 0 } } - if (windowId !== undefined) { - const pages = await this.browser.listPages() - const page = pages.find((p) => p.windowId === windowId && p.isActive) - if (!page) { - throw new SdkError('No active tab in specified window') - } - await this.browser.goto(page.pageId, url) - return { tabId: page.tabId, windowId } - } - - const activePage = await this.browser.getActivePage() - if (!activePage) { - throw new SdkError('No active tab to navigate') - } + const activePage = await this.resolveNavigationPage(windowId) await this.browser.goto(activePage.pageId, url) return { tabId: activePage.tabId, diff --git a/packages/browseros-agent/apps/server/tests/sdk/agent-sdk.test.ts b/packages/browseros-agent/apps/server/tests/sdk/agent-sdk.test.ts index f6cf35390..ea3d38ac9 100644 --- a/packages/browseros-agent/apps/server/tests/sdk/agent-sdk.test.ts +++ b/packages/browseros-agent/apps/server/tests/sdk/agent-sdk.test.ts @@ -6,21 +6,57 @@ * Tests the SDK against a real BrowserOS server. */ -import { beforeAll, describe, it } from 'bun:test' +import { afterAll, beforeAll, describe, it } from 'bun:test' import assert from 'node:assert' import { Agent } from '@browseros-ai/agent-sdk' +import { CdpBackend } from '../../src/browser/backends/cdp' +import type { ControllerBackend } from '../../src/browser/backends/types' +import { Browser } from '../../src/browser/browser' import { ensureBrowserOS, type TestEnvironmentConfig, } from '../__helpers__/setup' let config: TestEnvironmentConfig +let cdp: CdpBackend | null = null +let runtimeWindowId: number + +const stubController: ControllerBackend = { + start: async () => {}, + stop: async () => {}, + isConnected: () => false, + send: async () => { + throw new Error('Controller not available in SDK tests') + }, +} + +async function getRuntimeWindow( + testConfig: TestEnvironmentConfig, +): Promise { + const runtimeCdp = new CdpBackend({ port: testConfig.cdpPort }) + await runtimeCdp.connect() + cdp = runtimeCdp + + const browser = new Browser(runtimeCdp, stubController) + const pages = await browser.listPages() + const page = + pages.find((entry) => !entry.isHidden && entry.windowId !== undefined) ?? + pages.find((entry) => entry.windowId !== undefined) + + assert.ok(page?.windowId !== undefined, 'Expected a runtime window ID') + return page.windowId +} beforeAll(async () => { config = await ensureBrowserOS() + runtimeWindowId = await getRuntimeWindow(config) }, 60000) +afterAll(async () => { + await cdp?.disconnect() +}) + function createAgent(browserContext?: { windowId?: number activeTab?: { id: number; url: string } @@ -177,20 +213,17 @@ describe('Agent SDK Integration', () => { describe('browserContext', () => { it('passes windowId through nav()', async () => { - const testWindowId = 12345 + const testWindowId = runtimeWindowId const agent = createAgent({ windowId: testWindowId }) const events: unknown[] = [] agent.onProgress((event) => events.push(event)) - // This will use the windowId from browserContext - // Server logs should show the windowId being passed const result = await agent.nav('data:text/html,

Window Test

') console.log('\n=== nav() with windowId ===') console.log('windowId:', testWindowId) console.log('result:', JSON.stringify(result, null, 2)) - // Navigation may fail if window doesn't exist, but we're testing the flow assert.ok( typeof result.success === 'boolean', 'Should return a result with success boolean', @@ -198,14 +231,12 @@ describe('Agent SDK Integration', () => { }, 30000) it('passes windowId through act()', async () => { - const testWindowId = 12345 + const testWindowId = runtimeWindowId const agent = createAgent({ windowId: testWindowId }) - // First navigate without windowId constraint to set up the page const plainAgent = createAgent() await plainAgent.nav('data:text/html,') - // Now act with windowId - server logs should show windowId being passed const result = await agent.act('describe what you see') console.log('\n=== act() with windowId ===') @@ -220,14 +251,12 @@ describe('Agent SDK Integration', () => { it('passes windowId through extract()', async () => { const { z } = await import('zod') - const testWindowId = 12345 + const testWindowId = runtimeWindowId const agent = createAgent({ windowId: testWindowId }) - // Set up a page first const plainAgent = createAgent() await plainAgent.nav('data:text/html,

Extract Test

') - // Extract with windowId - server logs should show windowId const result = await agent.extract('get the page heading', { schema: z.object({ heading: z.string() }), }) @@ -240,14 +269,12 @@ describe('Agent SDK Integration', () => { }, 60000) it('passes windowId through verify()', async () => { - const testWindowId = 12345 + const testWindowId = runtimeWindowId const agent = createAgent({ windowId: testWindowId }) - // Set up a page first const plainAgent = createAgent() await plainAgent.nav('data:text/html,

Verify Test

') - // Verify with windowId - server logs should show windowId const result = await agent.verify('the page has some content') console.log('\n=== verify() with windowId ===')