mirror of
https://github.com/browseros-ai/BrowserOS.git
synced 2026-05-13 15:46:22 +00:00
fix: make SDK navigation tolerate unfocused startup tabs (#607)
This commit is contained in:
@@ -6,7 +6,7 @@
|
||||
* Browser Service - Direct browser operations for SDK
|
||||
*/
|
||||
|
||||
import type { Browser } from '../../../browser/browser'
|
||||
import type { Browser, PageInfo } from '../../../browser/browser'
|
||||
import type {
|
||||
ActiveTab,
|
||||
InteractiveElements,
|
||||
@@ -19,6 +19,65 @@ import { SdkError } from './types'
|
||||
export class BrowserService {
|
||||
constructor(private browser: Browser) {}
|
||||
|
||||
private selectPage(pages: PageInfo[], windowId?: number): PageInfo | null {
|
||||
const scopedPages =
|
||||
windowId === undefined
|
||||
? pages
|
||||
: pages.filter((page) => page.windowId === windowId)
|
||||
if (scopedPages.length === 0) {
|
||||
return null
|
||||
}
|
||||
return (
|
||||
scopedPages.find((page) => page.isActive) ??
|
||||
scopedPages.find((page) => !page.isHidden) ??
|
||||
scopedPages[0]
|
||||
)
|
||||
}
|
||||
|
||||
private async findExistingPage(windowId?: number): Promise<PageInfo | null> {
|
||||
if (windowId === undefined) {
|
||||
const activePage = await this.browser.getActivePage()
|
||||
if (activePage) {
|
||||
return activePage
|
||||
}
|
||||
}
|
||||
|
||||
return this.selectPage(await this.browser.listPages(), windowId)
|
||||
}
|
||||
|
||||
private async resolveExistingPage(windowId?: number): Promise<PageInfo> {
|
||||
const page = await this.findExistingPage(windowId)
|
||||
if (!page) {
|
||||
throw new SdkError(
|
||||
windowId === undefined
|
||||
? 'No active tab found'
|
||||
: 'No tab found in specified window',
|
||||
)
|
||||
}
|
||||
return page
|
||||
}
|
||||
|
||||
private async resolveNavigationPage(windowId?: number): Promise<PageInfo> {
|
||||
const existingPage = await this.findExistingPage(windowId)
|
||||
if (existingPage) {
|
||||
return existingPage
|
||||
}
|
||||
if (windowId !== undefined) {
|
||||
throw new SdkError('No tab found in specified window')
|
||||
}
|
||||
|
||||
const pageId = await this.browser.newPage('about:blank', {
|
||||
background: false,
|
||||
})
|
||||
const createdPage = (await this.browser.listPages()).find(
|
||||
(page) => page.pageId === pageId,
|
||||
)
|
||||
if (!createdPage) {
|
||||
throw new SdkError('Failed to create a tab for navigation')
|
||||
}
|
||||
return createdPage
|
||||
}
|
||||
|
||||
private async getPageIdForTab(tabId: number): Promise<number> {
|
||||
const resolved = await this.browser.resolveTabIds([tabId])
|
||||
const pageId = resolved.get(tabId)
|
||||
@@ -29,26 +88,7 @@ export class BrowserService {
|
||||
}
|
||||
|
||||
async getActiveTab(windowId?: number): Promise<ActiveTab> {
|
||||
if (windowId !== undefined) {
|
||||
// Find the active tab in the specified window
|
||||
const pages = await this.browser.listPages()
|
||||
const page = pages.find((p) => p.windowId === windowId && p.isActive)
|
||||
if (!page) {
|
||||
throw new SdkError('No active tab found in specified window')
|
||||
}
|
||||
return {
|
||||
tabId: page.tabId,
|
||||
url: page.url,
|
||||
title: page.title,
|
||||
windowId: page.windowId ?? 0,
|
||||
}
|
||||
}
|
||||
|
||||
const page = await this.browser.getActivePage()
|
||||
if (!page) {
|
||||
throw new SdkError('No active tab found')
|
||||
}
|
||||
|
||||
const page = await this.resolveExistingPage(windowId)
|
||||
return {
|
||||
tabId: page.tabId,
|
||||
url: page.url,
|
||||
@@ -89,20 +129,7 @@ export class BrowserService {
|
||||
return { tabId, windowId: page.windowId ?? 0 }
|
||||
}
|
||||
|
||||
if (windowId !== undefined) {
|
||||
const pages = await this.browser.listPages()
|
||||
const page = pages.find((p) => p.windowId === windowId && p.isActive)
|
||||
if (!page) {
|
||||
throw new SdkError('No active tab in specified window')
|
||||
}
|
||||
await this.browser.goto(page.pageId, url)
|
||||
return { tabId: page.tabId, windowId }
|
||||
}
|
||||
|
||||
const activePage = await this.browser.getActivePage()
|
||||
if (!activePage) {
|
||||
throw new SdkError('No active tab to navigate')
|
||||
}
|
||||
const activePage = await this.resolveNavigationPage(windowId)
|
||||
await this.browser.goto(activePage.pageId, url)
|
||||
return {
|
||||
tabId: activePage.tabId,
|
||||
|
||||
@@ -6,21 +6,57 @@
|
||||
* Tests the SDK against a real BrowserOS server.
|
||||
*/
|
||||
|
||||
import { beforeAll, describe, it } from 'bun:test'
|
||||
import { afterAll, beforeAll, describe, it } from 'bun:test'
|
||||
import assert from 'node:assert'
|
||||
import { Agent } from '@browseros-ai/agent-sdk'
|
||||
|
||||
import { CdpBackend } from '../../src/browser/backends/cdp'
|
||||
import type { ControllerBackend } from '../../src/browser/backends/types'
|
||||
import { Browser } from '../../src/browser/browser'
|
||||
import {
|
||||
ensureBrowserOS,
|
||||
type TestEnvironmentConfig,
|
||||
} from '../__helpers__/setup'
|
||||
|
||||
let config: TestEnvironmentConfig
|
||||
let cdp: CdpBackend | null = null
|
||||
let runtimeWindowId: number
|
||||
|
||||
const stubController: ControllerBackend = {
|
||||
start: async () => {},
|
||||
stop: async () => {},
|
||||
isConnected: () => false,
|
||||
send: async () => {
|
||||
throw new Error('Controller not available in SDK tests')
|
||||
},
|
||||
}
|
||||
|
||||
async function getRuntimeWindow(
|
||||
testConfig: TestEnvironmentConfig,
|
||||
): Promise<number> {
|
||||
const runtimeCdp = new CdpBackend({ port: testConfig.cdpPort })
|
||||
await runtimeCdp.connect()
|
||||
cdp = runtimeCdp
|
||||
|
||||
const browser = new Browser(runtimeCdp, stubController)
|
||||
const pages = await browser.listPages()
|
||||
const page =
|
||||
pages.find((entry) => !entry.isHidden && entry.windowId !== undefined) ??
|
||||
pages.find((entry) => entry.windowId !== undefined)
|
||||
|
||||
assert.ok(page?.windowId !== undefined, 'Expected a runtime window ID')
|
||||
return page.windowId
|
||||
}
|
||||
|
||||
beforeAll(async () => {
|
||||
config = await ensureBrowserOS()
|
||||
runtimeWindowId = await getRuntimeWindow(config)
|
||||
}, 60000)
|
||||
|
||||
afterAll(async () => {
|
||||
await cdp?.disconnect()
|
||||
})
|
||||
|
||||
function createAgent(browserContext?: {
|
||||
windowId?: number
|
||||
activeTab?: { id: number; url: string }
|
||||
@@ -177,20 +213,17 @@ describe('Agent SDK Integration', () => {
|
||||
|
||||
describe('browserContext', () => {
|
||||
it('passes windowId through nav()', async () => {
|
||||
const testWindowId = 12345
|
||||
const testWindowId = runtimeWindowId
|
||||
const agent = createAgent({ windowId: testWindowId })
|
||||
const events: unknown[] = []
|
||||
agent.onProgress((event) => events.push(event))
|
||||
|
||||
// This will use the windowId from browserContext
|
||||
// Server logs should show the windowId being passed
|
||||
const result = await agent.nav('data:text/html,<h1>Window Test</h1>')
|
||||
|
||||
console.log('\n=== nav() with windowId ===')
|
||||
console.log('windowId:', testWindowId)
|
||||
console.log('result:', JSON.stringify(result, null, 2))
|
||||
|
||||
// Navigation may fail if window doesn't exist, but we're testing the flow
|
||||
assert.ok(
|
||||
typeof result.success === 'boolean',
|
||||
'Should return a result with success boolean',
|
||||
@@ -198,14 +231,12 @@ describe('Agent SDK Integration', () => {
|
||||
}, 30000)
|
||||
|
||||
it('passes windowId through act()', async () => {
|
||||
const testWindowId = 12345
|
||||
const testWindowId = runtimeWindowId
|
||||
const agent = createAgent({ windowId: testWindowId })
|
||||
|
||||
// First navigate without windowId constraint to set up the page
|
||||
const plainAgent = createAgent()
|
||||
await plainAgent.nav('data:text/html,<button id="btn">Click</button>')
|
||||
|
||||
// Now act with windowId - server logs should show windowId being passed
|
||||
const result = await agent.act('describe what you see')
|
||||
|
||||
console.log('\n=== act() with windowId ===')
|
||||
@@ -220,14 +251,12 @@ describe('Agent SDK Integration', () => {
|
||||
|
||||
it('passes windowId through extract()', async () => {
|
||||
const { z } = await import('zod')
|
||||
const testWindowId = 12345
|
||||
const testWindowId = runtimeWindowId
|
||||
const agent = createAgent({ windowId: testWindowId })
|
||||
|
||||
// Set up a page first
|
||||
const plainAgent = createAgent()
|
||||
await plainAgent.nav('data:text/html,<h1>Extract Test</h1>')
|
||||
|
||||
// Extract with windowId - server logs should show windowId
|
||||
const result = await agent.extract('get the page heading', {
|
||||
schema: z.object({ heading: z.string() }),
|
||||
})
|
||||
@@ -240,14 +269,12 @@ describe('Agent SDK Integration', () => {
|
||||
}, 60000)
|
||||
|
||||
it('passes windowId through verify()', async () => {
|
||||
const testWindowId = 12345
|
||||
const testWindowId = runtimeWindowId
|
||||
const agent = createAgent({ windowId: testWindowId })
|
||||
|
||||
// Set up a page first
|
||||
const plainAgent = createAgent()
|
||||
await plainAgent.nav('data:text/html,<h1>Verify Test</h1>')
|
||||
|
||||
// Verify with windowId - server logs should show windowId
|
||||
const result = await agent.verify('the page has some content')
|
||||
|
||||
console.log('\n=== verify() with windowId ===')
|
||||
|
||||
Reference in New Issue
Block a user