mirror of
https://github.com/browseros-ai/BrowserOS.git
synced 2026-05-13 15:46:22 +00:00
fix: make SDK navigation tolerate unfocused startup tabs (#607)
This commit is contained in:
@@ -6,7 +6,7 @@
|
|||||||
* Browser Service - Direct browser operations for SDK
|
* Browser Service - Direct browser operations for SDK
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import type { Browser } from '../../../browser/browser'
|
import type { Browser, PageInfo } from '../../../browser/browser'
|
||||||
import type {
|
import type {
|
||||||
ActiveTab,
|
ActiveTab,
|
||||||
InteractiveElements,
|
InteractiveElements,
|
||||||
@@ -19,6 +19,65 @@ import { SdkError } from './types'
|
|||||||
export class BrowserService {
|
export class BrowserService {
|
||||||
constructor(private browser: Browser) {}
|
constructor(private browser: Browser) {}
|
||||||
|
|
||||||
|
private selectPage(pages: PageInfo[], windowId?: number): PageInfo | null {
|
||||||
|
const scopedPages =
|
||||||
|
windowId === undefined
|
||||||
|
? pages
|
||||||
|
: pages.filter((page) => page.windowId === windowId)
|
||||||
|
if (scopedPages.length === 0) {
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
return (
|
||||||
|
scopedPages.find((page) => page.isActive) ??
|
||||||
|
scopedPages.find((page) => !page.isHidden) ??
|
||||||
|
scopedPages[0]
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
private async findExistingPage(windowId?: number): Promise<PageInfo | null> {
|
||||||
|
if (windowId === undefined) {
|
||||||
|
const activePage = await this.browser.getActivePage()
|
||||||
|
if (activePage) {
|
||||||
|
return activePage
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return this.selectPage(await this.browser.listPages(), windowId)
|
||||||
|
}
|
||||||
|
|
||||||
|
private async resolveExistingPage(windowId?: number): Promise<PageInfo> {
|
||||||
|
const page = await this.findExistingPage(windowId)
|
||||||
|
if (!page) {
|
||||||
|
throw new SdkError(
|
||||||
|
windowId === undefined
|
||||||
|
? 'No active tab found'
|
||||||
|
: 'No tab found in specified window',
|
||||||
|
)
|
||||||
|
}
|
||||||
|
return page
|
||||||
|
}
|
||||||
|
|
||||||
|
private async resolveNavigationPage(windowId?: number): Promise<PageInfo> {
|
||||||
|
const existingPage = await this.findExistingPage(windowId)
|
||||||
|
if (existingPage) {
|
||||||
|
return existingPage
|
||||||
|
}
|
||||||
|
if (windowId !== undefined) {
|
||||||
|
throw new SdkError('No tab found in specified window')
|
||||||
|
}
|
||||||
|
|
||||||
|
const pageId = await this.browser.newPage('about:blank', {
|
||||||
|
background: false,
|
||||||
|
})
|
||||||
|
const createdPage = (await this.browser.listPages()).find(
|
||||||
|
(page) => page.pageId === pageId,
|
||||||
|
)
|
||||||
|
if (!createdPage) {
|
||||||
|
throw new SdkError('Failed to create a tab for navigation')
|
||||||
|
}
|
||||||
|
return createdPage
|
||||||
|
}
|
||||||
|
|
||||||
private async getPageIdForTab(tabId: number): Promise<number> {
|
private async getPageIdForTab(tabId: number): Promise<number> {
|
||||||
const resolved = await this.browser.resolveTabIds([tabId])
|
const resolved = await this.browser.resolveTabIds([tabId])
|
||||||
const pageId = resolved.get(tabId)
|
const pageId = resolved.get(tabId)
|
||||||
@@ -29,26 +88,7 @@ export class BrowserService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async getActiveTab(windowId?: number): Promise<ActiveTab> {
|
async getActiveTab(windowId?: number): Promise<ActiveTab> {
|
||||||
if (windowId !== undefined) {
|
const page = await this.resolveExistingPage(windowId)
|
||||||
// Find the active tab in the specified window
|
|
||||||
const pages = await this.browser.listPages()
|
|
||||||
const page = pages.find((p) => p.windowId === windowId && p.isActive)
|
|
||||||
if (!page) {
|
|
||||||
throw new SdkError('No active tab found in specified window')
|
|
||||||
}
|
|
||||||
return {
|
|
||||||
tabId: page.tabId,
|
|
||||||
url: page.url,
|
|
||||||
title: page.title,
|
|
||||||
windowId: page.windowId ?? 0,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const page = await this.browser.getActivePage()
|
|
||||||
if (!page) {
|
|
||||||
throw new SdkError('No active tab found')
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
tabId: page.tabId,
|
tabId: page.tabId,
|
||||||
url: page.url,
|
url: page.url,
|
||||||
@@ -89,20 +129,7 @@ export class BrowserService {
|
|||||||
return { tabId, windowId: page.windowId ?? 0 }
|
return { tabId, windowId: page.windowId ?? 0 }
|
||||||
}
|
}
|
||||||
|
|
||||||
if (windowId !== undefined) {
|
const activePage = await this.resolveNavigationPage(windowId)
|
||||||
const pages = await this.browser.listPages()
|
|
||||||
const page = pages.find((p) => p.windowId === windowId && p.isActive)
|
|
||||||
if (!page) {
|
|
||||||
throw new SdkError('No active tab in specified window')
|
|
||||||
}
|
|
||||||
await this.browser.goto(page.pageId, url)
|
|
||||||
return { tabId: page.tabId, windowId }
|
|
||||||
}
|
|
||||||
|
|
||||||
const activePage = await this.browser.getActivePage()
|
|
||||||
if (!activePage) {
|
|
||||||
throw new SdkError('No active tab to navigate')
|
|
||||||
}
|
|
||||||
await this.browser.goto(activePage.pageId, url)
|
await this.browser.goto(activePage.pageId, url)
|
||||||
return {
|
return {
|
||||||
tabId: activePage.tabId,
|
tabId: activePage.tabId,
|
||||||
|
|||||||
@@ -6,21 +6,57 @@
|
|||||||
* Tests the SDK against a real BrowserOS server.
|
* Tests the SDK against a real BrowserOS server.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { beforeAll, describe, it } from 'bun:test'
|
import { afterAll, beforeAll, describe, it } from 'bun:test'
|
||||||
import assert from 'node:assert'
|
import assert from 'node:assert'
|
||||||
import { Agent } from '@browseros-ai/agent-sdk'
|
import { Agent } from '@browseros-ai/agent-sdk'
|
||||||
|
|
||||||
|
import { CdpBackend } from '../../src/browser/backends/cdp'
|
||||||
|
import type { ControllerBackend } from '../../src/browser/backends/types'
|
||||||
|
import { Browser } from '../../src/browser/browser'
|
||||||
import {
|
import {
|
||||||
ensureBrowserOS,
|
ensureBrowserOS,
|
||||||
type TestEnvironmentConfig,
|
type TestEnvironmentConfig,
|
||||||
} from '../__helpers__/setup'
|
} from '../__helpers__/setup'
|
||||||
|
|
||||||
let config: TestEnvironmentConfig
|
let config: TestEnvironmentConfig
|
||||||
|
let cdp: CdpBackend | null = null
|
||||||
|
let runtimeWindowId: number
|
||||||
|
|
||||||
|
const stubController: ControllerBackend = {
|
||||||
|
start: async () => {},
|
||||||
|
stop: async () => {},
|
||||||
|
isConnected: () => false,
|
||||||
|
send: async () => {
|
||||||
|
throw new Error('Controller not available in SDK tests')
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
async function getRuntimeWindow(
|
||||||
|
testConfig: TestEnvironmentConfig,
|
||||||
|
): Promise<number> {
|
||||||
|
const runtimeCdp = new CdpBackend({ port: testConfig.cdpPort })
|
||||||
|
await runtimeCdp.connect()
|
||||||
|
cdp = runtimeCdp
|
||||||
|
|
||||||
|
const browser = new Browser(runtimeCdp, stubController)
|
||||||
|
const pages = await browser.listPages()
|
||||||
|
const page =
|
||||||
|
pages.find((entry) => !entry.isHidden && entry.windowId !== undefined) ??
|
||||||
|
pages.find((entry) => entry.windowId !== undefined)
|
||||||
|
|
||||||
|
assert.ok(page?.windowId !== undefined, 'Expected a runtime window ID')
|
||||||
|
return page.windowId
|
||||||
|
}
|
||||||
|
|
||||||
beforeAll(async () => {
|
beforeAll(async () => {
|
||||||
config = await ensureBrowserOS()
|
config = await ensureBrowserOS()
|
||||||
|
runtimeWindowId = await getRuntimeWindow(config)
|
||||||
}, 60000)
|
}, 60000)
|
||||||
|
|
||||||
|
afterAll(async () => {
|
||||||
|
await cdp?.disconnect()
|
||||||
|
})
|
||||||
|
|
||||||
function createAgent(browserContext?: {
|
function createAgent(browserContext?: {
|
||||||
windowId?: number
|
windowId?: number
|
||||||
activeTab?: { id: number; url: string }
|
activeTab?: { id: number; url: string }
|
||||||
@@ -177,20 +213,17 @@ describe('Agent SDK Integration', () => {
|
|||||||
|
|
||||||
describe('browserContext', () => {
|
describe('browserContext', () => {
|
||||||
it('passes windowId through nav()', async () => {
|
it('passes windowId through nav()', async () => {
|
||||||
const testWindowId = 12345
|
const testWindowId = runtimeWindowId
|
||||||
const agent = createAgent({ windowId: testWindowId })
|
const agent = createAgent({ windowId: testWindowId })
|
||||||
const events: unknown[] = []
|
const events: unknown[] = []
|
||||||
agent.onProgress((event) => events.push(event))
|
agent.onProgress((event) => events.push(event))
|
||||||
|
|
||||||
// This will use the windowId from browserContext
|
|
||||||
// Server logs should show the windowId being passed
|
|
||||||
const result = await agent.nav('data:text/html,<h1>Window Test</h1>')
|
const result = await agent.nav('data:text/html,<h1>Window Test</h1>')
|
||||||
|
|
||||||
console.log('\n=== nav() with windowId ===')
|
console.log('\n=== nav() with windowId ===')
|
||||||
console.log('windowId:', testWindowId)
|
console.log('windowId:', testWindowId)
|
||||||
console.log('result:', JSON.stringify(result, null, 2))
|
console.log('result:', JSON.stringify(result, null, 2))
|
||||||
|
|
||||||
// Navigation may fail if window doesn't exist, but we're testing the flow
|
|
||||||
assert.ok(
|
assert.ok(
|
||||||
typeof result.success === 'boolean',
|
typeof result.success === 'boolean',
|
||||||
'Should return a result with success boolean',
|
'Should return a result with success boolean',
|
||||||
@@ -198,14 +231,12 @@ describe('Agent SDK Integration', () => {
|
|||||||
}, 30000)
|
}, 30000)
|
||||||
|
|
||||||
it('passes windowId through act()', async () => {
|
it('passes windowId through act()', async () => {
|
||||||
const testWindowId = 12345
|
const testWindowId = runtimeWindowId
|
||||||
const agent = createAgent({ windowId: testWindowId })
|
const agent = createAgent({ windowId: testWindowId })
|
||||||
|
|
||||||
// First navigate without windowId constraint to set up the page
|
|
||||||
const plainAgent = createAgent()
|
const plainAgent = createAgent()
|
||||||
await plainAgent.nav('data:text/html,<button id="btn">Click</button>')
|
await plainAgent.nav('data:text/html,<button id="btn">Click</button>')
|
||||||
|
|
||||||
// Now act with windowId - server logs should show windowId being passed
|
|
||||||
const result = await agent.act('describe what you see')
|
const result = await agent.act('describe what you see')
|
||||||
|
|
||||||
console.log('\n=== act() with windowId ===')
|
console.log('\n=== act() with windowId ===')
|
||||||
@@ -220,14 +251,12 @@ describe('Agent SDK Integration', () => {
|
|||||||
|
|
||||||
it('passes windowId through extract()', async () => {
|
it('passes windowId through extract()', async () => {
|
||||||
const { z } = await import('zod')
|
const { z } = await import('zod')
|
||||||
const testWindowId = 12345
|
const testWindowId = runtimeWindowId
|
||||||
const agent = createAgent({ windowId: testWindowId })
|
const agent = createAgent({ windowId: testWindowId })
|
||||||
|
|
||||||
// Set up a page first
|
|
||||||
const plainAgent = createAgent()
|
const plainAgent = createAgent()
|
||||||
await plainAgent.nav('data:text/html,<h1>Extract Test</h1>')
|
await plainAgent.nav('data:text/html,<h1>Extract Test</h1>')
|
||||||
|
|
||||||
// Extract with windowId - server logs should show windowId
|
|
||||||
const result = await agent.extract('get the page heading', {
|
const result = await agent.extract('get the page heading', {
|
||||||
schema: z.object({ heading: z.string() }),
|
schema: z.object({ heading: z.string() }),
|
||||||
})
|
})
|
||||||
@@ -240,14 +269,12 @@ describe('Agent SDK Integration', () => {
|
|||||||
}, 60000)
|
}, 60000)
|
||||||
|
|
||||||
it('passes windowId through verify()', async () => {
|
it('passes windowId through verify()', async () => {
|
||||||
const testWindowId = 12345
|
const testWindowId = runtimeWindowId
|
||||||
const agent = createAgent({ windowId: testWindowId })
|
const agent = createAgent({ windowId: testWindowId })
|
||||||
|
|
||||||
// Set up a page first
|
|
||||||
const plainAgent = createAgent()
|
const plainAgent = createAgent()
|
||||||
await plainAgent.nav('data:text/html,<h1>Verify Test</h1>')
|
await plainAgent.nav('data:text/html,<h1>Verify Test</h1>')
|
||||||
|
|
||||||
// Verify with windowId - server logs should show windowId
|
|
||||||
const result = await agent.verify('the page has some content')
|
const result = await agent.verify('the page has some content')
|
||||||
|
|
||||||
console.log('\n=== verify() with windowId ===')
|
console.log('\n=== verify() with windowId ===')
|
||||||
|
|||||||
Reference in New Issue
Block a user