import { z } from 'zod'; import BrowserPage from './BrowserPage'; import { Logging } from '../utils/Logging'; import { profileAsync } from '../utils/Profiler'; // ============= Browser Context Configuration ============= // Browser context window size schema export const BrowserContextWindowSizeSchema = z.object({ width: z.number().int().positive(), // Window width in pixels height: z.number().int().positive() // Window height in pixels }) export type BrowserContextWindowSize = z.infer // Browser context configuration schema export const BrowserContextConfigSchema = z.object({ maximumWaitPageLoadTime: z.number().default(5.0), // Maximum time to wait for page load waitBetweenActions: z.number().default(0.1), // Time to wait between multiple actions homePageUrl: z.string().default('https://www.google.com'), // Home page url useVision: z.boolean().default(true) // Use vision mode }) export type BrowserContextConfig = z.infer // Default configuration export const DEFAULT_BROWSER_CONTEXT_CONFIG: BrowserContextConfig = BrowserContextConfigSchema.parse({}) // Tab info schema export const TabInfoSchema = z.object({ id: z.number().int().positive(), // Tab ID url: z.string(), // Tab URL title: z.string() // Tab title }) export type TabInfo = z.infer // Browser state schema for V2 export const BrowserStateSchema = z.object({ // Current tab info tabId: z.number(), // Current tab ID url: z.string(), // Current page URL title: z.string(), // Current page title // All tabs info tabs: z.array(TabInfoSchema), // All open tabs // Interactive elements as structured data clickableElements: z.array(z.object({ nodeId: z.number(), // Chrome BrowserOS node ID text: z.string(), // Element text (axName or tag) tag: z.string() // HTML tag name })), // Clickable elements with nodeId, text, and tag typeableElements: z.array(z.object({ nodeId: z.number(), // Chrome BrowserOS node ID text: z.string(), // Element text (axName or tag) tag: z.string() // HTML tag name })), // Typeable elements with nodeId, text, and tag // Pre-formatted strings for display clickableElementsString: z.string(), // Formatted string of clickable elements typeableElementsString: z.string(), // Formatted string of typeable elements // Hierarchical structure from BrowserOS API hierarchicalStructure: z.string().nullable().optional(), // Hierarchical text representation with context // Screenshot support (base64 encoded) screenshot: z.string().nullable().optional() // Base64 encoded screenshot data }) export type BrowserState = z.infer // Error classes export class BrowserError extends Error { constructor(message?: string) { super(message) this.name = 'BrowserError' } } export class URLNotAllowedError extends BrowserError { constructor(message?: string) { super(message) this.name = 'URLNotAllowedError' } } /** * Simplified BrowserContext that uses BrowserPageV2 * * Key differences from V1: * - No Puppeteer dependencies * - No tab attachment/detachment logic (pages are always "attached") * - Simplified state management * - Direct Chrome API usage */ export class BrowserContext { private _config: BrowserContextConfig; private _userSelectedTabIds: number[] | null = null; private _executionLockedTabId: number | null = null; // Simple page cache - no attachment state needed private _pageCache: Map = new Map(); constructor(config: Partial = {}) { this._config = { ...DEFAULT_BROWSER_CONTEXT_CONFIG, ...config }; } public getConfig(): BrowserContextConfig { return this._config; } public updateConfig(config: Partial): void { this._config = { ...this._config, ...config }; } // ============= Core Page Operations ============= /** * Get or create a Page instance for a tab */ private async _getOrCreatePage(tab: chrome.tabs.Tab): Promise { if (!tab.id) { throw new Error('Tab ID is not available'); } // Check cache const existingPage = this._pageCache.get(tab.id); if (existingPage) { return existingPage; } // Create new page const page = new BrowserPage(tab.id, tab.url || 'Unknown URL', tab.title || 'Unknown Title'); this._pageCache.set(tab.id, page); Logging.log('BrowserContextV2', `Created page for tab ${tab.id}`); return page; } /** * Get the current page */ public async getCurrentPage(): Promise { return profileAsync('BrowserContext.getCurrentPage', async () => { const targetTab = await this.getTargetTab(); if (!targetTab.id) { throw new Error('Target tab has no ID'); } const page = await this._getOrCreatePage(targetTab); // Set execution lock for single-tab operations if (!this._executionLockedTabId) { this.lockExecutionToTab(targetTab.id); } return page; }); } // ============= Tab Management ============= /** * Switch to a different tab */ public async switchTab(tabId: number): Promise { return profileAsync(`BrowserContext.switchTab[${tabId}]`, async () => { Logging.log('BrowserContextV2', `Switching to tab ${tabId}`); await chrome.tabs.update(tabId, { active: true }); const tab = await chrome.tabs.get(tabId); const page = await this._getOrCreatePage(tab); this._executionLockedTabId = tabId; return page; }); } /** * Get tab information */ public async getTabs(): Promise { const tabs = await chrome.tabs.query({}); const tabInfos: TabInfo[] = []; for (const tab of tabs) { if (tab.id && tab.url && tab.title) { tabInfos.push({ id: tab.id, url: tab.url, title: tab.title, }); } } return tabInfos; } // ============= Navigation Operations ============= /** * Navigate to a URL */ public async navigateTo(url: string): Promise { const page = await this.getCurrentPage(); await page.navigateTo(url); } /** * Open a new tab with URL */ public async openTab(url: string): Promise { return profileAsync('BrowserContext.openTab', async () => { // Create the new tab const tab = await chrome.tabs.create({ url, active: true }); if (!tab.id) { throw new Error('No tab ID available'); } // Wait a bit for tab to initialize await new Promise(resolve => setTimeout(resolve, 100)); // Get updated tab information const updatedTab = await chrome.tabs.get(tab.id); const page = await this._getOrCreatePage(updatedTab); this._executionLockedTabId = tab.id; return page; }); } /** * Close a tab */ public async closeTab(tabId: number): Promise { // Remove from cache this._pageCache.delete(tabId); // Close the tab await chrome.tabs.remove(tabId); // Update execution locked tab id if needed if (this._executionLockedTabId === tabId) { this._executionLockedTabId = null; } // Remove from user selected tabs if present if (this._userSelectedTabIds && this._userSelectedTabIds.includes(tabId)) { this._userSelectedTabIds = this._userSelectedTabIds.filter(id => id !== tabId); } } // ============= State Operations ============= /** * Get detailed browser state description for agents */ public async getBrowserStateString(): Promise { return profileAsync('BrowserContext.getBrowserStateString', async () => { try { // Use the structured getBrowserState API const browserState = await this.getBrowserState(); // Format current tab const currentTab = `{id: ${browserState.tabId}, url: ${browserState.url}, title: ${browserState.title}}`; // Format other tabs const otherTabs = browserState.tabs .filter(tab => tab.id !== browserState.tabId) .map(tab => `- {id: ${tab.id}, url: ${tab.url}, title: ${tab.title}}`); // Get current date/time const timeStr = new Date().toISOString().slice(0, 16).replace('T', ' '); // Combine clickable and typeable elements let elementsText = ''; const parts: string[] = []; if (browserState.clickableElementsString) { parts.push('Clickable elements:\n' + browserState.clickableElementsString); } if (browserState.typeableElementsString) { parts.push('Input fields:\n' + browserState.typeableElementsString); } elementsText = parts.join('\n\n') || 'No interactive elements found'; // Include hierarchical structure if available // elementsText = browserState.hierarchicalStructure? `\n\nHierarchical structure:\n${browserState.hierarchicalStructure}` : ''; // Build state description const stateDescription = ` BROWSER STATE: Current tab: ${currentTab} Other available tabs: ${otherTabs.join('\n ')} Current date and time: ${timeStr} Interactive elements from the current page (numbers in [brackets] are nodeIds): ${elementsText} `; return stateDescription; } catch (error) { Logging.log('BrowserContextV2', `Failed to get detailed browser state: ${error}`, 'warning'); const currentPage = await this.getCurrentPage(); const url = await currentPage.url(); const title = await currentPage.title(); return `BROWSER STATE:\nCurrent page: ${url} - ${title}`; } }); } // ============= Multi-Tab Operations ============= /** * Get pages for specific tab IDs */ public async getPages(tabIds?: number[]): Promise { try { // If no tab IDs provided, return current page if (!tabIds || tabIds.length === 0) { const currentPage = await this.getCurrentPage(); return [currentPage]; } // Get pages for specified tabs const pages: BrowserPage[] = []; for (const tabId of tabIds) { try { const tab = await chrome.tabs.get(tabId); const page = await this._getOrCreatePage(tab); pages.push(page); } catch (error) { Logging.log('BrowserContextV2', `Failed to get page for tab ${tabId}: ${error}`, 'warning'); } } if (pages.length === 0) { throw new Error(`Failed to get any of the selected tabs (${tabIds.join(', ')})`); } return pages; } catch (error) { Logging.log('BrowserContextV2', `Error getting pages: ${error}`, 'error'); return []; } } /** * Get all tab IDs from the current window */ public async getAllTabIds(): Promise> { try { const tabs = await chrome.tabs.query({ currentWindow: true }); return new Set(tabs.map(tab => tab.id).filter((id): id is number => id !== undefined)); } catch (error) { Logging.log('BrowserContextV2', `Failed to get tab IDs: ${error}`, 'warning'); return new Set(); } } // ============= Execution Lock Management ============= /** * Get the target tab for operations */ private async getTargetTab(): Promise { // Check if we're in a locked execution context if (this._executionLockedTabId) { try { const tab = await chrome.tabs.get(this._executionLockedTabId); if (tab) { return tab; } } catch (error) { Logging.log('BrowserContextV2', `Execution-locked tab ${this._executionLockedTabId} no longer exists`, 'warning'); this._executionLockedTabId = null; } } // No locked tab - use the active tab const [activeTab] = await chrome.tabs.query({ active: true, currentWindow: true }); if (!activeTab?.id) { throw new Error('No active tab available'); } return activeTab; } /** * Lock execution to a specific tab */ public lockExecutionToTab(tabId: number): void { this._executionLockedTabId = tabId; Logging.log('BrowserContextV2', `Execution locked to tab ${tabId}`); } /** * Unlock execution */ public async unlockExecution(): Promise { const previousLockedTab = this._executionLockedTabId; this._executionLockedTabId = null; Logging.log('BrowserContextV2', `Execution unlocked${previousLockedTab ? ` (was locked to tab ${previousLockedTab})` : ''}`); } // ============= Window Management ============= public async getCurrentWindow(): Promise { try { const tab = await this.getTargetTab(); if (tab && tab.windowId) { const window = await chrome.windows.get(tab.windowId); return window; } } catch (error) { Logging.log('BrowserContextV2', `Failed to get window from target tab: ${error}`, 'warning'); } // Fall back to current window const window = await chrome.windows.getCurrent(); if (window) { return window; } throw new Error('No window found'); } /** * Get structured browser state (V2 clean API) * @returns BrowserState object with current page info and interactive elements */ public async getBrowserState(): Promise { return profileAsync('BrowserContext.getBrowserState', async () => { try { const currentPage = await this.getCurrentPage(); const tabs = await this.getTabs(); // Get current page info const url = await currentPage.url(); const title = await currentPage.title(); const tabId = currentPage.tabId; // Get formatted strings from the page const clickableElementsString = await currentPage.getClickableElementsString(); const typeableElementsString = await currentPage.getTypeableElementsString(); // Get structured elements from the page const clickableElements = await currentPage.getClickableElements(); const typeableElements = await currentPage.getTypeableElements(); // Get hierarchical structure const hierarchicalStructure = await currentPage.getHierarchicalStructure(); // Get screenshot if vision is enabled let screenshot: string | null = null; if (this._config.useVision) { screenshot = await currentPage.takeScreenshot(); } // Build structured state const state: BrowserState = { // Current tab info tabId, url, title, // All tabs tabs, // Interactive elements clickableElements, typeableElements, // Pre-formatted strings clickableElementsString, typeableElementsString, // Hierarchical structure hierarchicalStructure, // Screenshot (if vision enabled) screenshot }; return state; } catch (error) { Logging.log('BrowserContextV2', `Failed to get state: ${error}`, 'warning'); // Return minimal state on error const minimalState: BrowserState = { tabId: 0, url: 'about:blank', title: 'New Tab', tabs: [], clickableElements: [], typeableElements: [], clickableElementsString: '', typeableElementsString: '', hierarchicalStructure: null, screenshot: null }; return minimalState; } }); } // ============= Cleanup Operations ============= /** * Clean up all resources */ public async cleanup(): Promise { try { Logging.log('BrowserContextV2', 'Cleaning up browser context'); // Clear all state this._pageCache.clear(); this._executionLockedTabId = null; this._userSelectedTabIds = null; Logging.log('BrowserContextV2', 'Browser context cleaned up successfully'); } catch (error) { Logging.log('BrowserContextV2', `Error during cleanup: ${error}`, 'error'); } } } export default BrowserContext;