Files
BrowserOS/reference-code/old-lib/browser/BrowserContext.ts
Felarof 8245dfe0ff Rewrite Agent Loop (#7)
* clean-up bunch of files for re-write

* more clean-up and adding basic agent

* Minor fix moved types into respective files.

* Deleted bunch of old files

backup

Update gitignore

Deleted a bunch of files

Remove message manager

Deleted old docs

Update rules

rename Profiler to profiler

* Temporarily adding old code

* Adding two small things back

* backup

* Implemented LangChainProvider and updated cursor rules

backup

LangChainProvider

curosr rules

* Implement tests for LangChainProvider -- unit test and integration test

integration test passes

integration test backup

* Tool Design

Tools Desing

tools design

* NavigationTool ready

NavigationTool ready

NavigationTool ready

NaivgationTool ready

backup

* MessageManager

MessageManager

backup

* Fixed integration test

* Agent design new

Updated agent design and added bunch of /NTN commands

agent new design

* Delete old agent design

* MessageManagerReadOnly class

* PlannerTool ready

PlannerTool almost ready

* ToolManager and DoneTool

* Integration of BrowserAgent

* BrowserAgent implementation v0.1

* BrowserAgent small fix v0.2

* Tool calling design

too call design

tool design claude

* Update agent tool design with // NTN

* add zod-to-json npm install

* BrowserAGent v0.3

* BrowserAgent v0.4

* BrowserAgent v0.5

* fixes

* Build error fixes in my NEWLY added code

build errors fix

* Build error fixes in old code (integration work)

backup

* Comment StreamEventProcessor for now, it is not used

* Small build error fix

* Small rename

* Added integration test to check structuredLLM and changed to 4o-mini

change default to nxtscape

integration test

* Small docstring

* Simplified BrowserAgent code and added integration test

Simplified BrowserAgent code

BrowserAGent integrationt est

* Update CLAUDE.md with project memory and instructions on how to write code

Update CLAUDE.md with project memory and instructions on how to write code

Project Memory

* Just a mova.. Moved ToolManager outside. Build works.

* TabOperations tool

TabOperations Tool and fixing some test

tab operations

* Update CLAUDE.md

* Added ClassificationTool

classifiction tool

classification prommpt

* Refactored and simplified PlannerTool unit test and integration test

* Updated Plnnaer tool

* Update CLAUDE.md

* BrowserAgent modified to do classification

BrowserAgent with classification

* minor fix to ToolManager

* Instead of ToolCall and ToolResult -- just updating message manager once

* minor fix to BrowserAgent integration test

* Changed done to "done_tool"

* Updated CLAUDE.md to reflect understanding of claude

* Uncommented stream event processor

* Renamed EventBus to StreamEventBus

* Commented StreamEventProcessor

* Event Processor

* Integrated EventProcessor with BrowserAgent

Added EventProcessor to BrowserAgetn

* Renamed StreamEventBus to EventBus

* Made EventBus required parameter in ExecutionContext

* PlanGenerator rewrite

PlanGenerator rewrite

backup

* For simple task, explicitly tell it to call done tool

* Max attempts for simple task

* backup

* Revert "backup"

This reverts commit 7d79a3d4d5774bfef79ec9827878b74edad3593f.

* Consolidating where EventBus and EventProcessor are created and initialized

backup

* Update CLAUDE.md

Update CLAUDE.md

* Improving agent loop code

Cleaned up processTooCall

classification task

* Create test-writer subAgent

test-agent-prompt

test agent prompt

test-agent-prompt

Update test-writer.md

* BrowserAgent test

Browseragent test

BrowserAgent test

* BrowserAgent refactor

backup

backup

* Minor fixes

* Minor fix

* minor change -- NEW AGENT LOOP IS WORKING WELL

* Update cursor rules

* Small change

* Improved BrowserAgent integration test

Improved BrowserAgent integration test

* Small change

* Update CLAUDE.md

* Different tools

* FindElementTool is ready

Find element update

backup

find element backup

* Updated to test strings to say "tests..."

* ScrollTool is ready

* RefreshStateTool is updated as well

* MessageManager updated

* SearchTool is ready

backup

* Interaction Element is also ready

* Add debugMessage emitter

* ValidatorTool ready and tests are passing

Validation Tool

validator tool

backup

backup

* GroupTabs tool ready

* Registered all the tools

* Planning changed to 5 steps

* BrowserAgent integration test fix

* Minor string changes

* backup

* Removed too many confusing events in EventProcessor -- there is only event.info right now

* Abort control implemented

backup

Abort

* Formatter for toolResult

Formatter for toolResult

backup

* Always render using Markdown

* Minor fix

---------

Co-authored-by: Nikhil Sonti <nikhilsv92@gmail.com>
2025-07-29 08:14:45 -07:00

537 lines
16 KiB
TypeScript

import { z } from 'zod';
import BrowserPage from './BrowserPage';
import { Logging } from '../utils/Logging';
import { profileAsync } from '../utils/Profiler';
// ============= Browser Context Configuration =============
// Browser context window size schema
export const BrowserContextWindowSizeSchema = z.object({
width: z.number().int().positive(), // Window width in pixels
height: z.number().int().positive() // Window height in pixels
})
export type BrowserContextWindowSize = z.infer<typeof BrowserContextWindowSizeSchema>
// Browser context configuration schema
export const BrowserContextConfigSchema = z.object({
maximumWaitPageLoadTime: z.number().default(5.0), // Maximum time to wait for page load
waitBetweenActions: z.number().default(0.1), // Time to wait between multiple actions
homePageUrl: z.string().default('https://www.google.com'), // Home page url
useVision: z.boolean().default(true) // Use vision mode
})
export type BrowserContextConfig = z.infer<typeof BrowserContextConfigSchema>
// Default configuration
export const DEFAULT_BROWSER_CONTEXT_CONFIG: BrowserContextConfig = BrowserContextConfigSchema.parse({})
// Tab info schema
export const TabInfoSchema = z.object({
id: z.number().int().positive(), // Tab ID
url: z.string(), // Tab URL
title: z.string() // Tab title
})
export type TabInfo = z.infer<typeof TabInfoSchema>
// Browser state schema for V2
export const BrowserStateSchema = z.object({
// Current tab info
tabId: z.number(), // Current tab ID
url: z.string(), // Current page URL
title: z.string(), // Current page title
// All tabs info
tabs: z.array(TabInfoSchema), // All open tabs
// Interactive elements as structured data
clickableElements: z.array(z.object({
nodeId: z.number(), // Chrome BrowserOS node ID
text: z.string(), // Element text (axName or tag)
tag: z.string() // HTML tag name
})), // Clickable elements with nodeId, text, and tag
typeableElements: z.array(z.object({
nodeId: z.number(), // Chrome BrowserOS node ID
text: z.string(), // Element text (axName or tag)
tag: z.string() // HTML tag name
})), // Typeable elements with nodeId, text, and tag
// Pre-formatted strings for display
clickableElementsString: z.string(), // Formatted string of clickable elements
typeableElementsString: z.string(), // Formatted string of typeable elements
// Hierarchical structure from BrowserOS API
hierarchicalStructure: z.string().nullable().optional(), // Hierarchical text representation with context
// Screenshot support (base64 encoded)
screenshot: z.string().nullable().optional() // Base64 encoded screenshot data
})
export type BrowserState = z.infer<typeof BrowserStateSchema>
// Error classes
export class BrowserError extends Error {
constructor(message?: string) {
super(message)
this.name = 'BrowserError'
}
}
export class URLNotAllowedError extends BrowserError {
constructor(message?: string) {
super(message)
this.name = 'URLNotAllowedError'
}
}
/**
* Simplified BrowserContext that uses BrowserPageV2
*
* Key differences from V1:
* - No Puppeteer dependencies
* - No tab attachment/detachment logic (pages are always "attached")
* - Simplified state management
* - Direct Chrome API usage
*/
export class BrowserContext {
private _config: BrowserContextConfig;
private _userSelectedTabIds: number[] | null = null;
private _executionLockedTabId: number | null = null;
// Simple page cache - no attachment state needed
private _pageCache: Map<number, BrowserPage> = new Map();
constructor(config: Partial<BrowserContextConfig> = {}) {
this._config = { ...DEFAULT_BROWSER_CONTEXT_CONFIG, ...config };
}
public getConfig(): BrowserContextConfig {
return this._config;
}
public updateConfig(config: Partial<BrowserContextConfig>): void {
this._config = { ...this._config, ...config };
}
// ============= Core Page Operations =============
/**
* Get or create a Page instance for a tab
*/
private async _getOrCreatePage(tab: chrome.tabs.Tab): Promise<BrowserPage> {
if (!tab.id) {
throw new Error('Tab ID is not available');
}
// Check cache
const existingPage = this._pageCache.get(tab.id);
if (existingPage) {
return existingPage;
}
// Create new page
const page = new BrowserPage(tab.id, tab.url || 'Unknown URL', tab.title || 'Unknown Title');
this._pageCache.set(tab.id, page);
Logging.log('BrowserContextV2', `Created page for tab ${tab.id}`);
return page;
}
/**
* Get the current page
*/
public async getCurrentPage(): Promise<BrowserPage> {
return profileAsync('BrowserContext.getCurrentPage', async () => {
const targetTab = await this.getTargetTab();
if (!targetTab.id) {
throw new Error('Target tab has no ID');
}
const page = await this._getOrCreatePage(targetTab);
// Set execution lock for single-tab operations
if (!this._executionLockedTabId) {
this.lockExecutionToTab(targetTab.id);
}
return page;
});
}
// ============= Tab Management =============
/**
* Switch to a different tab
*/
public async switchTab(tabId: number): Promise<BrowserPage> {
return profileAsync(`BrowserContext.switchTab[${tabId}]`, async () => {
Logging.log('BrowserContextV2', `Switching to tab ${tabId}`);
await chrome.tabs.update(tabId, { active: true });
const tab = await chrome.tabs.get(tabId);
const page = await this._getOrCreatePage(tab);
this._executionLockedTabId = tabId;
return page;
});
}
/**
* Get tab information
*/
public async getTabs(): Promise<TabInfo[]> {
const tabs = await chrome.tabs.query({});
const tabInfos: TabInfo[] = [];
for (const tab of tabs) {
if (tab.id && tab.url && tab.title) {
tabInfos.push({
id: tab.id,
url: tab.url,
title: tab.title,
});
}
}
return tabInfos;
}
// ============= Navigation Operations =============
/**
* Navigate to a URL
*/
public async navigateTo(url: string): Promise<void> {
const page = await this.getCurrentPage();
await page.navigateTo(url);
}
/**
* Open a new tab with URL
*/
public async openTab(url: string): Promise<BrowserPage> {
return profileAsync('BrowserContext.openTab', async () => {
// Create the new tab
const tab = await chrome.tabs.create({ url, active: true });
if (!tab.id) {
throw new Error('No tab ID available');
}
// Wait a bit for tab to initialize
await new Promise(resolve => setTimeout(resolve, 100));
// Get updated tab information
const updatedTab = await chrome.tabs.get(tab.id);
const page = await this._getOrCreatePage(updatedTab);
this._executionLockedTabId = tab.id;
return page;
});
}
/**
* Close a tab
*/
public async closeTab(tabId: number): Promise<void> {
// Remove from cache
this._pageCache.delete(tabId);
// Close the tab
await chrome.tabs.remove(tabId);
// Update execution locked tab id if needed
if (this._executionLockedTabId === tabId) {
this._executionLockedTabId = null;
}
// Remove from user selected tabs if present
if (this._userSelectedTabIds && this._userSelectedTabIds.includes(tabId)) {
this._userSelectedTabIds = this._userSelectedTabIds.filter(id => id !== tabId);
}
}
// ============= State Operations =============
/**
* Get detailed browser state description for agents
*/
public async getBrowserStateString(): Promise<string> {
return profileAsync('BrowserContext.getBrowserStateString', async () => {
try {
// Use the structured getBrowserState API
const browserState = await this.getBrowserState();
// Format current tab
const currentTab = `{id: ${browserState.tabId}, url: ${browserState.url}, title: ${browserState.title}}`;
// Format other tabs
const otherTabs = browserState.tabs
.filter(tab => tab.id !== browserState.tabId)
.map(tab => `- {id: ${tab.id}, url: ${tab.url}, title: ${tab.title}}`);
// Get current date/time
const timeStr = new Date().toISOString().slice(0, 16).replace('T', ' ');
// Combine clickable and typeable elements
let elementsText = '';
const parts: string[] = [];
if (browserState.clickableElementsString) {
parts.push('Clickable elements:\n' + browserState.clickableElementsString);
}
if (browserState.typeableElementsString) {
parts.push('Input fields:\n' + browserState.typeableElementsString);
}
elementsText = parts.join('\n\n') || 'No interactive elements found';
// Include hierarchical structure if available
// elementsText = browserState.hierarchicalStructure? `\n\nHierarchical structure:\n${browserState.hierarchicalStructure}` : '';
// Build state description
const stateDescription = `
BROWSER STATE:
Current tab: ${currentTab}
Other available tabs:
${otherTabs.join('\n ')}
Current date and time: ${timeStr}
Interactive elements from the current page (numbers in [brackets] are nodeIds):
${elementsText}
`;
return stateDescription;
} catch (error) {
Logging.log('BrowserContextV2', `Failed to get detailed browser state: ${error}`, 'warning');
const currentPage = await this.getCurrentPage();
const url = await currentPage.url();
const title = await currentPage.title();
return `BROWSER STATE:\nCurrent page: ${url} - ${title}`;
}
});
}
// ============= Multi-Tab Operations =============
/**
* Get pages for specific tab IDs
*/
public async getPages(tabIds?: number[]): Promise<BrowserPage[]> {
try {
// If no tab IDs provided, return current page
if (!tabIds || tabIds.length === 0) {
const currentPage = await this.getCurrentPage();
return [currentPage];
}
// Get pages for specified tabs
const pages: BrowserPage[] = [];
for (const tabId of tabIds) {
try {
const tab = await chrome.tabs.get(tabId);
const page = await this._getOrCreatePage(tab);
pages.push(page);
} catch (error) {
Logging.log('BrowserContextV2', `Failed to get page for tab ${tabId}: ${error}`, 'warning');
}
}
if (pages.length === 0) {
throw new Error(`Failed to get any of the selected tabs (${tabIds.join(', ')})`);
}
return pages;
} catch (error) {
Logging.log('BrowserContextV2', `Error getting pages: ${error}`, 'error');
return [];
}
}
/**
* Get all tab IDs from the current window
*/
public async getAllTabIds(): Promise<Set<number>> {
try {
const tabs = await chrome.tabs.query({ currentWindow: true });
return new Set(tabs.map(tab => tab.id).filter((id): id is number => id !== undefined));
} catch (error) {
Logging.log('BrowserContextV2', `Failed to get tab IDs: ${error}`, 'warning');
return new Set();
}
}
// ============= Execution Lock Management =============
/**
* Get the target tab for operations
*/
private async getTargetTab(): Promise<chrome.tabs.Tab> {
// Check if we're in a locked execution context
if (this._executionLockedTabId) {
try {
const tab = await chrome.tabs.get(this._executionLockedTabId);
if (tab) {
return tab;
}
} catch (error) {
Logging.log('BrowserContextV2', `Execution-locked tab ${this._executionLockedTabId} no longer exists`, 'warning');
this._executionLockedTabId = null;
}
}
// No locked tab - use the active tab
const [activeTab] = await chrome.tabs.query({ active: true, currentWindow: true });
if (!activeTab?.id) {
throw new Error('No active tab available');
}
return activeTab;
}
/**
* Lock execution to a specific tab
*/
public lockExecutionToTab(tabId: number): void {
this._executionLockedTabId = tabId;
Logging.log('BrowserContextV2', `Execution locked to tab ${tabId}`);
}
/**
* Unlock execution
*/
public async unlockExecution(): Promise<void> {
const previousLockedTab = this._executionLockedTabId;
this._executionLockedTabId = null;
Logging.log('BrowserContextV2', `Execution unlocked${previousLockedTab ? ` (was locked to tab ${previousLockedTab})` : ''}`);
}
// ============= Window Management =============
public async getCurrentWindow(): Promise<chrome.windows.Window> {
try {
const tab = await this.getTargetTab();
if (tab && tab.windowId) {
const window = await chrome.windows.get(tab.windowId);
return window;
}
} catch (error) {
Logging.log('BrowserContextV2', `Failed to get window from target tab: ${error}`, 'warning');
}
// Fall back to current window
const window = await chrome.windows.getCurrent();
if (window) {
return window;
}
throw new Error('No window found');
}
/**
* Get structured browser state (V2 clean API)
* @returns BrowserState object with current page info and interactive elements
*/
public async getBrowserState(): Promise<BrowserState> {
return profileAsync('BrowserContext.getBrowserState', async () => {
try {
const currentPage = await this.getCurrentPage();
const tabs = await this.getTabs();
// Get current page info
const url = await currentPage.url();
const title = await currentPage.title();
const tabId = currentPage.tabId;
// Get formatted strings from the page
const clickableElementsString = await currentPage.getClickableElementsString();
const typeableElementsString = await currentPage.getTypeableElementsString();
// Get structured elements from the page
const clickableElements = await currentPage.getClickableElements();
const typeableElements = await currentPage.getTypeableElements();
// Get hierarchical structure
const hierarchicalStructure = await currentPage.getHierarchicalStructure();
// Get screenshot if vision is enabled
let screenshot: string | null = null;
if (this._config.useVision) {
screenshot = await currentPage.takeScreenshot();
}
// Build structured state
const state: BrowserState = {
// Current tab info
tabId,
url,
title,
// All tabs
tabs,
// Interactive elements
clickableElements,
typeableElements,
// Pre-formatted strings
clickableElementsString,
typeableElementsString,
// Hierarchical structure
hierarchicalStructure,
// Screenshot (if vision enabled)
screenshot
};
return state;
} catch (error) {
Logging.log('BrowserContextV2', `Failed to get state: ${error}`, 'warning');
// Return minimal state on error
const minimalState: BrowserState = {
tabId: 0,
url: 'about:blank',
title: 'New Tab',
tabs: [],
clickableElements: [],
typeableElements: [],
clickableElementsString: '',
typeableElementsString: '',
hierarchicalStructure: null,
screenshot: null
};
return minimalState;
}
});
}
// ============= Cleanup Operations =============
/**
* Clean up all resources
*/
public async cleanup(): Promise<void> {
try {
Logging.log('BrowserContextV2', 'Cleaning up browser context');
// Clear all state
this._pageCache.clear();
this._executionLockedTabId = null;
this._userSelectedTabIds = null;
Logging.log('BrowserContextV2', 'Browser context cleaned up successfully');
} catch (error) {
Logging.log('BrowserContextV2', `Error during cleanup: ${error}`, 'error');
}
}
}
export default BrowserContext;