mirror of
https://github.com/browseros-ai/BrowserOS.git
synced 2026-05-20 04:21:23 +00:00
* clean-up bunch of files for re-write * more clean-up and adding basic agent * Minor fix moved types into respective files. * Deleted bunch of old files backup Update gitignore Deleted a bunch of files Remove message manager Deleted old docs Update rules rename Profiler to profiler * Temporarily adding old code * Adding two small things back * backup * Implemented LangChainProvider and updated cursor rules backup LangChainProvider curosr rules * Implement tests for LangChainProvider -- unit test and integration test integration test passes integration test backup * Tool Design Tools Desing tools design * NavigationTool ready NavigationTool ready NavigationTool ready NaivgationTool ready backup * MessageManager MessageManager backup * Fixed integration test * Agent design new Updated agent design and added bunch of /NTN commands agent new design * Delete old agent design * MessageManagerReadOnly class * PlannerTool ready PlannerTool almost ready * ToolManager and DoneTool * Integration of BrowserAgent * BrowserAgent implementation v0.1 * BrowserAgent small fix v0.2 * Tool calling design too call design tool design claude * Update agent tool design with // NTN * add zod-to-json npm install * BrowserAGent v0.3 * BrowserAgent v0.4 * BrowserAgent v0.5 * fixes * Build error fixes in my NEWLY added code build errors fix * Build error fixes in old code (integration work) backup * Comment StreamEventProcessor for now, it is not used * Small build error fix * Small rename * Added integration test to check structuredLLM and changed to 4o-mini change default to nxtscape integration test * Small docstring * Simplified BrowserAgent code and added integration test Simplified BrowserAgent code BrowserAGent integrationt est * Update CLAUDE.md with project memory and instructions on how to write code Update CLAUDE.md with project memory and instructions on how to write code Project Memory * Just a mova.. Moved ToolManager outside. Build works. * TabOperations tool TabOperations Tool and fixing some test tab operations * Update CLAUDE.md * Added ClassificationTool classifiction tool classification prommpt * Refactored and simplified PlannerTool unit test and integration test * Updated Plnnaer tool * Update CLAUDE.md * BrowserAgent modified to do classification BrowserAgent with classification * minor fix to ToolManager * Instead of ToolCall and ToolResult -- just updating message manager once * minor fix to BrowserAgent integration test * Changed done to "done_tool" * Updated CLAUDE.md to reflect understanding of claude * Uncommented stream event processor * Renamed EventBus to StreamEventBus * Commented StreamEventProcessor * Event Processor * Integrated EventProcessor with BrowserAgent Added EventProcessor to BrowserAgetn * Renamed StreamEventBus to EventBus * Made EventBus required parameter in ExecutionContext * PlanGenerator rewrite PlanGenerator rewrite backup * For simple task, explicitly tell it to call done tool * Max attempts for simple task * backup * Revert "backup" This reverts commit 7d79a3d4d5774bfef79ec9827878b74edad3593f. * Consolidating where EventBus and EventProcessor are created and initialized backup * Update CLAUDE.md Update CLAUDE.md * Improving agent loop code Cleaned up processTooCall classification task * Create test-writer subAgent test-agent-prompt test agent prompt test-agent-prompt Update test-writer.md * BrowserAgent test Browseragent test BrowserAgent test * BrowserAgent refactor backup backup * Minor fixes * Minor fix * minor change -- NEW AGENT LOOP IS WORKING WELL * Update cursor rules * Small change * Improved BrowserAgent integration test Improved BrowserAgent integration test * Small change * Update CLAUDE.md * Different tools * FindElementTool is ready Find element update backup find element backup * Updated to test strings to say "tests..." * ScrollTool is ready * RefreshStateTool is updated as well * MessageManager updated * SearchTool is ready backup * Interaction Element is also ready * Add debugMessage emitter * ValidatorTool ready and tests are passing Validation Tool validator tool backup backup * GroupTabs tool ready * Registered all the tools * Planning changed to 5 steps * BrowserAgent integration test fix * Minor string changes * backup * Removed too many confusing events in EventProcessor -- there is only event.info right now * Abort control implemented backup Abort * Formatter for toolResult Formatter for toolResult backup * Always render using Markdown * Minor fix --------- Co-authored-by: Nikhil Sonti <nikhilsv92@gmail.com>
374 lines
13 KiB
TypeScript
374 lines
13 KiB
TypeScript
import { Logging } from '@/lib/utils/Logging';
|
|
/// <reference path="../../types/chrome-browser-os.d.ts" />
|
|
|
|
// ============= Re-export types from chrome.browserOS namespace =============
|
|
|
|
export type InteractiveNode = chrome.browserOS.InteractiveNode;
|
|
export type InteractiveSnapshot = chrome.browserOS.InteractiveSnapshot;
|
|
export type InteractiveSnapshotOptions = chrome.browserOS.InteractiveSnapshotOptions;
|
|
export type PageLoadStatus = chrome.browserOS.PageLoadStatus;
|
|
export type InteractiveNodeType = chrome.browserOS.InteractiveNodeType;
|
|
export type Rect = chrome.browserOS.BoundingRect;
|
|
|
|
// New snapshot types
|
|
export type SnapshotType = chrome.browserOS.SnapshotType;
|
|
export type SnapshotContext = chrome.browserOS.SnapshotContext;
|
|
export type SectionType = chrome.browserOS.SectionType;
|
|
export type TextSnapshotResult = chrome.browserOS.TextSnapshotResult;
|
|
export type LinkInfo = chrome.browserOS.LinkInfo;
|
|
export type LinksSnapshotResult = chrome.browserOS.LinksSnapshotResult;
|
|
export type SnapshotSection = chrome.browserOS.SnapshotSection;
|
|
export type Snapshot = chrome.browserOS.Snapshot;
|
|
export type SnapshotOptions = chrome.browserOS.SnapshotOptions;
|
|
|
|
// ============= BrowserOS Adapter =============
|
|
|
|
/**
|
|
* Adapter for Chrome BrowserOS Extension APIs
|
|
* Provides a clean interface to browserOS functionality with extensibility
|
|
*/
|
|
export class BrowserOSAdapter {
|
|
private static instance: BrowserOSAdapter | null = null;
|
|
|
|
private constructor() {}
|
|
|
|
/**
|
|
* Get singleton instance
|
|
*/
|
|
static getInstance(): BrowserOSAdapter {
|
|
if (!BrowserOSAdapter.instance) {
|
|
BrowserOSAdapter.instance = new BrowserOSAdapter();
|
|
}
|
|
return BrowserOSAdapter.instance;
|
|
}
|
|
|
|
/**
|
|
* Get interactive snapshot of the current page
|
|
*/
|
|
async getInteractiveSnapshot(tabId: number, options?: InteractiveSnapshotOptions): Promise<InteractiveSnapshot> {
|
|
try {
|
|
Logging.log('BrowserOSAdapter', `Getting interactive snapshot for tab ${tabId} with options: ${JSON.stringify(options)}`, 'info');
|
|
|
|
return new Promise<InteractiveSnapshot>((resolve, reject) => {
|
|
if (options) {
|
|
chrome.browserOS.getInteractiveSnapshot(
|
|
tabId,
|
|
options,
|
|
(snapshot: InteractiveSnapshot) => {
|
|
if (chrome.runtime.lastError) {
|
|
reject(new Error(chrome.runtime.lastError.message));
|
|
} else {
|
|
Logging.log('BrowserOSAdapter', `Retrieved snapshot with ${snapshot.elements.length} elements`, 'info');
|
|
resolve(snapshot);
|
|
}
|
|
}
|
|
);
|
|
} else {
|
|
chrome.browserOS.getInteractiveSnapshot(
|
|
tabId,
|
|
(snapshot: InteractiveSnapshot) => {
|
|
if (chrome.runtime.lastError) {
|
|
reject(new Error(chrome.runtime.lastError.message));
|
|
} else {
|
|
Logging.log('BrowserOSAdapter', `Retrieved snapshot with ${snapshot.elements.length} elements`, 'info');
|
|
resolve(snapshot);
|
|
}
|
|
}
|
|
);
|
|
}
|
|
});
|
|
} catch (error) {
|
|
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
Logging.log('BrowserOSAdapter', `Failed to get interactive snapshot: ${errorMessage}`, 'error');
|
|
throw new Error(`Failed to get interactive snapshot: ${errorMessage}`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Click an element by node ID
|
|
*/
|
|
async click(tabId: number, nodeId: number): Promise<void> {
|
|
try {
|
|
Logging.log('BrowserOSAdapter', `Clicking node ${nodeId} in tab ${tabId}`, 'info');
|
|
|
|
return new Promise<void>((resolve, reject) => {
|
|
chrome.browserOS.click(tabId, nodeId, () => {
|
|
if (chrome.runtime.lastError) {
|
|
reject(new Error(chrome.runtime.lastError.message));
|
|
} else {
|
|
resolve();
|
|
}
|
|
});
|
|
});
|
|
} catch (error) {
|
|
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
Logging.log('BrowserOSAdapter', `Failed to click node: ${errorMessage}`, 'error');
|
|
throw new Error(`Failed to click node ${nodeId}: ${errorMessage}`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Input text into an element
|
|
*/
|
|
async inputText(tabId: number, nodeId: number, text: string): Promise<void> {
|
|
try {
|
|
Logging.log('BrowserOSAdapter', `Inputting text into node ${nodeId} in tab ${tabId}`, 'info');
|
|
|
|
return new Promise<void>((resolve, reject) => {
|
|
chrome.browserOS.inputText(tabId, nodeId, text, () => {
|
|
if (chrome.runtime.lastError) {
|
|
reject(new Error(chrome.runtime.lastError.message));
|
|
} else {
|
|
resolve();
|
|
}
|
|
});
|
|
});
|
|
} catch (error) {
|
|
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
Logging.log('BrowserOSAdapter', `Failed to input text: ${errorMessage}`, 'error');
|
|
throw new Error(`Failed to input text into node ${nodeId}: ${errorMessage}`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Clear text from an element
|
|
*/
|
|
async clear(tabId: number, nodeId: number): Promise<void> {
|
|
try {
|
|
Logging.log('BrowserOSAdapter', `Clearing node ${nodeId} in tab ${tabId}`, 'info');
|
|
|
|
return new Promise<void>((resolve, reject) => {
|
|
chrome.browserOS.clear(tabId, nodeId, () => {
|
|
if (chrome.runtime.lastError) {
|
|
reject(new Error(chrome.runtime.lastError.message));
|
|
} else {
|
|
resolve();
|
|
}
|
|
});
|
|
});
|
|
} catch (error) {
|
|
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
Logging.log('BrowserOSAdapter', `Failed to clear node: ${errorMessage}`, 'error');
|
|
throw new Error(`Failed to clear node ${nodeId}: ${errorMessage}`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Scroll to a specific node
|
|
*/
|
|
async scrollToNode(tabId: number, nodeId: number): Promise<boolean> {
|
|
try {
|
|
Logging.log('BrowserOSAdapter', `Scrolling to node ${nodeId} in tab ${tabId}`, 'info');
|
|
|
|
return new Promise<boolean>((resolve, reject) => {
|
|
chrome.browserOS.scrollToNode(tabId, nodeId, (scrolled: boolean) => {
|
|
if (chrome.runtime.lastError) {
|
|
reject(new Error(chrome.runtime.lastError.message));
|
|
} else {
|
|
resolve(scrolled);
|
|
}
|
|
});
|
|
});
|
|
} catch (error) {
|
|
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
Logging.log('BrowserOSAdapter', `Failed to scroll to node: ${errorMessage}`, 'error');
|
|
throw new Error(`Failed to scroll to node ${nodeId}: ${errorMessage}`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Send keyboard keys
|
|
*/
|
|
async sendKeys(tabId: number, keys: chrome.browserOS.Key): Promise<void> {
|
|
try {
|
|
Logging.log('BrowserOSAdapter', `Sending keys "${keys}" to tab ${tabId}`, 'info');
|
|
|
|
return new Promise<void>((resolve, reject) => {
|
|
chrome.browserOS.sendKeys(tabId, keys, () => {
|
|
if (chrome.runtime.lastError) {
|
|
reject(new Error(chrome.runtime.lastError.message));
|
|
} else {
|
|
resolve();
|
|
}
|
|
});
|
|
});
|
|
} catch (error) {
|
|
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
Logging.log('BrowserOSAdapter', `Failed to send keys: ${errorMessage}`, 'error');
|
|
throw new Error(`Failed to send keys: ${errorMessage}`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get page load status
|
|
*/
|
|
async getPageLoadStatus(tabId: number): Promise<PageLoadStatus> {
|
|
try {
|
|
Logging.log('BrowserOSAdapter', `Getting page load status for tab ${tabId}`, 'info');
|
|
|
|
return new Promise<PageLoadStatus>((resolve, reject) => {
|
|
chrome.browserOS.getPageLoadStatus(tabId, (status: PageLoadStatus) => {
|
|
if (chrome.runtime.lastError) {
|
|
reject(new Error(chrome.runtime.lastError.message));
|
|
} else {
|
|
resolve(status);
|
|
}
|
|
});
|
|
});
|
|
} catch (error) {
|
|
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
Logging.log('BrowserOSAdapter', `Failed to get page load status: ${errorMessage}`, 'error');
|
|
throw new Error(`Failed to get page load status: ${errorMessage}`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get accessibility tree (if available)
|
|
*/
|
|
async getAccessibilityTree(tabId: number): Promise<chrome.browserOS.AccessibilityTree> {
|
|
try {
|
|
Logging.log('BrowserOSAdapter', `Getting accessibility tree for tab ${tabId}`, 'info');
|
|
|
|
return new Promise<chrome.browserOS.AccessibilityTree>((resolve, reject) => {
|
|
chrome.browserOS.getAccessibilityTree(tabId, (tree: chrome.browserOS.AccessibilityTree) => {
|
|
if (chrome.runtime.lastError) {
|
|
reject(new Error(chrome.runtime.lastError.message));
|
|
} else {
|
|
resolve(tree);
|
|
}
|
|
});
|
|
});
|
|
} catch (error) {
|
|
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
Logging.log('BrowserOSAdapter', `Failed to get accessibility tree: ${errorMessage}`, 'error');
|
|
throw new Error(`Failed to get accessibility tree: ${errorMessage}`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Capture a screenshot of the tab
|
|
*/
|
|
async captureScreenshot(tabId: number): Promise<string> {
|
|
try {
|
|
Logging.log('BrowserOSAdapter', `Capturing screenshot for tab ${tabId}`, 'info');
|
|
|
|
return new Promise<string>((resolve, reject) => {
|
|
chrome.browserOS.captureScreenshot(tabId, (dataUrl: string) => {
|
|
if (chrome.runtime.lastError) {
|
|
reject(new Error(chrome.runtime.lastError.message));
|
|
} else {
|
|
Logging.log('BrowserOSAdapter', `Screenshot captured for tab ${tabId}`, 'info');
|
|
resolve(dataUrl);
|
|
}
|
|
});
|
|
});
|
|
} catch (error) {
|
|
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
Logging.log('BrowserOSAdapter', `Failed to capture screenshot: ${errorMessage}`, 'error');
|
|
throw new Error(`Failed to capture screenshot: ${errorMessage}`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get a content snapshot of the specified type from the page
|
|
*/
|
|
async getSnapshot(tabId: number, type: SnapshotType, options?: SnapshotOptions): Promise<Snapshot> {
|
|
try {
|
|
Logging.log('BrowserOSAdapter', `Getting ${type} snapshot for tab ${tabId} with options: ${JSON.stringify(options)}`, 'info');
|
|
|
|
return new Promise<Snapshot>((resolve, reject) => {
|
|
if (options) {
|
|
chrome.browserOS.getSnapshot(
|
|
tabId,
|
|
type,
|
|
options,
|
|
(snapshot: Snapshot) => {
|
|
if (chrome.runtime.lastError) {
|
|
reject(new Error(chrome.runtime.lastError.message));
|
|
} else {
|
|
Logging.log('BrowserOSAdapter', `Retrieved ${type} snapshot with ${snapshot.sections.length} sections`, 'info');
|
|
resolve(snapshot);
|
|
}
|
|
}
|
|
);
|
|
} else {
|
|
chrome.browserOS.getSnapshot(
|
|
tabId,
|
|
type,
|
|
(snapshot: Snapshot) => {
|
|
if (chrome.runtime.lastError) {
|
|
reject(new Error(chrome.runtime.lastError.message));
|
|
} else {
|
|
Logging.log('BrowserOSAdapter', `Retrieved ${type} snapshot with ${snapshot.sections.length} sections`, 'info');
|
|
resolve(snapshot);
|
|
}
|
|
}
|
|
);
|
|
}
|
|
});
|
|
} catch (error) {
|
|
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
Logging.log('BrowserOSAdapter', `Failed to get ${type} snapshot: ${errorMessage}`, 'error');
|
|
throw new Error(`Failed to get ${type} snapshot: ${errorMessage}`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get text content snapshot from the page
|
|
* Convenience method for text snapshot
|
|
*/
|
|
async getTextSnapshot(tabId: number, options?: SnapshotOptions): Promise<Snapshot> {
|
|
return this.getSnapshot(tabId, 'text', options);
|
|
}
|
|
|
|
/**
|
|
* Get links snapshot from the page
|
|
* Convenience method for links snapshot
|
|
*/
|
|
async getLinksSnapshot(tabId: number, options?: SnapshotOptions): Promise<Snapshot> {
|
|
return this.getSnapshot(tabId, 'links', options);
|
|
}
|
|
|
|
/**
|
|
* Generic method to invoke any BrowserOS API
|
|
* Useful for future APIs or experimental features
|
|
*/
|
|
async invokeAPI(method: string, ...args: any[]): Promise<any> {
|
|
try {
|
|
Logging.log('BrowserOSAdapter', `Invoking BrowserOS API: ${method}`, 'info');
|
|
|
|
if (!(method in chrome.browserOS)) {
|
|
throw new Error(`Unknown BrowserOS API method: ${method}`);
|
|
}
|
|
|
|
// @ts-expect-error - Dynamic API invocation
|
|
const result = await chrome.browserOS[method](...args);
|
|
return result;
|
|
} catch (error) {
|
|
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
Logging.log('BrowserOSAdapter', `Failed to invoke API ${method}: ${errorMessage}`, 'error');
|
|
throw new Error(`Failed to invoke BrowserOS API ${method}: ${errorMessage}`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Check if a specific API is available
|
|
*/
|
|
isAPIAvailable(method: string): boolean {
|
|
return method in chrome.browserOS;
|
|
}
|
|
|
|
/**
|
|
* Get list of available BrowserOS APIs
|
|
*/
|
|
getAvailableAPIs(): string[] {
|
|
return Object.keys(chrome.browserOS).filter(key => {
|
|
// @ts-expect-error - Dynamic key access for API discovery
|
|
return typeof chrome.browserOS[key] === 'function';
|
|
});
|
|
}
|
|
}
|
|
|
|
// Export singleton instance getter for convenience
|
|
export const getBrowserOSAdapter = () => BrowserOSAdapter.getInstance();
|