Files
BrowserOS/reference-code/old-lib/browser/BrowserOSAdapter.ts
Felarof 8245dfe0ff Rewrite Agent Loop (#7)
* clean-up bunch of files for re-write

* more clean-up and adding basic agent

* Minor fix moved types into respective files.

* Deleted bunch of old files

backup

Update gitignore

Deleted a bunch of files

Remove message manager

Deleted old docs

Update rules

rename Profiler to profiler

* Temporarily adding old code

* Adding two small things back

* backup

* Implemented LangChainProvider and updated cursor rules

backup

LangChainProvider

curosr rules

* Implement tests for LangChainProvider -- unit test and integration test

integration test passes

integration test backup

* Tool Design

Tools Desing

tools design

* NavigationTool ready

NavigationTool ready

NavigationTool ready

NaivgationTool ready

backup

* MessageManager

MessageManager

backup

* Fixed integration test

* Agent design new

Updated agent design and added bunch of /NTN commands

agent new design

* Delete old agent design

* MessageManagerReadOnly class

* PlannerTool ready

PlannerTool almost ready

* ToolManager and DoneTool

* Integration of BrowserAgent

* BrowserAgent implementation v0.1

* BrowserAgent small fix v0.2

* Tool calling design

too call design

tool design claude

* Update agent tool design with // NTN

* add zod-to-json npm install

* BrowserAGent v0.3

* BrowserAgent v0.4

* BrowserAgent v0.5

* fixes

* Build error fixes in my NEWLY added code

build errors fix

* Build error fixes in old code (integration work)

backup

* Comment StreamEventProcessor for now, it is not used

* Small build error fix

* Small rename

* Added integration test to check structuredLLM and changed to 4o-mini

change default to nxtscape

integration test

* Small docstring

* Simplified BrowserAgent code and added integration test

Simplified BrowserAgent code

BrowserAGent integrationt est

* Update CLAUDE.md with project memory and instructions on how to write code

Update CLAUDE.md with project memory and instructions on how to write code

Project Memory

* Just a mova.. Moved ToolManager outside. Build works.

* TabOperations tool

TabOperations Tool and fixing some test

tab operations

* Update CLAUDE.md

* Added ClassificationTool

classifiction tool

classification prommpt

* Refactored and simplified PlannerTool unit test and integration test

* Updated Plnnaer tool

* Update CLAUDE.md

* BrowserAgent modified to do classification

BrowserAgent with classification

* minor fix to ToolManager

* Instead of ToolCall and ToolResult -- just updating message manager once

* minor fix to BrowserAgent integration test

* Changed done to "done_tool"

* Updated CLAUDE.md to reflect understanding of claude

* Uncommented stream event processor

* Renamed EventBus to StreamEventBus

* Commented StreamEventProcessor

* Event Processor

* Integrated EventProcessor with BrowserAgent

Added EventProcessor to BrowserAgetn

* Renamed StreamEventBus to EventBus

* Made EventBus required parameter in ExecutionContext

* PlanGenerator rewrite

PlanGenerator rewrite

backup

* For simple task, explicitly tell it to call done tool

* Max attempts for simple task

* backup

* Revert "backup"

This reverts commit 7d79a3d4d5774bfef79ec9827878b74edad3593f.

* Consolidating where EventBus and EventProcessor are created and initialized

backup

* Update CLAUDE.md

Update CLAUDE.md

* Improving agent loop code

Cleaned up processTooCall

classification task

* Create test-writer subAgent

test-agent-prompt

test agent prompt

test-agent-prompt

Update test-writer.md

* BrowserAgent test

Browseragent test

BrowserAgent test

* BrowserAgent refactor

backup

backup

* Minor fixes

* Minor fix

* minor change -- NEW AGENT LOOP IS WORKING WELL

* Update cursor rules

* Small change

* Improved BrowserAgent integration test

Improved BrowserAgent integration test

* Small change

* Update CLAUDE.md

* Different tools

* FindElementTool is ready

Find element update

backup

find element backup

* Updated to test strings to say "tests..."

* ScrollTool is ready

* RefreshStateTool is updated as well

* MessageManager updated

* SearchTool is ready

backup

* Interaction Element is also ready

* Add debugMessage emitter

* ValidatorTool ready and tests are passing

Validation Tool

validator tool

backup

backup

* GroupTabs tool ready

* Registered all the tools

* Planning changed to 5 steps

* BrowserAgent integration test fix

* Minor string changes

* backup

* Removed too many confusing events in EventProcessor -- there is only event.info right now

* Abort control implemented

backup

Abort

* Formatter for toolResult

Formatter for toolResult

backup

* Always render using Markdown

* Minor fix

---------

Co-authored-by: Nikhil Sonti <nikhilsv92@gmail.com>
2025-07-29 08:14:45 -07:00

374 lines
13 KiB
TypeScript

import { Logging } from '@/lib/utils/Logging';
/// <reference path="../../types/chrome-browser-os.d.ts" />
// ============= Re-export types from chrome.browserOS namespace =============
export type InteractiveNode = chrome.browserOS.InteractiveNode;
export type InteractiveSnapshot = chrome.browserOS.InteractiveSnapshot;
export type InteractiveSnapshotOptions = chrome.browserOS.InteractiveSnapshotOptions;
export type PageLoadStatus = chrome.browserOS.PageLoadStatus;
export type InteractiveNodeType = chrome.browserOS.InteractiveNodeType;
export type Rect = chrome.browserOS.BoundingRect;
// New snapshot types
export type SnapshotType = chrome.browserOS.SnapshotType;
export type SnapshotContext = chrome.browserOS.SnapshotContext;
export type SectionType = chrome.browserOS.SectionType;
export type TextSnapshotResult = chrome.browserOS.TextSnapshotResult;
export type LinkInfo = chrome.browserOS.LinkInfo;
export type LinksSnapshotResult = chrome.browserOS.LinksSnapshotResult;
export type SnapshotSection = chrome.browserOS.SnapshotSection;
export type Snapshot = chrome.browserOS.Snapshot;
export type SnapshotOptions = chrome.browserOS.SnapshotOptions;
// ============= BrowserOS Adapter =============
/**
* Adapter for Chrome BrowserOS Extension APIs
* Provides a clean interface to browserOS functionality with extensibility
*/
export class BrowserOSAdapter {
private static instance: BrowserOSAdapter | null = null;
private constructor() {}
/**
* Get singleton instance
*/
static getInstance(): BrowserOSAdapter {
if (!BrowserOSAdapter.instance) {
BrowserOSAdapter.instance = new BrowserOSAdapter();
}
return BrowserOSAdapter.instance;
}
/**
* Get interactive snapshot of the current page
*/
async getInteractiveSnapshot(tabId: number, options?: InteractiveSnapshotOptions): Promise<InteractiveSnapshot> {
try {
Logging.log('BrowserOSAdapter', `Getting interactive snapshot for tab ${tabId} with options: ${JSON.stringify(options)}`, 'info');
return new Promise<InteractiveSnapshot>((resolve, reject) => {
if (options) {
chrome.browserOS.getInteractiveSnapshot(
tabId,
options,
(snapshot: InteractiveSnapshot) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message));
} else {
Logging.log('BrowserOSAdapter', `Retrieved snapshot with ${snapshot.elements.length} elements`, 'info');
resolve(snapshot);
}
}
);
} else {
chrome.browserOS.getInteractiveSnapshot(
tabId,
(snapshot: InteractiveSnapshot) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message));
} else {
Logging.log('BrowserOSAdapter', `Retrieved snapshot with ${snapshot.elements.length} elements`, 'info');
resolve(snapshot);
}
}
);
}
});
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
Logging.log('BrowserOSAdapter', `Failed to get interactive snapshot: ${errorMessage}`, 'error');
throw new Error(`Failed to get interactive snapshot: ${errorMessage}`);
}
}
/**
* Click an element by node ID
*/
async click(tabId: number, nodeId: number): Promise<void> {
try {
Logging.log('BrowserOSAdapter', `Clicking node ${nodeId} in tab ${tabId}`, 'info');
return new Promise<void>((resolve, reject) => {
chrome.browserOS.click(tabId, nodeId, () => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message));
} else {
resolve();
}
});
});
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
Logging.log('BrowserOSAdapter', `Failed to click node: ${errorMessage}`, 'error');
throw new Error(`Failed to click node ${nodeId}: ${errorMessage}`);
}
}
/**
* Input text into an element
*/
async inputText(tabId: number, nodeId: number, text: string): Promise<void> {
try {
Logging.log('BrowserOSAdapter', `Inputting text into node ${nodeId} in tab ${tabId}`, 'info');
return new Promise<void>((resolve, reject) => {
chrome.browserOS.inputText(tabId, nodeId, text, () => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message));
} else {
resolve();
}
});
});
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
Logging.log('BrowserOSAdapter', `Failed to input text: ${errorMessage}`, 'error');
throw new Error(`Failed to input text into node ${nodeId}: ${errorMessage}`);
}
}
/**
* Clear text from an element
*/
async clear(tabId: number, nodeId: number): Promise<void> {
try {
Logging.log('BrowserOSAdapter', `Clearing node ${nodeId} in tab ${tabId}`, 'info');
return new Promise<void>((resolve, reject) => {
chrome.browserOS.clear(tabId, nodeId, () => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message));
} else {
resolve();
}
});
});
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
Logging.log('BrowserOSAdapter', `Failed to clear node: ${errorMessage}`, 'error');
throw new Error(`Failed to clear node ${nodeId}: ${errorMessage}`);
}
}
/**
* Scroll to a specific node
*/
async scrollToNode(tabId: number, nodeId: number): Promise<boolean> {
try {
Logging.log('BrowserOSAdapter', `Scrolling to node ${nodeId} in tab ${tabId}`, 'info');
return new Promise<boolean>((resolve, reject) => {
chrome.browserOS.scrollToNode(tabId, nodeId, (scrolled: boolean) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message));
} else {
resolve(scrolled);
}
});
});
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
Logging.log('BrowserOSAdapter', `Failed to scroll to node: ${errorMessage}`, 'error');
throw new Error(`Failed to scroll to node ${nodeId}: ${errorMessage}`);
}
}
/**
* Send keyboard keys
*/
async sendKeys(tabId: number, keys: chrome.browserOS.Key): Promise<void> {
try {
Logging.log('BrowserOSAdapter', `Sending keys "${keys}" to tab ${tabId}`, 'info');
return new Promise<void>((resolve, reject) => {
chrome.browserOS.sendKeys(tabId, keys, () => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message));
} else {
resolve();
}
});
});
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
Logging.log('BrowserOSAdapter', `Failed to send keys: ${errorMessage}`, 'error');
throw new Error(`Failed to send keys: ${errorMessage}`);
}
}
/**
* Get page load status
*/
async getPageLoadStatus(tabId: number): Promise<PageLoadStatus> {
try {
Logging.log('BrowserOSAdapter', `Getting page load status for tab ${tabId}`, 'info');
return new Promise<PageLoadStatus>((resolve, reject) => {
chrome.browserOS.getPageLoadStatus(tabId, (status: PageLoadStatus) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message));
} else {
resolve(status);
}
});
});
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
Logging.log('BrowserOSAdapter', `Failed to get page load status: ${errorMessage}`, 'error');
throw new Error(`Failed to get page load status: ${errorMessage}`);
}
}
/**
* Get accessibility tree (if available)
*/
async getAccessibilityTree(tabId: number): Promise<chrome.browserOS.AccessibilityTree> {
try {
Logging.log('BrowserOSAdapter', `Getting accessibility tree for tab ${tabId}`, 'info');
return new Promise<chrome.browserOS.AccessibilityTree>((resolve, reject) => {
chrome.browserOS.getAccessibilityTree(tabId, (tree: chrome.browserOS.AccessibilityTree) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message));
} else {
resolve(tree);
}
});
});
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
Logging.log('BrowserOSAdapter', `Failed to get accessibility tree: ${errorMessage}`, 'error');
throw new Error(`Failed to get accessibility tree: ${errorMessage}`);
}
}
/**
* Capture a screenshot of the tab
*/
async captureScreenshot(tabId: number): Promise<string> {
try {
Logging.log('BrowserOSAdapter', `Capturing screenshot for tab ${tabId}`, 'info');
return new Promise<string>((resolve, reject) => {
chrome.browserOS.captureScreenshot(tabId, (dataUrl: string) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message));
} else {
Logging.log('BrowserOSAdapter', `Screenshot captured for tab ${tabId}`, 'info');
resolve(dataUrl);
}
});
});
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
Logging.log('BrowserOSAdapter', `Failed to capture screenshot: ${errorMessage}`, 'error');
throw new Error(`Failed to capture screenshot: ${errorMessage}`);
}
}
/**
* Get a content snapshot of the specified type from the page
*/
async getSnapshot(tabId: number, type: SnapshotType, options?: SnapshotOptions): Promise<Snapshot> {
try {
Logging.log('BrowserOSAdapter', `Getting ${type} snapshot for tab ${tabId} with options: ${JSON.stringify(options)}`, 'info');
return new Promise<Snapshot>((resolve, reject) => {
if (options) {
chrome.browserOS.getSnapshot(
tabId,
type,
options,
(snapshot: Snapshot) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message));
} else {
Logging.log('BrowserOSAdapter', `Retrieved ${type} snapshot with ${snapshot.sections.length} sections`, 'info');
resolve(snapshot);
}
}
);
} else {
chrome.browserOS.getSnapshot(
tabId,
type,
(snapshot: Snapshot) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message));
} else {
Logging.log('BrowserOSAdapter', `Retrieved ${type} snapshot with ${snapshot.sections.length} sections`, 'info');
resolve(snapshot);
}
}
);
}
});
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
Logging.log('BrowserOSAdapter', `Failed to get ${type} snapshot: ${errorMessage}`, 'error');
throw new Error(`Failed to get ${type} snapshot: ${errorMessage}`);
}
}
/**
* Get text content snapshot from the page
* Convenience method for text snapshot
*/
async getTextSnapshot(tabId: number, options?: SnapshotOptions): Promise<Snapshot> {
return this.getSnapshot(tabId, 'text', options);
}
/**
* Get links snapshot from the page
* Convenience method for links snapshot
*/
async getLinksSnapshot(tabId: number, options?: SnapshotOptions): Promise<Snapshot> {
return this.getSnapshot(tabId, 'links', options);
}
/**
* Generic method to invoke any BrowserOS API
* Useful for future APIs or experimental features
*/
async invokeAPI(method: string, ...args: any[]): Promise<any> {
try {
Logging.log('BrowserOSAdapter', `Invoking BrowserOS API: ${method}`, 'info');
if (!(method in chrome.browserOS)) {
throw new Error(`Unknown BrowserOS API method: ${method}`);
}
// @ts-expect-error - Dynamic API invocation
const result = await chrome.browserOS[method](...args);
return result;
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
Logging.log('BrowserOSAdapter', `Failed to invoke API ${method}: ${errorMessage}`, 'error');
throw new Error(`Failed to invoke BrowserOS API ${method}: ${errorMessage}`);
}
}
/**
* Check if a specific API is available
*/
isAPIAvailable(method: string): boolean {
return method in chrome.browserOS;
}
/**
* Get list of available BrowserOS APIs
*/
getAvailableAPIs(): string[] {
return Object.keys(chrome.browserOS).filter(key => {
// @ts-expect-error - Dynamic key access for API discovery
return typeof chrome.browserOS[key] === 'function';
});
}
}
// Export singleton instance getter for convenience
export const getBrowserOSAdapter = () => BrowserOSAdapter.getInstance();