Files
BrowserOS/reference-code/old-lib/agent/BrowseAgent.ts
Felarof 8245dfe0ff Rewrite Agent Loop (#7)
* clean-up bunch of files for re-write

* more clean-up and adding basic agent

* Minor fix moved types into respective files.

* Deleted bunch of old files

backup

Update gitignore

Deleted a bunch of files

Remove message manager

Deleted old docs

Update rules

rename Profiler to profiler

* Temporarily adding old code

* Adding two small things back

* backup

* Implemented LangChainProvider and updated cursor rules

backup

LangChainProvider

curosr rules

* Implement tests for LangChainProvider -- unit test and integration test

integration test passes

integration test backup

* Tool Design

Tools Desing

tools design

* NavigationTool ready

NavigationTool ready

NavigationTool ready

NaivgationTool ready

backup

* MessageManager

MessageManager

backup

* Fixed integration test

* Agent design new

Updated agent design and added bunch of /NTN commands

agent new design

* Delete old agent design

* MessageManagerReadOnly class

* PlannerTool ready

PlannerTool almost ready

* ToolManager and DoneTool

* Integration of BrowserAgent

* BrowserAgent implementation v0.1

* BrowserAgent small fix v0.2

* Tool calling design

too call design

tool design claude

* Update agent tool design with // NTN

* add zod-to-json npm install

* BrowserAGent v0.3

* BrowserAgent v0.4

* BrowserAgent v0.5

* fixes

* Build error fixes in my NEWLY added code

build errors fix

* Build error fixes in old code (integration work)

backup

* Comment StreamEventProcessor for now, it is not used

* Small build error fix

* Small rename

* Added integration test to check structuredLLM and changed to 4o-mini

change default to nxtscape

integration test

* Small docstring

* Simplified BrowserAgent code and added integration test

Simplified BrowserAgent code

BrowserAGent integrationt est

* Update CLAUDE.md with project memory and instructions on how to write code

Update CLAUDE.md with project memory and instructions on how to write code

Project Memory

* Just a mova.. Moved ToolManager outside. Build works.

* TabOperations tool

TabOperations Tool and fixing some test

tab operations

* Update CLAUDE.md

* Added ClassificationTool

classifiction tool

classification prommpt

* Refactored and simplified PlannerTool unit test and integration test

* Updated Plnnaer tool

* Update CLAUDE.md

* BrowserAgent modified to do classification

BrowserAgent with classification

* minor fix to ToolManager

* Instead of ToolCall and ToolResult -- just updating message manager once

* minor fix to BrowserAgent integration test

* Changed done to "done_tool"

* Updated CLAUDE.md to reflect understanding of claude

* Uncommented stream event processor

* Renamed EventBus to StreamEventBus

* Commented StreamEventProcessor

* Event Processor

* Integrated EventProcessor with BrowserAgent

Added EventProcessor to BrowserAgetn

* Renamed StreamEventBus to EventBus

* Made EventBus required parameter in ExecutionContext

* PlanGenerator rewrite

PlanGenerator rewrite

backup

* For simple task, explicitly tell it to call done tool

* Max attempts for simple task

* backup

* Revert "backup"

This reverts commit 7d79a3d4d5774bfef79ec9827878b74edad3593f.

* Consolidating where EventBus and EventProcessor are created and initialized

backup

* Update CLAUDE.md

Update CLAUDE.md

* Improving agent loop code

Cleaned up processTooCall

classification task

* Create test-writer subAgent

test-agent-prompt

test agent prompt

test-agent-prompt

Update test-writer.md

* BrowserAgent test

Browseragent test

BrowserAgent test

* BrowserAgent refactor

backup

backup

* Minor fixes

* Minor fix

* minor change -- NEW AGENT LOOP IS WORKING WELL

* Update cursor rules

* Small change

* Improved BrowserAgent integration test

Improved BrowserAgent integration test

* Small change

* Update CLAUDE.md

* Different tools

* FindElementTool is ready

Find element update

backup

find element backup

* Updated to test strings to say "tests..."

* ScrollTool is ready

* RefreshStateTool is updated as well

* MessageManager updated

* SearchTool is ready

backup

* Interaction Element is also ready

* Add debugMessage emitter

* ValidatorTool ready and tests are passing

Validation Tool

validator tool

backup

backup

* GroupTabs tool ready

* Registered all the tools

* Planning changed to 5 steps

* BrowserAgent integration test fix

* Minor string changes

* backup

* Removed too many confusing events in EventProcessor -- there is only event.info right now

* Abort control implemented

backup

Abort

* Formatter for toolResult

Formatter for toolResult

backup

* Always render using Markdown

* Minor fix

---------

Co-authored-by: Nikhil Sonti <nikhilsv92@gmail.com>
2025-07-29 08:14:45 -07:00

258 lines
8.9 KiB
TypeScript

import { z } from 'zod';
import { createReactAgent } from '@langchain/langgraph/prebuilt';
import { Logging } from '@/lib/utils/Logging';
import { RunnableConfig } from '@langchain/core/runnables';
// Import base agent
import { BaseAgent, AgentOptions, AgentInput } from './BaseAgent';
// Import new tool system
import { ToolRegistry } from '@/lib/tools/base';
import { ExtractTool } from '@/lib/tools/answer/ExtractTool';
import { NavigationTool } from '@/lib/tools/browser-navigation/NavigationTool';
import { FindElementTool } from '@/lib/tools/browser-navigation/FindElementTool';
import { InteractionTool } from '@/lib/tools/browser-navigation/InteractionTool';
import { ScrollTool } from '@/lib/tools/browser-navigation/ScrollTool';
import { SearchTool } from '@/lib/tools/browser-navigation/SearchTool';
import { TabOperationsTool } from '@/lib/tools/tab/TabOperationsTool';
import { DoneTool } from '@/lib/tools/utility/DoneTool';
import { WaitTool } from '@/lib/tools/utility/WaitTool';
import { TodoListManagerTool } from '@/lib/tools/utility/TodoListManagerTool';
// Import prompt
import { BrowseAgentPrompt } from '@/lib/prompts/BrowseAgentPrompt';
import { RefreshStateTool } from '../tools';
/**
* Browse agent output schema
*/
export const BrowseOutputSchema = z.object({
completed: z.boolean(), // Whether the browsing task was completed
actions_taken: z.array(z.string()), // List of actions performed
final_state: z.string(), // Description of final page state
extracted_data: z.record(z.unknown()).optional() // Any data extracted during browsing
});
export type BrowseOutput = z.infer<typeof BrowseOutputSchema>;
/**
* Agent specialized for web browsing automation using ReAct pattern.
* Uses tools to complete complex web tasks through multi-step reasoning.
*/
export class BrowseAgent extends BaseAgent {
/**
* Creates a new instance of BrowseAgent
* @param options - Configuration options for the browse agent
*/
constructor(options: AgentOptions) {
super(options);
}
/**
* Override: Create tool registry for the agent
* @returns ToolRegistry with browse tools
*/
protected createToolRegistry(): ToolRegistry {
const registry = new ToolRegistry();
// Register only browser navigation, extraction, and utility tools
registry.registerAll([
// Browser navigation tools
new NavigationTool(this.executionContext),
new FindElementTool(this.executionContext),
new SearchTool(this.executionContext),
new InteractionTool(this.executionContext),
new ScrollTool(this.executionContext),
// Tab management tools
new TabOperationsTool(this.executionContext),
// Utility tools
new DoneTool(this.executionContext),
// new WaitTool(this.executionContext),
new TodoListManagerTool(this.executionContext),
// Extraction tools
new ExtractTool(this.executionContext),
new RefreshStateTool(this.executionContext),
]);
return registry;
}
/**
* Override: Generate system prompt for browse agent
* @returns System prompt string
*/
protected generateSystemPrompt(): string {
// Use the tool registry to generate documentation
const toolRegistry = this.toolRegistry;
const toolDocs = toolRegistry?.generateSystemPrompt() || '';
// Create and use the browse agent prompt with tool documentation
const promptGenerator = new BrowseAgentPrompt(toolDocs);
return promptGenerator.generate();
}
/**
* Override: Get the agent name for logging
* @returns Agent name
*/
protected getAgentName(): string {
return 'BrowseAgent';
}
/**
* Execute browsing agent - handles instruction enhancement and execution
* @param input - Agent input containing instruction and context
* @param callbacks - Optional streaming callbacks
* @param config - Optional configuration for LangGraph web compatibility
* @returns Parsed browse output
*/
protected async executeAgent(
input: AgentInput,
config?: RunnableConfig
): Promise<BrowseOutput> {
try {
await this.ensureInitialized();
// 1. Add system prompt (agent-specific)
// TODO: do we need to add system prompt here? we add as messageModifier below
this.executionContext.messageManager.addSystemMessage(this.systemPrompt, 0);
this.systemPromptAdded = true;
// 2. Add browser state before execution
if (!this.stateMessageAdded) {
const browserState = await this.executionContext.browserContext.getBrowserStateString();
this.executionContext.messageManager.addBrowserStateMessage(browserState);
this.stateMessageAdded = true;
// Debug: Log browser state details
const currentPage = await this.browserContext.getCurrentPage();
this.log('🌐 Browser state captured', 'info', {
url: currentPage.url(),
title: await currentPage.title(),
useVision: this.options.useVision,
hasScreenshot: browserState.includes('Screenshot:')
});
}
// Add selected tabs instruction if any
const selectedTabsInstruction = await this.getSelectedTabsInstruction();
if (selectedTabsInstruction) {
this.executionContext.messageManager.addHumanMessage(`[Context: ${selectedTabsInstruction}]`);
}
// Get LLM and tools
const llm = await this.getLLM();
const tools = this.createTools();
const isGemini = llm._llmType()?.indexOf('google') !== -1 || false;
const messages = this.executionContext.messageManager.getMessages(isGemini);
// Debug: Log agent configuration
this.log('🤖 Creating browse agent', 'info', {
instruction: input.instruction,
toolCount: tools.length,
tools: tools.map(t => t.name),
llmType: llm._llmType(),
messageCount: messages.length,
hasSelectedTabs: !!selectedTabsInstruction
});
// Create ReAct agent
const agent = createReactAgent({
llm,
tools,
});
// Use centralized streaming execution
const { result, allMessages } = await this.executeReactAgentWithStreaming(
agent,
input.instruction,
config,
messages
);
// 3. Remove browser state and system prompt after execution
if (this.stateMessageAdded) {
this.executionContext.messageManager.removeBrowserStateMessages();
this.stateMessageAdded = false;
// Debug log handled by base log method
}
//
if (this.systemPromptAdded) {
this.executionContext.messageManager.removeSystemMessage();
this.systemPromptAdded = false;
// Debug log handled by base log method
}
// Extract the final message content and model output
const lastMessage = allMessages[allMessages.length - 1];
const finalContent = typeof lastMessage?.content === 'string'
? lastMessage.content
: 'Task completed';
// Check for done tool usage to determine completion
const actionsTaken: string[] = [];
let completed = false;
const modelOutput: Record<string, any> | null = null;
for (const message of allMessages) {
// Check if message has tool_calls property (type guard)
if ('tool_calls' in message && message.tool_calls && Array.isArray(message.tool_calls)) {
for (const toolCall of message.tool_calls) {
actionsTaken.push(`${toolCall.name}: ${JSON.stringify(toolCall.args)}`);
if (toolCall.name === 'done') {
completed = true;
}
}
}
}
// Debug: Log execution results
this.log('🏁 Browse execution complete', 'info', {
completed,
actionCount: actionsTaken.length,
toolCalls: actionsTaken.map(action => action.split(':')[0]),
finalStateLength: finalContent.length
});
return {
completed,
actions_taken: actionsTaken,
final_state: finalContent,
extracted_data: input.context || {} // Include any context data passed in
};
} catch (error) {
// Ensure state and system prompt are cleaned up on error
if (this.stateMessageAdded) {
this.executionContext.messageManager.removeBrowserStateMessages();
this.stateMessageAdded = false;
}
if (this.systemPromptAdded) {
this.executionContext.messageManager.removeSystemMessage();
this.systemPromptAdded = false;
}
const errorMessage = error instanceof Error ? error.message : String(error);
// Debug: Log error details
this.log('❌ Browse task failed', 'error', {
error: errorMessage,
stack: error instanceof Error ? error.stack : undefined
});
return {
completed: false,
actions_taken: [],
final_state: `Task failed: ${errorMessage}`,
extracted_data: {}
};
}
}
}