Files
BrowserOS/reference-code/old-lib/agent/BaseAgent.ts
Felarof 8245dfe0ff Rewrite Agent Loop (#7)
* clean-up bunch of files for re-write

* more clean-up and adding basic agent

* Minor fix moved types into respective files.

* Deleted bunch of old files

backup

Update gitignore

Deleted a bunch of files

Remove message manager

Deleted old docs

Update rules

rename Profiler to profiler

* Temporarily adding old code

* Adding two small things back

* backup

* Implemented LangChainProvider and updated cursor rules

backup

LangChainProvider

curosr rules

* Implement tests for LangChainProvider -- unit test and integration test

integration test passes

integration test backup

* Tool Design

Tools Desing

tools design

* NavigationTool ready

NavigationTool ready

NavigationTool ready

NaivgationTool ready

backup

* MessageManager

MessageManager

backup

* Fixed integration test

* Agent design new

Updated agent design and added bunch of /NTN commands

agent new design

* Delete old agent design

* MessageManagerReadOnly class

* PlannerTool ready

PlannerTool almost ready

* ToolManager and DoneTool

* Integration of BrowserAgent

* BrowserAgent implementation v0.1

* BrowserAgent small fix v0.2

* Tool calling design

too call design

tool design claude

* Update agent tool design with // NTN

* add zod-to-json npm install

* BrowserAGent v0.3

* BrowserAgent v0.4

* BrowserAgent v0.5

* fixes

* Build error fixes in my NEWLY added code

build errors fix

* Build error fixes in old code (integration work)

backup

* Comment StreamEventProcessor for now, it is not used

* Small build error fix

* Small rename

* Added integration test to check structuredLLM and changed to 4o-mini

change default to nxtscape

integration test

* Small docstring

* Simplified BrowserAgent code and added integration test

Simplified BrowserAgent code

BrowserAGent integrationt est

* Update CLAUDE.md with project memory and instructions on how to write code

Update CLAUDE.md with project memory and instructions on how to write code

Project Memory

* Just a mova.. Moved ToolManager outside. Build works.

* TabOperations tool

TabOperations Tool and fixing some test

tab operations

* Update CLAUDE.md

* Added ClassificationTool

classifiction tool

classification prommpt

* Refactored and simplified PlannerTool unit test and integration test

* Updated Plnnaer tool

* Update CLAUDE.md

* BrowserAgent modified to do classification

BrowserAgent with classification

* minor fix to ToolManager

* Instead of ToolCall and ToolResult -- just updating message manager once

* minor fix to BrowserAgent integration test

* Changed done to "done_tool"

* Updated CLAUDE.md to reflect understanding of claude

* Uncommented stream event processor

* Renamed EventBus to StreamEventBus

* Commented StreamEventProcessor

* Event Processor

* Integrated EventProcessor with BrowserAgent

Added EventProcessor to BrowserAgetn

* Renamed StreamEventBus to EventBus

* Made EventBus required parameter in ExecutionContext

* PlanGenerator rewrite

PlanGenerator rewrite

backup

* For simple task, explicitly tell it to call done tool

* Max attempts for simple task

* backup

* Revert "backup"

This reverts commit 7d79a3d4d5774bfef79ec9827878b74edad3593f.

* Consolidating where EventBus and EventProcessor are created and initialized

backup

* Update CLAUDE.md

Update CLAUDE.md

* Improving agent loop code

Cleaned up processTooCall

classification task

* Create test-writer subAgent

test-agent-prompt

test agent prompt

test-agent-prompt

Update test-writer.md

* BrowserAgent test

Browseragent test

BrowserAgent test

* BrowserAgent refactor

backup

backup

* Minor fixes

* Minor fix

* minor change -- NEW AGENT LOOP IS WORKING WELL

* Update cursor rules

* Small change

* Improved BrowserAgent integration test

Improved BrowserAgent integration test

* Small change

* Update CLAUDE.md

* Different tools

* FindElementTool is ready

Find element update

backup

find element backup

* Updated to test strings to say "tests..."

* ScrollTool is ready

* RefreshStateTool is updated as well

* MessageManager updated

* SearchTool is ready

backup

* Interaction Element is also ready

* Add debugMessage emitter

* ValidatorTool ready and tests are passing

Validation Tool

validator tool

backup

backup

* GroupTabs tool ready

* Registered all the tools

* Planning changed to 5 steps

* BrowserAgent integration test fix

* Minor string changes

* backup

* Removed too many confusing events in EventProcessor -- there is only event.info right now

* Abort control implemented

backup

Abort

* Formatter for toolResult

Formatter for toolResult

backup

* Always render using Markdown

* Minor fix

---------

Co-authored-by: Nikhil Sonti <nikhilsv92@gmail.com>
2025-07-29 08:14:45 -07:00

640 lines
21 KiB
TypeScript

import { z } from "zod";
import { createReactAgent } from "@langchain/langgraph/prebuilt";
import { TaskMetadata } from "@/lib/types/types";
import { Logging } from "@/lib/utils/Logging";
import {
HumanMessage,
AIMessage,
ToolMessage,
SystemMessage,
AIMessageChunk,
} from "@langchain/core/messages";
import { BaseChatModel } from "@langchain/core/language_models/chat_models";
import { StreamEvent } from "@langchain/core/dist/tracers/event_stream";
import { LangChainProviderFactory } from "@/lib/llm";
import { ExecutionContext } from "@/lib/runtime/ExecutionContext";
import { BaseMessage } from "@langchain/core/messages";
import { profileStart, profileEnd } from "@/lib/utils/Profiler";
// Import streaming components
import { StreamEventBus } from "@/lib/events";
import { StreamEventProcessor } from "../events/StreamEventProcessor";
// Removed deprecated IStreamProcessor import
import { ToolRegistry } from "@/lib/tools/base/ToolRegistry";
import BrowserContext from "../browser/BrowserContext";
import { getDomainFromUrl } from "../browser/Utils";
import { Runnable } from "@langchain/core/runnables";
import { RunnableConfig } from "@langchain/core/runnables";
/**
* Input schema for agent execution
*/
export const AgentInputSchema = z.object({
instruction: z.string(), // The task instruction
// Additional context you can pass -- different agents pass different additional context -- like BrowserAgent passes browserState, ProductivityAgent passes selectedTabIds, etc.
context: z.record(z.unknown()).optional(),
browserState: z.record(z.unknown()).optional(), // Current browser state
});
export type AgentInput = z.infer<typeof AgentInputSchema>;
/**
* Base output schema that all agents must conform to
*/
export const AgentOutputSchema = z.object({
success: z.boolean(), // Whether the execution completed successfully
result: z.unknown(), // Agent-specific result data
error: z.string().optional(), // Error message if failed
metadata: z.record(z.unknown()).optional(), // Additional metadata
});
export type AgentOutput = z.infer<typeof AgentOutputSchema>;
/**
* Base configuration options for agents
*/
export const AgentOptionsSchema = z.object({
executionContext: z.instanceof(ExecutionContext), // Execution context instance
systemPrompt: z.string().optional(), // Optional custom system prompt
maxIterations: z.number().int().positive().default(10), // Maximum iterations for ReAct agents
useVision: z.boolean().default(false), // Whether to enable vision
debugMode: z.boolean().default(false), // Debug logging
});
export type AgentOptions = z.infer<typeof AgentOptionsSchema>;
/**
* Interface for all agent types - now extends Runnable for LangGraph compatibility
*
* RunnableConfig: Runtime configuration object that controls execution behavior across LangChain components.
* Key properties:
* - configurable: Dynamic runtime values (e.g., {sessionId: "123", temperature: 0.7})
* - callbacks: Event handlers for streaming, errors, and progress tracking
* - recursionLimit: Max depth for ReAct loops (default: 25)
* - maxConcurrency: Parallel execution limit
* - timeout: Execution timeout in milliseconds
* - signal: AbortSignal for cancellation
* - tags/metadata: Tracking and debugging info
*
* The config propagates through the entire execution pipeline, maintaining context,
* callbacks, and control flow. Essential for streaming, cancellation, and observability.
*/
export interface IAgent extends Runnable<AgentInput, AgentOutput> {
/**
* Execute a task with the given input
* @param input - Agent input containing instruction and context
* @param config - Runtime configuration controlling execution behavior, callbacks, and context propagation
* @returns Promise resolving to structured output
*/
invoke(input: AgentInput, config?: RunnableConfig): Promise<AgentOutput>;
}
/**
* Abstract base class for LangChain-based agents.
* Uses two-phase initialization to avoid constructor virtual method call issues.
* LLM provider is created lazily to always use the latest user settings.
*/
export abstract class BaseAgent
extends Runnable<AgentInput, AgentOutput>
implements IAgent
{
protected readonly options: AgentOptions;
protected readonly executionContext: ExecutionContext; // Execution context from options
protected readonly browserContext: BrowserContext;
// LangChain namespace requirement
lc_namespace = ["nxtscape", "agents"];
// These get set during initialize()
protected systemPrompt!: string;
protected toolRegistry!: ToolRegistry;
protected isInitialized: boolean = false;
protected debugMode: boolean;
// State management for derived agents
protected stateMessageAdded: boolean = false;
protected systemPromptAdded: boolean = false;
// Store current EventBus for streaming
protected currentEventBus: StreamEventBus | null = null;
/**
* Creates a new instance of BaseAgent
* @param options - Configuration options for the agent
*/
constructor(options: AgentOptions) {
super();
this.options = AgentOptionsSchema.parse(options);
this.executionContext = this.options.executionContext;
this.browserContext = this.executionContext.browserContext;
this.debugMode = this.options.debugMode || this.executionContext.debugMode;
}
/**
* Initialize the agent by calling virtual methods to set up tools and system prompt
* This must be called after construction before using the agent
*/
public async initialize(): Promise<void> {
if (this.isInitialized) {
return; // Already initialized
}
try {
// Now it's safe to call virtual methods
this.toolRegistry = this.createToolRegistry();
this.systemPrompt =
this.options.systemPrompt || this.generateSystemPrompt();
this.isInitialized = true;
if (this.debugMode) {
this.log(
`${this.getAgentName()} initialized (LLM will be created lazily)`
);
}
} catch (error) {
this.log(
`Failed to initialize ${this.getAgentName()}: ${error}`,
"error"
);
throw error;
}
}
/**
* Ensure the agent is initialized before execution
*/
protected async ensureInitialized(): Promise<void> {
if (!this.isInitialized) {
await this.initialize();
}
}
/**
* Template method: Get agent-specific initialization message
* @returns Initialization message
*/
protected getInitializationMessage(): string {
return `🚀 Initializing ${this.getAgentName().toLowerCase()}...`;
}
/**
* Main execution method - implements Runnable interface
* This is called by LangGraph as a node function
* @param input - Agent input containing instruction and context
* @param config - Optional configuration for LangGraph web compatibility
* @returns Promise resolving to agent output
*/
public async invoke(
input: AgentInput,
config?: RunnableConfig,
): Promise<AgentOutput> {
const validatedInput = AgentInputSchema.parse(input);
const profileLabel = `Agent.${this.getAgentName()}`;
profileStart(profileLabel);
try {
await this.ensureInitialized();
// Extract EventBus from ExecutionContext only
this.currentEventBus = this.executionContext.getEventBus();
if (this.debugMode) {
this.log(
`🚀🚀 Starting ${this.getAgentName()} execution: ${
validatedInput.instruction
}`
);
this.log(`🚀🚀 Starting ${this.getAgentName()} task...`);
}
// Execute agent-specific logic - each agent handles its own orchestration
const agentResult = await this.executeAgent(
validatedInput,
config
);
// Log completion internally for debugging
if (this.debugMode) {
this.log(`✅✅ ${this.getAgentName()} completed successfully`);
}
// Removed automatic completion message - let Orchestrator handle final completion
return {
success: true,
result: agentResult,
metadata: {
agentName: this.getAgentName(),
timestamp: new Date().toISOString(),
},
};
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error);
const isAbortError =
error instanceof Error &&
(error.name === "AbortError" ||
errorMessage.includes("Aborted") ||
errorMessage.includes("cancelled") ||
errorMessage.includes("stopped"));
if (!isAbortError) {
this.log(`${this.getAgentName()} failed: ${errorMessage}`, "error");
// Send error message only for non-cancellation errors
this.currentEventBus?.emitSystemError(
errorMessage,
error instanceof Error ? error : undefined,
this.getAgentName()
);
this.currentEventBus?.emitSystemMessage(
`${this.getAgentName()} failed: ${errorMessage}`,
'error',
this.getAgentName()
);
} else {
this.log(`${this.getAgentName()} cancelled: ${errorMessage}`, "info");
}
return {
success: false,
result: null,
error: errorMessage,
metadata: {
agentName: this.getAgentName(),
timestamp: new Date().toISOString(),
cancelled: isAbortError,
},
};
} finally {
// Clear EventBus after execution
this.currentEventBus = null;
profileEnd(profileLabel);
}
}
/**
* Helper method for centralized streaming execution that agents can use
* @param agent - The created ReAct agent
* @param instruction - The instruction to execute
* @param config - Optional configuration for LangGraph web compatibility
* @param messages - Optional messages array
* @returns Object containing result and all messages
*/
protected async executeReactAgentWithStreaming(
agent: any,
instruction: string,
config?: RunnableConfig,
messages?: BaseMessage[]
): Promise<{ result: any; allMessages: any[] }> {
const profileLabel = `${this.getAgentName()}.invokeWithStreaming`;
profileStart(profileLabel);
// Extract EventBus from ExecutionContext only - single source of truth
const eventBus = this.executionContext.getEventBus();
if (!eventBus) {
throw new Error('EventBus not available in ExecutionContext - ensure setEventBus() was called');
}
// Store EventBus for use in log() method during streaming
this.currentEventBus = eventBus;
// CENTRALIZED STREAMING LOGIC
eventBus.emitThinking(
`🔧 Setting up ${this.getAgentName()} tools`,
'info',
this.getAgentName()
);
// Use the existing tool registry instead of creating a new one
const toolRegistry = this.getToolRegistry();
if (!toolRegistry) {
throw new Error(`Tool registry not available for ${this.getAgentName()}`);
}
if (this.debugMode) {
this.log(
`Using existing tool registry with ${
toolRegistry.getAll().length
} tools`
);
}
// Initialize StreamEventProcessor with EventBus and existing tool registry
const streamEventProcessor = new StreamEventProcessor(
eventBus,
toolRegistry
);
if (this.debugMode) {
this.log(`StreamEventProcessor initialized with existing tool registry`);
}
// Check if already cancelled before starting
if (this.executionContext.abortController.signal.aborted) {
throw new Error("Task was cancelled before execution");
}
const eventStream = await agent.streamEvents(
{
messages: messages || [new HumanMessage(instruction)],
},
{
version: "v2",
signal: this.executionContext.abortController.signal,
recursionLimit: this.options.maxIterations,
...config,
}
);
let result: any;
let allMessages: any[] = [];
let wasCancelled = false;
try {
// Process streaming events using StreamProcessor
for await (const event of eventStream) {
// Check for cancellation
if (this.executionContext.abortController.signal.aborted) {
wasCancelled = true;
break;
}
// Delegate all event processing to StreamEventProcessor
await streamEventProcessor.processEvent(event);
// sync langchain and message manager
await this.syncLangchainAndMessageManager(event, streamEventProcessor);
// Extract result from any chain end event that contains messages
if (event.event === "on_chain_end") {
const output = event.data?.output;
// Store the first string output as a fallback in case we never see messages
if (result === undefined && typeof output === "string") {
result = output;
}
if (output && typeof output === "object" && Array.isArray(output.messages)) {
// We found the rich output that carries the full message list
result = output;
allMessages = output.messages;
}
}
}
} catch (error) {
// Handle specific known errors
if (error instanceof Error) {
if (
error.message.includes("ToolNode only accepts AIMessages as input")
) {
// Known LangGraph streaming issue that doesn't affect execution
this.log(
"Encountered known LangGraph streaming issue - continuing execution",
"info"
);
} else if (error.name === "AbortError") {
wasCancelled = true;
this.log("Task was cancelled by user", "info");
} else {
throw error;
}
} else {
throw error;
}
}
// Complete streaming
streamEventProcessor.completeStreaming();
// Handle cancellation
if (wasCancelled) {
eventBus.emitCancel('Task was cancelled', true, this.getAgentName());
profileEnd(profileLabel);
throw new Error("Task was cancelled");
}
profileEnd(profileLabel);
return { result, allMessages };
}
/**
* Template method: Get the agent name for logging
* @returns Agent name
*/
protected abstract getAgentName(): string;
/**
* Template method: Execute agent-specific logic
* Each agent handles its own instruction enhancement, agent creation, and execution
* @param input - Agent input containing instruction and context
* @param config - Optional configuration for LangGraph web compatibility
* @returns Parsed agent-specific output
*/
protected abstract executeAgent(
input: AgentInput,
config?: RunnableConfig
): Promise<unknown>;
/**
* Template method: Create tool registry for the agent
* @returns Tool registry or undefined
*/
protected createToolRegistry(): ToolRegistry {
// Default implementation returns undefined
// Subclasses can override to provide a tool registry
throw new Error("createToolRegistry must be implemented by subclasses");
}
/**
* Template method: Create agent-specific tools
* @returns Array of tools for the agent
*/
protected createTools(): any[] {
return this.toolRegistry?.getLangChainTools() || [];
}
/**
* Template method: Get tool registry for streaming display
* @returns Tool registry or undefined
*/
protected getToolRegistry(): ToolRegistry | undefined {
return this.toolRegistry;
}
/**
* Get LLM provider using current user settings
* This method creates the LLM fresh each time to ensure latest settings are used
* @returns Promise resolving to configured LLM provider
*/
protected async getLLM(): Promise<BaseChatModel> {
try {
const llm = await LangChainProviderFactory.createLLM();
return llm;
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error);
this.log(`Failed to create LLM provider: ${errorMessage}`, "error");
throw new Error(`LLM provider creation failed: ${errorMessage}`);
}
}
/**
* Template method: Generate the system prompt for this agent
* @returns System prompt string
*/
protected abstract generateSystemPrompt(): string;
protected async enhanceInstructionWithContext(
instruction: string
): Promise<string> {
try {
const pageUrl = await this.browserContext.getCurrentPage().then(p => p.url());
const pageTitle = await this.browserContext.getCurrentPage().then(p => p.title());
return `${instruction}\n\n## BROWSER CONTEXT\nCurrent URL: ${pageUrl}\nPage Title: ${pageTitle}`;
} catch (error) {
this.log(
`Failed to enhance instruction with context: ${error}`,
"warning"
);
return instruction;
}
}
protected async getSelectedTabsInstruction(): Promise<string> {
const selectedTabIds =
this.executionContext.getSelectedTabIds() || undefined;
// User has selected tabs
if (selectedTabIds && selectedTabIds.length > 1) {
// Multi-tab context - get detailed information about all selected tabs
const tabInfoList: Array<{
id: number;
title: string;
url: string;
domain: string;
}> = [];
for (const tabId of selectedTabIds) {
try {
const tab = await chrome.tabs.get(tabId);
const tabDomain = getDomainFromUrl(tab.url || "unknown");
tabInfoList.push({
id: tabId,
title: tab.title || "Untitled",
url: tab.url || "about:blank",
domain: tabDomain,
});
} catch (error) {
// If tab can't be accessed, add basic info don't add it to the list
}
}
let selectedTabsInstruction = `## USER TAB SELECTED CONTEXT\n🔗 **User has selected ${
tabInfoList.length
} tabs:**\n\nJSON: ${JSON.stringify(tabInfoList)}`;
selectedTabsInstruction += `\n\n**TAB PROCESSING INSTRUCTIONS:**\n• Content from all ${tabInfoList.length} tabs should be considered collectively\n• When summarizing or analyzing, include information from all selected tabs\n• Mention which tabs specific information comes from when relevant\n• Consider relationships and connections between the different tabs`;
return selectedTabsInstruction;
}
return "";
}
protected async syncLangchainAndMessageManager(
event: StreamEvent,
streamEventProcessor?: StreamEventProcessor
): Promise<void> {
try {
if (event.event === "on_chat_model_end") {
// on_chat_model_end is called when the chat model is done
// This could either AIChunk message -- which is AI thinking; where content is present
// This could also be AI calling the tool -- which is AI calling the tool where content is not present
// we want to extract the output of that message and add to our message manager
// Tip: Put breakpoint here to see how the event looks like
if (event.data?.output instanceof AIMessageChunk) {
this.executionContext.messageManager.addAIMessage(
event.data?.output.text
);
}
} else if (event.event === "on_tool_end") {
// on_tool_end is called when a tool is executed
// we want to extract the output of that message and add to our message manager
// Tip: Put breakpoint here to see how the event looks like
const output = event.data?.output;
const toolName = event.name || "unknown";
if (
output &&
streamEventProcessor &&
"getActionResults" in streamEventProcessor
) {
const actionResults = streamEventProcessor.getActionResults();
// Find the most recent ActionResult for this tool
const recentResult = actionResults
.filter((ar: any) => ar.toolName === toolName)
.pop();
if (recentResult && recentResult.includeInMemory) {
// Use extracted content if available (for search_text and interact)
const contentToAdd =
recentResult.extractedContent ||
(output instanceof ToolMessage
? (output.content as string)
: typeof output === "string"
? output
: JSON.stringify(output));
this.executionContext.messageManager.addToolMessage(
contentToAdd,
toolName
);
}
}
}
// For other event types, we don't need to sync yet
} catch (error) {
// Log the error but don't throw to avoid breaking the stream
this.log(`Error syncing LangChain and MessageManager: ${error}`, "error");
}
}
/**
* Log a message if debug mode is enabled
* @param message - Message to log
* @param level - Log level
* @param data - Optional structured data for debugging
*/
protected log(
message: string,
level: "info" | "warning" | "error" = "info",
data?: any
): void {
// Always log to console in debug mode or for errors
if (this.debugMode || level === "error") {
Logging.log(this.getAgentName(), message, level);
}
// Send to UI via EventBus if debug mode is on and EventBus is available
if (this.debugMode && this.currentEventBus && level !== "error") {
this.currentEventBus.emitDebugMessage(`[${this.getAgentName()}] ${message}`, data, this.getAgentName());
}
}
public async cleanup(): Promise<void> {
try {
await this.executionContext.browserContext.cleanup();
} catch (error) {
this.log(`Failed to cleanup browser context: ${error}`, "error");
}
}
}