mirror of
https://github.com/browseros-ai/BrowserOS.git
synced 2026-05-20 04:21:23 +00:00
* clean-up bunch of files for re-write * more clean-up and adding basic agent * Minor fix moved types into respective files. * Deleted bunch of old files backup Update gitignore Deleted a bunch of files Remove message manager Deleted old docs Update rules rename Profiler to profiler * Temporarily adding old code * Adding two small things back * backup * Implemented LangChainProvider and updated cursor rules backup LangChainProvider curosr rules * Implement tests for LangChainProvider -- unit test and integration test integration test passes integration test backup * Tool Design Tools Desing tools design * NavigationTool ready NavigationTool ready NavigationTool ready NaivgationTool ready backup * MessageManager MessageManager backup * Fixed integration test * Agent design new Updated agent design and added bunch of /NTN commands agent new design * Delete old agent design * MessageManagerReadOnly class * PlannerTool ready PlannerTool almost ready * ToolManager and DoneTool * Integration of BrowserAgent * BrowserAgent implementation v0.1 * BrowserAgent small fix v0.2 * Tool calling design too call design tool design claude * Update agent tool design with // NTN * add zod-to-json npm install * BrowserAGent v0.3 * BrowserAgent v0.4 * BrowserAgent v0.5 * fixes * Build error fixes in my NEWLY added code build errors fix * Build error fixes in old code (integration work) backup * Comment StreamEventProcessor for now, it is not used * Small build error fix * Small rename * Added integration test to check structuredLLM and changed to 4o-mini change default to nxtscape integration test * Small docstring * Simplified BrowserAgent code and added integration test Simplified BrowserAgent code BrowserAGent integrationt est * Update CLAUDE.md with project memory and instructions on how to write code Update CLAUDE.md with project memory and instructions on how to write code Project Memory * Just a mova.. Moved ToolManager outside. Build works. * TabOperations tool TabOperations Tool and fixing some test tab operations * Update CLAUDE.md * Added ClassificationTool classifiction tool classification prommpt * Refactored and simplified PlannerTool unit test and integration test * Updated Plnnaer tool * Update CLAUDE.md * BrowserAgent modified to do classification BrowserAgent with classification * minor fix to ToolManager * Instead of ToolCall and ToolResult -- just updating message manager once * minor fix to BrowserAgent integration test * Changed done to "done_tool" * Updated CLAUDE.md to reflect understanding of claude * Uncommented stream event processor * Renamed EventBus to StreamEventBus * Commented StreamEventProcessor * Event Processor * Integrated EventProcessor with BrowserAgent Added EventProcessor to BrowserAgetn * Renamed StreamEventBus to EventBus * Made EventBus required parameter in ExecutionContext * PlanGenerator rewrite PlanGenerator rewrite backup * For simple task, explicitly tell it to call done tool * Max attempts for simple task * backup * Revert "backup" This reverts commit 7d79a3d4d5774bfef79ec9827878b74edad3593f. * Consolidating where EventBus and EventProcessor are created and initialized backup * Update CLAUDE.md Update CLAUDE.md * Improving agent loop code Cleaned up processTooCall classification task * Create test-writer subAgent test-agent-prompt test agent prompt test-agent-prompt Update test-writer.md * BrowserAgent test Browseragent test BrowserAgent test * BrowserAgent refactor backup backup * Minor fixes * Minor fix * minor change -- NEW AGENT LOOP IS WORKING WELL * Update cursor rules * Small change * Improved BrowserAgent integration test Improved BrowserAgent integration test * Small change * Update CLAUDE.md * Different tools * FindElementTool is ready Find element update backup find element backup * Updated to test strings to say "tests..." * ScrollTool is ready * RefreshStateTool is updated as well * MessageManager updated * SearchTool is ready backup * Interaction Element is also ready * Add debugMessage emitter * ValidatorTool ready and tests are passing Validation Tool validator tool backup backup * GroupTabs tool ready * Registered all the tools * Planning changed to 5 steps * BrowserAgent integration test fix * Minor string changes * backup * Removed too many confusing events in EventProcessor -- there is only event.info right now * Abort control implemented backup Abort * Formatter for toolResult Formatter for toolResult backup * Always render using Markdown * Minor fix --------- Co-authored-by: Nikhil Sonti <nikhilsv92@gmail.com>
200 lines
6.9 KiB
TypeScript
200 lines
6.9 KiB
TypeScript
import { z } from 'zod';
|
|
import { BaseAgent, AgentOptions, AgentInput } from './BaseAgent';
|
|
import { ToolRegistry } from '@/lib/tools/base';
|
|
import { Logging } from '@/lib/utils/Logging';
|
|
import { HumanMessage, SystemMessage } from '@langchain/core/messages';
|
|
import { RunnableConfig } from '@langchain/core/runnables';
|
|
import { IntentPredictionPrompt } from '@/lib/prompts/IntentPredictionPrompt';
|
|
import { withFlexibleStructuredOutput } from '@/lib/llm/utils/structuredOutput';
|
|
|
|
// Schema for navigation history entry
|
|
const NavigationEntrySchema = z.object({
|
|
url: z.string(),
|
|
title: z.string(),
|
|
timestamp: z.number()
|
|
});
|
|
|
|
// Schema for accessibility snapshot
|
|
const AccessibilitySnapshotSchema = z.object({
|
|
url: z.string(),
|
|
cleanUrl: z.string(), // URL without query parameters
|
|
title: z.string(),
|
|
metaDescription: z.string().optional(), // Meta description tag
|
|
ogTitle: z.string().optional(), // Open Graph title
|
|
ogDescription: z.string().optional(), // Open Graph description
|
|
headings: z.array(z.string()),
|
|
buttons: z.array(z.string()),
|
|
links: z.array(z.string()),
|
|
ariaLabels: z.array(z.string()),
|
|
landmarks: z.array(z.object({
|
|
role: z.string(),
|
|
label: z.string().optional()
|
|
})),
|
|
forms: z.array(z.object({
|
|
action: z.string().optional(),
|
|
fields: z.array(z.string())
|
|
})),
|
|
mainText: z.string().optional()
|
|
});
|
|
|
|
// Intent prediction specific input schema
|
|
export const IntentPredictionInputSchema = z.object({
|
|
tabHistory: z.array(NavigationEntrySchema),
|
|
accessibilitySnapshot: AccessibilitySnapshotSchema
|
|
});
|
|
|
|
// Intent prediction output schema
|
|
export const IntentPredictionOutputSchema = z.object({
|
|
success: z.boolean(),
|
|
intents: z.array(z.string()), // Predicted intents
|
|
confidence: z.number().min(0).max(1).optional(), // Overall confidence
|
|
error: z.string().optional()
|
|
});
|
|
|
|
export type IntentPredictionInput = z.infer<typeof IntentPredictionInputSchema>;
|
|
export type IntentPredictionOutput = z.infer<typeof IntentPredictionOutputSchema>;
|
|
|
|
/**
|
|
* Agent specialized for predicting user intents based on browsing context
|
|
*/
|
|
export class IntentPredictionAgent extends BaseAgent {
|
|
private prompt: IntentPredictionPrompt;
|
|
|
|
constructor(options: AgentOptions) {
|
|
super(options);
|
|
this.prompt = new IntentPredictionPrompt();
|
|
}
|
|
|
|
/**
|
|
* Override: Create tool registry for the agent (no tools needed for intent prediction)
|
|
*/
|
|
protected createToolRegistry(): ToolRegistry {
|
|
this.log('🔧 Creating empty ToolRegistry (intent prediction uses only LLM)');
|
|
return new ToolRegistry();
|
|
}
|
|
|
|
/**
|
|
* Override: Generate system prompt for intent prediction
|
|
*/
|
|
protected generateSystemPrompt(): string {
|
|
return this.prompt.generate();
|
|
}
|
|
|
|
/**
|
|
* Execute agent-specific logic (required by BaseAgent)
|
|
*/
|
|
protected async executeAgent(
|
|
input: AgentInput,
|
|
callbacks?: any,
|
|
config?: RunnableConfig
|
|
): Promise<unknown> {
|
|
try {
|
|
// Ensure agent is initialized
|
|
if (!this.isInitialized) {
|
|
await this.initialize();
|
|
}
|
|
|
|
this.log('🔮 Starting intent prediction');
|
|
|
|
// Parse and validate context
|
|
const context = input.context as IntentPredictionInput;
|
|
if (!context?.tabHistory || !context?.accessibilitySnapshot) {
|
|
throw new Error('Missing required context: tabHistory and accessibilitySnapshot');
|
|
}
|
|
|
|
const { tabHistory, accessibilitySnapshot } = context;
|
|
|
|
// Log tab history for debugging
|
|
this.log('📍 Tab history:', 'info', {
|
|
tabHistory: tabHistory.map(entry => ({
|
|
url: entry.url,
|
|
title: entry.title,
|
|
timestamp: new Date(entry.timestamp).toISOString()
|
|
}))
|
|
});
|
|
|
|
// Log current page info
|
|
this.log('📄 Current page:', 'info', {
|
|
url: accessibilitySnapshot.url,
|
|
cleanUrl: accessibilitySnapshot.cleanUrl,
|
|
title: accessibilitySnapshot.title,
|
|
metaDescription: accessibilitySnapshot.metaDescription,
|
|
ogTitle: accessibilitySnapshot.ogTitle,
|
|
headingsCount: accessibilitySnapshot.headings.length,
|
|
buttonsCount: accessibilitySnapshot.buttons.length,
|
|
linksCount: accessibilitySnapshot.links.length,
|
|
formsCount: accessibilitySnapshot.forms.length
|
|
});
|
|
|
|
// Build prompt
|
|
const prompt = this.prompt.buildPredictionPrompt(tabHistory, accessibilitySnapshot);
|
|
|
|
// TODO(nithin): Similar to other agents, add system prompt to the message manager. Also, in the intent prediction
|
|
// message manager, keep track of which intents were predicted and which were clicked and pass that to the LLM.
|
|
// this.executionContext.messageManager.addSystemMessage(this.systemPrompt, 0);
|
|
// this.systemPromptAdded = true;
|
|
|
|
// Get LLM (created lazily with latest settings)
|
|
const llm = await this.getLLM();
|
|
|
|
// Define the structured output schema for intent prediction
|
|
const intentPredictionSchema = z.object({
|
|
intents: z.array(z.string()).max(3).describe('Top 3 predicted user intents based on browsing context'),
|
|
confidence: z.number().min(0).max(1).describe('Confidence level in the predictions (0-1)')
|
|
});
|
|
|
|
// Create LLM with structured output using flexible schema handling
|
|
const structuredLLM = await withFlexibleStructuredOutput(llm, intentPredictionSchema);
|
|
|
|
// Get system prompt
|
|
const systemPrompt = this.generateSystemPrompt();
|
|
|
|
// Add messages with system prompt
|
|
const messages = [
|
|
new SystemMessage(systemPrompt),
|
|
new HumanMessage(prompt)
|
|
];
|
|
|
|
// Invoke LLM with structured output
|
|
this.log('🤖 Invoking LLM for intent prediction with structured output');
|
|
const result = await structuredLLM.invoke(messages, config);
|
|
|
|
// Log the structured response
|
|
this.log('🤖 Structured LLM response:', 'info', {
|
|
intents: result.intents,
|
|
confidence: result.confidence,
|
|
intentsCount: result.intents.length
|
|
});
|
|
|
|
this.log(`✅ Predicted ${result.intents.length} intents`);
|
|
|
|
// Log the predicted intents with confidence
|
|
this.log('🎯 Predicted intents:', 'info', {
|
|
intents: result.intents,
|
|
confidence: result.confidence,
|
|
pageUrl: accessibilitySnapshot.cleanUrl,
|
|
pageTitle: accessibilitySnapshot.ogTitle || accessibilitySnapshot.title
|
|
});
|
|
|
|
// Return just the data - BaseAgent will wrap it
|
|
return {
|
|
intents: result.intents,
|
|
confidence: result.confidence
|
|
};
|
|
|
|
} catch (error) {
|
|
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
this.log(`❌ Intent prediction failed: ${errorMessage}`, 'error');
|
|
|
|
// Don't return any intents on error - let the UI handle it
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
// Removed buildPredictionPrompt, analyzePageFeatures, parseIntentResponse, and getFallbackIntents
|
|
// Now using structured output with withFlexibleStructuredOutput for reliable JSON parsing
|
|
|
|
protected getAgentName(): string {
|
|
return 'IntentPredictionAgent';
|
|
}
|
|
} |