mirror of
https://github.com/browseros-ai/BrowserOS.git
synced 2026-05-19 19:41:06 +00:00
* clean-up bunch of files for re-write * more clean-up and adding basic agent * Minor fix moved types into respective files. * Deleted bunch of old files backup Update gitignore Deleted a bunch of files Remove message manager Deleted old docs Update rules rename Profiler to profiler * Temporarily adding old code * Adding two small things back * backup * Implemented LangChainProvider and updated cursor rules backup LangChainProvider curosr rules * Implement tests for LangChainProvider -- unit test and integration test integration test passes integration test backup * Tool Design Tools Desing tools design * NavigationTool ready NavigationTool ready NavigationTool ready NaivgationTool ready backup * MessageManager MessageManager backup * Fixed integration test * Agent design new Updated agent design and added bunch of /NTN commands agent new design * Delete old agent design * MessageManagerReadOnly class * PlannerTool ready PlannerTool almost ready * ToolManager and DoneTool * Integration of BrowserAgent * BrowserAgent implementation v0.1 * BrowserAgent small fix v0.2 * Tool calling design too call design tool design claude * Update agent tool design with // NTN * add zod-to-json npm install * BrowserAGent v0.3 * BrowserAgent v0.4 * BrowserAgent v0.5 * fixes * Build error fixes in my NEWLY added code build errors fix * Build error fixes in old code (integration work) backup * Comment StreamEventProcessor for now, it is not used * Small build error fix * Small rename * Added integration test to check structuredLLM and changed to 4o-mini change default to nxtscape integration test * Small docstring * Simplified BrowserAgent code and added integration test Simplified BrowserAgent code BrowserAGent integrationt est * Update CLAUDE.md with project memory and instructions on how to write code Update CLAUDE.md with project memory and instructions on how to write code Project Memory * Just a mova.. Moved ToolManager outside. Build works. * TabOperations tool TabOperations Tool and fixing some test tab operations * Update CLAUDE.md * Added ClassificationTool classifiction tool classification prommpt * Refactored and simplified PlannerTool unit test and integration test * Updated Plnnaer tool * Update CLAUDE.md * BrowserAgent modified to do classification BrowserAgent with classification * minor fix to ToolManager * Instead of ToolCall and ToolResult -- just updating message manager once * minor fix to BrowserAgent integration test * Changed done to "done_tool" * Updated CLAUDE.md to reflect understanding of claude * Uncommented stream event processor * Renamed EventBus to StreamEventBus * Commented StreamEventProcessor * Event Processor * Integrated EventProcessor with BrowserAgent Added EventProcessor to BrowserAgetn * Renamed StreamEventBus to EventBus * Made EventBus required parameter in ExecutionContext * PlanGenerator rewrite PlanGenerator rewrite backup * For simple task, explicitly tell it to call done tool * Max attempts for simple task * backup * Revert "backup" This reverts commit 7d79a3d4d5774bfef79ec9827878b74edad3593f. * Consolidating where EventBus and EventProcessor are created and initialized backup * Update CLAUDE.md Update CLAUDE.md * Improving agent loop code Cleaned up processTooCall classification task * Create test-writer subAgent test-agent-prompt test agent prompt test-agent-prompt Update test-writer.md * BrowserAgent test Browseragent test BrowserAgent test * BrowserAgent refactor backup backup * Minor fixes * Minor fix * minor change -- NEW AGENT LOOP IS WORKING WELL * Update cursor rules * Small change * Improved BrowserAgent integration test Improved BrowserAgent integration test * Small change * Update CLAUDE.md * Different tools * FindElementTool is ready Find element update backup find element backup * Updated to test strings to say "tests..." * ScrollTool is ready * RefreshStateTool is updated as well * MessageManager updated * SearchTool is ready backup * Interaction Element is also ready * Add debugMessage emitter * ValidatorTool ready and tests are passing Validation Tool validator tool backup backup * GroupTabs tool ready * Registered all the tools * Planning changed to 5 steps * BrowserAgent integration test fix * Minor string changes * backup * Removed too many confusing events in EventProcessor -- there is only event.info right now * Abort control implemented backup Abort * Formatter for toolResult Formatter for toolResult backup * Always render using Markdown * Minor fix --------- Co-authored-by: Nikhil Sonti <nikhilsv92@gmail.com>
143 lines
5.6 KiB
TypeScript
143 lines
5.6 KiB
TypeScript
import { z } from 'zod';
|
|
import { BaseAgent, AgentOptions, AgentInput } from './BaseAgent';
|
|
import { ToolRegistry } from '@/lib/tools/base/ToolRegistry';
|
|
import { LangChainProviderFactory } from '@/lib/llm/LangChainProviderFactory';
|
|
import { HumanMessage, SystemMessage } from '@langchain/core/messages';
|
|
import { BaseChatModel } from '@langchain/core/language_models/chat_models';
|
|
import { RunnableConfig } from '@langchain/core/runnables';
|
|
import { withFlexibleStructuredOutput } from '@/lib/llm/utils/structuredOutput';
|
|
|
|
/**
|
|
* Classification output schema for routing decisions
|
|
*/
|
|
export const ClassificationOutputSchema = z.object({
|
|
task_type: z.enum(['productivity', 'browse', 'answer']) // Which agent path to take
|
|
});
|
|
|
|
export type ClassificationOutput = z.infer<typeof ClassificationOutputSchema>;
|
|
|
|
/**
|
|
* Agent specialized for classifying user intents and routing to appropriate workflows.
|
|
* Determines whether tasks should go to ProductivityAgent or BrowseAgent flow.
|
|
*/
|
|
export class ClassificationAgent extends BaseAgent {
|
|
/**
|
|
* Creates a new instance of ClassificationAgent
|
|
* @param options - Configuration options for the classification agent
|
|
*/
|
|
constructor(options: AgentOptions) {
|
|
super(options);
|
|
}
|
|
|
|
/**
|
|
* Override: Get the agent name for logging
|
|
*/
|
|
protected getAgentName(): string {
|
|
return 'ClassificationAgent';
|
|
}
|
|
|
|
/**
|
|
* Override: Create empty tool registry (classification doesn't use tools)
|
|
*/
|
|
protected createToolRegistry(): ToolRegistry {
|
|
return new ToolRegistry(); // Empty registry
|
|
}
|
|
|
|
/**
|
|
* Override: Get the default system prompt for classification
|
|
*/
|
|
protected generateSystemPrompt(): string {
|
|
return `You are a task classification specialist for the Nxtscape browser assistant. Your job is to analyze user requests and determine the appropriate workflow.
|
|
|
|
CLASSIFICATION RULES:
|
|
|
|
**ANSWER TASKS** (Question answering about web content):
|
|
- Content questions: "what is this page about?", "explain this article", "what does this site say about X?"
|
|
- Multi-tab analysis: "summarize these tabs", "compare content across tabs", "find information about Y in open tabs"
|
|
- Specific queries: "what are the main points?", "extract key information", "answer based on page content"
|
|
- Research questions: "what can you tell me about X from these pages?", "analyze the content"
|
|
- Active data extraction: "get product prices", "scrape company info"
|
|
|
|
**PRODUCTIVITY TASKS** (Direct browser management - no content analysis):
|
|
- Tab management: "close tabs", "group tabs", "switch to Gmail", "list open tabs"
|
|
- Browser organization: "save session", "bookmark page", "organize bookmarks"
|
|
- Status queries: "what tabs are open?", "show history", "tab count"
|
|
- Browser efficiency: "close duplicate tabs", "group shopping tabs"
|
|
|
|
**BROWSE TASKS** (Require multi-step planning and web automation):
|
|
- Website navigation: "go to Amazon and search", "navigate to login page"
|
|
- Form interactions: "fill out form", "submit contact form", "sign up for account"
|
|
- Complex workflows: "complete checkout", "compare prices across sites"
|
|
- Web automation: "click buttons", "scroll and find", "interact with elements"
|
|
|
|
DECISION CRITERIA:
|
|
- If task is about understanding/analyzing current page content → answer
|
|
- If task involves direct browser management → productivity
|
|
- If task involves web page interaction/automation → browse
|
|
- If unsure and task is about content → answer
|
|
- If unsure and task involves actions → browse
|
|
|
|
Analyze the user request and classify it appropriately with high confidence.`;
|
|
}
|
|
|
|
|
|
/**
|
|
* Execute classification task - handles instruction enhancement and execution
|
|
* @param input - Agent input containing instruction and context
|
|
* @param callbacks - Optional streaming callbacks
|
|
* @param config - Optional configuration for LangGraph web compatibility
|
|
* @returns Promise resolving to classification output
|
|
*/
|
|
protected async executeAgent(
|
|
input: AgentInput,
|
|
config?: RunnableConfig
|
|
): Promise<ClassificationOutput> {
|
|
try {
|
|
this.log(`🎯 Classifying user request: ${input.instruction}`);
|
|
|
|
// Send progress update via EventBus
|
|
// this.currentEventBus?.emitSystemMessage('🎯 Analyzing and planning your task...', 'info', this.getAgentName());
|
|
|
|
// Create structured output schema for LLM
|
|
const classificationSchema = z.object({
|
|
task_type: z.enum(['productivity', 'browse', 'answer']).describe('Whether this is a productivity task, browse task, or answer task')
|
|
});
|
|
|
|
// Get LLM and create structured output
|
|
const llm = await this.getLLM();
|
|
const structuredLLM = await withFlexibleStructuredOutput(llm, classificationSchema);
|
|
|
|
// Build classification prompt
|
|
const userPrompt = `Classify this user request:
|
|
|
|
USER REQUEST: "${input.instruction}"
|
|
|
|
Determine if this is:
|
|
- An answer task (question about web content)
|
|
- A productivity task (browser/tab management)
|
|
- A browse task (web automation)
|
|
|
|
You must return a JSON object with a single field "task_type" that has value either "answer", "productivity", or "browse".`;
|
|
|
|
// Get classification from LLM
|
|
const result = await structuredLLM.invoke([
|
|
new SystemMessage(this.systemPrompt),
|
|
new HumanMessage(userPrompt)
|
|
], config);
|
|
|
|
this.log(`✅ Classification result: ${result.task_type}`);
|
|
|
|
return result as ClassificationOutput;
|
|
|
|
} catch (error) {
|
|
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
this.log(`❌ Classification failed: ${errorMessage}`, 'error');
|
|
|
|
// Default to productivity path on error
|
|
return {
|
|
task_type: 'productivity'
|
|
};
|
|
}
|
|
}
|
|
}
|