diff --git a/src/lib/agent/BrowserAgent.prompt.ts b/src/lib/agent/BrowserAgent.prompt.ts index 0da333ba..faacadff 100644 --- a/src/lib/agent/BrowserAgent.prompt.ts +++ b/src/lib/agent/BrowserAgent.prompt.ts @@ -51,63 +51,6 @@ The system automatically classifies tasks before you see them: ## 🛠️ AVAILABLE TOOLS ${toolDescriptions} -## 🔌 MCP SERVER INTEGRATION -You have access to MCP (Model Context Protocol) servers that provide direct API access to external services. - -### CRITICAL: Three-Step Process (NEVER SKIP STEPS) -When users ask about emails, videos, documents, calendars, repositories, or other external services: - -**🔴 STEP 1: MANDATORY - Check Installed MCP Servers** -- Use: mcp_tool with action: 'getUserInstances' -- Returns: List of installed servers with serverUrls -- Example response: { instances: [{ name: 'Gmail', serverUrl: 'https://mcp-gmail.klavis.ai/abc-123', authenticated: true }] } -- SAVE the serverUrl for next steps - -**🔴 STEP 2: MANDATORY - Get Available Tools (NEVER SKIP THIS)** -- Use: mcp_tool with action: 'listTools', serverUrl: [EXACT URL from step 1] -- Returns: List of available tools for that server -- Example response: { tools: [{ name: 'gmail_search', description: 'Search emails' }, { name: 'gmail_send', description: 'Send email' }] } -- DO NOT GUESS TOOL NAMES - you MUST get them from listTools - -**🔴 STEP 3: Call the Tool** -- Use: mcp_tool with action: 'callTool', serverUrl: [EXACT URL from step 1], toolName: [EXACT NAME from step 2], toolArgs: {relevant arguments as JSON object} -- IMPORTANT: toolArgs must be a proper JSON object, not a string -- Returns: Tool execution result - -### ⚠️ COMMON MISTAKES TO AVOID: -- ❌ NEVER assume tool names like 'gmail_list_messages' - always get from listTools -- ❌ NEVER skip the listTools step - tool names vary between servers -- ❌ NEVER use partial URLs - use the full serverUrl from getUserInstances -- ❌ NEVER combine steps - execute them sequentially - -### Example: "Check my unread emails" -1. mcp_tool { action: 'getUserInstances' } - → Returns: { instances: [{ name: 'Gmail', serverUrl: 'https://mcp-gmail.klavis.ai/a6ea8271-61d3-421b-af51-e61a546e7446', authenticated: true }] } -2. mcp_tool { action: 'listTools', serverUrl: 'https://mcp-gmail.klavis.ai/a6ea8271-61d3-421b-af51-e61a546e7446' } - → Returns: { tools: [{ name: 'gmail_search_emails', description: 'Searches for emails using Gmail search syntax' }, { name: 'gmail_read_email', description: 'Retrieves the content of a specific email' }] } -3. mcp_tool { action: 'callTool', serverUrl: 'https://mcp-gmail.klavis.ai/a6ea8271-61d3-421b-af51-e61a546e7446', toolName: 'gmail_search_emails', toolArgs: { "q": "is:unread" } } - → Note: toolArgs is a JSON object with property "q", NOT a string like "{'q': 'is:unread'}" - → Returns: unread email messages - -### MCP Usage Rules -- **ALWAYS execute all 3 steps in order** - No exceptions -- **ALWAYS check listTools** - Tool names are dynamic and server-specific -- **Use exact serverUrl** from getUserInstances response (full URL) -- **Use exact toolName** from listTools response (don't guess) -- **If server not authenticated** (authenticated: false), inform user to reconnect in settings -- **Prefer MCP over browser automation** when available for supported services - -### Supported Services -- Gmail → Email operations -- YouTube → Video operations -- GitHub → Repository operations -- Slack → Team communication -- Google Calendar → Calendar operations -- Google Drive → File operations -- Notion → Note management -- Linear → Issue tracking - -If NO relevant MCP server is installed, fall back to browser automation. ## 🎯 STATE MANAGEMENT & DECISION LOGIC ### 📊 STATE MANAGEMENT diff --git a/src/lib/agent/BrowserAgent.ts b/src/lib/agent/BrowserAgent.ts index 9fae70b4..e1cbb526 100644 --- a/src/lib/agent/BrowserAgent.ts +++ b/src/lib/agent/BrowserAgent.ts @@ -61,7 +61,6 @@ import { createValidatorTool } from '@/lib/tools/validation/ValidatorTool'; import { createScreenshotTool } from '@/lib/tools/utils/ScreenshotTool'; import { createExtractTool } from '@/lib/tools/extraction/ExtractTool'; import { createResultTool } from '@/lib/tools/result/ResultTool'; -import { createMCPTool } from '@/lib/tools/mcp/MCPTool'; import { generateSystemPrompt, generateSingleTurnExecutionPrompt } from './BrowserAgent.prompt'; import { AIMessage, AIMessageChunk } from '@langchain/core/messages'; import { PLANNING_CONFIG } from '@/lib/tools/planning/PlannerTool.config'; @@ -282,7 +281,7 @@ export class BrowserAgent { this.toolManager.register(createResultTool(this.executionContext)); // MCP tool for external integrations - this.toolManager.register(createMCPTool(this.executionContext)); + // this.toolManager.register(createMCPTool(this.executionContext)); // Register classification tool last with all tool descriptions const toolDescriptions = this.toolManager.getDescriptions(); diff --git a/src/lib/tools/classification/classification.tool.prompt.ts b/src/lib/tools/classification/classification.tool.prompt.ts index 8960e9bd..77ad77b8 100644 --- a/src/lib/tools/classification/classification.tool.prompt.ts +++ b/src/lib/tools/classification/classification.tool.prompt.ts @@ -19,21 +19,14 @@ Complex tasks require multiple steps or planning, such as: - "Research the latest news about AI" - "Compare prices across multiple websites" - "Fill out a form with specific information" - -MCP-related tasks should be classified as COMPLEX to ensure proper planning: -- "Check my emails" (Gmail MCP) -- "List my YouTube videos" (YouTube MCP) -- "Show my GitHub pull requests" (GitHub MCP) -- "Check my calendar" (Google Calendar MCP) -- "List my Slack messages" (Slack MCP) -- "Search my Google Drive" (Google Drive MCP) -- "Show my Notion pages" (Notion MCP) -- "List my Linear issues" (Linear MCP) - -These tasks need planning to: -1. Check if the MCP server is installed -2. Get available tools from the server -3. Execute the appropriate tool +- "Check my emails" +- "List my YouTube videos" +- "Show my GitHub pull requests" +- "Check my calendar" +- "List my Slack messages" +- "Search my Google Drive" +- "Show my Notion pages" +- "List my Linear issues" You must respond with a JSON object in this exact format: { diff --git a/src/lib/tools/planning/PlannerTool.prompt.ts b/src/lib/tools/planning/PlannerTool.prompt.ts index 1b0d690a..fc225bd0 100644 --- a/src/lib/tools/planning/PlannerTool.prompt.ts +++ b/src/lib/tools/planning/PlannerTool.prompt.ts @@ -23,26 +23,6 @@ export function generatePlannerSystemPrompt(): string { - If you know specific sites/URLs, mention them (e.g., "Navigate to Amazon") - Let the browser agent handle the technical details of each step -# MCP SERVER INTEGRATION: -For tasks involving external services (email, calendar, GitHub, Slack, YouTube, Drive, Notion, Linear): - -Your plan MUST follow this EXACT pattern for MCP-related tasks: -- Step 1: "Check if [Service] MCP server is installed and get server URL" -- Step 2: "Get available tools from [Service] MCP server" -- Step 3: "Use [Service] MCP to [perform action]" - -Example for "Check my unread emails": -- Step 1: "Check if Gmail MCP server is installed and get server URL" -- Step 2: "Get available tools from Gmail MCP server" -- Step 3: "Use Gmail MCP to search for unread emails" - -Example for "Send an email": -- Step 1: "Check if Gmail MCP server is installed and get server URL" -- Step 2: "Get available tools from Gmail MCP server" -- Step 3: "Use Gmail MCP to compose and send email" - -IMPORTANT: Do NOT skip the "Get available tools" step - tool names vary between servers - # STEP FORMAT: Each step should describe WHAT to achieve, not HOW: - "Navigate to Amazon" (not "Click on address bar and type amazon.com")