Files
BrowserOS/reference-code/old-lib/tools/answer/ExtractTool.ts
Felarof 8245dfe0ff Rewrite Agent Loop (#7)
* clean-up bunch of files for re-write

* more clean-up and adding basic agent

* Minor fix moved types into respective files.

* Deleted bunch of old files

backup

Update gitignore

Deleted a bunch of files

Remove message manager

Deleted old docs

Update rules

rename Profiler to profiler

* Temporarily adding old code

* Adding two small things back

* backup

* Implemented LangChainProvider and updated cursor rules

backup

LangChainProvider

curosr rules

* Implement tests for LangChainProvider -- unit test and integration test

integration test passes

integration test backup

* Tool Design

Tools Desing

tools design

* NavigationTool ready

NavigationTool ready

NavigationTool ready

NaivgationTool ready

backup

* MessageManager

MessageManager

backup

* Fixed integration test

* Agent design new

Updated agent design and added bunch of /NTN commands

agent new design

* Delete old agent design

* MessageManagerReadOnly class

* PlannerTool ready

PlannerTool almost ready

* ToolManager and DoneTool

* Integration of BrowserAgent

* BrowserAgent implementation v0.1

* BrowserAgent small fix v0.2

* Tool calling design

too call design

tool design claude

* Update agent tool design with // NTN

* add zod-to-json npm install

* BrowserAGent v0.3

* BrowserAgent v0.4

* BrowserAgent v0.5

* fixes

* Build error fixes in my NEWLY added code

build errors fix

* Build error fixes in old code (integration work)

backup

* Comment StreamEventProcessor for now, it is not used

* Small build error fix

* Small rename

* Added integration test to check structuredLLM and changed to 4o-mini

change default to nxtscape

integration test

* Small docstring

* Simplified BrowserAgent code and added integration test

Simplified BrowserAgent code

BrowserAGent integrationt est

* Update CLAUDE.md with project memory and instructions on how to write code

Update CLAUDE.md with project memory and instructions on how to write code

Project Memory

* Just a mova.. Moved ToolManager outside. Build works.

* TabOperations tool

TabOperations Tool and fixing some test

tab operations

* Update CLAUDE.md

* Added ClassificationTool

classifiction tool

classification prommpt

* Refactored and simplified PlannerTool unit test and integration test

* Updated Plnnaer tool

* Update CLAUDE.md

* BrowserAgent modified to do classification

BrowserAgent with classification

* minor fix to ToolManager

* Instead of ToolCall and ToolResult -- just updating message manager once

* minor fix to BrowserAgent integration test

* Changed done to "done_tool"

* Updated CLAUDE.md to reflect understanding of claude

* Uncommented stream event processor

* Renamed EventBus to StreamEventBus

* Commented StreamEventProcessor

* Event Processor

* Integrated EventProcessor with BrowserAgent

Added EventProcessor to BrowserAgetn

* Renamed StreamEventBus to EventBus

* Made EventBus required parameter in ExecutionContext

* PlanGenerator rewrite

PlanGenerator rewrite

backup

* For simple task, explicitly tell it to call done tool

* Max attempts for simple task

* backup

* Revert "backup"

This reverts commit 7d79a3d4d5774bfef79ec9827878b74edad3593f.

* Consolidating where EventBus and EventProcessor are created and initialized

backup

* Update CLAUDE.md

Update CLAUDE.md

* Improving agent loop code

Cleaned up processTooCall

classification task

* Create test-writer subAgent

test-agent-prompt

test agent prompt

test-agent-prompt

Update test-writer.md

* BrowserAgent test

Browseragent test

BrowserAgent test

* BrowserAgent refactor

backup

backup

* Minor fixes

* Minor fix

* minor change -- NEW AGENT LOOP IS WORKING WELL

* Update cursor rules

* Small change

* Improved BrowserAgent integration test

Improved BrowserAgent integration test

* Small change

* Update CLAUDE.md

* Different tools

* FindElementTool is ready

Find element update

backup

find element backup

* Updated to test strings to say "tests..."

* ScrollTool is ready

* RefreshStateTool is updated as well

* MessageManager updated

* SearchTool is ready

backup

* Interaction Element is also ready

* Add debugMessage emitter

* ValidatorTool ready and tests are passing

Validation Tool

validator tool

backup

backup

* GroupTabs tool ready

* Registered all the tools

* Planning changed to 5 steps

* BrowserAgent integration test fix

* Minor string changes

* backup

* Removed too many confusing events in EventProcessor -- there is only event.info right now

* Abort control implemented

backup

Abort

* Formatter for toolResult

Formatter for toolResult

backup

* Always render using Markdown

* Minor fix

---------

Co-authored-by: Nikhil Sonti <nikhilsv92@gmail.com>
2025-07-29 08:14:45 -07:00

391 lines
12 KiB
TypeScript

import { z } from 'zod';
import { NxtscapeTool } from '../base/NxtscapeTool';
import { ToolConfig } from '../base/ToolConfig';
import { ExecutionContext } from '@/lib/runtime/ExecutionContext';
import { Logging } from '@/lib/utils/Logging';
import BrowserPage from '@/lib/browser/BrowserPage';
import type { SnapshotContext, SectionType, Snapshot, LinkInfo } from '@/lib/browser/BrowserOSAdapter';
/**
* Enum for extraction types
*/
export const ExtractTypeEnum = z.enum([
'text', // Extract text content
'links' // Extract all links
]);
export type ExtractType = z.infer<typeof ExtractTypeEnum>;
/**
* Schema for link information
*/
export const LinkSchema = z.object({
text: z.string(), // Link text
url: z.string() // Link URL
});
export type Link = z.infer<typeof LinkSchema>;
/**
* Schema for snapshot context options
*/
export const SnapshotContextSchema = z.enum(['visible', 'full']);
/**
* Schema for section types based on ARIA landmarks
*/
export const SectionTypeSchema = z.enum([
'main',
'navigation',
'footer',
'header',
'article',
'aside',
'complementary',
'contentinfo',
'form',
'search',
'region',
'other'
]);
/**
* Schema for a single extraction result
*/
export const ExtractionResultSchema = z.object({
tab_id: z.number(), // Tab ID
url: z.string(), // Page URL
title: z.string(), // Page title
content: z.string().optional(), // Extracted text content
links: z.array(LinkSchema).optional() // Extracted links
});
export type ExtractionResult = z.infer<typeof ExtractionResultSchema>;
/**
* Schema for extract tool input
*/
export const ExtractInputSchema = z.object({
tab_ids: z.array(z.number()), // Array of tab IDs to extract from
extract_type: ExtractTypeEnum, // What to extract (text or links)
context: SnapshotContextSchema.default('visible').optional(), // Context: visible or full page (default: visible)
sections: z.array(SectionTypeSchema).optional(), // Which sections to include (default: all)
include_metadata: z.boolean().default(true), // Include URL and title (default: true)
max_length: z.number().optional() // Maximum content length per tab (applies to text extraction only)
});
export type ExtractInput = z.input<typeof ExtractInputSchema>;
/**
* Schema for extract tool output
*/
export const ExtractOutputSchema = z.object({
success: z.boolean(), // Whether the operation succeeded
extractions: z.array(ExtractionResultSchema), // Array of extraction results
message: z.string() // Human-readable status message
});
export type ExtractOutput = z.infer<typeof ExtractOutputSchema>;
/**
* Unified tool for extracting content from one or multiple tabs
*/
export class ExtractTool extends NxtscapeTool<ExtractInput, ExtractOutput> {
constructor(executionContext: ExecutionContext) {
const config: ToolConfig<ExtractInput, ExtractOutput> = {
name: 'extract',
description: 'Extract content from one or multiple browser tabs. Supports extracting text or links from specific page sections. Always pass tab_ids as an array of tab IDs.',
category: 'observation',
version: '2.0.0',
inputSchema: ExtractInputSchema,
outputSchema: ExtractOutputSchema,
examples: [
{
description: 'Extract text from a single tab',
input: {
tab_ids: [12345],
extract_type: 'text',
context: 'visible'
},
output: {
success: true,
extractions: [{
tab_id: 12345,
url: 'https://example.com',
title: 'Example Page',
content: 'Welcome to our website. We offer the best products and services...'
}],
message: 'Successfully extracted content from 1 tab'
}
},
{
description: 'Extract links from navigation sections',
input: {
tab_ids: [12345],
extract_type: 'links',
sections: ['navigation', 'header']
},
output: {
success: true,
extractions: [{
tab_id: 12345,
url: 'https://example.com',
title: 'Example Page',
links: [
{ text: 'Home', url: 'https://example.com/' },
{ text: 'About', url: 'https://example.com/about' },
{ text: 'Contact', url: 'https://example.com/contact' }
]
}],
message: 'Successfully extracted content from 1 tab'
}
},
{
description: 'Extract main content from full page',
input: {
tab_ids: [12345],
extract_type: 'text',
context: 'full',
sections: ['main', 'article'],
max_length: 1000
},
output: {
success: true,
extractions: [{
tab_id: 12345,
url: 'https://example.com',
title: 'Example Page',
content: 'Welcome to our website. We offer the best products and services...'
}],
message: 'Successfully extracted content from 1 tab'
}
}
],
streamingConfig: {
displayName: 'Extract Content',
icon: '📄',
progressMessage: 'Extracting content...'
}
};
super(config, executionContext);
}
/**
* Override: Generate contextual display message
*/
getProgressMessage(args: ExtractInput): string {
try {
// Note: args should already be parsed by StreamEventProcessor
const tabCount = args?.tab_ids?.length || 0;
const extractType = args?.extract_type || 'content';
const context = args?.context || 'visible';
const sections = args?.sections;
let message = `Extracting ${extractType}`;
if (sections && sections.length > 0) {
message += ` from ${sections.join(', ')} sections`;
}
if (context === 'full') {
message += ' (full page)';
}
if (tabCount === 1) {
message += ` from tab ${args.tab_ids[0]}`;
} else if (tabCount > 1) {
message += ` from ${tabCount} tabs`;
}
return message + '...';
} catch {
return 'Extracting content...';
}
}
/**
* Override: Format extraction result for display
*/
FormatResultForUI(output: ExtractOutput): string {
if (!output.success) {
return `${output.message}`;
}
const extractionCount = output.extractions.length;
if (extractionCount === 0) {
return '📄 No content extracted';
}
let result = `📄 **Extracted from ${extractionCount} tab${extractionCount > 1 ? 's' : ''}**\n\n`;
output.extractions.forEach((extraction, index) => {
result += `**${index + 1}. ${extraction.title}**\n`;
result += `🔗 ${extraction.url}\n`;
if (extraction.content) {
const wordCount = extraction.content.split(/\s+/).filter(word => word.length > 0).length;
const preview = extraction.content.slice(0, 100).trim();
const hasMore = extraction.content.length > 100;
result += `📝 "${preview}${hasMore ? '...' : ''}"\n`;
result += `📊 ${wordCount} words\n`;
}
if (extraction.links) {
const linkCount = extraction.links.length;
result += `🔗 ${linkCount} link${linkCount !== 1 ? 's' : ''} found\n`;
if (linkCount > 0 && linkCount <= 3) {
extraction.links.forEach(link => {
const text = link.text.length > 30 ? link.text.substring(0, 30) + '...' : link.text;
result += ` - ${text}\n`;
});
}
}
if (index < extractionCount - 1) {
result += '\n';
}
});
return result.trim();
}
protected async execute(input: ExtractInput): Promise<ExtractOutput> {
try {
// Validate input
if (!input.tab_ids || input.tab_ids.length === 0) {
return {
success: false,
extractions: [],
message: 'No tab IDs provided'
};
}
// Get pages for the specified tab IDs directly
const pages = await this.browserContext.getPages(input.tab_ids);
if (!pages || pages.length === 0) {
return {
success: false,
extractions: [],
message: `No tabs found with IDs: ${input.tab_ids.join(', ')}`
};
}
// Extract content from each page
const extractions: ExtractionResult[] = [];
for (const page of pages) {
// Get the tab ID from the page state
const pageState = await page.getBrowserState();
const tabId = pageState.tabId;
const extraction = await this.extractFromPage(page, tabId, input);
if (extraction) {
extractions.push(extraction);
}
}
return {
success: true,
extractions,
message: `Successfully extracted content from ${extractions.length} tab${extractions.length !== 1 ? 's' : ''}`
};
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
return {
success: false,
extractions: [],
message: `Extraction failed: ${errorMessage}`
};
}
}
/**
* Extract content from a single page
*/
private async extractFromPage(
page: BrowserPage,
tabId: number,
input: ExtractInput
): Promise<ExtractionResult | null> {
try {
// Build snapshot options
// const snapshotOptions = {
// context: input.context as SnapshotContext || 'visible',
// includeSections: input.sections as SectionType[]
// };
// TODO: fix the snapshot to use seciton and context. Today, override it to get full all all sections
const snapshotOptions = {
context: 'full' as SnapshotContext, // Force full context for now
// all sections are included today
};
// Get URL and title first
const url = page.url();
const title = await page.title();
// Build the result
const result: ExtractionResult = {
tab_id: tabId,
url: url,
title: title || 'Untitled'
};
// Extract based on type
if (input.extract_type === 'text') {
const snapshot = await page.getTextSnapshot(snapshotOptions);
// Combine text from all sections
let allText = '';
for (const section of snapshot.sections) {
if (section.textResult) {
allText += section.textResult.text + '\n\n';
}
}
// Apply max_length if specified
let finalText = allText.trim();
if (input.max_length && finalText.length > input.max_length) {
finalText = finalText.substring(0, input.max_length) + '...';
}
if (finalText) {
result.content = finalText;
}
} else if (input.extract_type === 'links') {
const snapshot = await page.getLinksSnapshot(snapshotOptions);
// Combine links from all sections
const allLinks: Link[] = [];
for (const section of snapshot.sections) {
if (section.linksResult) {
for (const linkInfo of section.linksResult.links) {
allLinks.push({
text: linkInfo.text,
url: linkInfo.url
});
}
}
}
if (allLinks.length > 0) {
result.links = allLinks;
}
}
return result;
} catch (error) {
Logging.log('ExtractTool', `Failed to extract from tab ${tabId}: ${error}`, 'warning');
return null;
}
}
// Link extraction is now handled by the new snapshot APIs
}