Files
BrowserOS/reference-code/old-lib/tools/browser-navigation/ScrollTool.ts
Felarof 8245dfe0ff Rewrite Agent Loop (#7)
* clean-up bunch of files for re-write

* more clean-up and adding basic agent

* Minor fix moved types into respective files.

* Deleted bunch of old files

backup

Update gitignore

Deleted a bunch of files

Remove message manager

Deleted old docs

Update rules

rename Profiler to profiler

* Temporarily adding old code

* Adding two small things back

* backup

* Implemented LangChainProvider and updated cursor rules

backup

LangChainProvider

curosr rules

* Implement tests for LangChainProvider -- unit test and integration test

integration test passes

integration test backup

* Tool Design

Tools Desing

tools design

* NavigationTool ready

NavigationTool ready

NavigationTool ready

NaivgationTool ready

backup

* MessageManager

MessageManager

backup

* Fixed integration test

* Agent design new

Updated agent design and added bunch of /NTN commands

agent new design

* Delete old agent design

* MessageManagerReadOnly class

* PlannerTool ready

PlannerTool almost ready

* ToolManager and DoneTool

* Integration of BrowserAgent

* BrowserAgent implementation v0.1

* BrowserAgent small fix v0.2

* Tool calling design

too call design

tool design claude

* Update agent tool design with // NTN

* add zod-to-json npm install

* BrowserAGent v0.3

* BrowserAgent v0.4

* BrowserAgent v0.5

* fixes

* Build error fixes in my NEWLY added code

build errors fix

* Build error fixes in old code (integration work)

backup

* Comment StreamEventProcessor for now, it is not used

* Small build error fix

* Small rename

* Added integration test to check structuredLLM and changed to 4o-mini

change default to nxtscape

integration test

* Small docstring

* Simplified BrowserAgent code and added integration test

Simplified BrowserAgent code

BrowserAGent integrationt est

* Update CLAUDE.md with project memory and instructions on how to write code

Update CLAUDE.md with project memory and instructions on how to write code

Project Memory

* Just a mova.. Moved ToolManager outside. Build works.

* TabOperations tool

TabOperations Tool and fixing some test

tab operations

* Update CLAUDE.md

* Added ClassificationTool

classifiction tool

classification prommpt

* Refactored and simplified PlannerTool unit test and integration test

* Updated Plnnaer tool

* Update CLAUDE.md

* BrowserAgent modified to do classification

BrowserAgent with classification

* minor fix to ToolManager

* Instead of ToolCall and ToolResult -- just updating message manager once

* minor fix to BrowserAgent integration test

* Changed done to "done_tool"

* Updated CLAUDE.md to reflect understanding of claude

* Uncommented stream event processor

* Renamed EventBus to StreamEventBus

* Commented StreamEventProcessor

* Event Processor

* Integrated EventProcessor with BrowserAgent

Added EventProcessor to BrowserAgetn

* Renamed StreamEventBus to EventBus

* Made EventBus required parameter in ExecutionContext

* PlanGenerator rewrite

PlanGenerator rewrite

backup

* For simple task, explicitly tell it to call done tool

* Max attempts for simple task

* backup

* Revert "backup"

This reverts commit 7d79a3d4d5774bfef79ec9827878b74edad3593f.

* Consolidating where EventBus and EventProcessor are created and initialized

backup

* Update CLAUDE.md

Update CLAUDE.md

* Improving agent loop code

Cleaned up processTooCall

classification task

* Create test-writer subAgent

test-agent-prompt

test agent prompt

test-agent-prompt

Update test-writer.md

* BrowserAgent test

Browseragent test

BrowserAgent test

* BrowserAgent refactor

backup

backup

* Minor fixes

* Minor fix

* minor change -- NEW AGENT LOOP IS WORKING WELL

* Update cursor rules

* Small change

* Improved BrowserAgent integration test

Improved BrowserAgent integration test

* Small change

* Update CLAUDE.md

* Different tools

* FindElementTool is ready

Find element update

backup

find element backup

* Updated to test strings to say "tests..."

* ScrollTool is ready

* RefreshStateTool is updated as well

* MessageManager updated

* SearchTool is ready

backup

* Interaction Element is also ready

* Add debugMessage emitter

* ValidatorTool ready and tests are passing

Validation Tool

validator tool

backup

backup

* GroupTabs tool ready

* Registered all the tools

* Planning changed to 5 steps

* BrowserAgent integration test fix

* Minor string changes

* backup

* Removed too many confusing events in EventProcessor -- there is only event.info right now

* Abort control implemented

backup

Abort

* Formatter for toolResult

Formatter for toolResult

backup

* Always render using Markdown

* Minor fix

---------

Co-authored-by: Nikhil Sonti <nikhilsv92@gmail.com>
2025-07-29 08:14:45 -07:00

320 lines
9.4 KiB
TypeScript

import { z } from 'zod';
import { NxtscapeTool } from '../base/NxtscapeTool';
import { ToolConfig } from '../base/ToolConfig';
import { ExecutionContext } from '@/lib/runtime/ExecutionContext';
import { BrowserPage } from '@/lib/browser/BrowserPage';
/**
* Enum for scroll operations
*/
export const ScrollOperationTypeEnum = z.enum([
'scroll_down', // Scroll down by viewports
'scroll_up', // Scroll up by viewports
'scroll_to_element' // Scroll to element by index
]);
export type ScrollOperationType = z.infer<typeof ScrollOperationTypeEnum>;
/**
* Schema for scroll tool input
*/
export const ScrollInputSchema = z.object({
operationType: ScrollOperationTypeEnum, // The operation to perform
amount: z.number().optional(), // Number of viewports to scroll (for scroll_down/up)
index: z.number().optional(), // Element index (for scroll_to_element)
intent: z.string().optional() // Optional description of why this scroll is being performed
});
export type ScrollInput = z.infer<typeof ScrollInputSchema>;
/**
* Schema for scroll tool output
*/
export const ScrollOutputSchema = z.object({
success: z.boolean(), // Whether the operation succeeded
operationType: ScrollOperationTypeEnum, // Operation that was performed
message: z.string(), // Human-readable result message
elementFound: z.boolean().optional() // Whether target element was found (for scroll_to_element)
});
export type ScrollOutput = z.infer<typeof ScrollOutputSchema>;
/**
* Tool for scrolling operations using viewport-based scrolling
*/
export class ScrollTool extends NxtscapeTool<ScrollInput, ScrollOutput> {
constructor(executionContext: ExecutionContext) {
const config: ToolConfig<ScrollInput, ScrollOutput> = {
name: 'scroll',
description: 'Perform scrolling operations. Operations: "scroll_down" (scroll down by viewports), "scroll_up" (scroll up by viewports), "scroll_to_element" (scroll to element by index). Always pass operationType. Pass amount for number of viewports to scroll (default 1), or index for element scrolling.',
category: 'interaction',
version: '1.0.0',
inputSchema: ScrollInputSchema,
outputSchema: ScrollOutputSchema,
examples: [
{
description: 'Scroll down one viewport',
input: {
operationType: 'scroll_down',
intent: 'Scrolling to see more content'
},
output: {
success: true,
operationType: 'scroll_down',
message: 'Scrolled down 1 viewport'
}
},
{
description: 'Scroll down multiple viewports',
input: {
operationType: 'scroll_down',
amount: 2,
intent: 'Scrolling down 2 viewports'
},
output: {
success: true,
operationType: 'scroll_down',
message: 'Scrolled down 2 viewports'
}
},
{
description: 'Scroll up one viewport',
input: {
operationType: 'scroll_up',
intent: 'Scrolling back up'
},
output: {
success: true,
operationType: 'scroll_up',
message: 'Scrolled up 1 viewport'
}
},
{
description: 'Scroll to element by index',
input: {
operationType: 'scroll_to_element',
index: 42,
intent: 'Scrolling to button with index 42'
},
output: {
success: true,
operationType: 'scroll_to_element',
message: 'Scrolled to element with index 42',
elementFound: true
}
}
],
streamingConfig: {
displayName: 'Scroll',
icon: '📜',
progressMessage: 'Scrolling page...'
}
};
super(config, executionContext);
}
/**
* Override: Generate contextual display message based on operation
*/
getProgressMessage(args: ScrollInput): string {
try {
// Note: args should already be parsed by StreamEventProcessor
const operationType = args?.operationType;
const intent = args?.intent;
// Use intent if provided, otherwise generate based on operation
if (intent) {
return intent;
}
switch (operationType) {
case 'scroll_down':
return args?.amount
? `Scrolling down ${args.amount} viewport${args.amount > 1 ? 's' : ''}`
: 'Scrolling down';
case 'scroll_up':
return args?.amount
? `Scrolling up ${args.amount} viewport${args.amount > 1 ? 's' : ''}`
: 'Scrolling up';
case 'scroll_to_element':
return args?.index !== undefined
? `Scrolling to element ${args.index}`
: 'Scrolling to element';
default:
return 'Scrolling page...';
}
} catch {
return 'Scrolling page...';
}
}
/**
* Override: Format result based on operation type
*/
FormatResultForUI(output: ScrollOutput): string {
if (!output.success) {
return `${output.message}`;
}
switch (output.operationType) {
case 'scroll_down':
return `⬇️ Scrolled down`;
case 'scroll_up':
return `⬆️ Scrolled up`;
case 'scroll_to_element':
return output.elementFound
? `🎯 Scrolled to element`
: `❓ Element not found`;
default:
return `${output.message}`;
}
}
protected async execute(input: ScrollInput): Promise<ScrollOutput> {
// Validate inputs for operations that need them
switch (input.operationType) {
case 'scroll_to_element':
if (input.index === undefined) {
return {
success: false,
operationType: input.operationType,
message: 'scroll_to_element operation requires index parameter'
};
}
break;
}
try {
// Get the current page
const page = await this.browserContext.getCurrentPage();
// Execute the operation
switch (input.operationType) {
case 'scroll_down':
return await this.scrollDown(page, input.amount);
case 'scroll_up':
return await this.scrollUp(page, input.amount);
case 'scroll_to_element':
return await this.scrollToElement(page, input.index!);
default:
return {
success: false,
operationType: 'scroll_down',
message: 'Invalid operation type specified'
};
}
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
return {
success: false,
operationType: input.operationType,
message: `Scroll operation failed: ${errorMessage}`
};
}
}
/**
* Scroll down the page by viewports
*/
private async scrollDown(page: BrowserPage, amount?: number): Promise<ScrollOutput> {
try {
// Perform scroll (amount is number of viewports)
const viewports = amount || 1;
await page.scrollDown(viewports);
return {
success: true,
operationType: 'scroll_down',
message: `Scrolled down ${viewports} viewport${viewports > 1 ? 's' : ''}`
};
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
return {
success: false,
operationType: 'scroll_down',
message: `Failed to scroll down: ${errorMessage}`
};
}
}
/**
* Scroll up the page by viewports
*/
private async scrollUp(page: BrowserPage, amount?: number): Promise<ScrollOutput> {
try {
// Perform scroll (amount is number of viewports)
const viewports = amount || 1;
await page.scrollUp(viewports);
return {
success: true,
operationType: 'scroll_up',
message: `Scrolled up ${viewports} viewport${viewports > 1 ? 's' : ''}`
};
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
return {
success: false,
operationType: 'scroll_up',
message: `Failed to scroll up: ${errorMessage}`
};
}
}
/**
* Scroll to element by index
*/
private async scrollToElement(page: BrowserPage, index: number): Promise<ScrollOutput> {
try {
// Get the element from the selector map
const element = await page.getElementByIndex(index);
if (!element) {
return {
success: false,
operationType: 'scroll_to_element',
message: `Element with index ${index} not found`,
elementFound: false
};
}
// V2: Use nodeId to scroll
const success = await page.scrollToElement(element.nodeId);
if (!success) {
return {
success: false,
operationType: 'scroll_to_element',
message: `Could not scroll to element with index ${index}`,
elementFound: false
};
}
return {
success: true,
operationType: 'scroll_to_element',
message: `Scrolled to element with index ${index}`,
elementFound: true
};
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
return {
success: false,
operationType: 'scroll_to_element',
message: `Failed to scroll to element: ${errorMessage}`,
elementFound: false
};
}
}
}