mirror of
https://github.com/browseros-ai/BrowserOS.git
synced 2026-05-13 23:53:25 +00:00
git-subtree-dir: packages/browseros-agent git-subtree-mainline:8f148d0918git-subtree-split:90bd4be300
72 lines
2.1 KiB
TypeScript
72 lines
2.1 KiB
TypeScript
/**
|
|
* @license
|
|
* Copyright 2025 BrowserOS
|
|
* SPDX-License-Identifier: AGPL-3.0-or-later
|
|
*/
|
|
import { z } from 'zod'
|
|
import type {
|
|
InteractiveSnapshot,
|
|
InteractiveSnapshotOptions,
|
|
} from '@/adapters/BrowserOSAdapter'
|
|
import { BrowserOSAdapter } from '@/adapters/BrowserOSAdapter'
|
|
import { SnapshotCache } from '@/utils/SnapshotCache'
|
|
import { ActionHandler } from '../ActionHandler'
|
|
|
|
// Input schema
|
|
const GetInteractiveSnapshotInputSchema = z.object({
|
|
tabId: z.number().describe('The tab ID to get snapshot from'),
|
|
options: z
|
|
.object({
|
|
includeHidden: z
|
|
.boolean()
|
|
.optional()
|
|
.default(false)
|
|
.describe('Include hidden elements (default: false)'),
|
|
})
|
|
.optional()
|
|
.describe('Optional snapshot options'),
|
|
})
|
|
|
|
type GetInteractiveSnapshotInput = z.infer<
|
|
typeof GetInteractiveSnapshotInputSchema
|
|
>
|
|
|
|
/**
|
|
* GetInteractiveSnapshotAction - Get interactive elements from the page
|
|
*
|
|
* This is THE MOST CRITICAL action - it returns all interactive elements
|
|
* with their nodeIds, which are needed by click, inputText, clear, and scrollToNode actions.
|
|
*
|
|
* Returns:
|
|
* - elements: Array of interactive nodes with nodeIds
|
|
* - hierarchicalStructure: String representation of page structure
|
|
*
|
|
* Each element contains:
|
|
* - nodeId: Sequential integer ID (1, 2, 3...)
|
|
* - type: 'clickable' | 'typeable' | 'selectable'
|
|
* - name: Element text/label
|
|
* - attributes: Element properties (html-tag, role, etc.)
|
|
* - rect: Bounding box coordinates
|
|
*/
|
|
export class GetInteractiveSnapshotAction extends ActionHandler<
|
|
GetInteractiveSnapshotInput,
|
|
InteractiveSnapshot
|
|
> {
|
|
readonly inputSchema = GetInteractiveSnapshotInputSchema
|
|
private browserOSAdapter = BrowserOSAdapter.getInstance()
|
|
|
|
async execute(
|
|
input: GetInteractiveSnapshotInput,
|
|
): Promise<InteractiveSnapshot> {
|
|
const snapshot = await this.browserOSAdapter.getInteractiveSnapshot(
|
|
input.tabId,
|
|
input.options as InteractiveSnapshotOptions | undefined,
|
|
)
|
|
|
|
// Cache snapshot for pointer overlay lookup
|
|
SnapshotCache.set(input.tabId, snapshot)
|
|
|
|
return snapshot
|
|
}
|
|
}
|