Files
BrowserOS/packages/browseros-agent/apps/controller-ext/src/actions/browser/GetInteractiveSnapshotAction.ts
Dani Akash 290ee91a8b Add 'packages/browseros-agent/' from commit '90bd4be3008285bf3825aad3702aff98f872671a'
git-subtree-dir: packages/browseros-agent
git-subtree-mainline: 8f148d0918
git-subtree-split: 90bd4be300
2026-03-13 21:22:09 +05:30

72 lines
2.1 KiB
TypeScript

/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import type {
InteractiveSnapshot,
InteractiveSnapshotOptions,
} from '@/adapters/BrowserOSAdapter'
import { BrowserOSAdapter } from '@/adapters/BrowserOSAdapter'
import { SnapshotCache } from '@/utils/SnapshotCache'
import { ActionHandler } from '../ActionHandler'
// Input schema
const GetInteractiveSnapshotInputSchema = z.object({
tabId: z.number().describe('The tab ID to get snapshot from'),
options: z
.object({
includeHidden: z
.boolean()
.optional()
.default(false)
.describe('Include hidden elements (default: false)'),
})
.optional()
.describe('Optional snapshot options'),
})
type GetInteractiveSnapshotInput = z.infer<
typeof GetInteractiveSnapshotInputSchema
>
/**
* GetInteractiveSnapshotAction - Get interactive elements from the page
*
* This is THE MOST CRITICAL action - it returns all interactive elements
* with their nodeIds, which are needed by click, inputText, clear, and scrollToNode actions.
*
* Returns:
* - elements: Array of interactive nodes with nodeIds
* - hierarchicalStructure: String representation of page structure
*
* Each element contains:
* - nodeId: Sequential integer ID (1, 2, 3...)
* - type: 'clickable' | 'typeable' | 'selectable'
* - name: Element text/label
* - attributes: Element properties (html-tag, role, etc.)
* - rect: Bounding box coordinates
*/
export class GetInteractiveSnapshotAction extends ActionHandler<
GetInteractiveSnapshotInput,
InteractiveSnapshot
> {
readonly inputSchema = GetInteractiveSnapshotInputSchema
private browserOSAdapter = BrowserOSAdapter.getInstance()
async execute(
input: GetInteractiveSnapshotInput,
): Promise<InteractiveSnapshot> {
const snapshot = await this.browserOSAdapter.getInteractiveSnapshot(
input.tabId,
input.options as InteractiveSnapshotOptions | undefined,
)
// Cache snapshot for pointer overlay lookup
SnapshotCache.set(input.tabId, snapshot)
return snapshot
}
}