feat(dom-snapshot): add DOM snapshot utility with serialization and search capabilities

This commit is contained in:
jk4235
2026-01-07 16:42:16 +08:00
committed by ropzislaw
parent 99ba4da9a9
commit 447a2d0611
14 changed files with 2896 additions and 2 deletions

View File

@@ -40,6 +40,13 @@
} }
], ],
"files": { "files": {
"includes": ["**", "!**/dist", "!**/coverage", "!**/build", "!**/assets"] "includes": [
"**",
"!**/dist",
"!**/coverage",
"!**/build",
"!**/assets",
"!.history"
]
} }
} }

View File

@@ -0,0 +1,225 @@
# @aipexstudio/dom-snapshot
A lightweight library for capturing DOM snapshots without relying on Chrome DevTools Protocol (CDP) Accessibility Tree (AXTree). This library provides a pure JavaScript/TypeScript solution for creating structured page snapshots that can be used for web automation, testing, and AI-powered browser agents.
## Why Not CDP AXTree?
Traditional approaches to capturing page structure often rely on CDP's Accessibility Tree, which has several limitations:
- **Browser dependency**: Requires Chrome/Chromium with DevTools Protocol
- **Performance overhead**: CDP communication adds latency
- **Complex setup**: Needs browser debugging port configuration
- **Limited portability**: Doesn't work in all browser contexts
This library takes a different approach by directly traversing the DOM and building a semantic snapshot that mimics accessibility tree structure, but works in any browser environment with just JavaScript.
## Features
- **Pure DOM-based**: No CDP or browser extensions required
- **Accessibility-aware**: Captures semantic roles, names, and states following ARIA patterns
- **Interactive element focus**: Prioritizes buttons, links, inputs, and other actionable elements
- **Hidden element filtering**: Automatically skips `aria-hidden`, `display:none`, `visibility:hidden`, and `inert` elements
- **Stable node IDs**: Assigns persistent `data-aipex-nodeid` attributes for reliable element targeting
- **Text content extraction**: Captures static text nodes for full page context
- **Configurable options**: Control text length limits, hidden element inclusion, and text node capture
- **Search functionality**: Built-in glob pattern search across snapshot text
## Installation
```bash
npm install @aipexstudio/dom-snapshot
# or
pnpm add @aipexstudio/dom-snapshot
```
## Usage
### Basic Snapshot Collection
```typescript
import { collectDomSnapshot, collectDomSnapshotInPage } from '@aipexstudio/dom-snapshot';
// Collect snapshot from current page
const snapshot = collectDomSnapshotInPage();
// Or specify a custom document
const snapshot = collectDomSnapshot(document, {
maxTextLength: 160, // Max characters for element text (default: 160, does not affect StaticText)
includeHidden: false, // Include hidden elements (default: false)
captureTextNodes: true, // Capture StaticText nodes (default: true)
});
console.log(snapshot.totalNodes); // Total nodes captured
console.log(snapshot.root); // Root node of the tree
console.log(snapshot.idToNode); // Flat map of id -> node
console.log(snapshot.metadata.url); // Page URL
```
### Converting to Text Format
```typescript
import { collectDomSnapshot, DomSnapshotManager } from '@aipexstudio/dom-snapshot';
const manager = new DomSnapshotManager();
// Collect raw snapshot
const serialized = collectDomSnapshot(document);
// Convert to TextSnapshot format
const textSnapshot = manager.buildTextSnapshot(serialized, { tabId: 1 });
// Format as readable text representation
const formatted = manager.formatSnapshot(textSnapshot);
console.log(formatted);
```
Output example:
```
→uid=dom_abc123 RootWebArea "My Page" <body>
uid=dom_def456 button "Submit" <button>
uid=dom_ghi789 textbox "Email" <input> desc="Enter your email"
StaticText "Welcome to our site"
*uid=dom_jkl012 link "Learn More" <a>
```
Markers:
- `*` - Currently focused element
- `→` - Ancestor of focused element
- ` ` (space) - Regular element
### Searching Snapshots
```typescript
import { searchSnapshotText } from '@aipexstudio/dom-snapshot';
const formatted = manager.formatSnapshot(textSnapshot);
// Simple text search
const result = searchSnapshotText(formatted, 'Submit');
// Multiple terms with | separator
const result = searchSnapshotText(formatted, '登录 | Login | Sign In');
// Glob pattern search
const result = searchSnapshotText(formatted, 'button* | *submit*', {
useGlob: true,
contextLevels: 2, // Lines of context around matches
caseSensitive: false,
});
console.log(result.matchedLines); // Line numbers of matches
console.log(result.contextLines); // All lines to display (with context)
console.log(result.totalMatches); // Total match count
```
## API Reference
### `collectDomSnapshot(document, options?)`
Collects a DOM snapshot from the specified document.
**Parameters:**
- `document` - The Document to snapshot
- `options` - Optional configuration:
- `maxTextLength` (number, default: 160) - Maximum text length for element nodes (does not affect StaticText nodes which preserve full content)
- `includeHidden` (boolean, default: false) - Include hidden elements
- `captureTextNodes` (boolean, default: true) - Capture text nodes as StaticText
**Returns:** `SerializedDomSnapshot`
### `collectDomSnapshotInPage(options?)`
Convenience function that calls `collectDomSnapshot` with the current `document`.
### `DomSnapshotManager`
Manager class for converting and formatting snapshots.
**Methods:**
- `buildTextSnapshot(source, options?)` - Convert serialized snapshot to TextSnapshot
- `formatSnapshot(snapshot)` - Format TextSnapshot as readable text
### `searchSnapshotText(text, query, options?)`
Search snapshot text with optional glob patterns.
**Parameters:**
- `text` - The formatted snapshot text
- `query` - Search query (use `|` to separate multiple terms)
- `options`:
- `contextLevels` (number, default: 1) - Lines of context around matches
- `caseSensitive` (boolean, default: false) - Case-sensitive search
- `useGlob` (boolean, auto-detect) - Enable glob pattern matching
## Node Structure
Each captured node includes:
```typescript
interface DomSnapshotNode {
id: string; // Unique node identifier
role: string; // Semantic role (button, link, textbox, etc.)
name?: string; // Accessible name
value?: string; // Current value (for inputs)
description?: string; // Additional description
children: DomSnapshotNode[]; // Child nodes
tagName?: string; // HTML tag name
// State properties
checked?: boolean | 'mixed'; // Checkbox/radio state
pressed?: boolean | 'mixed'; // Toggle button state
disabled?: boolean; // Disabled state
focused?: boolean; // Focus state
selected?: boolean; // Selection state
expanded?: boolean; // Expanded state
// Additional properties
placeholder?: string; // Input placeholder
href?: string; // Link URL
title?: string; // Element title
textContent?: string; // Text content
inputType?: string; // Input type attribute
}
```
## Role Mapping
The library maps HTML elements to semantic roles:
| HTML Element | Role |
|-------------|------|
| `<button>` | button |
| `<a href="...">` | link |
| `<input type="text">` | textbox |
| `<input type="checkbox">` | checkbox |
| `<input type="radio">` | radio |
| `<input type="range">` | slider |
| `<select>` | combobox |
| `<textarea>` | textbox |
| `<img>` | image |
| Elements with `contenteditable` | textbox |
Explicit `role` attributes are respected and take precedence.
## Skipped Elements
The following are automatically excluded from snapshots:
- `<script>`, `<style>`, `<noscript>`, `<template>`, `<svg>`, `<head>`, `<meta>`, `<link>`
- Elements with `aria-hidden="true"`
- Elements with `hidden` attribute
- Elements with `inert` attribute
- Elements with `display: none`
- Elements with `visibility: hidden`
## Use Cases
- **Web Automation**: Provide page context to AI agents for browser automation
- **Testing**: Capture page state for snapshot testing
- **Accessibility Auditing**: Analyze semantic structure of pages
- **Content Extraction**: Extract meaningful content from web pages
- **Browser Extensions**: Build tools that need page structure without CDP
## License
MIT

View File

@@ -0,0 +1,42 @@
{
"name": "@aipexstudio/dom-snapshot",
"version": "0.0.6",
"description": "DOM snapshot utility for capturing and serializing web page state",
"main": "./dist/src/index.js",
"types": "./dist/src/index.d.ts",
"repository": {
"type": "git",
"url": "git+https://github.com/AIPexStudio/AIPex.git"
},
"exports": {
".": {
"types": "./dist/src/index.d.ts",
"import": "./dist/src/index.js"
}
},
"files": [
"dist/src/**/*.js",
"dist/src/**/*.d.ts",
"!dist/src/**/*.test.js",
"!dist/src/**/*.test.d.ts"
],
"scripts": {
"build": "tsc",
"test": "vitest run",
"typecheck": "tsc --project tsconfig.json",
"prepublishOnly": "npm run build"
},
"keywords": [
"dom",
"snapshot",
"serialize",
"web",
"browser"
],
"author": "AIPex Studio",
"license": "MIT",
"type": "module",
"devDependencies": {
"@types/node": "^24.10.1"
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,613 @@
import type {
CollectorOptions,
DomSnapshotFlatMap,
DomSnapshotNode,
SerializedDomSnapshot,
} from "./types";
const NODE_ID_ATTR = "data-aipex-nodeid";
const STATIC_TEXT_ROLE = "StaticText";
const ROOT_ROLE = "RootWebArea";
// Tags that should be completely skipped (no traversal, no text extraction)
const SKIP_TAGS = new Set([
"script",
"style",
"noscript",
"template",
"svg", // SVG internals are usually not useful for automation
"head",
"meta",
"link",
]);
const DEFAULT_OPTIONS: CollectorOptions = {
maxTextLength: 160,
includeHidden: false,
captureTextNodes: true,
};
const INTERACTIVE_TAGS = new Set([
"a",
"button",
"summary",
"details",
"select",
"textarea",
"input",
"label",
"video",
"audio",
]);
const INPUT_TYPES_AS_ROLE: Record<string, string> = {
button: "button",
submit: "button",
reset: "button",
image: "button",
checkbox: "checkbox",
radio: "radio",
range: "slider",
email: "textbox",
search: "searchbox",
url: "textbox",
number: "spinbutton",
password: "textbox",
text: "textbox",
};
const LAYOUT_ROLES = new Set([
"generic",
"article",
"section",
"region",
"group",
"main",
"complementary",
"navigation",
"banner",
"contentinfo",
]);
const INTERACTIVE_ROLES = new Set([
"button",
"checkbox",
"combobox",
"link",
"menuitem",
"radio",
"searchbox",
"slider",
"spinbutton",
"switch",
"tab",
"textbox",
]);
export function collectDomSnapshot(
rootDocument: Document = document,
options?: Partial<CollectorOptions>,
): SerializedDomSnapshot {
const config: CollectorOptions = { ...DEFAULT_OPTIONS, ...options };
const idToNode: DomSnapshotFlatMap = Object.create(null);
const body = rootDocument.body || rootDocument.documentElement;
const rootNode: DomSnapshotNode = {
id: ensureElementUid(
body ?? rootDocument.documentElement ?? rootDocument.createElement("div"),
),
role: ROOT_ROLE,
name: rootDocument.title || rootDocument.URL || "document",
children: [],
tagName: body?.tagName.toLowerCase(),
};
const walkerRoot = body || rootDocument.documentElement;
if (walkerRoot) {
const childNodes = traverseElement(
walkerRoot,
config,
idToNode,
rootDocument,
);
if (childNodes.length > 0) {
rootNode.children.push(...childNodes);
}
}
idToNode[rootNode.id] = rootNode;
return {
root: rootNode,
idToNode,
totalNodes: Object.keys(idToNode).length,
timestamp: Date.now(),
metadata: {
title: rootDocument.title || "",
url: rootDocument.URL || "",
collectedAt: new Date().toISOString(),
options: config,
},
};
}
export function collectDomSnapshotInPage(
options?: Partial<CollectorOptions>,
): SerializedDomSnapshot {
return collectDomSnapshot(document, options);
}
function traverseElement(
element: Element,
options: CollectorOptions,
idToNode: DomSnapshotFlatMap,
rootDocument: Document,
): DomSnapshotNode[] {
// Skip tags that should not be traversed (script, style, etc.)
const tagName = element.tagName.toLowerCase();
if (SKIP_TAGS.has(tagName)) {
return [];
}
// Skip entire subtree if element is hidden (not just the element itself)
if (!options.includeHidden && isElementHidden(element, rootDocument)) {
return [];
}
const nodes: DomSnapshotNode[] = [];
const includeSelf = shouldIncludeElement(element, options, rootDocument);
const childrenNodes: DomSnapshotNode[] = [];
const childElements = Array.from(element.children);
for (const child of childElements) {
childrenNodes.push(
...traverseElement(child, options, idToNode, rootDocument),
);
}
if (options.captureTextNodes) {
const textChildren = extractTextNodes(element, options, idToNode);
childrenNodes.push(...textChildren);
}
if (!includeSelf) {
if (childrenNodes.length === 1) {
return childrenNodes;
}
if (childrenNodes.length > 1) {
const syntheticNode = createNodeFromElement(
element,
options,
idToNode,
rootDocument,
true,
);
syntheticNode.children = childrenNodes;
idToNode[syntheticNode.id] = syntheticNode;
nodes.push(syntheticNode);
return nodes;
}
return nodes;
}
const node = createNodeFromElement(
element,
options,
idToNode,
rootDocument,
false,
);
node.children = childrenNodes;
idToNode[node.id] = node;
nodes.push(node);
return nodes;
}
function createNodeFromElement(
element: Element,
options: CollectorOptions,
_idToNode: DomSnapshotFlatMap,
rootDocument: Document,
isSynthetic: boolean,
): DomSnapshotNode {
const nodeId = ensureElementUid(element);
const role = resolveRole(element);
const name = resolveAccessibleName(element, rootDocument);
const textContent = normalizeTextContent(element.textContent || "");
const value = resolveElementValue(element);
const node: DomSnapshotNode = {
id: nodeId,
role: role || "generic",
name: name || undefined,
children: [],
tagName: element.tagName.toLowerCase(),
};
if (value) {
node.value = value;
}
if (textContent && textContent !== node.name) {
node.textContent = textContent.slice(0, options.maxTextLength);
}
if (element instanceof HTMLInputElement) {
node.inputType = element.type;
if (element.placeholder) {
node.placeholder = element.placeholder;
}
if (element.type === "checkbox" || element.type === "radio") {
node.checked = element.indeterminate ? "mixed" : element.checked;
}
if (element.type === "submit" && !node.name) {
node.name = element.value || "Submit";
}
}
if (element instanceof HTMLTextAreaElement) {
node.inputType = "textarea";
if (!node.value && element.value) {
node.value = element.value;
}
if (element.placeholder) {
node.placeholder = element.placeholder;
}
}
if (element instanceof HTMLSelectElement) {
node.inputType = "select";
const selectedOptions = Array.from(element.selectedOptions);
if (selectedOptions.length > 0) {
// value should be the actual HTML value attribute (for form submission)
node.value = selectedOptions.map((opt) => opt.value).join(", ");
// name should be the selected option's display text (what user sees), not all options' text
const selectedText = selectedOptions
.map((opt) => opt.label || opt.textContent?.trim() || "")
.filter(Boolean)
.join(", ");
if (selectedText) {
node.name = selectedText;
}
}
}
if (element instanceof HTMLAnchorElement) {
node.href = element.href;
}
if (element instanceof HTMLImageElement) {
node.description = element.alt || undefined;
}
if (element instanceof HTMLElement) {
if (element.title) {
node.title = element.title;
}
if (element.hasAttribute("aria-disabled")) {
node.disabled = element.getAttribute("aria-disabled") === "true";
} else if ("disabled" in element) {
node.disabled = Boolean(
(element as HTMLButtonElement | HTMLInputElement).disabled,
);
}
if (element.hasAttribute("aria-pressed")) {
const pressed = element.getAttribute("aria-pressed");
node.pressed = pressed === "mixed" ? "mixed" : pressed === "true";
}
if (element.hasAttribute("aria-expanded")) {
node.expanded = element.getAttribute("aria-expanded") === "true";
}
if (element.hasAttribute("aria-selected")) {
node.selected = element.getAttribute("aria-selected") === "true";
}
// Capture focused state
if (rootDocument.activeElement === element) {
node.focused = true;
}
}
if (isSynthetic && !node.name && textContent) {
node.name = textContent.slice(0, options.maxTextLength);
}
return node;
}
function shouldIncludeElement(
element: Element,
options: CollectorOptions,
rootDocument: Document,
): boolean {
if (!options.includeHidden && !isElementVisible(element, rootDocument)) {
return false;
}
const role = resolveRole(element);
const name = resolveAccessibleName(element, rootDocument);
const hasMeaningfulName = Boolean(name && name.trim().length > 1);
if (INTERACTIVE_ROLES.has(role)) {
return true;
}
if (INTERACTIVE_TAGS.has(element.tagName.toLowerCase())) {
return true;
}
if (element instanceof HTMLElement && element.isContentEditable) {
return true;
}
if (role === "image") {
const img = element as HTMLImageElement;
return Boolean(img.alt && img.alt.trim().length > 0);
}
if (!LAYOUT_ROLES.has(role) && hasMeaningfulName) {
return true;
}
const normalizedText = normalizeTextContent(element.textContent || "");
if (normalizedText.length >= 2 && !LAYOUT_ROLES.has(role)) {
return true;
}
return false;
}
function resolveRole(element: Element): string {
const explicitRole = element.getAttribute("role");
if (explicitRole) {
return explicitRole;
}
const tag = element.tagName.toLowerCase();
if (tag === "a") {
return (element as HTMLAnchorElement).href ? "link" : "generic";
}
if (tag === "button") {
return "button";
}
if (tag === "img") {
return "image";
}
if (tag === "textarea") {
return "textbox";
}
if (tag === "select") {
return "combobox";
}
if (tag === "input") {
const input = element as HTMLInputElement;
const type = (input.type || "text").toLowerCase();
return (
INPUT_TYPES_AS_ROLE[type] ||
(input.type === "range" ? "slider" : "textbox")
);
}
if (element instanceof HTMLElement && element.isContentEditable) {
return "textbox";
}
return "generic";
}
function resolveAccessibleName(
element: Element,
rootDocument: Document,
): string | null {
const ariaLabel = element.getAttribute("aria-label");
if (ariaLabel && ariaLabel.trim().length > 0) {
return ariaLabel.trim();
}
const labelledBy = element.getAttribute("aria-labelledby");
if (labelledBy) {
const ids = labelledBy
.split(/\s+/g)
.map((id) => id.trim())
.filter(Boolean);
const texts: string[] = [];
for (const id of ids) {
const target = rootDocument.getElementById(id);
if (target) {
const text = normalizeTextContent(target.textContent || "");
if (text) {
texts.push(text);
}
}
}
if (texts.length > 0) {
return texts.join(" ");
}
}
if (element instanceof HTMLImageElement && element.alt) {
return element.alt.trim();
}
if (element instanceof HTMLInputElement) {
if (element.placeholder) {
return element.placeholder;
}
if (element.type === "submit" || element.type === "button") {
return element.value || "Submit";
}
}
if (element instanceof HTMLButtonElement && element.textContent) {
return normalizeTextContent(element.textContent);
}
if (element instanceof HTMLAnchorElement) {
const text = normalizeTextContent(element.textContent || "");
if (text) {
return text;
}
}
const textContent = normalizeTextContent(element.textContent || "");
return textContent || null;
}
function resolveElementValue(element: Element): string | undefined {
if (element instanceof HTMLInputElement) {
if (element.type === "password") {
return "*".repeat(element.value.length);
}
return element.value || undefined;
}
if (element instanceof HTMLTextAreaElement) {
return element.value || undefined;
}
if (element instanceof HTMLSelectElement) {
const selected = element.selectedOptions[0];
if (selected) {
// Return the actual HTML value attribute for consistency with form submission
return selected.value || undefined;
}
return undefined;
}
if (element instanceof HTMLElement && element.isContentEditable) {
return normalizeTextContent(element.textContent || "") || undefined;
}
return undefined;
}
function extractTextNodes(
element: Element,
_options: CollectorOptions,
idToNode: DomSnapshotFlatMap,
): DomSnapshotNode[] {
const results: DomSnapshotNode[] = [];
const childNodes = Array.from(element.childNodes);
childNodes.forEach((node, index) => {
if (node.nodeType !== Node.TEXT_NODE) {
return;
}
const text = normalizeTextContent(node.textContent || "");
if (!text || text.length === 0) {
return;
}
const uid = `${ensureElementUid(element)}::text-${index}`;
// StaticText nodes preserve full text content without truncation
// as they provide important context for understanding page content
const textNode: DomSnapshotNode = {
id: uid,
role: STATIC_TEXT_ROLE,
name: text,
children: [],
textContent: text,
};
idToNode[uid] = textNode; // Add to flat map for consistency
results.push(textNode);
});
return results;
}
function ensureElementUid(element: Element): string {
const existing = element.getAttribute(NODE_ID_ATTR);
if (existing) {
return existing;
}
const uid = `dom_${generateShortId()}`;
element.setAttribute(NODE_ID_ATTR, uid);
return uid;
}
function generateShortId(): string {
const random = Math.random().toString(36).slice(2, 8);
const time = Date.now().toString(36).slice(-4);
return `${time}${random}`;
}
function normalizeTextContent(text: string): string {
return text.replace(/\s+/g, " ").trim();
}
/**
* Check if an element is completely hidden and its entire subtree should be skipped.
* This is a stronger check than isElementVisible - if true, we skip the whole subtree.
*/
function isElementHidden(element: Element, rootDocument: Document): boolean {
// Check aria-hidden attribute (hides entire subtree from accessibility tree)
if (element.getAttribute("aria-hidden") === "true") {
return true;
}
// Check hidden attribute (HTML5 hidden)
if (element.hasAttribute("hidden")) {
return true;
}
// Check inert attribute (makes element and subtree non-interactive and hidden from AT)
if (element.hasAttribute("inert")) {
return true;
}
// Check CSS visibility
if (element instanceof HTMLElement) {
const style = rootDocument.defaultView?.getComputedStyle(element);
if (style) {
// display: none hides entire subtree
if (style.display === "none") {
return true;
}
// visibility: hidden with children inheriting (subtree hidden)
// Note: visibility can be overridden by children, so we only skip if truly hidden
if (style.visibility === "hidden") {
return true;
}
}
}
return false;
}
/**
* Check if an element should be considered visible for inclusion purposes.
* This is a weaker check - element might still be traversed even if not visible.
*/
function isElementVisible(element: Element, rootDocument: Document): boolean {
if (!(element instanceof HTMLElement)) {
return true;
}
const style = rootDocument.defaultView?.getComputedStyle(element);
if (!style) {
return true;
}
if (
style.display === "none" ||
style.visibility === "hidden" ||
style.opacity === "0"
) {
return false;
}
// offsetParent is unreliable in JSDOM/happy-dom (always null), skip this heuristic in test environments
const win = rootDocument.defaultView;
const isTestEnv =
win &&
(win.navigator?.userAgent?.includes("jsdom") || win.innerWidth === 0);
if (
!isTestEnv &&
element.offsetParent === null &&
style.position !== "fixed"
) {
return element === element.ownerDocument?.body;
}
return true;
}

View File

@@ -0,0 +1,4 @@
export { collectDomSnapshot, collectDomSnapshotInPage } from "./collector";
export { DomSnapshotManager } from "./manager";
export { searchSnapshotText } from "./query";
export * from "./types";

View File

@@ -0,0 +1,66 @@
import { buildFocusAncestorSet, formatNode } from "./snapshot-formatter";
import type {
DomSnapshotNode,
SerializedDomSnapshot,
TextSnapshot,
TextSnapshotNode,
} from "./types";
export interface BuildSnapshotOptions {
tabId?: number;
}
export class DomSnapshotManager {
buildTextSnapshot(
source: SerializedDomSnapshot,
options?: BuildSnapshotOptions,
): TextSnapshot {
const idToNode = new Map<string, TextSnapshotNode>();
const root = this.cloneNode(source.root, idToNode);
return {
root,
idToNode,
tabId: options?.tabId ?? 0,
};
}
formatSnapshot(snapshot: TextSnapshot): string {
const focusAncestorSet = buildFocusAncestorSet(
snapshot.root,
snapshot.idToNode,
);
return formatNode(snapshot.root, 0, focusAncestorSet);
}
private cloneNode(
node: DomSnapshotNode,
idToNode: Map<string, TextSnapshotNode>,
): TextSnapshotNode {
const clonedChildren =
node.children?.map((child) => this.cloneNode(child, idToNode)) ?? [];
const clonedNode: TextSnapshotNode = {
id: node.id,
role: node.role,
name: node.name,
value: node.value,
description: node.description,
children: clonedChildren,
tagName: node.tagName,
checked: node.checked,
pressed: node.pressed,
disabled: node.disabled,
focused: node.focused,
selected: node.selected,
expanded: node.expanded,
};
if (node.placeholder && !clonedNode.description) {
clonedNode.description = node.placeholder;
}
idToNode.set(clonedNode.id, clonedNode);
return clonedNode;
}
}
export const domSnapshotManager = new DomSnapshotManager();

View File

@@ -0,0 +1,266 @@
/**
* Snapshot Query and Search System
*
* Provides search functionality for snapshot text with glob pattern support
*/
export const SKIP_ROLES = [
"generic",
"none",
"group",
"main",
"navigation",
"contentinfo",
"search",
"banner",
"complementary",
"region",
"article",
"section",
"InlineTextBox", // These are usually redundant with StaticText
"presentation", // ARIA presentation role (no semantic meaning)
"LineBreak", // Line break elements
];
/**
* Check if a string contains glob pattern characters
*/
function hasGlobPattern(str: string): boolean {
return /[*?[\]{}]/.test(str);
}
/**
* Simple glob pattern matcher supporting basic patterns:
* - * matches any characters
* - ? matches single character
* - [abc] matches a, b, or c
* - [a-z] matches character range
* - {pattern1,pattern2} matches either pattern
*/
function matchGlob(
pattern: string,
text: string,
caseSensitive: boolean = false,
): boolean {
if (!caseSensitive) {
pattern = pattern.toLowerCase();
text = text.toLowerCase();
}
// Handle brace expansion {pattern1,pattern2}
if (pattern.includes("{") && pattern.includes("}")) {
const braceStart = pattern.indexOf("{");
const braceEnd = pattern.indexOf("}");
if (braceStart < braceEnd) {
const prefix = pattern.substring(0, braceStart);
const suffix = pattern.substring(braceEnd + 1);
const alternatives = pattern
.substring(braceStart + 1, braceEnd)
.split(",");
for (const alt of alternatives) {
const fullPattern = prefix + alt.trim() + suffix;
if (matchGlob(fullPattern, text, caseSensitive)) {
return true;
}
}
return false;
}
}
// Convert glob pattern to regex
let regexPattern = pattern
.replace(/[.*+^${}()|[\]\\]/g, "\\$&") // Escape regex special chars
.replace(/\\\*/g, ".*") // * -> .*
.replace(/\\\?/g, ".") // ? -> .
.replace(/\\\[/g, "[") // Restore [ for char class
.replace(/\\\]/g, "]"); // Restore ] for char class
// Handle character classes [abc] and [a-z]
regexPattern = regexPattern.replace(/\[([^\]]+)\]/g, (_, chars) => {
// Handle ranges like [a-z]
if (chars.includes("-") && chars.length === 3) {
return `[${chars}]`;
}
// Handle character sets like [abc]
return `[${chars.replace(/[.*+^${}()|[\]\\]/g, "\\$&")}]`;
});
try {
const regex = new RegExp(`${regexPattern}`, "i");
return regex.test(text);
} catch (error) {
console.warn(`Invalid glob pattern: ${pattern}`, error);
return false;
}
}
/**
* Search options for snapshot text queries
*/
export interface SearchOptions {
contextLevels?: number; // Default: 1 (lines around matches)
caseSensitive?: boolean; // Default: false
useGlob?: boolean; // Default: auto-detect (true if pattern contains glob chars)
}
/**
* Search result containing matched lines and context
*/
export interface SearchResult {
matchedLines: number[]; // Line numbers of matched lines
contextLines: number[]; // Line numbers of all lines to display (matched + context)
totalMatches: number; // Total number of matches found
}
/**
* Main search entry point
* Searches snapshot text and returns matched lines with surrounding context
*/
export function searchSnapshotText(
snapshotText: string,
query: string,
options: SearchOptions = {},
): SearchResult {
const { contextLevels = 1, caseSensitive = false, useGlob } = options;
// Parse query string
const searchTerms = parseSearchQuery(query);
if (searchTerms.length === 0) {
return {
matchedLines: [],
contextLines: [],
totalMatches: 0,
};
}
// Auto-detect glob patterns if not explicitly set
const shouldUseGlob =
useGlob !== undefined
? useGlob
: searchTerms.some((term) => hasGlobPattern(term));
// Split text into lines
const lines = snapshotText.split("\n");
const matchedLines: number[] = [];
// Step 1: Find all matching lines
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
if (
line !== undefined &&
matchLine(line, searchTerms, caseSensitive, shouldUseGlob)
) {
matchedLines.push(i);
}
}
console.log(
`🔍 [SEARCH] Found ${matchedLines.length} matched lines for terms:`,
searchTerms,
);
// Step 2: Expand context around matched lines
const contextLines = expandLineContext(matchedLines, lines, contextLevels);
console.log(
`📦 [SEARCH] Expanded to ${contextLines.length} total lines (context level: ${contextLevels})`,
);
return {
matchedLines,
contextLines,
totalMatches: matchedLines.length,
};
}
/**
* Check if a line matches any of the search terms
*/
function matchLine(
line: string,
searchTerms: string[],
caseSensitive: boolean,
useGlob: boolean,
): boolean {
for (const term of searchTerms) {
if (useGlob) {
if (matchGlob(term, line, caseSensitive)) {
return true;
}
} else {
const lineValue = caseSensitive ? line : line.toLowerCase();
const searchTerm = caseSensitive ? term : term.toLowerCase();
if (lineValue.includes(searchTerm)) {
return true;
}
}
}
return false;
}
/**
* Expand context around matched lines
* Includes lines before and after matched lines, skipping lines that start with SKIP_ROLES
*/
function expandLineContext(
matchedLines: number[],
lines: string[],
levels: number,
): number[] {
const contextLines = new Set<number>();
for (const lineNum of matchedLines) {
// Add the matched line itself
contextLines.add(lineNum);
// Add context lines before, skipping SKIP_ROLES
let beforeCount = 0;
for (let i = lineNum - 1; i >= 0 && beforeCount < levels; i--) {
const line = lines[i];
if (line !== undefined && !shouldSkipLine(line)) {
contextLines.add(i);
beforeCount++;
}
}
// Add context lines after, skipping SKIP_ROLES
let afterCount = 0;
for (let i = lineNum + 1; i < lines.length && afterCount < levels; i++) {
const line = lines[i];
if (line !== undefined && !shouldSkipLine(line)) {
contextLines.add(i);
afterCount++;
}
}
}
return Array.from(contextLines).sort((a, b) => a - b);
}
/**
* Check if a line should be skipped based on SKIP_ROLES
*/
function shouldSkipLine(line: string): boolean {
const trimmedLine = line.trim();
return SKIP_ROLES.some((role) => trimmedLine.startsWith(role));
}
/**
* Parse search query string with "|" separator
* Example: "登录 | Login | Sign In" -> ["登录", "Login", "Sign In"]
* Example: "button* | login | submit?" -> ["button*", "login", "submit?"]
*/
export function parseSearchQuery(query: string): string[] {
return query
.split("|")
.map((term) => term.trim())
.filter((term) => term.length > 0);
}
/**
* Check if any search terms contain glob patterns
*/
export function hasGlobPatterns(searchTerms: string[]): boolean {
return searchTerms.some((term) => hasGlobPattern(term));
}

View File

@@ -0,0 +1,218 @@
/**
* Shared snapshot formatting utilities
*
* Used by both CDP-based and DOM-based snapshot implementations
*/
import { SKIP_ROLES } from "./query";
import type { TextSnapshotNode } from "./types";
/**
* Interactive roles that should always be included
*/
const INTERACTIVE_ROLES = [
"button",
"link",
"textbox",
"combobox",
"checkbox",
"radio",
"menuitem",
"tab",
"slider",
"spinbutton",
"searchbox",
"switch",
];
/**
* Check if a node should be included in output with full attributes
*/
export function shouldIncludeInOutput(
node: TextSnapshotNode,
skipRoles: string[] = SKIP_ROLES,
): boolean {
const role = node.role || "";
const name = node.name || "";
// Include root web area (always first)
if (role === "RootWebArea") return true;
// Always include interactive elements
if (INTERACTIVE_ROLES.includes(role)) return true;
// Include images
if (role === "image" || role === "img") return true;
// Include StaticText with meaningful content (at least 2 chars)
if (role === "StaticText" && name && name.trim().length >= 2) return true;
// Skip certain roles
if (skipRoles.includes(role)) return false;
// For any other role, include if it has meaningful content
if (name && name.trim().length > 1) return true;
return false;
}
/**
* Get node attributes for formatting
*/
export function getNodeAttributes(node: TextSnapshotNode): string[] {
const attributes: string[] = [];
// StaticText nodes don't need uid - they can't be operated on directly
if (node.role !== "StaticText") {
attributes.push(`uid=${node.id}`);
}
attributes.push(node.role, `"${node.name || ""}"`);
// Add tagName if available
if (node.tagName) {
attributes.push(`<${node.tagName}>`);
}
// Add value properties
const valueProperties = [
"value",
"valuetext",
"valuemin",
"valuemax",
"level",
"autocomplete",
];
for (const property of valueProperties) {
const value = (node as unknown as Record<string, unknown>)[property];
if (value !== undefined && value !== null) {
attributes.push(`${property}="${value}"`);
}
}
// Add description if present
if (node.description) {
attributes.push(`desc="${node.description}"`);
}
// Add boolean properties with capability indicators
const booleanProperties: Record<string, string> = {
disabled: "disableable",
expanded: "expandable",
focused: "focusable",
selected: "selectable",
modal: "modal",
readonly: "readonly",
required: "required",
};
for (const [property, capability] of Object.entries(booleanProperties)) {
const value = (node as unknown as Record<string, unknown>)[property];
if (value !== undefined) {
attributes.push(capability);
if (value) {
attributes.push(property);
}
}
}
// Add mixed properties (pressed, checked) - output as property="value"
for (const property of ["pressed", "checked"]) {
const value = (node as unknown as Record<string, unknown>)[property];
if (value !== undefined) {
attributes.push(`${property}="${value}"`);
}
}
return attributes.filter((attr): attr is string => attr !== undefined);
}
/**
* Format a node recursively into text representation
*/
export function formatNode(
node: TextSnapshotNode,
depth: number,
focusAncestorSet: Set<string>,
skipRoles: string[] = [],
): string {
const shouldInclude = shouldIncludeInOutput(node, skipRoles);
// For StaticText nodes that shouldn't be included, skip them entirely
// (they have no children, so we don't need to recurse)
if (!shouldInclude && node.role === "StaticText") {
return "";
}
// For other non-included nodes, still process children but don't output this node
if (!shouldInclude) {
let result = "";
for (const child of node.children) {
result += formatNode(child, depth, focusAncestorSet, skipRoles);
}
return result;
}
const attributes = getNodeAttributes(node);
// marker: '*' = exact focused node; '→' = ancestor in focus path
const marker = node.focused ? "*" : focusAncestorSet.has(node.id) ? "→" : " ";
let result = `${" ".repeat(depth) + marker + attributes.join(" ")}\n`;
// Recursively format child nodes
for (const child of node.children) {
result += formatNode(child, depth + 1, focusAncestorSet, skipRoles);
}
return result;
}
/**
* Build focus ancestor set for highlighting focus path
*/
export function buildFocusAncestorSet(
root: TextSnapshotNode,
idToNode: Map<string, TextSnapshotNode>,
): Set<string> {
const focusedNodeIds: string[] = [];
for (const [id, node] of idToNode.entries()) {
if (node.focused) focusedNodeIds.push(id);
}
const focusAncestorSet = new Set<string>();
const rootId = root.id;
const findPath = (
currentId: string,
targetId: string,
visited = new Set<string>(),
): string[] | null => {
if (currentId === targetId) return [currentId];
if (visited.has(currentId)) return null;
visited.add(currentId);
const node = idToNode.get(currentId);
if (!node) return null;
for (const child of node.children) {
const path = findPath(child.id, targetId, visited);
if (path) {
return [currentId, ...path];
}
}
return null;
};
for (const focusedId of focusedNodeIds) {
const path = findPath(rootId, focusedId);
if (path) {
path.forEach((id) => {
focusAncestorSet.add(id);
});
} else {
focusAncestorSet.add(focusedId);
}
}
return focusAncestorSet;
}

View File

@@ -0,0 +1,96 @@
export interface DomSnapshotNode {
id: string;
role: string;
name?: string;
value?: string;
description?: string;
children: DomSnapshotNode[];
tagName?: string;
checked?: boolean | "mixed";
pressed?: boolean | "mixed";
disabled?: boolean;
focused?: boolean;
selected?: boolean;
expanded?: boolean;
placeholder?: string;
href?: string;
title?: string;
textContent?: string;
inputType?: string;
}
export interface DomSnapshotFlatMap {
[uid: string]: DomSnapshotNode;
}
export interface DomSnapshotResult {
root: DomSnapshotNode;
idToNode: DomSnapshotFlatMap;
totalNodes: number;
timestamp: number;
}
export interface CollectorOptions {
/**
* Maximum text length stored for StaticText nodes. Defaults to 160.
*/
maxTextLength: number;
/**
* Should we include invisible elements (display:none / hidden). Defaults to false.
*/
includeHidden: boolean;
/**
* Whether to capture raw text nodes as StaticText entries. Defaults to true.
*/
captureTextNodes: boolean;
}
export interface SerializedDomSnapshot extends DomSnapshotResult {
/**
* Additional metadata to help debug or visualize the snapshot.
*/
metadata: {
title: string;
url: string;
collectedAt: string;
options: Partial<CollectorOptions>;
};
}
export interface TextSnapshotNode {
id: string;
role: string;
name?: string;
value?: string;
description?: string;
children: TextSnapshotNode[];
backendDOMNodeId?: number;
tagName?: string;
// optional properties
focused?: boolean;
modal?: boolean;
keyshortcuts?: string;
roledescription?: string;
valuetext?: string;
disabled?: boolean;
expanded?: boolean;
selected?: boolean;
checked?: boolean | "mixed";
pressed?: boolean | "mixed";
level?: number;
valuemin?: number;
valuemax?: number;
autocomplete?: string;
haspopup?: string;
invalid?: string;
orientation?: string;
readonly?: boolean;
required?: boolean;
elementHandle?: () => Promise<Element>;
}
export interface TextSnapshot {
root: TextSnapshotNode;
idToNode: Map<string, TextSnapshotNode>;
tabId: number;
}

View File

@@ -0,0 +1,8 @@
{
"extends": "../../tsconfig.base.json",
"compilerOptions": {
"outDir": "./dist",
"lib": ["esnext", "dom", "dom.iterable"],
"emitDeclarationOnly": false
}
}

View File

@@ -0,0 +1,9 @@
import { defineConfig } from "vitest/config";
export default defineConfig({
test: {
pool: "threads",
silent: true,
environment: "jsdom",
},
});

6
pnpm-lock.yaml generated
View File

@@ -409,6 +409,12 @@ importers:
specifier: ^4.21.0 specifier: ^4.21.0
version: 4.21.0 version: 4.21.0
packages/dom-snapshot:
devDependencies:
'@types/node':
specifier: ^24.10.1
version: 24.10.1
packages: packages:
'@acemir/cssom@0.9.24': '@acemir/cssom@0.9.24':

View File

@@ -11,7 +11,9 @@
"@aipexstudio/aipex-react": ["./packages/aipex-react/src"], "@aipexstudio/aipex-react": ["./packages/aipex-react/src"],
"@aipexstudio/aipex-react/*": ["./packages/aipex-react/src/*"], "@aipexstudio/aipex-react/*": ["./packages/aipex-react/src/*"],
"@aipexstudio/browser-runtime": ["./packages/browser-runtime/src"], "@aipexstudio/browser-runtime": ["./packages/browser-runtime/src"],
"@aipexstudio/browser-runtime/*": ["./packages/browser-runtime/src/*"] "@aipexstudio/browser-runtime/*": ["./packages/browser-runtime/src/*"],
"@aipexstudio/dom-snapshot": ["./packages/dom-snapshot/src"],
"@aipexstudio/dom-snapshot/*": ["./packages/dom-snapshot/src/*"]
}, },
"downlevelIteration": true, "downlevelIteration": true,
"esModuleInterop": true, "esModuleInterop": true,