fix: detect custom clickable elements in take_snapshot

take_snapshot only used the AX tree, which misses custom components
(cursor:pointer divs, onclick handlers, etc.) that lack ARIA roles.
These elements appeared as role="generic" and were invisible to the agent.

Changes:
- Merge findCursorInteractiveElements into snapshot() so take_snapshot
  catches cursor:pointer, onclick, and tabindex elements
- Add DisclosureTriangle to INTERACTIVE_ROLES for <summary> elements
- Use aria-label as text fallback in cursor detection for icon-only buttons
- Fix dedup bug in enhancedSnapshot that was silently dropping all
  cursor-detected elements by checking against all AX node IDs instead
  of only already-included output IDs
This commit is contained in:
shivammittal274
2026-03-16 13:31:46 +05:30
parent ecd31efcb0
commit d4e0a30e7c
2 changed files with 31 additions and 6 deletions

View File

@@ -389,7 +389,30 @@ export class Browser {
const session = await this.resolveSession(page)
const nodes = await this.fetchAXTree(session)
if (nodes.length === 0) return ''
return snapshot.buildInteractiveTree(nodes).join('\n')
const lines = snapshot.buildInteractiveTree(nodes)
try {
const cursorElements =
await snapshot.findCursorInteractiveElements(session)
if (cursorElements.length > 0) {
const includedIds = new Set<number>()
for (const line of lines) {
const match = line.match(/^\[(\d+)\]/)
if (match) includedIds.add(Number(match[1]))
}
for (const el of cursorElements) {
if (includedIds.has(el.backendNodeId)) continue
lines.push(`[${el.backendNodeId}] clickable "${el.text}"`)
}
}
} catch {
// cursor detection is best-effort; AX tree results are still returned
}
return lines.join('\n')
}
async getPageLinks(
@@ -441,15 +464,15 @@ export class Browser {
await snapshot.findCursorInteractiveElements(session)
if (cursorElements.length > 0) {
const existingIds = new Set<number>()
for (const node of nodes) {
if (node.backendDOMNodeId !== undefined)
existingIds.add(node.backendDOMNodeId)
const includedIds = new Set<number>()
for (const line of treeLines) {
const match = line.match(/\[(\d+)\]/)
if (match) includedIds.add(Number(match[1]))
}
const extras: string[] = []
for (const el of cursorElements) {
if (existingIds.has(el.backendNodeId)) continue
if (includedIds.has(el.backendNodeId)) continue
extras.push(
`[${el.backendNodeId}] clickable "${el.text}" (${el.reasons.join(', ')})`,
)

View File

@@ -41,6 +41,7 @@ const INTERACTIVE_ROLES = new Set([
'option',
'treeitem',
'listbox',
'DisclosureTriangle',
])
const NAMED_CONTENT_ROLES = new Set([
@@ -196,6 +197,7 @@ const CURSOR_INTERACTIVE_JS = `(function() {
if (parent && getComputedStyle(parent).cursor === 'pointer') continue;
}
var text = (el.textContent || '').trim().slice(0, 100);
if (!text) text = (el.getAttribute('aria-label') || '').trim();
if (!text) continue;
var rect = el.getBoundingClientRect();
if (rect.width === 0 || rect.height === 0) continue;