mirror of
https://github.com/browseros-ai/BrowserOS.git
synced 2026-05-13 15:46:22 +00:00
fix: enable agent interaction with elements inside iframes (#667)
* fix: enable agent interaction with elements inside iframes Fetch accessibility trees from all frames via Page.getFrameTree() + per-frame Accessibility.getFullAXTree(frameId), so iframe elements appear in snapshots with valid backendNodeIds. Pages without iframes take the original single-call path with zero overhead. Update snapshot tree builders to walk multiple RootWebArea roots from merged multi-frame trees. Extract same-origin iframe content in the markdown walker; show [iframe: url] placeholder for cross-origin. * fix: namespace AX nodeIds by frameId to prevent cross-frame collisions CDP AXNodeId values are frame-scoped — each frame's accessibility tree starts its own counter from 1. Prefix nodeId and childIds with frameId before merging so the nodeMap in snapshot builders never overwrites nodes from a different frame.
This commit is contained in:
@@ -392,9 +392,48 @@ export class Browser {
|
||||
|
||||
// --- Observation ---
|
||||
|
||||
private async getFrameIds(session: ProtocolApi): Promise<string[]> {
|
||||
try {
|
||||
const result = await session.Page.getFrameTree()
|
||||
const ids: string[] = []
|
||||
type Tree = { frame: { id: string }; childFrames?: Tree[] }
|
||||
function collect(tree: Tree) {
|
||||
ids.push(tree.frame.id)
|
||||
if (tree.childFrames)
|
||||
for (const child of tree.childFrames) collect(child)
|
||||
}
|
||||
collect(result.frameTree as Tree)
|
||||
return ids
|
||||
} catch {
|
||||
return []
|
||||
}
|
||||
}
|
||||
|
||||
private async fetchAXTree(session: ProtocolApi): Promise<AXNode[]> {
|
||||
const result = await session.Accessibility.getFullAXTree()
|
||||
return (result.nodes as AXNode[]) ?? []
|
||||
const frameIds = await this.getFrameIds(session)
|
||||
|
||||
if (frameIds.length <= 1) {
|
||||
const result = await session.Accessibility.getFullAXTree()
|
||||
return (result.nodes as AXNode[]) ?? []
|
||||
}
|
||||
|
||||
const allNodes: AXNode[] = []
|
||||
for (const frameId of frameIds) {
|
||||
try {
|
||||
const result = await session.Accessibility.getFullAXTree({ frameId })
|
||||
const nodes = (result.nodes as AXNode[]) ?? []
|
||||
for (const node of nodes) {
|
||||
allNodes.push({
|
||||
...node,
|
||||
nodeId: `${frameId}:${node.nodeId}`,
|
||||
childIds: node.childIds?.map((id) => `${frameId}:${id}`),
|
||||
})
|
||||
}
|
||||
} catch {
|
||||
// Cross-origin or detached frames may fail — skip
|
||||
}
|
||||
}
|
||||
return allNodes
|
||||
}
|
||||
|
||||
async snapshot(page: number): Promise<string> {
|
||||
|
||||
@@ -20,7 +20,7 @@ export function buildContentMarkdownExpression(
|
||||
// Uses var + ES5 style for consistency with other injected scripts.
|
||||
// Context object: { pre: bool, ld: listDepth, lt: listType, td: tableDepth }
|
||||
const DOM_WALKER_SCRIPT = `(function(o) {
|
||||
var SKIP = {SCRIPT:1,STYLE:1,NOSCRIPT:1,SVG:1,TEMPLATE:1,IFRAME:1,CANVAS:1,VIDEO:1,AUDIO:1,OBJECT:1,EMBED:1};
|
||||
var SKIP = {SCRIPT:1,STYLE:1,NOSCRIPT:1,SVG:1,TEMPLATE:1,CANVAS:1,VIDEO:1,AUDIO:1,OBJECT:1,EMBED:1};
|
||||
var FORM = {INPUT:1,SELECT:1,TEXTAREA:1,BUTTON:1};
|
||||
var vh = window.innerHeight, vw = window.innerWidth;
|
||||
var root = o.selector ? document.querySelector(o.selector) : document.body;
|
||||
@@ -219,6 +219,15 @@ function walk(node, ctx) {
|
||||
t = kids(el, ctx).trim();
|
||||
return t ? '\\n*' + t + '*\\n' : '';
|
||||
|
||||
case 'IFRAME':
|
||||
try {
|
||||
var idoc = el.contentDocument;
|
||||
if (idoc && idoc.body) return walk(idoc.body, ctx);
|
||||
} catch(e) {}
|
||||
var isrc = el.src || el.getAttribute('src');
|
||||
if (isrc) return '\\n\\n[iframe: ' + isrc + ']\\n\\n';
|
||||
return '';
|
||||
|
||||
default:
|
||||
return kids(el, ctx);
|
||||
}
|
||||
|
||||
@@ -100,11 +100,16 @@ export function buildInteractiveTree(nodes: AXNode[]): string[] {
|
||||
if (node.childIds) for (const childId of node.childIds) walk(childId)
|
||||
}
|
||||
|
||||
const root =
|
||||
nodes.find(
|
||||
(n) => n.role?.value === 'RootWebArea' || n.role?.value === 'WebArea',
|
||||
) ?? nodes[0]
|
||||
if (root?.childIds) for (const childId of root.childIds) walk(childId)
|
||||
const roots = nodes.filter(
|
||||
(n) => n.role?.value === 'RootWebArea' || n.role?.value === 'WebArea',
|
||||
)
|
||||
if (roots.length === 0 && nodes[0]?.childIds) {
|
||||
for (const childId of nodes[0].childIds) walk(childId)
|
||||
} else {
|
||||
for (const root of roots) {
|
||||
if (root.childIds) for (const childId of root.childIds) walk(childId)
|
||||
}
|
||||
}
|
||||
|
||||
return lines
|
||||
}
|
||||
@@ -160,11 +165,16 @@ export function buildEnhancedTree(nodes: AXNode[]): string[] {
|
||||
for (const childId of node.childIds) walk(childId, depth + 1)
|
||||
}
|
||||
|
||||
const root =
|
||||
nodes.find(
|
||||
(n) => n.role?.value === 'RootWebArea' || n.role?.value === 'WebArea',
|
||||
) ?? nodes[0]
|
||||
if (root?.childIds) for (const childId of root.childIds) walk(childId, 0)
|
||||
const roots = nodes.filter(
|
||||
(n) => n.role?.value === 'RootWebArea' || n.role?.value === 'WebArea',
|
||||
)
|
||||
if (roots.length === 0 && nodes[0]?.childIds) {
|
||||
for (const childId of nodes[0].childIds) walk(childId, 0)
|
||||
} else {
|
||||
for (const root of roots) {
|
||||
if (root.childIds) for (const childId of root.childIds) walk(childId, 0)
|
||||
}
|
||||
}
|
||||
|
||||
return lines
|
||||
}
|
||||
@@ -292,11 +302,16 @@ export function extractLinkNodes(nodes: AXNode[]): LinkNode[] {
|
||||
if (node.childIds) for (const childId of node.childIds) walk(childId)
|
||||
}
|
||||
|
||||
const root =
|
||||
nodes.find(
|
||||
(n) => n.role?.value === 'RootWebArea' || n.role?.value === 'WebArea',
|
||||
) ?? nodes[0]
|
||||
if (root?.childIds) for (const childId of root.childIds) walk(childId)
|
||||
const roots = nodes.filter(
|
||||
(n) => n.role?.value === 'RootWebArea' || n.role?.value === 'WebArea',
|
||||
)
|
||||
if (roots.length === 0 && nodes[0]?.childIds) {
|
||||
for (const childId of nodes[0].childIds) walk(childId)
|
||||
} else {
|
||||
for (const root of roots) {
|
||||
if (root.childIds) for (const childId of root.childIds) walk(childId)
|
||||
}
|
||||
}
|
||||
|
||||
return links
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user