diff --git a/packages/browseros-agent/apps/server/src/browser/browser.ts b/packages/browseros-agent/apps/server/src/browser/browser.ts index a4e34148f..446e262d4 100644 --- a/packages/browseros-agent/apps/server/src/browser/browser.ts +++ b/packages/browseros-agent/apps/server/src/browser/browser.ts @@ -392,9 +392,48 @@ export class Browser { // --- Observation --- + private async getFrameIds(session: ProtocolApi): Promise { + try { + const result = await session.Page.getFrameTree() + const ids: string[] = [] + type Tree = { frame: { id: string }; childFrames?: Tree[] } + function collect(tree: Tree) { + ids.push(tree.frame.id) + if (tree.childFrames) + for (const child of tree.childFrames) collect(child) + } + collect(result.frameTree as Tree) + return ids + } catch { + return [] + } + } + private async fetchAXTree(session: ProtocolApi): Promise { - const result = await session.Accessibility.getFullAXTree() - return (result.nodes as AXNode[]) ?? [] + const frameIds = await this.getFrameIds(session) + + if (frameIds.length <= 1) { + const result = await session.Accessibility.getFullAXTree() + return (result.nodes as AXNode[]) ?? [] + } + + const allNodes: AXNode[] = [] + for (const frameId of frameIds) { + try { + const result = await session.Accessibility.getFullAXTree({ frameId }) + const nodes = (result.nodes as AXNode[]) ?? [] + for (const node of nodes) { + allNodes.push({ + ...node, + nodeId: `${frameId}:${node.nodeId}`, + childIds: node.childIds?.map((id) => `${frameId}:${id}`), + }) + } + } catch { + // Cross-origin or detached frames may fail — skip + } + } + return allNodes } async snapshot(page: number): Promise { diff --git a/packages/browseros-agent/apps/server/src/browser/content-markdown.ts b/packages/browseros-agent/apps/server/src/browser/content-markdown.ts index 5ff9b1d56..52e9b3c4c 100644 --- a/packages/browseros-agent/apps/server/src/browser/content-markdown.ts +++ b/packages/browseros-agent/apps/server/src/browser/content-markdown.ts @@ -20,7 +20,7 @@ export function buildContentMarkdownExpression( // Uses var + ES5 style for consistency with other injected scripts. // Context object: { pre: bool, ld: listDepth, lt: listType, td: tableDepth } const DOM_WALKER_SCRIPT = `(function(o) { -var SKIP = {SCRIPT:1,STYLE:1,NOSCRIPT:1,SVG:1,TEMPLATE:1,IFRAME:1,CANVAS:1,VIDEO:1,AUDIO:1,OBJECT:1,EMBED:1}; +var SKIP = {SCRIPT:1,STYLE:1,NOSCRIPT:1,SVG:1,TEMPLATE:1,CANVAS:1,VIDEO:1,AUDIO:1,OBJECT:1,EMBED:1}; var FORM = {INPUT:1,SELECT:1,TEXTAREA:1,BUTTON:1}; var vh = window.innerHeight, vw = window.innerWidth; var root = o.selector ? document.querySelector(o.selector) : document.body; @@ -219,6 +219,15 @@ function walk(node, ctx) { t = kids(el, ctx).trim(); return t ? '\\n*' + t + '*\\n' : ''; + case 'IFRAME': + try { + var idoc = el.contentDocument; + if (idoc && idoc.body) return walk(idoc.body, ctx); + } catch(e) {} + var isrc = el.src || el.getAttribute('src'); + if (isrc) return '\\n\\n[iframe: ' + isrc + ']\\n\\n'; + return ''; + default: return kids(el, ctx); } diff --git a/packages/browseros-agent/apps/server/src/browser/snapshot.ts b/packages/browseros-agent/apps/server/src/browser/snapshot.ts index 4b6893af5..fab04972e 100644 --- a/packages/browseros-agent/apps/server/src/browser/snapshot.ts +++ b/packages/browseros-agent/apps/server/src/browser/snapshot.ts @@ -100,11 +100,16 @@ export function buildInteractiveTree(nodes: AXNode[]): string[] { if (node.childIds) for (const childId of node.childIds) walk(childId) } - const root = - nodes.find( - (n) => n.role?.value === 'RootWebArea' || n.role?.value === 'WebArea', - ) ?? nodes[0] - if (root?.childIds) for (const childId of root.childIds) walk(childId) + const roots = nodes.filter( + (n) => n.role?.value === 'RootWebArea' || n.role?.value === 'WebArea', + ) + if (roots.length === 0 && nodes[0]?.childIds) { + for (const childId of nodes[0].childIds) walk(childId) + } else { + for (const root of roots) { + if (root.childIds) for (const childId of root.childIds) walk(childId) + } + } return lines } @@ -160,11 +165,16 @@ export function buildEnhancedTree(nodes: AXNode[]): string[] { for (const childId of node.childIds) walk(childId, depth + 1) } - const root = - nodes.find( - (n) => n.role?.value === 'RootWebArea' || n.role?.value === 'WebArea', - ) ?? nodes[0] - if (root?.childIds) for (const childId of root.childIds) walk(childId, 0) + const roots = nodes.filter( + (n) => n.role?.value === 'RootWebArea' || n.role?.value === 'WebArea', + ) + if (roots.length === 0 && nodes[0]?.childIds) { + for (const childId of nodes[0].childIds) walk(childId, 0) + } else { + for (const root of roots) { + if (root.childIds) for (const childId of root.childIds) walk(childId, 0) + } + } return lines } @@ -292,11 +302,16 @@ export function extractLinkNodes(nodes: AXNode[]): LinkNode[] { if (node.childIds) for (const childId of node.childIds) walk(childId) } - const root = - nodes.find( - (n) => n.role?.value === 'RootWebArea' || n.role?.value === 'WebArea', - ) ?? nodes[0] - if (root?.childIds) for (const childId of root.childIds) walk(childId) + const roots = nodes.filter( + (n) => n.role?.value === 'RootWebArea' || n.role?.value === 'WebArea', + ) + if (roots.length === 0 && nodes[0]?.childIds) { + for (const childId of nodes[0].childIds) walk(childId) + } else { + for (const root of roots) { + if (root.childIds) for (const childId of root.childIds) walk(childId) + } + } return links }