diff --git a/packages/browseros-agent/apps/server/src/browser/browser.ts b/packages/browseros-agent/apps/server/src/browser/browser.ts index a81a990d..87b62c07 100644 --- a/packages/browseros-agent/apps/server/src/browser/browser.ts +++ b/packages/browseros-agent/apps/server/src/browser/browser.ts @@ -798,25 +798,43 @@ export class Browser { await elements.scrollIntoView(session, element) + // Always click to guarantee real keyboard focus. + // DOM.focus() is unreliable for shadow DOM, iframes, and custom components. let coords: { x: number; y: number } | undefined try { - await elements.focusElement(session, element) - try { - coords = await elements.getElementCenter(session, element) - } catch { - // coordinates are best-effort - } + const { x, y } = await elements.getElementCenter(session, element) + await mouse.dispatchClick(session, x, y, 'left', 1, 0) + coords = { x, y } } catch { + // Fallback to DOM.focus() if we can't get coordinates try { - const { x, y } = await elements.getElementCenter(session, element) - await mouse.dispatchClick(session, x, y, 'left', 1, 0) - coords = { x, y } + await elements.focusElement(session, element) } catch { - logger.warn('Could not focus element via click either') + logger.warn('Could not focus element via click or DOM.focus()') + } + } + + if (clear) { + // Primary: keyboard select-all + backspace + await keyboard.clearField(session) + + // Fallback: if field still has content, triple-click to select all + // then typeText will overwrite the selection + if (coords) { + const value = await elements.getInputValue(session, element) + if (value) { + await mouse.dispatchClick( + session, + coords.x, + coords.y, + 'left', + 3, + 0, + ) + } } } - if (clear) await keyboard.clearField(session) await keyboard.typeText(session, text) return coords } diff --git a/packages/browseros-agent/apps/server/src/browser/elements.ts b/packages/browseros-agent/apps/server/src/browser/elements.ts index 2111875e..98d8f797 100644 --- a/packages/browseros-agent/apps/server/src/browser/elements.ts +++ b/packages/browseros-agent/apps/server/src/browser/elements.ts @@ -94,6 +94,23 @@ export async function resolveObjectId( return objectId } +/** Read the current value/textContent of an input, textarea, or contenteditable element. */ +export async function getInputValue( + session: ProtocolApi, + backendNodeId: number, +): Promise { + try { + const value = await callOnElement( + session, + backendNodeId, + 'function(){return this.value??this.textContent??""}', + ) + return (value as string) ?? '' + } catch { + return '' + } +} + export async function callOnElement( session: ProtocolApi, backendNodeId: number, diff --git a/packages/browseros-agent/apps/server/src/browser/keyboard.ts b/packages/browseros-agent/apps/server/src/browser/keyboard.ts index 4dc489b9..70961936 100644 --- a/packages/browseros-agent/apps/server/src/browser/keyboard.ts +++ b/packages/browseros-agent/apps/server/src/browser/keyboard.ts @@ -1,5 +1,9 @@ +import { platform } from 'node:os' import type { ProtocolApi } from '@browseros/cdp-protocol/protocol-api' +// Meta (Cmd) on macOS, Control on everything else +const PLATFORM_MODIFIER = platform() === 'darwin' ? 4 : 2 + type KeyInfo = { code: string; keyCode: number | undefined } const KEY_MAP: Record = { @@ -180,31 +184,33 @@ export async function typeText( } export async function clearField(session: ProtocolApi): Promise { + // Select all: Cmd+A on macOS, Ctrl+A on others await session.Input.dispatchKeyEvent({ type: 'keyDown', key: 'a', code: 'KeyA', - modifiers: 2, + modifiers: PLATFORM_MODIFIER, windowsVirtualKeyCode: 65, }) await session.Input.dispatchKeyEvent({ type: 'keyUp', key: 'a', code: 'KeyA', - modifiers: 2, + modifiers: PLATFORM_MODIFIER, windowsVirtualKeyCode: 65, }) + // Backspace to delete selection (more reliable cross-platform than Delete) await session.Input.dispatchKeyEvent({ type: 'keyDown', - key: 'Delete', - code: 'Delete', - windowsVirtualKeyCode: 46, + key: 'Backspace', + code: 'Backspace', + windowsVirtualKeyCode: 8, }) await session.Input.dispatchKeyEvent({ type: 'keyUp', - key: 'Delete', - code: 'Delete', - windowsVirtualKeyCode: 46, + key: 'Backspace', + code: 'Backspace', + windowsVirtualKeyCode: 8, }) }