diff --git a/packages/browseros-agent/apps/server/src/browser/browser.ts b/packages/browseros-agent/apps/server/src/browser/browser.ts index a81a990d1..389536b04 100644 --- a/packages/browseros-agent/apps/server/src/browser/browser.ts +++ b/packages/browseros-agent/apps/server/src/browser/browser.ts @@ -821,6 +821,22 @@ export class Browser { return coords } + async clear(page: number, element: number): Promise { + const session = await this.resolveSession(page) + await elements.scrollIntoView(session, element) + try { + await elements.focusElement(session, element) + } catch { + try { + const { x, y } = await elements.getElementCenter(session, element) + await mouse.dispatchClick(session, x, y, 'left', 1, 0) + } catch { + logger.warn('Could not focus element for clear') + } + } + await keyboard.clearField(session) + } + async pressKey(page: number, key: string): Promise { const session = await this.resolveSession(page) await keyboard.pressCombo(session, key) diff --git a/packages/browseros-agent/apps/server/src/browser/keyboard.ts b/packages/browseros-agent/apps/server/src/browser/keyboard.ts index 4dc489b95..6eb98a67d 100644 --- a/packages/browseros-agent/apps/server/src/browser/keyboard.ts +++ b/packages/browseros-agent/apps/server/src/browser/keyboard.ts @@ -180,31 +180,31 @@ export async function typeText( } export async function clearField(session: ProtocolApi): Promise { + // Use the CDP `commands` parameter to trigger the selectAll editing command + // directly, bypassing platform-specific keyboard shortcut mappings + // (Ctrl+A doesn't select all on macOS Chrome — it's the Emacs "beginning of paragraph" binding) await session.Input.dispatchKeyEvent({ - type: 'keyDown', + type: 'rawKeyDown', key: 'a', code: 'KeyA', - modifiers: 2, - windowsVirtualKeyCode: 65, + commands: ['selectAll'], }) await session.Input.dispatchKeyEvent({ type: 'keyUp', key: 'a', code: 'KeyA', - modifiers: 2, - windowsVirtualKeyCode: 65, }) await session.Input.dispatchKeyEvent({ - type: 'keyDown', - key: 'Delete', - code: 'Delete', - windowsVirtualKeyCode: 46, + type: 'rawKeyDown', + key: 'Backspace', + code: 'Backspace', + windowsVirtualKeyCode: 8, }) await session.Input.dispatchKeyEvent({ type: 'keyUp', - key: 'Delete', - code: 'Delete', - windowsVirtualKeyCode: 46, + key: 'Backspace', + code: 'Backspace', + windowsVirtualKeyCode: 8, }) } diff --git a/packages/browseros-agent/apps/server/src/tools/input.ts b/packages/browseros-agent/apps/server/src/tools/input.ts index 37e8901a6..bacc830e0 100644 --- a/packages/browseros-agent/apps/server/src/tools/input.ts +++ b/packages/browseros-agent/apps/server/src/tools/input.ts @@ -177,7 +177,7 @@ export const clear = defineTool({ element: z.number(), }), handler: async (args, ctx, response) => { - await ctx.browser.fill(args.page, args.element, '', true) + await ctx.browser.clear(args.page, args.element) response.text(`Cleared [${args.element}]`) response.data({ action: 'clear', page: args.page, element: args.element }) response.includeSnapshot(args.page)