feat: clean-up - remove obsolete controller extension (#610)

* refactor(server): remove obsolete controller extension backend

* fix: address review feedback for PR #610
This commit is contained in:
Nikhil
2026-03-27 17:01:04 -07:00
committed by GitHub
parent ace9307878
commit 9bdb2413ec
115 changed files with 385 additions and 9517 deletions

View File

@@ -32,7 +32,7 @@ Use **kebab-case** for all file and folder names:
| Multi-word files | kebab-case | `gemini-agent.ts`, `mcp-context.ts` |
| Single-word files | lowercase | `types.ts`, `browser.ts`, `index.ts` |
| Test files | `.test.ts` suffix | `mcp-context.test.ts` |
| Folders | kebab-case | `controller-server/`, `rate-limiter/` |
| Folders | kebab-case | `rate-limiter/`, `browser-tools/` |
Classes remain PascalCase in code, but live in kebab-case files:
```typescript
@@ -97,21 +97,16 @@ The main MCP server that exposes browser automation tools via HTTP/SSE.
**Key components:**
- `src/tools/` - MCP tool definitions, split into:
- `cdp-based/` - Tools using Chrome DevTools Protocol (network, console, emulation, input, etc.)
- `controller-based/` - Tools using the browser extension (navigation, clicks, screenshots, tabs, history, bookmarks)
- `src/controller-server/` - WebSocket server that bridges to the browser extension
- `ControllerBridge` handles WebSocket connections with extension clients
- `ControllerContext` wraps the bridge for tool handlers
- `cdp-based/` - Tools using Chrome DevTools Protocol (navigation, DOM interaction, network, console, emulation, input, etc.)
- `src/common/` - Shared utilities (McpContext, PageCollector, browser connection, identity, db)
- `src/agent/` - AI agent functionality (Gemini adapter, rate limiting, session management)
- `src/http/` - Hono HTTP server with MCP, health, and provider routes
**Tool types:**
- CDP tools require a direct CDP connection (`--cdp-port`)
- Controller tools work via the browser extension over WebSocket
### Shared (`packages/shared`)
Shared constants, types, and configuration used by both server and extension. Avoids magic numbers.
Shared constants, types, and configuration used across packages. Avoids magic numbers.
**Structure:**
- `src/constants/` - Configuration values (ports, timeouts, limits, urls, paths)
@@ -119,22 +114,12 @@ Shared constants, types, and configuration used by both server and extension. Av
**Exports:** `@browseros/shared/constants/*`, `@browseros/shared/types/*`
### Controller Extension (`apps/controller-ext`)
Chrome extension that receives commands from the server via WebSocket.
**Entry point:** `src/background/index.ts` → `BrowserOSController`
**Structure:**
- `src/actions/` - Action handlers organized by domain (browser/, tab/, bookmark/, history/)
- `src/adapters/` - Chrome API adapters (TabAdapter, BookmarkAdapter, HistoryAdapter)
- `src/websocket/` - WebSocket client that connects to the server
### Communication Flow
```
AI Agent/MCP Client → HTTP Server (Hono) → Tool Handler
CDP (direct) ←── or ──→ WebSocket → Extension → Chrome APIs
CDP → BrowserOS / Chrome APIs
```
## Creating Packages

View File

@@ -10,7 +10,6 @@ apps/
agent/ # Agent UI (Chrome extension)
cli/ # Go CLI for controlling BrowserOS from the terminal
eval/ # Evaluation framework for benchmarking agents
controller-ext/ # BrowserOS Controller (Chrome extension for chrome.* APIs)
packages/
agent-sdk/ # Node.js SDK (@browseros-ai/agent-sdk)
@@ -24,7 +23,6 @@ packages/
| `apps/agent` | Agent UI — Chrome extension for the chat interface |
| `apps/cli` | Go CLI — control BrowserOS from the terminal or AI coding agents |
| `apps/eval` | Benchmark framework — WebVoyager, Mind2Web evaluation |
| `apps/controller-ext` | BrowserOS Controller — bridges `chrome.*` APIs to the server via WebSocket |
| `packages/agent-sdk` | Node.js SDK for browser automation with natural language |
| `packages/cdp-protocol` | Auto-generated CDP type bindings used by the server |
| `packages/shared` | Shared constants used across packages |
@@ -33,7 +31,6 @@ packages/
- `apps/server`: Bun server which contains the agent loop and tools.
- `apps/agent`: Agent UI (Chrome extension).
- `apps/controller-ext`: BrowserOS Controller - a Chrome extension that bridges `chrome.*` APIs to the server. Controller tools within the server communicate with this extension via WebSocket.
```
┌──────────────────────────────────────────────────────────────────────────┐
@@ -51,19 +48,19 @@ packages/
│ /health ─── Health check │
│ │
│ Tools: │
── CDP Tools (console, network, input, screenshot, ...)
└── Controller Tools (tabs, navigation, clicks, bookmarks, history)
── CDP-backed browser tools (tabs, navigation, input, screenshots, │
bookmarks, history, console, DOM, tab groups, windows, ...)
└──────────────────────────────────────────────────────────────────────────┘
│ CDP (client)WebSocket (server)
┌─────────────────────┐ ┌─────────────────────────────────────┐
Chromium CDP BrowserOS Controller Extension
(cdpPort: 9000) │ (extensionPort: 9300)
│ Server connects Bridges chrome.tabs, chrome.history
│ TO this as client │ chrome.bookmarks to the server
└─────────────────────┘ └─────────────────────────────────────┘
CDP (client)
─────────────────────┐
Chromium CDP
(cdpPort: 9000) │
│ │
Server connects
│ TO this as client
─────────────────────┘
```
### Ports
@@ -72,7 +69,7 @@ packages/
|------|--------------|---------|
| 9100 | `BROWSEROS_SERVER_PORT` | HTTP server - MCP endpoints, agent chat, health |
| 9000 | `BROWSEROS_CDP_PORT` | Chromium CDP server (BrowserOS Server connects as client) |
| 9300 | `BROWSEROS_EXTENSION_PORT` | WebSocket server for controller extension |
| 9300 | `BROWSEROS_EXTENSION_PORT` | Legacy BrowserOS launch arg kept for compatibility; not used by the server |
## Development
@@ -96,9 +93,8 @@ process-compose up
The `process-compose up` command runs the following in order:
1. `bun install` — installs dependencies
2. `bun --cwd apps/controller-ext build` — builds the controller extension
3. `bun --cwd apps/agent codegen` — generates agent code
4. `bun --cwd apps/server start` and `bun --cwd apps/agent dev` — starts server and agent in parallel
2. `bun --cwd apps/agent codegen` — generates agent code
3. `bun --cwd apps/server start` and `bun --cwd apps/agent dev` — starts server and agent in parallel
### Environment Variables
@@ -114,7 +110,7 @@ Runtime uses `.env.development`, while production artifact builds use `.env.prod
|----------|---------|-------------|
| `BROWSEROS_SERVER_PORT` | 9100 | HTTP server port (MCP, chat, health) |
| `BROWSEROS_CDP_PORT` | 9000 | Chromium CDP port (server connects as client) |
| `BROWSEROS_EXTENSION_PORT` | 9300 | WebSocket port for controller extension |
| `BROWSEROS_EXTENSION_PORT` | 9300 | Legacy BrowserOS launch arg kept for compatibility |
| `BROWSEROS_CONFIG_URL` | - | Remote config endpoint for rate limits |
| `BROWSEROS_INSTALL_ID` | - | Unique installation identifier (analytics) |
| `BROWSEROS_CLIENT_ID` | - | Client identifier (analytics) |
@@ -146,7 +142,7 @@ Copy from `apps/server/.env.production.example` before running `build:server`.
|----------|---------|-------------|
| `BROWSEROS_SERVER_PORT` | 9100 | Passed to BrowserOS via CLI args |
| `BROWSEROS_CDP_PORT` | 9000 | Passed to BrowserOS via CLI args |
| `BROWSEROS_EXTENSION_PORT` | 9300 | Passed to BrowserOS via CLI args |
| `BROWSEROS_EXTENSION_PORT` | 9300 | Legacy BrowserOS CLI arg still passed for compatibility |
| `VITE_BROWSEROS_SERVER_PORT` | 9100 | Agent UI connects to server (must match `BROWSEROS_SERVER_PORT`) |
| `BROWSEROS_BINARY` | - | Path to BrowserOS binary |
| `USE_BROWSEROS_BINARY` | true | Use BrowserOS instead of default Chrome |
@@ -163,15 +159,13 @@ bun run start:server # Start the server
bun run start:agent # Start agent extension (dev mode)
# Build
bun run build # Build server, agent, and controller extension
bun run build # Build server and agent
bun run build:server # Build production server resource artifacts and upload zips to R2
bun run build:agent # Build agent extension
bun run build:ext # Build controller extension
# Test
bun run test # Run standard tests
bun run test:cdp # Run CDP-based tests
bun run test:controller # Run controller-based tests
bun run test:integration # Run integration tests
# Quality

View File

@@ -1,19 +1,13 @@
import { dirname, join } from 'node:path'
import { fileURLToPath } from 'node:url'
import { defineWebExtConfig } from 'wxt'
// biome-ignore lint/style/noProcessEnv: config file needs env access
const env = process.env
const MONOREPO_ROOT = join(dirname(fileURLToPath(import.meta.url)), '../..')
const CONTROLLER_EXT_DIR = join(MONOREPO_ROOT, 'apps/controller-ext/dist')
const chromiumArgs = [
'--use-mock-keychain',
'--show-component-extension-options',
'--disable-browseros-server',
'--disable-browseros-extensions',
`--load-extension=${CONTROLLER_EXT_DIR}`,
]
if (env.BROWSEROS_CDP_PORT) {

View File

@@ -49,7 +49,7 @@ func init() {
statusCmd := &cobra.Command{
Use: "status",
Annotations: map[string]string{"group": "Setup:"},
Short: "Check extension connection status",
Short: "Check BrowserOS runtime status",
Args: cobra.NoArgs,
Run: func(cmd *cobra.Command, args []string) {
c := newClient()
@@ -64,12 +64,12 @@ func init() {
green := color.New(color.FgGreen).SprintFunc()
red := color.New(color.FgRed).SprintFunc()
ext := data["extensionConnected"]
extStr := red("disconnected")
if b, ok := ext.(bool); ok && b {
extStr = green("connected")
cdp := data["cdpConnected"]
cdpStr := red("disconnected")
if b, ok := cdp.(bool); ok && b {
cdpStr = green("connected")
}
fmt.Printf("Extension: %s\n", extStr)
fmt.Printf("Browser: %s\n", cdpStr)
},
}

View File

@@ -1,32 +0,0 @@
# Dependencies
node_modules/
# Build output
dist/
# Build unpublished docs
docs/
# TypeScript
*.tsbuildinfo
# IDE
.vscode/
.idea/
*.swp
*.swo
# OS
.DS_Store
Thumbs.db
# Logs
*.log
npm-debug.log*
# Environment
.env
.env.local
# Claude
.claude

View File

@@ -1,430 +0,0 @@
# BrowserOS Controller
WebSocket-based Chrome Extension that exposes browser automation APIs for remote control.
**⚠️ IMPORTANT:** This extension ONLY works in **BrowserOS Chrome**, not regular Chrome!
---
## 🚀 Quick Start
### 1. Build the Extension
```bash
npm install
npm run build
```
### 2. Load Extension in BrowserOS Chrome
1. Open BrowserOS Chrome
2. Go to `chrome://extensions/`
3. Enable **"Developer mode"** (top-right toggle)
4. Click **"Load unpacked"**
5. Select the `dist/` folder
6. Verify extension is loaded (you should see "BrowserOS Controller")
### 3. Test the Extension
```bash
npm test
```
This starts an interactive test client. You should see:
```
🚀 Starting BrowserOS Controller Test Client
──────────────────────────────────────────────────────────
WebSocket Server Started
Listening on: ws://localhost:9224/controller
Waiting for extension to connect...
✅ Extension connected!
Running Diagnostic Test
============================================================
📤 Sending: checkBrowserOS
Request ID: test-1729012345678
📨 Response: test-1729012345678
Status: ✅ SUCCESS
Data: {
"available": true,
"apis": [
"captureScreenshot",
"clear",
"click",
...
]
}
```
**If you see "available": true**, you're all set! 🎉
**If you see "available": false**, you're not using BrowserOS Chrome.
---
## ⚙️ Configuration
The extension can be configured using environment variables. This is optional - sensible defaults are provided.
### Environment Variables
Create a `.env` file in the project root to customize configuration:
```bash
# Copy the example file
cp .env.example .env
# Edit .env with your values
```
### Available Configuration Options
#### WebSocket Configuration
```bash
WEBSOCKET_PROTOCOL=ws # ws or wss (default: ws)
WEBSOCKET_HOST=localhost # Server host (default: localhost)
WEBSOCKET_PORT=9224 # Server port (default: 9224)
WEBSOCKET_PATH=/controller # Server path (default: /controller)
```
#### Connection Settings
```bash
WEBSOCKET_RECONNECT_DELAY=1000 # Initial reconnect delay in ms (default: 1000)
WEBSOCKET_MAX_RECONNECT_DELAY=30000 # Max reconnect delay in ms (default: 30000)
WEBSOCKET_RECONNECT_MULTIPLIER=1.5 # Exponential backoff multiplier (default: 1.5)
WEBSOCKET_MAX_RECONNECT_ATTEMPTS=0 # Max reconnect attempts, 0 = infinite (default: 0)
WEBSOCKET_HEARTBEAT_INTERVAL=30000 # Heartbeat interval in ms (default: 30000)
WEBSOCKET_HEARTBEAT_TIMEOUT=5000 # Heartbeat timeout in ms (default: 5000)
WEBSOCKET_CONNECTION_TIMEOUT=10000 # Connection timeout in ms (default: 10000)
WEBSOCKET_REQUEST_TIMEOUT=30000 # Request timeout in ms (default: 30000)
```
#### Concurrency Settings
```bash
CONCURRENCY_MAX_CONCURRENT=100 # Max concurrent requests (default: 100)
CONCURRENCY_MAX_QUEUE_SIZE=1000 # Max queued requests (default: 1000)
```
#### Logging Settings
```bash
LOGGING_ENABLED=true # Enable/disable logging (default: true)
LOGGING_LEVEL=info # Log level: debug, info, warn, error (default: info)
LOGGING_PREFIX=[BrowserOS Controller] # Log message prefix (default: [BrowserOS Controller])
```
### Example: Custom Port Configuration
If you want to use a different port (e.g., 8080):
```bash
# .env
WEBSOCKET_PORT=8080
```
Then rebuild the extension:
```bash
npm run build
```
The extension will now connect to `ws://localhost:8080/controller` instead of the default port 9224.
---
## 📖 Architecture
See [ARCHITECTURE.md](./ARCHITECTURE.md) for complete system documentation including:
- High-level architecture diagram
- Request flow (step-by-step)
- Component details
- All 14 registered actions
- WebSocket protocol specification
- Debugging guide
---
## 🧪 Testing
The test client (`npm test`) provides an interactive menu:
```
Available Commands:
Tab Actions:
1. getActiveTab - Get currently active tab
2. getTabs - Get all tabs
Browser Actions:
3. getInteractiveSnapshot - Get page elements (requires tabId)
4. click - Click element (requires tabId, nodeId)
5. inputText - Type text (requires tabId, nodeId, text)
6. captureScreenshot - Take screenshot (requires tabId)
Diagnostic:
d. checkBrowserOS - Check if chrome.browserOS is available
Other:
h. Show this menu
q. Quit
```
### Example Usage:
1. Type `1` → Get active tab
2. Type `d` → Run diagnostic
3. Type `q` → Quit
---
## 🔧 Development
### Build Commands
```bash
npm run build # Production build
npm run build:dev # Development build (with source maps)
npm run watch # Watch mode for development
```
### Debug Extension
1. Go to `chrome://extensions/`
2. Click **"Inspect views service worker"** under "BrowserOS Controller"
3. Service worker console shows all logs
**Check extension status:**
```javascript
__browserosController.getStats();
```
**Expected output:**
```javascript
{
connection: "connected",
requests: { inFlight: 0, avgDuration: 0, errorRate: 0, totalRequests: 0 },
concurrency: { inFlight: 0, queued: 0, utilization: 0 },
validator: { activeIds: 0 },
responseQueue: { size: 0 }
}
```
**Check registered actions:**
Look for this log on extension load:
```
Registered 14 action(s): checkBrowserOS, getActiveTab, getTabs, ...
```
---
## 📋 Available Actions
| Action | Input | Output | Description |
| ------------------------ | --------------------------------- | ------------------------------- | -------------------------------------- |
| `checkBrowserOS` | `{}` | `{available, apis}` | Check if chrome.browserOS is available |
| `getActiveTab` | `{}` | `{tabId, url, title, windowId}` | Get currently active tab |
| `getTabs` | `{}` | `{tabs[]}` | Get all open tabs |
| `getInteractiveSnapshot` | `{tabId, options?}` | `InteractiveSnapshot` | Get all interactive elements on page |
| `click` | `{tabId, nodeId}` | `{success}` | Click element by nodeId |
| `inputText` | `{tabId, nodeId, text}` | `{success}` | Type text into element |
| `clear` | `{tabId, nodeId}` | `{success}` | Clear text from element |
| `scrollToNode` | `{tabId, nodeId}` | `{scrolled}` | Scroll element into view |
| `captureScreenshot` | `{tabId, size?, showHighlights?}` | `{dataUrl}` | Take screenshot |
| `sendKeys` | `{tabId, keys}` | `{success}` | Send keyboard keys |
| `getPageLoadStatus` | `{tabId}` | `PageLoadStatus` | Get page load status |
| `getSnapshot` | `{tabId, type, options?}` | `Snapshot` | Get text/links snapshot |
| `clickCoordinates` | `{tabId, x, y}` | `{success}` | Click at coordinates |
| `typeAtCoordinates` | `{tabId, x, y, text}` | `{success}` | Type at coordinates |
---
## 🔌 WebSocket Protocol
**Endpoint:** `ws://localhost:9224/controller`
**Request Format:**
```json
{
"id": "unique-request-id",
"action": "click",
"payload": {
"tabId": 12345,
"nodeId": 42
}
}
```
**Response Format:**
```json
{
"id": "unique-request-id",
"ok": true,
"data": {
"success": true
}
}
```
**Error Response:**
```json
{
"id": "unique-request-id",
"ok": false,
"error": "Element not found: nodeId 42"
}
```
---
## ⚠️ Common Issues
### Issue 1: "chrome.browserOS is undefined"
**Symptoms:**
- Diagnostic shows `"available": false`
- All browser actions fail
**Cause:** Not using BrowserOS Chrome
**Solution:**
- Download and use BrowserOS Chrome (not regular Chrome)
- Verify at `chrome://version` - should show "BrowserOS" in the name
---
### Issue 2: "Port 9224 is already in use"
**Symptoms:**
```
❌ Fatal Error: Port 9224 is already in use!
```
**Solution:**
```bash
lsof -ti:9224 | xargs kill -9
npm test
```
---
### Issue 3: Extension Not Connecting
**Symptoms:**
- Test client shows "Waiting for extension to connect..." forever
- Service worker console shows "Connection timeout"
**Checklist:**
1. ✅ Test server running (`npm test`)
2. ✅ Extension loaded in BrowserOS Chrome
3. ✅ Extension enabled (chrome://extensions/)
4. ✅ Service worker active (not suspended)
**Solution:**
1. Reload extension: chrome://extensions/ → "Reload" button
2. Restart test server: Ctrl+C, then `npm test`
---
### Issue 4: "Unknown action"
**Symptoms:**
```
Error: Unknown action: "click". Available actions: getActiveTab, getTabs, ...
```
**Cause:** Action not registered (extension didn't reload properly)
**Solution:**
1. Toggle extension OFF and ON at chrome://extensions/
2. Check service worker console for: `Registered 14 action(s): ...`
---
## 📁 Project Structure
```
browseros-controller/
├── README.md # This file
├── ARCHITECTURE.md # Complete architecture documentation
├── .env.example # Environment variable template
├── manifest.json # Extension manifest
├── package.json # Node dependencies
├── webpack.config.js # Build configuration
├── src/ # Source code
│ ├── background/ # Service worker entry point
│ ├── actions/ # Action handlers
│ │ ├── bookmark/ # Bookmark management actions
│ │ ├── browser/ # Browser interaction actions
│ │ ├── diagnostics/ # Diagnostic actions
│ │ ├── history/ # History management actions
│ │ └── tab/ # Tab management actions
│ ├── adapters/ # Chrome API wrappers
│ ├── config/ # Configuration management
│ │ ├── constants.ts # Application constants
│ │ └── environment.ts # Environment variable handling
│ ├── websocket/ # WebSocket client
│ ├── utils/ # Utilities
│ ├── protocol/ # Protocol types
│ └── types/ # TypeScript definitions
├── tests/ # Test files
│ ├── test-simple.js # Interactive test client
│ └── test-auto.js # Automated test client
└── dist/ # Built extension (generated)
├── background.js
└── manifest.json
```
---
## 🔗 Related Projects
- **BrowserOS-agent**: AI agent that uses this controller for browser automation
- **BrowserOS Chrome**: Custom Chrome build with `chrome.browserOS` APIs
---
## 📄 License
MIT
---
## 🆘 Support
For issues or questions:
1. Check [ARCHITECTURE.md](./ARCHITECTURE.md) for detailed documentation
2. Review the "Common Issues" section above
3. Check service worker console for detailed error logs
4. Verify you're using BrowserOS Chrome (run diagnostic test)
---
**Happy automating! 🚀**

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.7 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 574 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.2 KiB

View File

@@ -1,38 +0,0 @@
{
"manifest_version": 3,
"name": "BrowserOS Controller",
"version": "1.0.0.8",
"description": "BrowserOS API bridge for BrowserOS Server",
"key": "MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAhlh9i/c2A3f0PL86hXhGPzguLIOQ+sPf3/Y8RD11gmdvoU6XqnUqv7GgBvm7SW7316uPnS58AYZY13jGtF4rFrscdda5H2CjZrtOyOycmKp2KzibJLwibXNm/JwKhZ3QEfgsW/orh1SMY2kNj62JemkWLcLyn3E1T+KTcTVyFOxiJS3hyQ+Y0/Jp1HOqGh5lYS58YYzwhId5rrJjfL7wFYtALgt2dEA2r7p4qpe+SW0QLA+ayjRAjS+yt+qitR0eWg+XgqcIk1f1KblN8/yDISssSD4LWiPofe5CmJPnqlHIuI0CpgvAFv9dvgR/w8OFkXxK5h06i6saum1xExj+IwIDAQAB",
"permissions": [
"tabs",
"activeTab",
"bookmarks",
"history",
"scripting",
"storage",
"tabGroups",
"webNavigation",
"downloads",
"browserOS",
"alarms"
],
"update_url": "https://cdn.browseros.com/extensions/update-manifest.xml",
"host_permissions": ["<all_urls>"],
"background": {
"service_worker": "background.js",
"type": "module"
},
"action": {
"default_icon": {
"16": "assets/icon16.png",
"48": "assets/icon48.png",
"128": "assets/icon128.png"
}
},
"icons": {
"16": "assets/icon16.png",
"48": "assets/icon48.png",
"128": "assets/icon128.png"
}
}

View File

@@ -1,39 +0,0 @@
{
"name": "browseros-controller",
"version": "1.0.0",
"description": "Chrome Extension API bridge for BrowserOS Server",
"directories": {
"doc": "docs"
},
"scripts": {
"build": "webpack --mode production",
"build:dev": "webpack --mode development",
"watch": "webpack --mode development --watch",
"test": "node tests/test-simple.js",
"test:auto": "node tests/test-auto.js",
"typecheck": "tsc --noEmit"
},
"keywords": [
"browser-automation",
"chrome-extension",
"browseros"
],
"author": "BrowserOS Team",
"license": "MIT",
"type": "commonjs",
"dependencies": {
"@browseros/shared": "workspace:*",
"zod": "^4.1.12"
},
"devDependencies": {
"@types/chrome": "^0.1.24",
"@types/node": "^24.7.1",
"copy-webpack-plugin": "^12.0.2",
"terser-webpack-plugin": "^5.3.11",
"ts-loader": "^9.5.4",
"typescript": "^5.9.3",
"webpack": "^5.102.1",
"webpack-cli": "^6.0.1",
"ws": "^8.18.3"
}
}

View File

@@ -1,106 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import type { ActionResponse } from '@/protocol/types'
import { ActionResponseSchema } from '@/protocol/types'
import { logger } from '@/utils/logger'
// Re-export for convenience
export type { ActionResponse }
export { ActionResponseSchema }
/**
* ActionHandler - Abstract base class for all actions
*
* Responsibilities:
* - Define contract for all actions (must implement inputSchema + execute)
* - Validate input using Zod schemas
* - Handle validation and execution errors
* - Return standardized ActionResponse
*
* Usage:
* class MyAction extends ActionHandler<InputType, OutputType> {
* inputSchema = z.object({ ... });
* async execute(input: InputType): Promise<OutputType> { ... }
* }
*/
export abstract class ActionHandler<TInput = unknown, TOutput = unknown> {
/**
* Zod schema for input validation
* Must be implemented by concrete actions
*/
abstract readonly inputSchema: z.ZodSchema<TInput>
/**
* Execute the action logic
* Must be implemented by concrete actions
*
* @param input - Validated input (guaranteed to match inputSchema)
* @returns Action result
*/
abstract execute(input: TInput): Promise<TOutput>
/**
* Handle request with validation and error handling
* Called by ActionRegistry
*
* Flow:
* 1. Validate input with Zod schema
* 2. Execute action logic
* 3. Return standardized response (ok/error)
*
* @param payload - Raw payload from request (unvalidated)
* @returns Standardized action response
*/
async handle(payload: unknown): Promise<ActionResponse> {
const actionName = this.constructor.name
try {
// Step 1: Validate input
logger.debug(`[${actionName}] Validating input`)
const validatedInput = this.inputSchema.parse(payload)
// Step 2: Execute action
logger.debug(`[${actionName}] Executing action`)
const result = await this.execute(validatedInput)
// Step 3: Return success response
logger.debug(`[${actionName}] Action completed successfully`)
return { ok: true, data: result }
} catch (error) {
// Handle validation or execution errors
const errorMessage = this._formatError(error)
logger.error(`[${actionName}] Action failed: ${errorMessage}`)
return { ok: false, error: errorMessage }
}
}
/**
* Format error for user-friendly response
*
* @param error - Error from validation or execution
* @returns Formatted error message
*/
protected _formatError(error: unknown): string {
// Zod validation error
if (error instanceof z.ZodError) {
const errors = error.issues.map((e: z.ZodIssue) => {
const path = e.path.length > 0 ? `${e.path.join('.')}: ` : ''
return `${path}${e.message}`
})
return `Validation error: ${errors.join(', ')}`
}
// Standard Error
if (error instanceof Error) {
return error.message
}
// Unknown error
return String(error)
}
}

View File

@@ -1,148 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { logger } from '@/utils/logger'
import type { ActionHandler, ActionResponse } from './ActionHandler'
/**
* ActionRegistry - Central dispatcher for all actions
*
* Responsibilities:
* - Register action handlers by name
* - Dispatch requests to correct handler
* - Return error for unknown actions
* - Provide introspection (list available actions)
*
* Usage:
* const registry = new ActionRegistry();
* registry.register('getActiveTab', new GetActiveTabAction());
* const response = await registry.dispatch('getActiveTab', {});
*/
export class ActionRegistry {
private handlers = new Map<string, ActionHandler>()
/**
* Register an action handler
*
* @param actionName - Unique action name (e.g., "getActiveTab")
* @param handler - Action handler instance
*/
register(actionName: string, handler: ActionHandler): void {
if (this.handlers.has(actionName)) {
logger.warn(
`[ActionRegistry] Action "${actionName}" already registered, overwriting`,
)
}
this.handlers.set(actionName, handler)
logger.info(`[ActionRegistry] Registered action: ${actionName}`)
}
/**
* Dispatch request to appropriate action handler
*
* Flow:
* 1. Find handler for action name
* 2. If not found, return error
* 3. If found, delegate to handler.handle()
* 4. Handler validates input and executes
* 5. Return result
*
* @param actionName - Action to execute
* @param payload - Action payload (unvalidated)
* @returns Action response
*/
async dispatch(
actionName: string,
payload: unknown,
): Promise<ActionResponse> {
logger.debug(`[ActionRegistry] Dispatching action: ${actionName}`)
// Check if action exists
const handler = this.handlers.get(actionName)
if (!handler) {
const availableActions = Array.from(this.handlers.keys()).join(', ')
const errorMessage = `Unknown action: "${actionName}". Available actions: ${availableActions || 'none'}`
logger.error(`[ActionRegistry] ${errorMessage}`)
return {
ok: false,
error: errorMessage,
}
}
// Delegate to handler
try {
const response = await handler.handle(payload)
logger.debug(
`[ActionRegistry] Action "${actionName}" ${response.ok ? 'succeeded' : 'failed'}`,
)
return response
} catch (error) {
// Catch any unexpected errors from handler
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[ActionRegistry] Unexpected error in "${actionName}": ${errorMessage}`,
)
return {
ok: false,
error: `Action execution failed: ${errorMessage}`,
}
}
}
/**
* Get list of registered action names
*
* @returns Array of action names
*/
getAvailableActions(): string[] {
return Array.from(this.handlers.keys())
}
/**
* Check if action is registered
*
* @param actionName - Action name to check
* @returns True if action exists
*/
hasAction(actionName: string): boolean {
return this.handlers.has(actionName)
}
/**
* Get number of registered actions
*
* @returns Count of registered actions
*/
getActionCount(): number {
return this.handlers.size
}
/**
* Unregister an action (useful for testing)
*
* @param actionName - Action to remove
* @returns True if action was removed
*/
unregister(actionName: string): boolean {
const removed = this.handlers.delete(actionName)
if (removed) {
logger.info(`[ActionRegistry] Unregistered action: ${actionName}`)
}
return removed
}
/**
* Clear all registered actions (useful for testing)
*/
clear(): void {
const count = this.handlers.size
this.handlers.clear()
logger.info(`[ActionRegistry] Cleared ${count} registered actions`)
}
}

View File

@@ -1,81 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { BookmarkAdapter } from '@/adapters/BookmarkAdapter'
import { ActionHandler } from '../ActionHandler'
// Input schema
const CreateBookmarkInputSchema = z.object({
title: z.string().describe('Bookmark title'),
url: z.string().url().describe('Bookmark URL'),
parentId: z
.string()
.optional()
.describe('Parent folder ID (optional, defaults to "Other Bookmarks")'),
})
// Output schema
const CreateBookmarkOutputSchema = z.object({
id: z.string().describe('Created bookmark ID'),
title: z.string().describe('Bookmark title'),
url: z.string().describe('Bookmark URL'),
dateAdded: z
.number()
.optional()
.describe('Timestamp when bookmark was created'),
})
type CreateBookmarkInput = z.infer<typeof CreateBookmarkInputSchema>
type CreateBookmarkOutput = z.infer<typeof CreateBookmarkOutputSchema>
/**
* CreateBookmarkAction - Create a new bookmark
*
* Creates a bookmark with the specified title and URL.
*
* Input:
* - title: Display title for the bookmark
* - url: Full URL to bookmark
* - parentId (optional): Parent folder ID
*
* Output:
* - id: Created bookmark ID
* - title: Bookmark title
* - url: Bookmark URL
* - dateAdded: Creation timestamp
*
* Usage:
* Create a bookmark in the default location (Other Bookmarks).
*
* Example:
* {
* "title": "Google",
* "url": "https://www.google.com"
* }
* // Returns: { id: "123", title: "Google", url: "https://www.google.com", dateAdded: 1729012345678 }
*/
export class CreateBookmarkAction extends ActionHandler<
CreateBookmarkInput,
CreateBookmarkOutput
> {
readonly inputSchema = CreateBookmarkInputSchema
private bookmarkAdapter = new BookmarkAdapter()
async execute(input: CreateBookmarkInput): Promise<CreateBookmarkOutput> {
const created = await this.bookmarkAdapter.createBookmark({
title: input.title,
url: input.url,
parentId: input.parentId,
})
return {
id: created.id,
title: created.title,
url: created.url || '',
dateAdded: created.dateAdded,
}
}
}

View File

@@ -1,52 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { BookmarkAdapter } from '@/adapters/BookmarkAdapter'
import { ActionHandler } from '../ActionHandler'
const CreateBookmarkFolderInputSchema = z.object({
title: z.string().describe('Folder name'),
parentId: z
.string()
.optional()
.describe('Parent folder ID (defaults to "1" = Bookmarks Bar)'),
})
const CreateBookmarkFolderOutputSchema = z.object({
id: z.string().describe('Created folder ID'),
title: z.string().describe('Folder name'),
parentId: z.string().optional().describe('Parent folder ID'),
dateAdded: z.number().optional().describe('Creation timestamp'),
})
type CreateBookmarkFolderInput = z.infer<typeof CreateBookmarkFolderInputSchema>
type CreateBookmarkFolderOutput = z.infer<
typeof CreateBookmarkFolderOutputSchema
>
export class CreateBookmarkFolderAction extends ActionHandler<
CreateBookmarkFolderInput,
CreateBookmarkFolderOutput
> {
readonly inputSchema = CreateBookmarkFolderInputSchema
private bookmarkAdapter = new BookmarkAdapter()
async execute(
input: CreateBookmarkFolderInput,
): Promise<CreateBookmarkFolderOutput> {
const created = await this.bookmarkAdapter.createBookmarkFolder({
title: input.title,
parentId: input.parentId,
})
return {
id: created.id,
title: created.title,
parentId: created.parentId,
dateAdded: created.dateAdded,
}
}
}

View File

@@ -1,59 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { BookmarkAdapter } from '@/adapters/BookmarkAdapter'
import { ActionHandler } from '../ActionHandler'
const GetBookmarkChildrenInputSchema = z.object({
folderId: z.string().describe('Folder ID to get children from'),
})
const GetBookmarkChildrenOutputSchema = z.object({
children: z.array(
z.object({
id: z.string(),
title: z.string(),
url: z.string().optional(),
parentId: z.string().optional(),
dateAdded: z.number().optional(),
isFolder: z.boolean(),
}),
),
count: z.number(),
})
type GetBookmarkChildrenInput = z.infer<typeof GetBookmarkChildrenInputSchema>
type GetBookmarkChildrenOutput = z.infer<typeof GetBookmarkChildrenOutputSchema>
export class GetBookmarkChildrenAction extends ActionHandler<
GetBookmarkChildrenInput,
GetBookmarkChildrenOutput
> {
readonly inputSchema = GetBookmarkChildrenInputSchema
private bookmarkAdapter = new BookmarkAdapter()
async execute(
input: GetBookmarkChildrenInput,
): Promise<GetBookmarkChildrenOutput> {
const results = await this.bookmarkAdapter.getBookmarkChildren(
input.folderId,
)
const children = results.map((node) => ({
id: node.id,
title: node.title,
url: node.url,
parentId: node.parentId,
dateAdded: node.dateAdded,
isFolder: !node.url,
}))
return {
children,
count: children.length,
}
}
}

View File

@@ -1,111 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { BookmarkAdapter } from '@/adapters/BookmarkAdapter'
import { ActionHandler } from '../ActionHandler'
// Input schema
const GetBookmarksInputSchema = z.object({
query: z
.string()
.optional()
.describe(
'Search query to filter bookmarks (optional, returns all if not provided)',
),
limit: z
.number()
.int()
.positive()
.optional()
.default(20)
.describe('Maximum number of results (default: 20)'),
recent: z
.boolean()
.optional()
.default(false)
.describe('Get recent bookmarks instead of searching'),
})
// Output schema
const GetBookmarksOutputSchema = z.object({
bookmarks: z.array(
z.object({
id: z.string(),
title: z.string(),
url: z.string().optional(),
dateAdded: z.number().optional(),
parentId: z.string().optional(),
}),
),
count: z.number(),
})
type GetBookmarksInput = z.infer<typeof GetBookmarksInputSchema>
type GetBookmarksOutput = z.infer<typeof GetBookmarksOutputSchema>
/**
* GetBookmarksAction - Get or search bookmarks
*
* Retrieves bookmarks with optional filtering.
*
* Input:
* - query (optional): Search query to match title or URL
* - limit (optional): Maximum results (default: 20)
* - recent (optional): Get recent bookmarks instead (default: false)
*
* Output:
* - bookmarks: Array of bookmark objects
* - count: Number of bookmarks returned
*
* Usage:
* - Get recent: { "recent": true }
* - Search: { "query": "github" }
* - Get all (limited): { "limit": 50 }
*
* Example:
* {
* "query": "google",
* "limit": 10
* }
* // Returns: { bookmarks: [{id: "1", title: "Google", url: "https://google.com"}], count: 1 }
*/
export class GetBookmarksAction extends ActionHandler<
GetBookmarksInput,
GetBookmarksOutput
> {
readonly inputSchema = GetBookmarksInputSchema
private bookmarkAdapter = new BookmarkAdapter()
async execute(input: GetBookmarksInput): Promise<GetBookmarksOutput> {
let results: chrome.bookmarks.BookmarkTreeNode[]
if (input.recent) {
// Get recent bookmarks
results = await this.bookmarkAdapter.getRecentBookmarks(input.limit)
} else if (input.query) {
// Search bookmarks
results = await this.bookmarkAdapter.searchBookmarks(input.query)
results = results.slice(0, input.limit)
} else {
// Get recent by default
results = await this.bookmarkAdapter.getRecentBookmarks(input.limit)
}
// Map to output format
const bookmarks = results.map((b) => ({
id: b.id,
title: b.title,
url: b.url,
dateAdded: b.dateAdded,
parentId: b.parentId,
}))
return {
bookmarks,
count: bookmarks.length,
}
}
}

View File

@@ -1,49 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { BookmarkAdapter } from '@/adapters/BookmarkAdapter'
import { ActionHandler } from '../ActionHandler'
const MoveBookmarkInputSchema = z.object({
id: z.string().describe('Bookmark or folder ID to move'),
parentId: z.string().optional().describe('New parent folder ID'),
index: z.number().int().min(0).optional().describe('Position within parent'),
})
const MoveBookmarkOutputSchema = z.object({
id: z.string().describe('Moved bookmark ID'),
title: z.string().describe('Bookmark title'),
url: z.string().optional().describe('Bookmark URL (undefined if folder)'),
parentId: z.string().optional().describe('New parent folder ID'),
index: z.number().optional().describe('New position within parent'),
})
type MoveBookmarkInput = z.infer<typeof MoveBookmarkInputSchema>
type MoveBookmarkOutput = z.infer<typeof MoveBookmarkOutputSchema>
export class MoveBookmarkAction extends ActionHandler<
MoveBookmarkInput,
MoveBookmarkOutput
> {
readonly inputSchema = MoveBookmarkInputSchema
private bookmarkAdapter = new BookmarkAdapter()
async execute(input: MoveBookmarkInput): Promise<MoveBookmarkOutput> {
const destination: { parentId?: string; index?: number } = {}
if (input.parentId !== undefined) destination.parentId = input.parentId
if (input.index !== undefined) destination.index = input.index
const moved = await this.bookmarkAdapter.moveBookmark(input.id, destination)
return {
id: moved.id,
title: moved.title,
url: moved.url,
parentId: moved.parentId,
index: moved.index,
}
}
}

View File

@@ -1,62 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { BookmarkAdapter } from '@/adapters/BookmarkAdapter'
import { ActionHandler } from '../ActionHandler'
// Input schema
const RemoveBookmarkInputSchema = z.object({
id: z.string().describe('Bookmark ID to remove'),
})
// Output schema
const RemoveBookmarkOutputSchema = z.object({
success: z
.boolean()
.describe('Whether the bookmark was successfully removed'),
message: z.string().describe('Confirmation message'),
})
type RemoveBookmarkInput = z.infer<typeof RemoveBookmarkInputSchema>
type RemoveBookmarkOutput = z.infer<typeof RemoveBookmarkOutputSchema>
/**
* RemoveBookmarkAction - Remove a bookmark
*
* Deletes a bookmark by its ID.
*
* Input:
* - id: Bookmark ID to remove
*
* Output:
* - success: true if removed
* - message: Confirmation message
*
* Usage:
* Get the bookmark ID from getBookmarks first, then remove it.
*
* Example:
* {
* "id": "123"
* }
* // Returns: { success: true, message: "Removed bookmark 123" }
*/
export class RemoveBookmarkAction extends ActionHandler<
RemoveBookmarkInput,
RemoveBookmarkOutput
> {
readonly inputSchema = RemoveBookmarkInputSchema
private bookmarkAdapter = new BookmarkAdapter()
async execute(input: RemoveBookmarkInput): Promise<RemoveBookmarkOutput> {
await this.bookmarkAdapter.removeBookmark(input.id)
return {
success: true,
message: `Removed bookmark ${input.id}`,
}
}
}

View File

@@ -1,48 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { BookmarkAdapter } from '@/adapters/BookmarkAdapter'
import { ActionHandler } from '../ActionHandler'
const RemoveBookmarkTreeInputSchema = z.object({
id: z.string().describe('Folder ID to remove'),
confirm: z.boolean().describe('Must be true to confirm recursive deletion'),
})
const RemoveBookmarkTreeOutputSchema = z.object({
success: z.boolean().describe('Whether the folder was removed'),
message: z.string().describe('Result message'),
})
type RemoveBookmarkTreeInput = z.infer<typeof RemoveBookmarkTreeInputSchema>
type RemoveBookmarkTreeOutput = z.infer<typeof RemoveBookmarkTreeOutputSchema>
export class RemoveBookmarkTreeAction extends ActionHandler<
RemoveBookmarkTreeInput,
RemoveBookmarkTreeOutput
> {
readonly inputSchema = RemoveBookmarkTreeInputSchema
private bookmarkAdapter = new BookmarkAdapter()
async execute(
input: RemoveBookmarkTreeInput,
): Promise<RemoveBookmarkTreeOutput> {
if (input.confirm !== true) {
return {
success: false,
message:
'Recursive deletion requires confirm: true. This will permanently delete the folder and all its contents.',
}
}
await this.bookmarkAdapter.removeBookmarkTree(input.id)
return {
success: true,
message: `Removed folder ${input.id} and all its contents`,
}
}
}

View File

@@ -1,82 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { BookmarkAdapter } from '@/adapters/BookmarkAdapter'
import { ActionHandler } from '../ActionHandler'
// Input schema
const UpdateBookmarkInputSchema = z.object({
id: z.string().describe('Bookmark ID to update'),
title: z.string().optional().describe('New bookmark title'),
url: z.string().url().optional().describe('New bookmark URL'),
})
// Output schema
const UpdateBookmarkOutputSchema = z.object({
id: z.string().describe('Bookmark ID'),
title: z.string().describe('Updated bookmark title'),
url: z.string().optional().describe('Updated bookmark URL'),
})
type UpdateBookmarkInput = z.infer<typeof UpdateBookmarkInputSchema>
type UpdateBookmarkOutput = z.infer<typeof UpdateBookmarkOutputSchema>
/**
* UpdateBookmarkAction - Update a bookmark's title or URL
*
* Updates an existing bookmark with new title and/or URL.
*
* Input:
* - id: Bookmark ID to update
* - title (optional): New title for the bookmark
* - url (optional): New URL for the bookmark
*
* Output:
* - id: Bookmark ID
* - title: Updated title
* - url: Updated URL
*
* Usage:
* Update a bookmark's title or URL (at least one must be provided).
*
* Example:
* {
* "id": "123",
* "title": "New Title",
* "url": "https://www.example.com"
* }
* // Returns: { id: "123", title: "New Title", url: "https://www.example.com" }
*/
export class UpdateBookmarkAction extends ActionHandler<
UpdateBookmarkInput,
UpdateBookmarkOutput
> {
readonly inputSchema = UpdateBookmarkInputSchema
private bookmarkAdapter = new BookmarkAdapter()
async execute(input: UpdateBookmarkInput): Promise<UpdateBookmarkOutput> {
const changes: { title?: string; url?: string } = {}
if (input.title !== undefined) {
changes.title = input.title
}
if (input.url !== undefined) {
changes.url = input.url
}
if (Object.keys(changes).length === 0) {
throw new Error('At least one of title or url must be provided')
}
const updated = await this.bookmarkAdapter.updateBookmark(input.id, changes)
return {
id: updated.id,
title: updated.title,
url: updated.url,
}
}
}

View File

@@ -1,79 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import {
BrowserOSAdapter,
type ScreenshotSizeKey,
} from '@/adapters/BrowserOSAdapter'
import { ActionHandler } from '../ActionHandler'
// Input schema
const CaptureScreenshotInputSchema = z.object({
tabId: z.number().describe('The tab ID to capture'),
size: z
.enum(['small', 'medium', 'large'])
.optional()
.default('medium')
.describe('Screenshot size preset (default: medium)'),
showHighlights: z
.boolean()
.optional()
.default(true)
.describe('Show element highlights (default: true)'),
width: z.number().optional().describe('Exact width in pixels'),
height: z.number().optional().describe('Exact height in pixels'),
})
// Output schema
const CaptureScreenshotOutputSchema = z.object({
dataUrl: z.string().describe('Base64-encoded PNG data URL'),
})
type CaptureScreenshotInput = z.infer<typeof CaptureScreenshotInputSchema>
type CaptureScreenshotOutput = z.infer<typeof CaptureScreenshotOutputSchema>
/**
* CaptureScreenshotAction - Capture a screenshot of the page
*
* Captures a screenshot with configurable size and options.
*
* Size Options:
* - small (512px): Low detail, minimal tokens
* - medium (768px): Balanced quality/tokens (default)
* - large (1028px): High detail, maximum tokens
*
* Or specify exact dimensions with width/height.
*
* Returns:
* - dataUrl: PNG image as base64 data URL (data:image/png;base64,...)
*
* Usage:
* 1. For AI vision models: use 'medium' or 'large'
* 2. For debugging: use 'small'
* 3. For exact size: specify width and height
*
* Used by: ScreenshotTool, VisualClick, VisualType
*/
export class CaptureScreenshotAction extends ActionHandler<
CaptureScreenshotInput,
CaptureScreenshotOutput
> {
readonly inputSchema = CaptureScreenshotInputSchema
private browserOSAdapter = BrowserOSAdapter.getInstance()
async execute(
input: CaptureScreenshotInput,
): Promise<CaptureScreenshotOutput> {
const dataUrl = await this.browserOSAdapter.captureScreenshot(
input.tabId,
input.size as ScreenshotSizeKey | undefined,
input.showHighlights,
input.width,
input.height,
)
return { dataUrl }
}
}

View File

@@ -1,124 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import {
BrowserOSAdapter,
type ScreenshotSizeKey,
} from '@/adapters/BrowserOSAdapter'
import { logger } from '@/utils/logger'
import { PointerOverlay } from '@/utils/PointerOverlay'
import { SnapshotCache } from '@/utils/SnapshotCache'
import { ActionHandler } from '../ActionHandler'
// Input schema
const CaptureScreenshotPointerInputSchema = z.object({
tabId: z.number().describe('The tab ID to capture'),
nodeId: z
.number()
.int()
.positive()
.describe('The nodeId to show pointer over'),
size: z
.enum(['small', 'medium', 'large'])
.optional()
.default('medium')
.describe('Screenshot size preset (default: medium)'),
pointerLabel: z
.string()
.optional()
.describe('Optional label to show with pointer (e.g., "Click", "Type")'),
})
// Output schema
const CaptureScreenshotPointerOutputSchema = z.object({
dataUrl: z.string().describe('Base64-encoded PNG data URL'),
pointerPosition: z
.object({
x: z.number(),
y: z.number(),
})
.optional()
.describe('Coordinates where pointer was shown'),
})
type CaptureScreenshotPointerInput = z.infer<
typeof CaptureScreenshotPointerInputSchema
>
type CaptureScreenshotPointerOutput = z.infer<
typeof CaptureScreenshotPointerOutputSchema
>
/**
* CaptureScreenshotPointerAction - Show pointer over element and capture screenshot
*
* Shows a visual pointer overlay at the center of the specified element,
* then captures a screenshot with the pointer visible.
*
* Prerequisites:
* - Must call getInteractiveSnapshot first to populate the cache
* - NodeId must exist in the cached snapshot
*
* Usage:
* 1. Get snapshot to find elements and populate cache
* 2. Call captureScreenshotPointer with tabId and nodeId
* 3. Returns screenshot with pointer overlay visible
*
* Used by: Visual debugging, automation demos, step-by-step captures
*/
export class CaptureScreenshotPointerAction extends ActionHandler<
CaptureScreenshotPointerInput,
CaptureScreenshotPointerOutput
> {
readonly inputSchema = CaptureScreenshotPointerInputSchema
private browserOSAdapter = BrowserOSAdapter.getInstance()
async execute(
input: CaptureScreenshotPointerInput,
): Promise<CaptureScreenshotPointerOutput> {
const { tabId, nodeId, size, pointerLabel } = input
// Get element rect from cache
const rect = SnapshotCache.getNodeRect(tabId, nodeId)
let pointerPosition: { x: number; y: number } | undefined
if (rect) {
// Calculate center coordinates
const { x, y } = PointerOverlay.getCenterCoordinates(rect)
pointerPosition = { x, y }
// Show pointer
await PointerOverlay.showPointer(tabId, x, y, pointerLabel)
logger.debug(
`[CaptureScreenshotPointerAction] Showed pointer at (${x}, ${y}) for node ${nodeId}`,
)
} else {
logger.warn(
`[CaptureScreenshotPointerAction] No cached rect for node ${nodeId} in tab ${tabId}. Capturing without pointer.`,
)
}
// Small delay to ensure pointer is rendered
await this.delay(100)
// Capture screenshot with pointer visible
const dataUrl = await this.browserOSAdapter.captureScreenshot(
tabId,
size as ScreenshotSizeKey | undefined,
false, // Don't show highlights, we have the pointer
)
return {
dataUrl,
pointerPosition,
}
}
private delay(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms))
}
}

View File

@@ -1,38 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { BrowserOSAdapter } from '@/adapters/BrowserOSAdapter'
import { ActionHandler } from '../ActionHandler'
const ClearInputSchema = z.object({
tabId: z.number().describe('The tab ID containing the element'),
nodeId: z
.number()
.int()
.positive()
.describe('The nodeId from interactive snapshot'),
})
type ClearInput = z.infer<typeof ClearInputSchema>
interface ClearOutput {
success: boolean
}
/**
* ClearAction - Clear text from an input element
*
* Clears all text from an input field or textarea.
* Used before inputText or to reset form fields.
*/
export class ClearAction extends ActionHandler<ClearInput, ClearOutput> {
readonly inputSchema = ClearInputSchema
private browserOSAdapter = BrowserOSAdapter.getInstance()
async execute(input: ClearInput): Promise<ClearOutput> {
await this.browserOSAdapter.clear(input.tabId, input.nodeId)
return { success: true }
}
}

View File

@@ -1,62 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { BrowserOSAdapter } from '@/adapters/BrowserOSAdapter'
import { PointerOverlay } from '@/utils/PointerOverlay'
import { SnapshotCache } from '@/utils/SnapshotCache'
import { ActionHandler } from '../ActionHandler'
// Input schema
const ClickInputSchema = z.object({
tabId: z.number().describe('The tab ID containing the element'),
nodeId: z
.number()
.int()
.positive()
.describe('The nodeId from interactive snapshot'),
})
// Output schema
const ClickOutputSchema = z.object({
success: z.boolean().describe('Whether the click succeeded'),
})
type ClickInput = z.infer<typeof ClickInputSchema>
type ClickOutput = z.infer<typeof ClickOutputSchema>
/**
* ClickAction - Click an element by its nodeId
*
* This action clicks an interactive element identified by its nodeId from getInteractiveSnapshot.
*
* Prerequisites:
* - Must call getInteractiveSnapshot first to get valid nodeIds
* - NodeIds are valid only for the current page state
* - NodeIds are invalidated on page navigation
*
* Usage:
* 1. Get snapshot to find clickable elements
* 2. Choose element by nodeId
* 3. Call click with tabId and nodeId
*
* Used by: ClickTool, all automation workflows
*/
export class ClickAction extends ActionHandler<ClickInput, ClickOutput> {
readonly inputSchema = ClickInputSchema
private browserOSAdapter = BrowserOSAdapter.getInstance()
async execute(input: ClickInput): Promise<ClickOutput> {
// Show pointer overlay before click
const rect = SnapshotCache.getNodeRect(input.tabId, input.nodeId)
if (rect) {
const { x, y } = PointerOverlay.getCenterCoordinates(rect)
await PointerOverlay.showPointerAndWait(input.tabId, x, y, 'Click')
}
await this.browserOSAdapter.click(input.tabId, input.nodeId)
return { success: true }
}
}

View File

@@ -1,69 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { getBrowserOSAdapter } from '@/adapters/BrowserOSAdapter'
import { PointerOverlay } from '@/utils/PointerOverlay'
import { ActionHandler } from '../ActionHandler'
// Input schema for clickCoordinates action
const ClickCoordinatesInputSchema = z.object({
tabId: z.number().int().positive().describe('Tab ID to click in'),
x: z.number().int().nonnegative().describe('X coordinate in viewport pixels'),
y: z.number().int().nonnegative().describe('Y coordinate in viewport pixels'),
})
type ClickCoordinatesInput = z.infer<typeof ClickCoordinatesInputSchema>
// Output confirms the click
export interface ClickCoordinatesOutput {
success: boolean
message: string
coordinates: {
x: number
y: number
}
}
/**
* ClickCoordinatesAction - Click at specific viewport coordinates
*
* Performs a click at the specified (x, y) coordinates in the viewport.
* Coordinates are in pixels relative to the top-left of the visible viewport (0, 0).
*
* Useful when:
* - Elements don't have accessible node IDs
* - Working with canvas or interactive graphics
* - Vision-based automation (e.g., AI identifies coordinates from screenshots)
*
* Example payload:
* {
* "tabId": 123,
* "x": 500,
* "y": 300
* }
*/
export class ClickCoordinatesAction extends ActionHandler<
ClickCoordinatesInput,
ClickCoordinatesOutput
> {
readonly inputSchema = ClickCoordinatesInputSchema
private browserOS = getBrowserOSAdapter()
async execute(input: ClickCoordinatesInput): Promise<ClickCoordinatesOutput> {
const { tabId, x, y } = input
// Show pointer overlay before click
await PointerOverlay.showPointerAndWait(tabId, x, y, 'Click')
await this.browserOS.clickCoordinates(tabId, x, y)
return {
success: true,
message: `Successfully clicked at coordinates (${x}, ${y}) in tab ${tabId}`,
coordinates: { x, y },
}
}
}

View File

@@ -1,38 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { CHROME_API_TIMEOUTS, withTimeout } from '@/utils/timeout'
import { ActionHandler } from '../ActionHandler'
const CloseWindowInputSchema = z.object({
windowId: z.number().int().positive().describe('ID of the window to close'),
})
const CloseWindowOutputSchema = z.object({
success: z.boolean().describe('Whether the window was successfully closed'),
})
type CloseWindowInput = z.infer<typeof CloseWindowInputSchema>
type CloseWindowOutput = z.infer<typeof CloseWindowOutputSchema>
export class CloseWindowAction extends ActionHandler<
CloseWindowInput,
CloseWindowOutput
> {
readonly inputSchema = CloseWindowInputSchema
async execute(input: CloseWindowInput): Promise<CloseWindowOutput> {
await withTimeout(
chrome.windows.remove(input.windowId),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.windows.remove',
)
return {
success: true,
}
}
}

View File

@@ -1,73 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { CHROME_API_TIMEOUTS, withTimeout } from '@/utils/timeout'
import { ActionHandler } from '../ActionHandler'
const CreateWindowInputSchema = z.object({
url: z
.string()
.optional()
.default('about:blank')
.describe('URL to open in the new window'),
incognito: z
.boolean()
.optional()
.default(false)
.describe('Create an incognito window'),
focused: z
.boolean()
.optional()
.default(true)
.describe('Whether to focus the new window'),
})
const CreateWindowOutputSchema = z.object({
windowId: z.number().describe('ID of the newly created window'),
tabId: z.number().describe('ID of the first tab in the new window'),
})
type CreateWindowInput = z.infer<typeof CreateWindowInputSchema>
type CreateWindowOutput = z.infer<typeof CreateWindowOutputSchema>
export class CreateWindowAction extends ActionHandler<
CreateWindowInput,
CreateWindowOutput
> {
readonly inputSchema = CreateWindowInputSchema
async execute(input: CreateWindowInput): Promise<CreateWindowOutput> {
const createData: chrome.windows.CreateData = {
url: input.url,
focused: input.focused,
incognito: input.incognito,
}
const createdWindow = await withTimeout(
chrome.windows.create(createData),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.windows.create',
)
if (!createdWindow) {
throw new Error('Failed to create window')
}
if (createdWindow.id === undefined) {
throw new Error('Created window has no ID')
}
const tabId = createdWindow.tabs?.[0]?.id
if (tabId === undefined) {
throw new Error('Created window has no tab')
}
return {
windowId: createdWindow.id,
tabId,
}
}
}

View File

@@ -1,64 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { BrowserOSAdapter } from '@/adapters/BrowserOSAdapter'
import { ActionHandler } from '../ActionHandler'
// Input schema
const ExecuteJavaScriptInputSchema = z.object({
tabId: z.number().describe('The tab ID to execute code in'),
code: z.string().describe('JavaScript code to execute'),
})
// Output schema
const ExecuteJavaScriptOutputSchema = z.object({
result: z.any().describe('The result of the code execution'),
})
type ExecuteJavaScriptInput = z.infer<typeof ExecuteJavaScriptInputSchema>
type ExecuteJavaScriptOutput = z.infer<typeof ExecuteJavaScriptOutputSchema>
/**
* ExecuteJavaScriptAction - Execute JavaScript code in page context
*
* Executes arbitrary JavaScript code in the page and returns the result.
*
* Input:
* - tabId: Tab ID to execute code in
* - code: JavaScript code as string
*
* Output:
* - result: The return value of the executed code
*
* Usage:
* - Extract data from page: "document.title"
* - Manipulate DOM: "document.body.style.background = 'red'"
* - Get element values: "document.querySelector('#email').value"
*
* Example:
* {
* "tabId": 123,
* "code": "document.title"
* }
* // Returns: { result: "Google" }
*/
export class ExecuteJavaScriptAction extends ActionHandler<
ExecuteJavaScriptInput,
ExecuteJavaScriptOutput
> {
readonly inputSchema = ExecuteJavaScriptInputSchema
private browserOSAdapter = BrowserOSAdapter.getInstance()
async execute(
input: ExecuteJavaScriptInput,
): Promise<ExecuteJavaScriptOutput> {
const result = await this.browserOSAdapter.executeJavaScript(
input.tabId,
input.code,
)
return { result }
}
}

View File

@@ -1,53 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { BrowserOSAdapter } from '@/adapters/BrowserOSAdapter'
import { ActionHandler } from '../ActionHandler'
const GetAccessibilityTreeInputSchema = z.object({
tabId: z
.number()
.int()
.positive()
.describe('Tab ID to get accessibility tree from'),
})
type GetAccessibilityTreeInput = z.infer<typeof GetAccessibilityTreeInputSchema>
export type GetAccessibilityTreeOutput = chrome.browserOS.AccessibilityTree
/**
* GetAccessibilityTreeAction - Get accessibility tree for a tab
*
* Returns the full accessibility tree structure containing:
* - rootId: The root node ID
* - nodes: Map of node IDs to accessibility nodes
*
* Each node contains:
* - nodeId: Unique node identifier
* - role: Accessibility role (e.g., 'staticText', 'heading', 'button')
* - name: Text content or label
* - childIds: Array of child node IDs
*
* Example payload:
* {
* "tabId": 123
* }
*/
export class GetAccessibilityTreeAction extends ActionHandler<
GetAccessibilityTreeInput,
GetAccessibilityTreeOutput
> {
readonly inputSchema = GetAccessibilityTreeInputSchema
private browserOSAdapter = BrowserOSAdapter.getInstance()
async execute(
input: GetAccessibilityTreeInput,
): Promise<GetAccessibilityTreeOutput> {
const { tabId } = input
const tree = await this.browserOSAdapter.getAccessibilityTree(tabId)
return tree
}
}

View File

@@ -1,71 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import type {
InteractiveSnapshot,
InteractiveSnapshotOptions,
} from '@/adapters/BrowserOSAdapter'
import { BrowserOSAdapter } from '@/adapters/BrowserOSAdapter'
import { SnapshotCache } from '@/utils/SnapshotCache'
import { ActionHandler } from '../ActionHandler'
// Input schema
const GetInteractiveSnapshotInputSchema = z.object({
tabId: z.number().describe('The tab ID to get snapshot from'),
options: z
.object({
includeHidden: z
.boolean()
.optional()
.default(false)
.describe('Include hidden elements (default: false)'),
})
.optional()
.describe('Optional snapshot options'),
})
type GetInteractiveSnapshotInput = z.infer<
typeof GetInteractiveSnapshotInputSchema
>
/**
* GetInteractiveSnapshotAction - Get interactive elements from the page
*
* This is THE MOST CRITICAL action - it returns all interactive elements
* with their nodeIds, which are needed by click, inputText, clear, and scrollToNode actions.
*
* Returns:
* - elements: Array of interactive nodes with nodeIds
* - hierarchicalStructure: String representation of page structure
*
* Each element contains:
* - nodeId: Sequential integer ID (1, 2, 3...)
* - type: 'clickable' | 'typeable' | 'selectable'
* - name: Element text/label
* - attributes: Element properties (html-tag, role, etc.)
* - rect: Bounding box coordinates
*/
export class GetInteractiveSnapshotAction extends ActionHandler<
GetInteractiveSnapshotInput,
InteractiveSnapshot
> {
readonly inputSchema = GetInteractiveSnapshotInputSchema
private browserOSAdapter = BrowserOSAdapter.getInstance()
async execute(
input: GetInteractiveSnapshotInput,
): Promise<InteractiveSnapshot> {
const snapshot = await this.browserOSAdapter.getInteractiveSnapshot(
input.tabId,
input.options as InteractiveSnapshotOptions | undefined,
)
// Cache snapshot for pointer overlay lookup
SnapshotCache.set(input.tabId, snapshot)
return snapshot
}
}

View File

@@ -1,69 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import {
BrowserOSAdapter,
type PageLoadStatus,
} from '@/adapters/BrowserOSAdapter'
import { ActionHandler } from '../ActionHandler'
// Input schema for getPageLoadStatus action
const GetPageLoadStatusInputSchema = z.object({
tabId: z
.number()
.int()
.positive()
.describe('Tab ID to check page load status'),
})
type GetPageLoadStatusInput = z.infer<typeof GetPageLoadStatusInputSchema>
// Output includes page load status details
export interface GetPageLoadStatusOutput {
tabId: number
isResourcesLoading: boolean
isDOMContentLoaded: boolean
isPageComplete: boolean
}
/**
* GetPageLoadStatusAction - Get page loading status for a tab
*
* Returns the current page load status including:
* - isResourcesLoading: Whether resources (images, scripts, etc.) are still loading
* - isDOMContentLoaded: Whether the DOM is fully parsed and ready
* - isPageComplete: Whether the page has completely finished loading
*
* Useful for waiting for pages to load before taking actions.
*
* Example payload:
* {
* "tabId": 123
* }
*/
export class GetPageLoadStatusAction extends ActionHandler<
GetPageLoadStatusInput,
GetPageLoadStatusOutput
> {
readonly inputSchema = GetPageLoadStatusInputSchema
private browserOSAdapter = BrowserOSAdapter.getInstance()
async execute(
input: GetPageLoadStatusInput,
): Promise<GetPageLoadStatusOutput> {
const { tabId } = input
const status: PageLoadStatus =
await this.browserOSAdapter.getPageLoadStatus(tabId)
return {
tabId,
isResourcesLoading: status.isResourcesLoading,
isDOMContentLoaded: status.isDOMContentLoaded,
isPageComplete: status.isPageComplete,
}
}
}

View File

@@ -1,74 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { BrowserOSAdapter, type Snapshot } from '@/adapters/BrowserOSAdapter'
import { logger } from '@/utils/logger'
import { ActionHandler } from '../ActionHandler'
// Input schema for getSnapshot action
const GetSnapshotInputSchema = z.object({
tabId: z.number().int().positive().describe('Tab ID to get snapshot from'),
type: z
.enum(['text', 'links'])
.default('text')
.describe('Type of snapshot: text or links'),
options: z
.object({
context: z.enum(['visible', 'full']).optional(),
includeSections: z
.array(
z.enum([
'main',
'navigation',
'footer',
'header',
'article',
'aside',
]),
)
.optional(),
})
.optional()
.describe('Optional snapshot configuration'),
})
type GetSnapshotInput = z.infer<typeof GetSnapshotInputSchema>
// Output is the full snapshot structure
export type GetSnapshotOutput = Snapshot
/**
* GetSnapshotAction - Extract page content snapshot
*
* Extracts structured content from the page including:
* - Headings (with levels)
* - Text content
* - Links (with URLs)
*
* Returns items in document order with type information.
*
* Example payload:
* {
* "tabId": 123,
* "type": "text"
* }
*/
export class GetSnapshotAction extends ActionHandler<
GetSnapshotInput,
GetSnapshotOutput
> {
readonly inputSchema = GetSnapshotInputSchema
private browserOSAdapter = BrowserOSAdapter.getInstance()
async execute(input: GetSnapshotInput): Promise<GetSnapshotOutput> {
const { tabId, type } = input
logger.info(
`[GetSnapshotAction] Getting snapshot for tab ${tabId} with type ${type}`,
)
const snapshot = await this.browserOSAdapter.getSnapshot(tabId, type)
return snapshot
}
}

View File

@@ -1,75 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { BrowserOSAdapter } from '@/adapters/BrowserOSAdapter'
import { PointerOverlay } from '@/utils/PointerOverlay'
import { SnapshotCache } from '@/utils/SnapshotCache'
import { ActionHandler } from '../ActionHandler'
// Input schema
const InputTextInputSchema = z.object({
tabId: z.number().describe('The tab ID containing the element'),
nodeId: z
.number()
.int()
.positive()
.describe('The nodeId from interactive snapshot'),
text: z.string().describe('Text to type into the element'),
})
// Output schema
const InputTextOutputSchema = z.object({
success: z.boolean().describe('Whether the input succeeded'),
})
type InputTextInput = z.infer<typeof InputTextInputSchema>
type InputTextOutput = z.infer<typeof InputTextOutputSchema>
/**
* InputTextAction - Type text into an element by its nodeId
*
* This action types text into an input field or textarea identified by its nodeId.
*
* Prerequisites:
* - Must call getInteractiveSnapshot first to get valid nodeIds
* - Element must be typeable (type: 'typeable' in snapshot)
* - NodeIds are valid only for the current page state
*
* Behavior:
* - Automatically clears existing text before typing (handled by adapter)
* - Types the full text string
* - Triggers input/change events
*
* Usage:
* 1. Get snapshot to find typeable elements
* 2. Choose input field by nodeId
* 3. Call inputText with tabId, nodeId, and text
*
* Used by: TypeTool, form automation workflows
*/
export class InputTextAction extends ActionHandler<
InputTextInput,
InputTextOutput
> {
readonly inputSchema = InputTextInputSchema
private browserOSAdapter = BrowserOSAdapter.getInstance()
async execute(input: InputTextInput): Promise<InputTextOutput> {
// Show pointer overlay before typing
const rect = SnapshotCache.getNodeRect(input.tabId, input.nodeId)
if (rect) {
const { x, y } = PointerOverlay.getLeftCenterCoordinates(rect)
const textPreview =
input.text.length > 20
? `Type: ${input.text.substring(0, 20)}...`
: `Type: ${input.text}`
await PointerOverlay.showPointerAndWait(input.tabId, x, y, textPreview)
}
await this.browserOSAdapter.inputText(input.tabId, input.nodeId, input.text)
return { success: true }
}
}

View File

@@ -1,54 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { BrowserOSAdapter } from '@/adapters/BrowserOSAdapter'
import { ActionHandler } from '../ActionHandler'
// Input schema
const ScrollDownInputSchema = z.object({
tabId: z.number().describe('The tab ID to scroll'),
})
// Output schema
const ScrollDownOutputSchema = z.object({
success: z.boolean().describe('Whether the scroll succeeded'),
})
type ScrollDownInput = z.infer<typeof ScrollDownInputSchema>
type ScrollDownOutput = z.infer<typeof ScrollDownOutputSchema>
/**
* ScrollDownAction - Scroll page down
*
* Scrolls the page down by one viewport height using PageDown key.
* This approach is more reliable than the direct scrollDown API.
*
* Input:
* - tabId: Tab ID to scroll
*
* Output:
* - success: true if scroll succeeded
*
* Usage:
* Used for scrolling through long pages to view content below the fold.
*/
export class ScrollDownAction extends ActionHandler<
ScrollDownInput,
ScrollDownOutput
> {
readonly inputSchema = ScrollDownInputSchema
private browserOSAdapter = BrowserOSAdapter.getInstance()
async execute(input: ScrollDownInput): Promise<ScrollDownOutput> {
// Use sendKeys with PageDown instead of scrollDown API (more reliable)
await this.browserOSAdapter.sendKeys(input.tabId, 'PageDown')
// Add small delay for scroll to complete
await new Promise((resolve) => setTimeout(resolve, 100))
return { success: true }
}
}

View File

@@ -1,42 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { BrowserOSAdapter } from '@/adapters/BrowserOSAdapter'
import { ActionHandler } from '../ActionHandler'
const ScrollToNodeInputSchema = z.object({
tabId: z.number().describe('The tab ID containing the element'),
nodeId: z.number().int().positive().describe('The nodeId to scroll to'),
})
type ScrollToNodeInput = z.infer<typeof ScrollToNodeInputSchema>
interface ScrollToNodeOutput {
scrolled: boolean
}
/**
* ScrollToNodeAction - Scroll an element into view
*
* Scrolls the page so that the specified element is visible in the viewport.
* Returns whether scrolling actually occurred.
*
* Used by: Click/Type tools to ensure element is visible before interaction
*/
export class ScrollToNodeAction extends ActionHandler<
ScrollToNodeInput,
ScrollToNodeOutput
> {
readonly inputSchema = ScrollToNodeInputSchema
private browserOSAdapter = BrowserOSAdapter.getInstance()
async execute(input: ScrollToNodeInput): Promise<ScrollToNodeOutput> {
const scrolled = await this.browserOSAdapter.scrollToNode(
input.tabId,
input.nodeId,
)
return { scrolled }
}
}

View File

@@ -1,54 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { BrowserOSAdapter } from '@/adapters/BrowserOSAdapter'
import { ActionHandler } from '../ActionHandler'
// Input schema
const ScrollUpInputSchema = z.object({
tabId: z.number().describe('The tab ID to scroll'),
})
// Output schema
const ScrollUpOutputSchema = z.object({
success: z.boolean().describe('Whether the scroll succeeded'),
})
type ScrollUpInput = z.infer<typeof ScrollUpInputSchema>
type ScrollUpOutput = z.infer<typeof ScrollUpOutputSchema>
/**
* ScrollUpAction - Scroll page up
*
* Scrolls the page up by one viewport height using PageUp key.
* This approach is more reliable than the direct scrollUp API.
*
* Input:
* - tabId: Tab ID to scroll
*
* Output:
* - success: true if scroll succeeded
*
* Usage:
* Used for scrolling back up through long pages.
*/
export class ScrollUpAction extends ActionHandler<
ScrollUpInput,
ScrollUpOutput
> {
readonly inputSchema = ScrollUpInputSchema
private browserOSAdapter = BrowserOSAdapter.getInstance()
async execute(input: ScrollUpInput): Promise<ScrollUpOutput> {
// Use sendKeys with PageUp instead of scrollUp API (more reliable)
await this.browserOSAdapter.sendKeys(input.tabId, 'PageUp')
// Add small delay for scroll to complete
await new Promise((resolve) => setTimeout(resolve, 100))
return { success: true }
}
}

View File

@@ -1,69 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { getBrowserOSAdapter } from '@/adapters/BrowserOSAdapter'
import { ActionHandler } from '../ActionHandler'
// Input schema for sendKeys action
const SendKeysInputSchema = z.object({
tabId: z.number().int().positive().describe('Tab ID to send keys to'),
key: z
.enum([
'Enter',
'Delete',
'Backspace',
'Tab',
'Escape',
'ArrowUp',
'ArrowDown',
'ArrowLeft',
'ArrowRight',
'Home',
'End',
'PageUp',
'PageDown',
])
.describe('Keyboard key to send'),
})
type SendKeysInput = z.infer<typeof SendKeysInputSchema>
// Output is just success (void result)
export interface SendKeysOutput {
success: boolean
message: string
}
/**
* SendKeysAction - Send keyboard keys to a tab
*
* Sends special keyboard keys (Enter, Escape, arrows, etc.) to the specified tab.
* Useful for navigation, form submission, closing dialogs, etc.
*
* Example payload:
* {
* "tabId": 123,
* "key": "Enter"
* }
*/
export class SendKeysAction extends ActionHandler<
SendKeysInput,
SendKeysOutput
> {
readonly inputSchema = SendKeysInputSchema
private browserOS = getBrowserOSAdapter()
async execute(input: SendKeysInput): Promise<SendKeysOutput> {
const { tabId, key } = input
await this.browserOS.sendKeys(tabId, key as chrome.browserOS.Key)
return {
success: true,
message: `Successfully sent "${key}" to tab ${tabId}`,
}
}
}

View File

@@ -1,81 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { getBrowserOSAdapter } from '@/adapters/BrowserOSAdapter'
import { PointerOverlay } from '@/utils/PointerOverlay'
import { ActionHandler } from '../ActionHandler'
// Input schema for typeAtCoordinates action
const TypeAtCoordinatesInputSchema = z.object({
tabId: z.number().int().positive().describe('Tab ID to type in'),
x: z.number().int().nonnegative().describe('X coordinate in viewport pixels'),
y: z.number().int().nonnegative().describe('Y coordinate in viewport pixels'),
text: z.string().min(1).describe('Text to type at the location'),
})
type TypeAtCoordinatesInput = z.infer<typeof TypeAtCoordinatesInputSchema>
// Output confirms the typing
export interface TypeAtCoordinatesOutput {
success: boolean
message: string
coordinates: {
x: number
y: number
}
textLength: number
}
/**
* TypeAtCoordinatesAction - Type text at specific viewport coordinates
*
* Clicks at the specified (x, y) coordinates and types the provided text.
* Coordinates are in pixels relative to the top-left of the visible viewport (0, 0).
*
* The action will:
* 1. Click at the coordinates to focus the element
* 2. Type the specified text
*
* Useful when:
* - Input fields don't have accessible node IDs
* - Working with complex forms or canvas-based inputs
* - Vision-based automation (e.g., AI identifies input coordinates from screenshots)
*
* Example payload:
* {
* "tabId": 123,
* "x": 500,
* "y": 300,
* "text": "Hello World"
* }
*/
export class TypeAtCoordinatesAction extends ActionHandler<
TypeAtCoordinatesInput,
TypeAtCoordinatesOutput
> {
readonly inputSchema = TypeAtCoordinatesInputSchema
private browserOS = getBrowserOSAdapter()
async execute(
input: TypeAtCoordinatesInput,
): Promise<TypeAtCoordinatesOutput> {
const { tabId, x, y, text } = input
// Show pointer overlay before typing
const textPreview =
text.length > 20 ? `Type: ${text.substring(0, 20)}...` : `Type: ${text}`
await PointerOverlay.showPointerAndWait(tabId, x, y, textPreview)
await this.browserOS.typeAtCoordinates(tabId, x, y, text)
return {
success: true,
message: `Successfully typed "${text.substring(0, 50)}${text.length > 50 ? '...' : ''}" at coordinates (${x}, ${y}) in tab ${tabId}`,
coordinates: { x, y },
textLength: text.length,
}
}
}

View File

@@ -1,90 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { ActionHandler } from '../ActionHandler'
// Input schema - no input needed
const CheckBrowserOSInputSchema = z.any()
// Output schema
const CheckBrowserOSOutputSchema = z.object({
available: z.boolean(),
apis: z.array(z.string()).optional(),
error: z.string().optional(),
})
type CheckBrowserOSInput = z.infer<typeof CheckBrowserOSInputSchema>
type CheckBrowserOSOutput = z.infer<typeof CheckBrowserOSOutputSchema>
/**
* CheckBrowserOSAction - Diagnostic action to check if chrome.browserOS is available
*
* This action checks:
* 1. Whether chrome.browserOS namespace exists
* 2. What APIs are available in the namespace
* 3. Returns detailed diagnostic information
*/
export class CheckBrowserOSAction extends ActionHandler<
CheckBrowserOSInput,
CheckBrowserOSOutput
> {
readonly inputSchema = CheckBrowserOSInputSchema
async execute(_input: CheckBrowserOSInput): Promise<CheckBrowserOSOutput> {
try {
console.log('[CheckBrowserOSAction] Starting diagnostic...')
console.log('[CheckBrowserOSAction] typeof chrome:', typeof chrome)
console.log('[CheckBrowserOSAction] chrome exists:', chrome !== undefined)
// Check if chrome.browserOS exists
const browserOSExists = typeof chrome.browserOS !== 'undefined'
console.log(
'[CheckBrowserOSAction] typeof chrome.browserOS:',
typeof chrome.browserOS,
)
console.log('[CheckBrowserOSAction] browserOSExists:', browserOSExists)
if (!browserOSExists) {
console.log('[CheckBrowserOSAction] chrome.browserOS is NOT available')
return {
available: false,
error:
'chrome.browserOS is undefined - not running in BrowserOS Chrome',
}
}
// Get available APIs
const apis: string[] = []
const browserOS = chrome.browserOS as Record<string, unknown>
for (const key in browserOS) {
if (typeof browserOS[key] === 'function') {
apis.push(key)
}
}
console.log('[CheckBrowserOSAction] Found APIs:', apis)
return {
available: true,
apis: apis.sort(),
}
} catch (error) {
console.error('[CheckBrowserOSAction] Error during diagnostic:', error)
const errorMsg =
error instanceof Error
? error.message
: error
? String(error)
: 'Unknown error'
return {
available: false,
error: errorMsg,
}
}
}
}

View File

@@ -1,96 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { HistoryAdapter } from '@/adapters/HistoryAdapter'
import { ActionHandler } from '../ActionHandler'
// Input schema
const GetRecentHistoryInputSchema = z.object({
maxResults: z
.number()
.int()
.positive()
.optional()
.default(20)
.describe('Maximum number of results (default: 20)'),
hoursBack: z
.number()
.int()
.positive()
.optional()
.default(24)
.describe('How many hours back to search (default: 24)'),
})
// Output schema
const GetRecentHistoryOutputSchema = z.object({
items: z.array(
z.object({
id: z.string(),
url: z.string().optional(),
title: z.string().optional(),
lastVisitTime: z.number().optional(),
visitCount: z.number().optional(),
}),
),
count: z.number(),
})
type GetRecentHistoryInput = z.infer<typeof GetRecentHistoryInputSchema>
type GetRecentHistoryOutput = z.infer<typeof GetRecentHistoryOutputSchema>
/**
* GetRecentHistoryAction - Get recent browser history
*
* Retrieves the most recent browser history items.
*
* Input:
* - maxResults (optional): Max results (default: 20)
* - hoursBack (optional): Time range in hours (default: 24)
*
* Output:
* - items: Array of recent history items
* - count: Number of items returned
*
* Usage:
* - Last 24 hours: { }
* - Last hour: { "hoursBack": 1 }
* - Last week: { "hoursBack": 168, "maxResults": 50 }
*
* Example:
* {
* "maxResults": 10,
* "hoursBack": 1
* }
* // Returns: { items: [{url: "https://google.com", title: "Google", lastVisitTime: 1729012345678}], count: 10 }
*/
export class GetRecentHistoryAction extends ActionHandler<
GetRecentHistoryInput,
GetRecentHistoryOutput
> {
readonly inputSchema = GetRecentHistoryInputSchema
private historyAdapter = new HistoryAdapter()
async execute(input: GetRecentHistoryInput): Promise<GetRecentHistoryOutput> {
const results = await this.historyAdapter.getRecentHistory(
input.maxResults,
input.hoursBack,
)
const items = results.map((item) => ({
id: item.id,
url: item.url,
title: item.title,
lastVisitTime: item.lastVisitTime,
visitCount: item.visitCount,
}))
return {
items,
count: items.length,
}
}
}

View File

@@ -1,104 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { HistoryAdapter } from '@/adapters/HistoryAdapter'
import { ActionHandler } from '../ActionHandler'
// Input schema
const SearchHistoryInputSchema = z.object({
query: z.string().describe('Search query to match URL or title'),
maxResults: z
.number()
.int()
.positive()
.optional()
.default(20)
.describe('Maximum number of results (default: 20)'),
startTime: z
.number()
.optional()
.describe('Start time in milliseconds since epoch (optional)'),
endTime: z
.number()
.optional()
.describe('End time in milliseconds since epoch (optional)'),
})
// Output schema
const SearchHistoryOutputSchema = z.object({
items: z.array(
z.object({
id: z.string(),
url: z.string().optional(),
title: z.string().optional(),
lastVisitTime: z.number().optional(),
visitCount: z.number().optional(),
typedCount: z.number().optional(),
}),
),
count: z.number(),
})
type SearchHistoryInput = z.infer<typeof SearchHistoryInputSchema>
type SearchHistoryOutput = z.infer<typeof SearchHistoryOutputSchema>
/**
* SearchHistoryAction - Search browser history
*
* Searches browser history for matching URLs and titles.
*
* Input:
* - query: Search text (matches URL and title)
* - maxResults (optional): Max results (default: 20)
* - startTime (optional): Start time filter
* - endTime (optional): End time filter
*
* Output:
* - items: Array of history items
* - count: Number of items returned
*
* Usage:
* - Simple search: { "query": "github" }
* - With limit: { "query": "google", "maxResults": 10 }
* - Time range: { "query": "", "startTime": 1729000000000, "endTime": 1729100000000 }
*
* Example:
* {
* "query": "github",
* "maxResults": 5
* }
* // Returns: { items: [{url: "https://github.com", title: "GitHub", visitCount: 42}], count: 1 }
*/
export class SearchHistoryAction extends ActionHandler<
SearchHistoryInput,
SearchHistoryOutput
> {
readonly inputSchema = SearchHistoryInputSchema
private historyAdapter = new HistoryAdapter()
async execute(input: SearchHistoryInput): Promise<SearchHistoryOutput> {
const results = await this.historyAdapter.searchHistory(
input.query,
input.maxResults,
input.startTime,
input.endTime,
)
const items = results.map((item) => ({
id: item.id,
url: item.url,
title: item.title,
lastVisitTime: item.lastVisitTime,
visitCount: item.visitCount,
typedCount: item.typedCount,
}))
return {
items,
count: items.length,
}
}
}

View File

@@ -1,61 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { TabAdapter } from '@/adapters/TabAdapter'
import { ActionHandler } from '../ActionHandler'
// Input schema
const CloseTabInputSchema = z.object({
tabId: z.number().int().positive().describe('Tab ID to close'),
})
// Output schema
const CloseTabOutputSchema = z.object({
success: z.boolean().describe('Whether the tab was successfully closed'),
message: z.string().describe('Confirmation message'),
})
type CloseTabInput = z.infer<typeof CloseTabInputSchema>
type CloseTabOutput = z.infer<typeof CloseTabOutputSchema>
/**
* CloseTabAction - Close a specific tab by ID
*
* Closes the tab with the given ID.
*
* Input:
* - tabId: ID of the tab to close
*
* Output:
* - success: true if tab was closed
* - message: Confirmation message
*
* Usage:
* Use this to close tabs that are no longer needed.
* You can get tab IDs from the getTabs or openTab actions.
*
* Example:
* {
* "tabId": 123
* }
* // Returns: { success: true, message: "Closed tab 123" }
*/
export class CloseTabAction extends ActionHandler<
CloseTabInput,
CloseTabOutput
> {
readonly inputSchema = CloseTabInputSchema
private tabAdapter = new TabAdapter()
async execute(input: CloseTabInput): Promise<CloseTabOutput> {
await this.tabAdapter.closeTab(input.tabId)
return {
success: true,
message: `Closed tab ${input.tabId}`,
}
}
}

View File

@@ -1,103 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { TabAdapter } from '@/adapters/TabAdapter'
import { ActionHandler } from '../ActionHandler'
/**
* GetActiveTabAction - Returns information about the currently active tab
*
* Input: None (void)
* Output: { tabId, url, title, windowId }
*
* Use Case:
* - Agent needs to know which tab user is currently viewing
* - Required for most automation actions (need to know target tab)
*
* Example Request:
* {
* "id": "req-123",
* "action": "getActiveTab",
* "payload": {}
* }
*
* Example Response:
* {
* "id": "req-123",
* "ok": true,
* "data": {
* "tabId": 5,
* "url": "https://google.com",
* "title": "Google",
* "windowId": 1
* }
* }
*/
// Input schema - accepts optional windowId for multi-window support
const GetActiveTabInputSchema = z
.object({
windowId: z
.number()
.int()
.optional()
.describe(
'Window ID to get active tab from. If not provided, uses current window.',
),
})
.passthrough()
// Output type
export interface GetActiveTabOutput {
tabId: number
url: string
title: string
windowId: number
}
type GetActiveTabInput = z.infer<typeof GetActiveTabInputSchema>
export class GetActiveTabAction extends ActionHandler<
GetActiveTabInput,
GetActiveTabOutput
> {
readonly inputSchema = GetActiveTabInputSchema
private tabAdapter = new TabAdapter()
/**
* Execute getActiveTab action
*
* Logic:
* 1. Get active tab via TabAdapter (using windowId if provided)
* 2. Extract relevant fields
* 3. Return typed result
*
* @param input - Optional windowId to specify which window
* @returns Active tab information
* @throws Error if no active tab found
*/
async execute(input: GetActiveTabInput): Promise<GetActiveTabOutput> {
// Get active tab from Chrome (use windowId if provided)
const tab = await this.tabAdapter.getActiveTab(input.windowId)
// Validate required fields exist
if (tab.id === undefined) {
throw new Error('Active tab has no ID')
}
if (tab.windowId === undefined) {
throw new Error('Active tab has no window ID')
}
// Return typed result
return {
tabId: tab.id,
url: tab.url || '',
title: tab.title || '',
windowId: tab.windowId,
}
}
}

View File

@@ -1,122 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { TabAdapter } from '@/adapters/TabAdapter'
import { ActionHandler } from '../ActionHandler'
// Input schema for getTabs action
const GetTabsInputSchema = z
.object({
currentWindowOnly: z
.boolean()
.optional()
.default(false)
.describe('If true, return only tabs in current window'),
windowId: z
.number()
.int()
.optional()
.describe('If specified, return tabs in this window only'),
url: z
.string()
.optional()
.describe(
'URL pattern to filter tabs (supports wildcards like "*://*.google.com/*")',
),
title: z.string().optional().describe('Title pattern to filter tabs'),
})
.describe('Optional filters for querying tabs')
type GetTabsInput = z.infer<typeof GetTabsInputSchema>
// Tab info in output
interface TabInfo {
id: number
url: string
title: string
windowId: number
active: boolean
index: number
}
// Output with array of tabs
export interface GetTabsOutput {
tabs: TabInfo[]
count: number
}
/**
* GetTabsAction - List all available tabs
*
* Returns a list of all tabs (or filtered tabs) with their IDs, URLs, titles, and window info.
* Essential for discovering which tabs exist before taking actions on them.
*
* Filters (all optional):
* - currentWindowOnly: true to only get tabs in the current window
* - windowId: Get tabs in a specific window
* - url: URL pattern (supports wildcards like "*://*.google.com/*")
* - title: Title pattern (supports wildcards)
*
* Example payloads:
*
* Get all tabs across all windows:
* {}
*
* Get tabs in current window only:
* { "currentWindowOnly": true }
*
* Get tabs in specific window:
* { "windowId": 12345 }
*
* Get all Google tabs:
* { "url": "*://*.google.com/*" }
*/
export class GetTabsAction extends ActionHandler<GetTabsInput, GetTabsOutput> {
readonly inputSchema = GetTabsInputSchema
private tabAdapter = new TabAdapter()
async execute(input: GetTabsInput): Promise<GetTabsOutput> {
let tabs: chrome.tabs.Tab[]
// Apply filters based on input
if (input.windowId) {
// Get tabs in specific window (windowId takes precedence)
tabs = await this.tabAdapter.getTabsInWindow(input.windowId)
} else if (input.currentWindowOnly) {
// Get tabs in current window (windowId may be injected by agent for multi-window support)
tabs = await this.tabAdapter.getCurrentWindowTabs()
} else if (input.url || input.title) {
// Use query API for URL/title filtering
const query: chrome.tabs.QueryInfo = {}
if (input.url) query.url = input.url
if (input.title) query.title = input.title
tabs = await this.tabAdapter.queryTabs(query)
} else {
// Get all tabs
tabs = await this.tabAdapter.getAllTabs()
}
// Convert to simplified TabInfo format
const tabInfos: TabInfo[] = tabs
.filter(
(tab): tab is chrome.tabs.Tab & { id: number; windowId: number } =>
tab.id !== undefined && tab.windowId !== undefined,
)
.map((tab) => ({
id: tab.id,
url: tab.url || '',
title: tab.title || '',
windowId: tab.windowId,
active: tab.active || false,
index: tab.index,
}))
return {
tabs: tabInfos,
count: tabInfos.length,
}
}
}

View File

@@ -1,126 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { TabAdapter } from '@/adapters/TabAdapter'
import { ActionHandler } from '../ActionHandler'
const TabGroupColorSchema = z.enum([
'grey',
'blue',
'red',
'yellow',
'green',
'pink',
'purple',
'cyan',
'orange',
])
const GroupTabsInputSchema = z
.object({
tabIds: z
.array(z.number().int().positive())
.min(1)
.describe('Array of tab IDs to group together'),
title: z
.string()
.optional()
.describe('Title for the group (e.g., "Shopping", "Work", "Research")'),
color: TabGroupColorSchema.optional().describe(
'Color for the group: grey, blue, red, yellow, green, pink, purple, cyan, orange',
),
groupId: z
.number()
.int()
.optional()
.describe(
'Existing group ID to add tabs to. If not specified, creates a new group.',
),
windowId: z
.number()
.int()
.optional()
.describe('Window ID for scoping the group lookup'),
})
.describe('Group tabs together with optional title and color')
type GroupTabsInput = z.infer<typeof GroupTabsInputSchema>
export interface GroupTabsOutput {
groupId: number
title: string
color: string
tabCount: number
}
/**
* GroupTabsAction - Group tabs together
*
* Groups the specified tabs together into a new or existing group.
* Optionally sets a title and color for the group.
*
* Example payloads:
*
* Create new group with tabs:
* { "tabIds": [123, 456, 789], "title": "Shopping", "color": "green" }
*
* Add tabs to existing group:
* { "tabIds": [123, 456], "groupId": 1 }
*
* Create unnamed group:
* { "tabIds": [123, 456] }
*/
export class GroupTabsAction extends ActionHandler<
GroupTabsInput,
GroupTabsOutput
> {
readonly inputSchema = GroupTabsInputSchema
private tabAdapter = new TabAdapter()
async execute(input: GroupTabsInput): Promise<GroupTabsOutput> {
// Group the tabs (pass windowId to prevent tabs moving to wrong window)
const groupId = await this.tabAdapter.groupTabs(
input.tabIds,
input.groupId,
input.windowId,
)
// Update group properties if title or color provided
if (input.title !== undefined || input.color !== undefined) {
const updateProps: chrome.tabGroups.UpdateProperties = {}
if (input.title !== undefined) updateProps.title = input.title
if (input.color !== undefined) updateProps.color = input.color
const updatedGroup = await this.tabAdapter.updateTabGroup(
groupId,
updateProps,
)
return {
groupId,
title: updatedGroup.title || '',
color: updatedGroup.color,
tabCount: input.tabIds.length,
}
}
// Get group info if no updates were made
// Determine which window to query - use windowId if provided, otherwise query all windows
const groups = await this.tabAdapter.getTabGroups(input.windowId)
const group = groups.find((g) => g.id === groupId)
if (!group) {
throw new Error(`Tab group ${groupId} not found`)
}
return {
groupId,
title: group.title || '',
color: group.color,
tabCount: input.tabIds.length,
}
}
}

View File

@@ -1,83 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { TabAdapter } from '@/adapters/TabAdapter'
import { ActionHandler } from '../ActionHandler'
const ListTabGroupsInputSchema = z
.object({
windowId: z
.number()
.int()
.optional()
.describe(
'Window ID to get groups from. If not specified, gets all groups.',
),
})
.describe('Optional filters for querying tab groups')
type ListTabGroupsInput = z.infer<typeof ListTabGroupsInputSchema>
interface TabGroupInfo {
id: number
windowId: number
title: string
color: string
collapsed: boolean
tabIds: number[]
}
export interface ListTabGroupsOutput {
groups: TabGroupInfo[]
count: number
}
/**
* ListTabGroupsAction - List all tab groups
*
* Returns a list of all tab groups with their IDs, titles, colors, and member tabs.
*
* Example payloads:
*
* Get all groups across all windows:
* {}
*
* Get groups in specific window:
* { "windowId": 12345 }
*/
export class ListTabGroupsAction extends ActionHandler<
ListTabGroupsInput,
ListTabGroupsOutput
> {
readonly inputSchema = ListTabGroupsInputSchema
private tabAdapter = new TabAdapter()
async execute(input: ListTabGroupsInput): Promise<ListTabGroupsOutput> {
const groups = await this.tabAdapter.getTabGroups(input.windowId)
// Get all tabs to find which tabs belong to each group
const tabs = input.windowId
? await this.tabAdapter.getTabsInWindow(input.windowId)
: await this.tabAdapter.getAllTabs()
// Build group info with tab IDs
const groupInfos: TabGroupInfo[] = groups.map((group) => ({
id: group.id,
windowId: group.windowId,
title: group.title || '',
color: group.color,
collapsed: group.collapsed,
tabIds: tabs
.filter((tab) => tab.groupId === group.id && tab.id !== undefined)
.map((tab) => tab.id as number),
}))
return {
groups: groupInfos,
count: groupInfos.length,
}
}
}

View File

@@ -1,93 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { TabAdapter } from '@/adapters/TabAdapter'
import { ActionHandler } from '../ActionHandler'
// Input schema
const NavigateInputSchema = z.object({
url: z.string().url().describe('URL to navigate to (must include https://)'),
tabId: z
.number()
.int()
.positive()
.optional()
.describe('Tab ID to navigate (optional, defaults to active tab)'),
windowId: z
.number()
.int()
.optional()
.describe('Window ID for getting active tab when tabId not provided'),
})
// Output schema
const NavigateOutputSchema = z.object({
tabId: z.number().describe('ID of the navigated tab'),
windowId: z.number().describe('ID of the window containing the tab'),
url: z.string().describe('URL that the tab is navigating to'),
message: z.string().describe('Confirmation message'),
})
type NavigateInput = z.infer<typeof NavigateInputSchema>
type NavigateOutput = z.infer<typeof NavigateOutputSchema>
/**
* NavigateAction - Navigate a tab to a URL
*
* Navigates the current tab or a specific tab to a URL.
*
* Input:
* - url: URL to navigate to (must be a valid URL with protocol)
* - tabId (optional): Specific tab to navigate (defaults to active tab)
*
* Output:
* - tabId: ID of the tab that was navigated
* - url: URL that the tab is navigating to
* - message: Confirmation message
*
* Usage:
* - Navigate active tab: { "url": "https://google.com" }
* - Navigate specific tab: { "url": "https://google.com", "tabId": 123 }
*
* Example:
* {
* "url": "https://www.wikipedia.org"
* }
* // Returns: { tabId: 123, url: "https://www.wikipedia.org", message: "Navigating to https://www.wikipedia.org" }
*/
export class NavigateAction extends ActionHandler<
NavigateInput,
NavigateOutput
> {
readonly inputSchema = NavigateInputSchema
private tabAdapter = new TabAdapter()
async execute(input: NavigateInput): Promise<NavigateOutput> {
// If no tabId provided, use the active tab (in specified window if provided)
let targetTabId = input.tabId
if (!targetTabId) {
const activeTab = await this.tabAdapter.getActiveTab(input.windowId)
if (activeTab.id === undefined) {
throw new Error('Active tab has no ID')
}
targetTabId = activeTab.id
}
// Navigate the tab
const tab = await this.tabAdapter.navigateTab(targetTabId, input.url)
if (tab.id === undefined || tab.windowId === undefined) {
throw new Error('Navigated tab has no ID or windowId')
}
return {
tabId: tab.id,
windowId: tab.windowId,
url: input.url,
message: `Navigating to ${input.url}`,
}
}
}

View File

@@ -1,88 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { TabAdapter } from '@/adapters/TabAdapter'
import { ActionHandler } from '../ActionHandler'
// Input schema
const OpenTabInputSchema = z.object({
url: z
.string()
.url()
.optional()
.describe('URL to open (optional, defaults to new tab page)'),
active: z
.boolean()
.optional()
.default(true)
.describe('Whether to make the new tab active'),
windowId: z
.number()
.int()
.optional()
.describe(
'Window ID to open the tab in. If not provided, opens in current window.',
),
})
// Output schema
const OpenTabOutputSchema = z.object({
tabId: z.number().describe('ID of the newly created tab'),
url: z.string().describe('URL of the new tab'),
title: z.string().optional().describe('Title of the new tab'),
})
type OpenTabInput = z.infer<typeof OpenTabInputSchema>
type OpenTabOutput = z.infer<typeof OpenTabOutputSchema>
/**
* OpenTabAction - Open a new browser tab
*
* Opens a new tab with an optional URL. If no URL is provided,
* opens a new tab page.
*
* Input:
* - url (optional): URL to open in the new tab
* - active (optional): Whether to make the tab active (default: true)
*
* Output:
* - tabId: ID of the newly created tab
* - url: URL of the new tab
* - title: Title of the new tab (if available)
*
* Usage:
* - Open blank tab: { }
* - Open specific URL: { "url": "https://google.com" }
* - Open in background: { "url": "https://google.com", "active": false }
*
* Example:
* {
* "url": "https://www.google.com",
* "active": true
* }
* // Returns: { tabId: 456, url: "https://www.google.com", title: "Google" }
*/
export class OpenTabAction extends ActionHandler<OpenTabInput, OpenTabOutput> {
readonly inputSchema = OpenTabInputSchema
private tabAdapter = new TabAdapter()
async execute(input: OpenTabInput): Promise<OpenTabOutput> {
const tab = await this.tabAdapter.openTab(
input.url,
input.active ?? true,
input.windowId,
)
if (tab.id === undefined) {
throw new Error('Opened tab has no ID')
}
return {
tabId: tab.id,
url: tab.url || tab.pendingUrl || input.url || 'chrome://newtab/',
title: tab.title,
}
}
}

View File

@@ -1,66 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { TabAdapter } from '@/adapters/TabAdapter'
import { ActionHandler } from '../ActionHandler'
// Input schema
const SwitchTabInputSchema = z.object({
tabId: z.number().int().positive().describe('Tab ID to switch to'),
})
// Output schema
const SwitchTabOutputSchema = z.object({
tabId: z.number().describe('ID of the tab that is now active'),
url: z.string().describe('URL of the active tab'),
title: z.string().describe('Title of the active tab'),
})
type SwitchTabInput = z.infer<typeof SwitchTabInputSchema>
type SwitchTabOutput = z.infer<typeof SwitchTabOutputSchema>
/**
* SwitchTabAction - Switch to (focus) a specific tab
*
* Makes the specified tab the active tab in its window.
*
* Input:
* - tabId: ID of the tab to switch to
*
* Output:
* - tabId: ID of the now-active tab
* - url: URL of the active tab
* - title: Title of the active tab
*
* Usage:
* Use this to switch between tabs. Get tab IDs from the getTabs action.
*
* Example:
* {
* "tabId": 123
* }
* // Returns: { tabId: 123, url: "https://google.com", title: "Google" }
*/
export class SwitchTabAction extends ActionHandler<
SwitchTabInput,
SwitchTabOutput
> {
readonly inputSchema = SwitchTabInputSchema
private tabAdapter = new TabAdapter()
async execute(input: SwitchTabInput): Promise<SwitchTabOutput> {
const tab = await this.tabAdapter.switchTab(input.tabId)
if (tab.id === undefined) {
throw new Error('Switched tab has no ID')
}
return {
tabId: tab.id,
url: tab.url || '',
title: tab.title || '',
}
}
}

View File

@@ -1,48 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { TabAdapter } from '@/adapters/TabAdapter'
import { ActionHandler } from '../ActionHandler'
const UngroupTabsInputSchema = z
.object({
tabIds: z
.array(z.number().int().positive())
.min(1)
.describe('Array of tab IDs to remove from their groups'),
})
.describe('Remove tabs from their groups')
type UngroupTabsInput = z.infer<typeof UngroupTabsInputSchema>
export interface UngroupTabsOutput {
ungroupedCount: number
}
/**
* UngroupTabsAction - Remove tabs from their groups
*
* Removes the specified tabs from any groups they belong to.
* The tabs remain open but are no longer part of any group.
*
* Example payload:
* { "tabIds": [123, 456, 789] }
*/
export class UngroupTabsAction extends ActionHandler<
UngroupTabsInput,
UngroupTabsOutput
> {
readonly inputSchema = UngroupTabsInputSchema
private tabAdapter = new TabAdapter()
async execute(input: UngroupTabsInput): Promise<UngroupTabsOutput> {
await this.tabAdapter.ungroupTabs(input.tabIds)
return {
ungroupedCount: input.tabIds.length,
}
}
}

View File

@@ -1,90 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { TabAdapter } from '@/adapters/TabAdapter'
import { ActionHandler } from '../ActionHandler'
const TabGroupColorSchema = z.enum([
'grey',
'blue',
'red',
'yellow',
'green',
'pink',
'purple',
'cyan',
'orange',
])
const UpdateTabGroupInputSchema = z
.object({
groupId: z.number().int().describe('ID of the group to update'),
title: z.string().optional().describe('New title for the group'),
color: TabGroupColorSchema.optional().describe(
'New color for the group: grey, blue, red, yellow, green, pink, purple, cyan, orange',
),
collapsed: z
.boolean()
.optional()
.describe('Whether to collapse (hide) the group tabs'),
})
.describe('Update tab group properties')
type UpdateTabGroupInput = z.infer<typeof UpdateTabGroupInputSchema>
export interface UpdateTabGroupOutput {
groupId: number
title: string
color: string
collapsed: boolean
}
/**
* UpdateTabGroupAction - Update a tab group's properties
*
* Updates the title, color, or collapsed state of an existing tab group.
*
* Example payloads:
*
* Rename a group:
* { "groupId": 1, "title": "Work Projects" }
*
* Change color:
* { "groupId": 1, "color": "blue" }
*
* Collapse a group:
* { "groupId": 1, "collapsed": true }
*
* Update multiple properties:
* { "groupId": 1, "title": "Research", "color": "purple", "collapsed": false }
*/
export class UpdateTabGroupAction extends ActionHandler<
UpdateTabGroupInput,
UpdateTabGroupOutput
> {
readonly inputSchema = UpdateTabGroupInputSchema
private tabAdapter = new TabAdapter()
async execute(input: UpdateTabGroupInput): Promise<UpdateTabGroupOutput> {
const updateProps: chrome.tabGroups.UpdateProperties = {}
if (input.title !== undefined) updateProps.title = input.title
if (input.color !== undefined) updateProps.color = input.color
if (input.collapsed !== undefined) updateProps.collapsed = input.collapsed
const group = await this.tabAdapter.updateTabGroup(
input.groupId,
updateProps,
)
return {
groupId: group.id,
title: group.title || '',
color: group.color,
collapsed: group.collapsed,
}
}
}

View File

@@ -1,377 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { logger } from '@/utils/logger'
import { CHROME_API_TIMEOUTS, withTimeout } from '@/utils/timeout'
/**
* BookmarkAdapter - Wrapper for Chrome bookmarks API
*
* Responsibilities:
* - Provide clean Promise-based interface to Chrome bookmarks API
* - Handle Chrome API errors
* - Log operations for debugging
*/
export class BookmarkAdapter {
/**
* Get all bookmarks as a tree structure
*
* @returns Bookmark tree root nodes
*/
async getBookmarkTree(): Promise<chrome.bookmarks.BookmarkTreeNode[]> {
logger.debug('[BookmarkAdapter] Getting bookmark tree')
try {
const tree = await withTimeout(
chrome.bookmarks.getTree(),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.bookmarks.getTree',
)
logger.debug(
`[BookmarkAdapter] Retrieved bookmark tree with ${tree.length} root nodes`,
)
return tree
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[BookmarkAdapter] Failed to get bookmark tree: ${errorMessage}`,
)
throw new Error(`Failed to get bookmark tree: ${errorMessage}`)
}
}
/**
* Search bookmarks by query
*
* @param query - Search query (matches title and URL)
* @returns Array of matching bookmarks
*/
async searchBookmarks(
query: string,
): Promise<chrome.bookmarks.BookmarkTreeNode[]> {
logger.debug(`[BookmarkAdapter] Searching bookmarks: "${query}"`)
try {
const results = await withTimeout(
chrome.bookmarks.search(query),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.bookmarks.search',
)
logger.debug(
`[BookmarkAdapter] Found ${results.length} bookmarks matching "${query}"`,
)
return results
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[BookmarkAdapter] Failed to search bookmarks: ${errorMessage}`,
)
throw new Error(`Failed to search bookmarks: ${errorMessage}`)
}
}
/**
* Get bookmark by ID
*
* @param id - Bookmark ID
* @returns Bookmark node
*/
async getBookmark(id: string): Promise<chrome.bookmarks.BookmarkTreeNode> {
logger.debug(`[BookmarkAdapter] Getting bookmark: ${id}`)
try {
const results = await withTimeout(
chrome.bookmarks.get(id),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.bookmarks.get',
)
if (results.length === 0) {
throw new Error('Bookmark not found')
}
logger.debug(`[BookmarkAdapter] Retrieved bookmark: ${id}`)
return results[0]
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`[BookmarkAdapter] Failed to get bookmark: ${errorMessage}`)
throw new Error(`Failed to get bookmark: ${errorMessage}`)
}
}
/**
* Create a new bookmark
*
* @param bookmark - Bookmark creation details
* @returns Created bookmark node
*/
async createBookmark(bookmark: {
title: string
url: string
parentId?: string
}): Promise<chrome.bookmarks.BookmarkTreeNode> {
logger.debug(
`[BookmarkAdapter] Creating bookmark: ${bookmark.title || 'Untitled'}`,
)
try {
const created = await withTimeout(
chrome.bookmarks.create(bookmark),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.bookmarks.create',
)
logger.debug(
`[BookmarkAdapter] Created bookmark: ${created.id} - ${created.title}`,
)
return created
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[BookmarkAdapter] Failed to create bookmark: ${errorMessage}`,
)
throw new Error(`Failed to create bookmark: ${errorMessage}`)
}
}
/**
* Remove a bookmark by ID
*
* @param id - Bookmark ID to remove
*/
async removeBookmark(id: string): Promise<void> {
logger.debug(`[BookmarkAdapter] Removing bookmark: ${id}`)
try {
await withTimeout(
chrome.bookmarks.remove(id),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.bookmarks.remove',
)
logger.debug(`[BookmarkAdapter] Removed bookmark: ${id}`)
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[BookmarkAdapter] Failed to remove bookmark ${id}: ${errorMessage}`,
)
throw new Error(`Failed to remove bookmark: ${errorMessage}`)
}
}
/**
* Update a bookmark
*
* @param id - Bookmark ID to update
* @param changes - Changes to apply
* @returns Updated bookmark node
*/
async updateBookmark(
id: string,
changes: { title?: string; url?: string },
): Promise<chrome.bookmarks.BookmarkTreeNode> {
logger.debug(`[BookmarkAdapter] Updating bookmark: ${id}`)
try {
const updated = await withTimeout(
chrome.bookmarks.update(id, changes),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.bookmarks.update',
)
logger.debug(
`[BookmarkAdapter] Updated bookmark: ${id} - ${updated.title}`,
)
return updated
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[BookmarkAdapter] Failed to update bookmark ${id}: ${errorMessage}`,
)
throw new Error(`Failed to update bookmark: ${errorMessage}`)
}
}
/**
* Get recent bookmarks
*
* @param limit - Maximum number of bookmarks to return
* @returns Array of recent bookmarks
*/
async getRecentBookmarks(
limit = 20,
): Promise<chrome.bookmarks.BookmarkTreeNode[]> {
logger.debug(`[BookmarkAdapter] Getting ${limit} recent bookmarks`)
try {
const tree = await withTimeout(
chrome.bookmarks.getTree(),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.bookmarks.getTree',
)
const bookmarks = this._flattenBookmarkTree(tree)
// Filter to only URL bookmarks (not folders) and sort by dateAdded
const urlBookmarks = bookmarks
.filter((b) => b.url && b.dateAdded)
.sort((a, b) => (b.dateAdded || 0) - (a.dateAdded || 0))
.slice(0, limit)
logger.debug(
`[BookmarkAdapter] Found ${urlBookmarks.length} recent bookmarks`,
)
return urlBookmarks
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[BookmarkAdapter] Failed to get recent bookmarks: ${errorMessage}`,
)
throw new Error(`Failed to get recent bookmarks: ${errorMessage}`)
}
}
/**
* Create a bookmark folder
*
* @param title - Folder name
* @param parentId - Parent folder ID (defaults to "1" = Bookmarks Bar)
* @returns Created folder node
*/
async createBookmarkFolder(options: {
title: string
parentId?: string
}): Promise<chrome.bookmarks.BookmarkTreeNode> {
const { title, parentId = '1' } = options
logger.debug(
`[BookmarkAdapter] Creating bookmark folder: "${title}" in parent ${parentId}`,
)
try {
const created = await chrome.bookmarks.create({
title,
parentId,
})
logger.debug(
`[BookmarkAdapter] Created folder: ${created.id} - ${created.title}`,
)
return created
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[BookmarkAdapter] Failed to create bookmark folder: ${errorMessage}`,
)
throw new Error(`Failed to create bookmark folder: ${errorMessage}`)
}
}
/**
* Get direct children of a folder
*
* @param folderId - Folder ID to get children from
* @returns Array of child nodes
*/
async getBookmarkChildren(
folderId: string,
): Promise<chrome.bookmarks.BookmarkTreeNode[]> {
logger.debug(`[BookmarkAdapter] Getting children of folder: ${folderId}`)
try {
const children = await chrome.bookmarks.getChildren(folderId)
logger.debug(
`[BookmarkAdapter] Found ${children.length} children in folder ${folderId}`,
)
return children
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[BookmarkAdapter] Failed to get bookmark children: ${errorMessage}`,
)
throw new Error(`Failed to get bookmark children: ${errorMessage}`)
}
}
/**
* Move a bookmark or folder to a new location
*
* @param id - Bookmark or folder ID to move
* @param destination - New location
* @returns Updated bookmark node
*/
async moveBookmark(
id: string,
destination: { parentId?: string; index?: number },
): Promise<chrome.bookmarks.BookmarkTreeNode> {
logger.debug(
`[BookmarkAdapter] Moving bookmark ${id} to parent ${destination.parentId}, index ${destination.index}`,
)
try {
const moved = await chrome.bookmarks.move(id, destination)
logger.debug(
`[BookmarkAdapter] Moved bookmark ${id} to ${moved.parentId}`,
)
return moved
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[BookmarkAdapter] Failed to move bookmark ${id}: ${errorMessage}`,
)
throw new Error(`Failed to move bookmark: ${errorMessage}`)
}
}
/**
* Remove a folder and all its contents recursively
*
* @param id - Folder ID to remove
* @throws if id is a root node ("0", "1", "2")
*/
async removeBookmarkTree(id: string): Promise<void> {
const protectedIds = ['0', '1', '2']
if (protectedIds.includes(id)) {
throw new Error(
`Cannot delete protected bookmark folder: ${id}. Root folders (Bookmarks Bar, Other Bookmarks, Mobile Bookmarks) cannot be deleted.`,
)
}
logger.debug(`[BookmarkAdapter] Removing bookmark tree: ${id}`)
try {
await chrome.bookmarks.removeTree(id)
logger.debug(`[BookmarkAdapter] Removed bookmark tree: ${id}`)
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[BookmarkAdapter] Failed to remove bookmark tree ${id}: ${errorMessage}`,
)
throw new Error(`Failed to remove bookmark tree: ${errorMessage}`)
}
}
/**
* Flatten bookmark tree into array
* @private
*/
private _flattenBookmarkTree(
nodes: chrome.bookmarks.BookmarkTreeNode[],
): chrome.bookmarks.BookmarkTreeNode[] {
const result: chrome.bookmarks.BookmarkTreeNode[] = []
for (const node of nodes) {
result.push(node)
if (node.children) {
result.push(...this._flattenBookmarkTree(node.children))
}
}
return result
}
}

View File

@@ -1,907 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
/// <reference path="../types/chrome-browser-os.d.ts" />
import { logger } from '@/utils/logger'
import { CHROME_API_TIMEOUTS, withTimeout } from '@/utils/timeout'
// ============= Re-export types from chrome.browserOS namespace =============
export type InteractiveNode = chrome.browserOS.InteractiveNode
export type InteractiveSnapshot = chrome.browserOS.InteractiveSnapshot
export type InteractiveSnapshotOptions =
chrome.browserOS.InteractiveSnapshotOptions
export type PageLoadStatus = chrome.browserOS.PageLoadStatus
export type InteractiveNodeType = chrome.browserOS.InteractiveNodeType
export type Rect = chrome.browserOS.BoundingRect
// New snapshot types
export type SnapshotType = chrome.browserOS.SnapshotType
export type SnapshotContext = chrome.browserOS.SnapshotContext
export type SectionType = chrome.browserOS.SectionType
export type TextSnapshotResult = chrome.browserOS.TextSnapshotResult
export type LinkInfo = chrome.browserOS.LinkInfo
export type LinksSnapshotResult = chrome.browserOS.LinksSnapshotResult
export type SnapshotSection = chrome.browserOS.SnapshotSection
export type Snapshot = chrome.browserOS.Snapshot
export type SnapshotOptions = chrome.browserOS.SnapshotOptions
export type PrefObject = chrome.browserOS.PrefObject
// ============= BrowserOS Adapter =============
// Screenshot size constants
export const SCREENSHOT_SIZES = {
small: 512, // Low token usage
medium: 768, // Balanced (default)
large: 1028, // High detail (note: 1028 not 1024)
} as const
export type ScreenshotSizeKey = keyof typeof SCREENSHOT_SIZES
/**
* Adapter for Chrome BrowserOS Extension APIs
* Provides a clean interface to browserOS functionality with extensibility
*/
export class BrowserOSAdapter {
private static instance: BrowserOSAdapter | null = null
private constructor() {}
/**
* Get singleton instance
*/
static getInstance(): BrowserOSAdapter {
if (!BrowserOSAdapter.instance) {
BrowserOSAdapter.instance = new BrowserOSAdapter()
}
return BrowserOSAdapter.instance
}
/**
* Get interactive snapshot of the current page
*/
async getInteractiveSnapshot(
tabId: number,
options?: InteractiveSnapshotOptions,
): Promise<InteractiveSnapshot> {
try {
logger.debug(
`[BrowserOSAdapter] Getting interactive snapshot for tab ${tabId} with options: ${JSON.stringify(options)}`,
)
const promise = new Promise<InteractiveSnapshot>((resolve, reject) => {
if (options) {
chrome.browserOS.getInteractiveSnapshot(
tabId,
options,
(snapshot: InteractiveSnapshot) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
logger.debug(
`[BrowserOSAdapter] Retrieved snapshot with ${snapshot.elements.length} elements`,
)
resolve(snapshot)
}
},
)
} else {
chrome.browserOS.getInteractiveSnapshot(
tabId,
(snapshot: InteractiveSnapshot) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
logger.debug(
`[BrowserOSAdapter] Retrieved snapshot with ${snapshot.elements.length} elements`,
)
resolve(snapshot)
}
},
)
}
})
return withTimeout(
promise,
CHROME_API_TIMEOUTS.BROWSEROS_HEAVY,
'getInteractiveSnapshot',
)
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[BrowserOSAdapter] Failed to get interactive snapshot: ${errorMessage}`,
)
throw new Error(`Failed to get interactive snapshot: ${errorMessage}`)
}
}
/**
* Click an element by node ID
*/
async click(tabId: number, nodeId: number): Promise<void> {
try {
logger.debug(`[BrowserOSAdapter] Clicking node ${nodeId} in tab ${tabId}`)
const promise = new Promise<void>((resolve, reject) => {
chrome.browserOS.click(tabId, nodeId, () => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
resolve()
}
})
})
return withTimeout(promise, CHROME_API_TIMEOUTS.BROWSEROS_ACTION, 'click')
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`[BrowserOSAdapter] Failed to click node: ${errorMessage}`)
throw new Error(`Failed to click node ${nodeId}: ${errorMessage}`)
}
}
/**
* Input text into an element
*/
async inputText(tabId: number, nodeId: number, text: string): Promise<void> {
try {
logger.debug(
`[BrowserOSAdapter] Inputting text into node ${nodeId} in tab ${tabId}`,
)
const promise = new Promise<void>((resolve, reject) => {
chrome.browserOS.inputText(tabId, nodeId, text, () => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
resolve()
}
})
})
return withTimeout(
promise,
CHROME_API_TIMEOUTS.BROWSEROS_ACTION,
'inputText',
)
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`[BrowserOSAdapter] Failed to input text: ${errorMessage}`)
throw new Error(
`Failed to input text into node ${nodeId}: ${errorMessage}`,
)
}
}
/**
* Clear text from an element
*/
async clear(tabId: number, nodeId: number): Promise<void> {
try {
logger.debug(`[BrowserOSAdapter] Clearing node ${nodeId} in tab ${tabId}`)
const promise = new Promise<void>((resolve, reject) => {
chrome.browserOS.clear(tabId, nodeId, () => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
resolve()
}
})
})
return withTimeout(promise, CHROME_API_TIMEOUTS.BROWSEROS_ACTION, 'clear')
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`[BrowserOSAdapter] Failed to clear node: ${errorMessage}`)
throw new Error(`Failed to clear node ${nodeId}: ${errorMessage}`)
}
}
/**
* Scroll to a specific node
*/
async scrollToNode(tabId: number, nodeId: number): Promise<boolean> {
try {
logger.debug(
`[BrowserOSAdapter] Scrolling to node ${nodeId} in tab ${tabId}`,
)
const promise = new Promise<boolean>((resolve, reject) => {
chrome.browserOS.scrollToNode(tabId, nodeId, (scrolled: boolean) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
resolve(scrolled)
}
})
})
return withTimeout(
promise,
CHROME_API_TIMEOUTS.BROWSEROS_ACTION,
'scrollToNode',
)
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[BrowserOSAdapter] Failed to scroll to node: ${errorMessage}`,
)
throw new Error(`Failed to scroll to node ${nodeId}: ${errorMessage}`)
}
}
/**
* Send keyboard keys
*/
async sendKeys(tabId: number, keys: chrome.browserOS.Key): Promise<void> {
try {
logger.debug(`[BrowserOSAdapter] Sending keys "${keys}" to tab ${tabId}`)
const promise = new Promise<void>((resolve, reject) => {
chrome.browserOS.sendKeys(tabId, keys, () => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
resolve()
}
})
})
return withTimeout(
promise,
CHROME_API_TIMEOUTS.BROWSEROS_ACTION,
'sendKeys',
)
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`[BrowserOSAdapter] Failed to send keys: ${errorMessage}`)
throw new Error(`Failed to send keys: ${errorMessage}`)
}
}
/**
* Get page load status
*/
async getPageLoadStatus(tabId: number): Promise<PageLoadStatus> {
try {
logger.debug(
`[BrowserOSAdapter] Getting page load status for tab ${tabId}`,
)
const promise = new Promise<PageLoadStatus>((resolve, reject) => {
chrome.browserOS.getPageLoadStatus(tabId, (status: PageLoadStatus) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
resolve(status)
}
})
})
return withTimeout(
promise,
CHROME_API_TIMEOUTS.BROWSEROS_HEAVY,
'getPageLoadStatus',
)
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[BrowserOSAdapter] Failed to get page load status: ${errorMessage}`,
)
throw new Error(`Failed to get page load status: ${errorMessage}`)
}
}
/**
* Get accessibility tree (if available)
*/
async getAccessibilityTree(
tabId: number,
): Promise<chrome.browserOS.AccessibilityTree> {
try {
logger.debug(
`[BrowserOSAdapter] Getting accessibility tree for tab ${tabId}`,
)
const promise = new Promise<chrome.browserOS.AccessibilityTree>(
(resolve, reject) => {
chrome.browserOS.getAccessibilityTree(
tabId,
(tree: chrome.browserOS.AccessibilityTree) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
resolve(tree)
}
},
)
},
)
return withTimeout(
promise,
CHROME_API_TIMEOUTS.BROWSEROS_HEAVY,
'getAccessibilityTree',
)
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[BrowserOSAdapter] Failed to get accessibility tree: ${errorMessage}`,
)
throw new Error(`Failed to get accessibility tree: ${errorMessage}`)
}
}
/**
* Capture a screenshot of the tab
* @param tabId - The tab ID to capture
* @param size - Optional screenshot size ('small', 'medium', or 'large')
* @param showHighlights - Optional flag to show element highlights
* @param width - Optional exact width for screenshot
* @param height - Optional exact height for screenshot
*/
async captureScreenshot(
tabId: number,
size?: ScreenshotSizeKey,
showHighlights?: boolean,
width?: number,
height?: number,
): Promise<string> {
try {
const sizeDesc = size ? ` (${size})` : ''
const highlightDesc = showHighlights ? ' with highlights' : ''
const dimensionsDesc = width && height ? ` (${width}x${height})` : ''
logger.debug(
`[BrowserOSAdapter] Capturing screenshot for tab ${tabId}${sizeDesc}${highlightDesc}${dimensionsDesc}`,
)
const promise = new Promise<string>((resolve, reject) => {
// Use exact dimensions if provided
if (width !== undefined && height !== undefined) {
chrome.browserOS.captureScreenshot(
tabId,
0, // thumbnailSize ignored when width/height specified
showHighlights || false,
width,
height,
(dataUrl: string) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
logger.debug(
`[BrowserOSAdapter] Screenshot captured for tab ${tabId} (${width}x${height})${highlightDesc}`,
)
resolve(dataUrl)
}
},
)
} else if (size !== undefined || showHighlights !== undefined) {
const pixelSize = size ? SCREENSHOT_SIZES[size] : 0
// Use the API with thumbnail size and highlights
if (showHighlights !== undefined) {
chrome.browserOS.captureScreenshot(
tabId,
pixelSize,
showHighlights,
(dataUrl: string) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
logger.debug(
`[BrowserOSAdapter] Screenshot captured for tab ${tabId}${sizeDesc}${highlightDesc}`,
)
resolve(dataUrl)
}
},
)
} else {
chrome.browserOS.captureScreenshot(
tabId,
pixelSize,
(dataUrl: string) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
logger.debug(
`[BrowserOSAdapter] Screenshot captured for tab ${tabId} (${size}: ${pixelSize}px)`,
)
resolve(dataUrl)
}
},
)
}
} else {
// Use the original API without size (backwards compatibility)
chrome.browserOS.captureScreenshot(tabId, (dataUrl: string) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
logger.debug(
`[BrowserOSAdapter] Screenshot captured for tab ${tabId}`,
)
resolve(dataUrl)
}
})
}
})
return withTimeout(
promise,
CHROME_API_TIMEOUTS.BROWSEROS_HEAVY,
'captureScreenshot',
)
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[BrowserOSAdapter] Failed to capture screenshot: ${errorMessage}`,
)
throw new Error(`Failed to capture screenshot: ${errorMessage}`)
}
}
/**
* Get a content snapshot from the page
*/
async getSnapshot(tabId: number, _type: SnapshotType): Promise<Snapshot> {
try {
logger.debug(`[BrowserOSAdapter] Getting snapshot for tab ${tabId}`)
const promise = new Promise<Snapshot>((resolve, reject) => {
chrome.browserOS.getSnapshot(tabId, (snapshot: Snapshot) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
logger.debug(
`[BrowserOSAdapter] Retrieved snapshot: ${JSON.stringify(snapshot)}`,
)
resolve(snapshot)
}
})
})
return withTimeout(
promise,
CHROME_API_TIMEOUTS.BROWSEROS_HEAVY,
'getSnapshot',
)
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`[BrowserOSAdapter] Failed to get snapshot: ${errorMessage}`)
throw new Error(`Failed to get snapshot: ${errorMessage}`)
}
}
/**
* Get text content snapshot from the page
* Convenience method (deprecated - use getSnapshot directly)
* Use getSnapshot(tabId, 'text') instead
*/
async getTextSnapshot(tabId: number): Promise<Snapshot> {
return this.getSnapshot(tabId, 'text')
}
/**
* Get links snapshot from the page
* Convenience method (deprecated - use getSnapshot directly)
* Use getSnapshot(tabId, 'links') instead
*/
async getLinksSnapshot(tabId: number): Promise<Snapshot> {
return this.getSnapshot(tabId, 'links')
}
/**
* Generic method to invoke any BrowserOS API
* Useful for future APIs or experimental features
*/
async invokeAPI(method: string, ...args: unknown[]): Promise<unknown> {
try {
logger.debug(`[BrowserOSAdapter] Invoking BrowserOS API: ${method}`)
if (!(method in chrome.browserOS)) {
throw new Error(`Unknown BrowserOS API method: ${method}`)
}
// @ts-expect-error - Dynamic API invocation
const result = await chrome.browserOS[method](...args)
return result
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[BrowserOSAdapter] Failed to invoke API ${method}: ${errorMessage}`,
)
throw new Error(
`Failed to invoke BrowserOS API ${method}: ${errorMessage}`,
)
}
}
/**
* Check if a specific API is available
*/
isAPIAvailable(method: string): boolean {
return method in chrome.browserOS
}
/**
* Get list of available BrowserOS APIs
*/
getAvailableAPIs(): string[] {
return Object.keys(chrome.browserOS).filter((key) => {
// @ts-expect-error - Dynamic key access for API discovery
return typeof chrome.browserOS[key] === 'function'
})
}
/**
* Get BrowserOS version information
*/
async getVersion(): Promise<string | null> {
try {
logger.debug('[BrowserOSAdapter] Getting BrowserOS version')
return new Promise<string | null>((resolve, reject) => {
// Check if getVersionNumber API is available
if (
'getVersionNumber' in chrome.browserOS &&
typeof chrome.browserOS.getVersionNumber === 'function'
) {
chrome.browserOS.getVersionNumber((version: string) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
logger.debug(`[BrowserOSAdapter] BrowserOS version: ${version}`)
resolve(version)
}
})
} else {
// Fallback - return null if API not available
resolve(null)
}
})
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`[BrowserOSAdapter] Failed to get version: ${errorMessage}`)
// Return null on error
return null
}
}
/**
* Log a metric event with optional properties
*/
async logMetric(
eventName: string,
properties?: Record<string, unknown>,
): Promise<void> {
try {
logger.debug(
`[BrowserOSAdapter] Logging metric: ${eventName} with properties: ${JSON.stringify(properties)}`,
)
return new Promise<void>((resolve, reject) => {
// Check if logMetric API is available
if (
'logMetric' in chrome.browserOS &&
typeof chrome.browserOS.logMetric === 'function'
) {
if (properties) {
chrome.browserOS.logMetric(eventName, properties, () => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
logger.debug(`[BrowserOSAdapter] Metric logged: ${eventName}`)
resolve()
}
})
} else {
chrome.browserOS.logMetric(eventName, () => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
logger.debug(`[BrowserOSAdapter] Metric logged: ${eventName}`)
resolve()
}
})
}
} else {
// If API not available, log a warning but don't fail
logger.warn(
`[BrowserOSAdapter] logMetric API not available, skipping metric: ${eventName}`,
)
resolve()
}
})
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`[BrowserOSAdapter] Failed to log metric: ${errorMessage}`)
return
}
}
/**
* Execute JavaScript code in the specified tab
* @param tabId - The tab ID to execute code in
* @param code - The JavaScript code to execute
* @returns The result of the execution
*/
async executeJavaScript(tabId: number, code: string): Promise<unknown> {
try {
logger.debug(`[BrowserOSAdapter] Executing JavaScript in tab ${tabId}`)
const promise = new Promise<unknown>((resolve, reject) => {
// Check if executeJavaScript API is available
if (
'executeJavaScript' in chrome.browserOS &&
typeof chrome.browserOS.executeJavaScript === 'function'
) {
chrome.browserOS.executeJavaScript(tabId, code, (result: unknown) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
logger.debug(
`[BrowserOSAdapter] JavaScript executed successfully in tab ${tabId}`,
)
resolve(result)
}
})
} else {
reject(new Error('executeJavaScript API not available'))
}
})
return withTimeout(
promise,
CHROME_API_TIMEOUTS.BROWSEROS_HEAVY,
'executeJavaScript',
)
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[BrowserOSAdapter] Failed to execute JavaScript: ${errorMessage}`,
)
throw new Error(`Failed to execute JavaScript: ${errorMessage}`)
}
}
/**
* Click at specific viewport coordinates
* @param tabId - The tab ID to click in
* @param x - X coordinate in viewport pixels
* @param y - Y coordinate in viewport pixels
*/
async clickCoordinates(tabId: number, x: number, y: number): Promise<void> {
try {
logger.debug(
`[BrowserOSAdapter] Clicking at coordinates (${x}, ${y}) in tab ${tabId}`,
)
const promise = new Promise<void>((resolve, reject) => {
// Check if clickCoordinates API is available
if (
'clickCoordinates' in chrome.browserOS &&
typeof chrome.browserOS.clickCoordinates === 'function'
) {
chrome.browserOS.clickCoordinates(tabId, x, y, () => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
logger.debug(
`[BrowserOSAdapter] Successfully clicked at (${x}, ${y}) in tab ${tabId}`,
)
resolve()
}
})
} else {
reject(new Error('clickCoordinates API not available'))
}
})
return withTimeout(
promise,
CHROME_API_TIMEOUTS.BROWSEROS_ACTION,
'clickCoordinates',
)
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[BrowserOSAdapter] Failed to click at coordinates: ${errorMessage}`,
)
throw new Error(
`Failed to click at coordinates (${x}, ${y}): ${errorMessage}`,
)
}
}
/**
* Type text at specific viewport coordinates
* @param tabId - The tab ID to type in
* @param x - X coordinate in viewport pixels
* @param y - Y coordinate in viewport pixels
* @param text - Text to type at the location
*/
async typeAtCoordinates(
tabId: number,
x: number,
y: number,
text: string,
): Promise<void> {
try {
logger.debug(
`[BrowserOSAdapter] Typing at coordinates (${x}, ${y}) in tab ${tabId}`,
)
const promise = new Promise<void>((resolve, reject) => {
// Check if typeAtCoordinates API is available
if (
'typeAtCoordinates' in chrome.browserOS &&
typeof chrome.browserOS.typeAtCoordinates === 'function'
) {
chrome.browserOS.typeAtCoordinates(tabId, x, y, text, () => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
logger.debug(
`[BrowserOSAdapter] Successfully typed "${text}" at (${x}, ${y}) in tab ${tabId}`,
)
resolve()
}
})
} else {
reject(new Error('typeAtCoordinates API not available'))
}
})
return withTimeout(
promise,
CHROME_API_TIMEOUTS.BROWSEROS_ACTION,
'typeAtCoordinates',
)
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[BrowserOSAdapter] Failed to type at coordinates: ${errorMessage}`,
)
throw new Error(
`Failed to type at coordinates (${x}, ${y}): ${errorMessage}`,
)
}
}
/**
* Get a specific preference value
* @param name - The preference name (e.g., "browseros.server.mcp_port")
* @returns Promise resolving to the preference object containing key, type, and value
*/
async getPref(name: string): Promise<PrefObject> {
try {
console.log(`[BrowserOSAdapter] Getting preference: ${name}`)
return new Promise<PrefObject>((resolve, reject) => {
chrome.browserOS.getPref(name, (pref: PrefObject) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
console.log(
`[BrowserOSAdapter] Retrieved preference ${name}: ${JSON.stringify(pref)}`,
)
resolve(pref)
}
})
})
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
console.error(
`[BrowserOSAdapter] Failed to get preference: ${errorMessage}`,
)
throw new Error(`Failed to get preference ${name}: ${errorMessage}`)
}
}
/**
* Set a specific preference value
* @param name - The preference name (e.g., "browseros.server.mcp_enabled")
* @param value - The value to set
* @param pageId - Optional page ID for settings tracking
* @returns Promise resolving to true if successful
*/
async setPref(
name: string,
value: unknown,
pageId?: string,
): Promise<boolean> {
try {
console.log(
`[BrowserOSAdapter] Setting preference ${name} to ${JSON.stringify(value)}`,
)
return new Promise<boolean>((resolve, reject) => {
if (pageId !== undefined) {
chrome.browserOS.setPref(name, value, pageId, (success: boolean) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
console.log(
`[BrowserOSAdapter] Successfully set preference ${name}`,
)
resolve(success)
}
})
} else {
chrome.browserOS.setPref(name, value, (success: boolean) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
console.log(
`[BrowserOSAdapter] Successfully set preference ${name}`,
)
resolve(success)
}
})
}
})
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
console.error(
`[BrowserOSAdapter] Failed to set preference: ${errorMessage}`,
)
throw new Error(`Failed to set preference ${name}: ${errorMessage}`)
}
}
/**
* Get all preferences (filtered to browseros.* prefs)
* @returns Promise resolving to array of preference objects
*/
async getAllPrefs(): Promise<PrefObject[]> {
try {
console.log('[BrowserOSAdapter] Getting all preferences')
return new Promise<PrefObject[]>((resolve, reject) => {
chrome.browserOS.getAllPrefs((prefs: PrefObject[]) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
console.log(
`[BrowserOSAdapter] Retrieved ${prefs.length} preferences`,
)
resolve(prefs)
}
})
})
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
console.error(
`[BrowserOSAdapter] Failed to get all preferences: ${errorMessage}`,
)
throw new Error(`Failed to get all preferences: ${errorMessage}`)
}
}
}
// Export singleton instance getter for convenience
export const getBrowserOSAdapter = () => BrowserOSAdapter.getInstance()

View File

@@ -1,261 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { logger } from '@/utils/logger'
import { CHROME_API_TIMEOUTS, withTimeout } from '@/utils/timeout'
/**
* HistoryAdapter - Wrapper for Chrome history API
*
* Responsibilities:
* - Provide clean Promise-based interface to Chrome history API
* - Handle Chrome API errors
* - Log operations for debugging
*/
export class HistoryAdapter {
/**
* Search browser history
*
* @param query - Search query (matches URL and title)
* @param maxResults - Maximum number of results (default: 100)
* @param startTime - Start time in milliseconds since epoch (optional)
* @param endTime - End time in milliseconds since epoch (optional)
* @returns Array of history items
*/
async searchHistory(
query: string,
maxResults = 100,
startTime?: number,
endTime?: number,
): Promise<chrome.history.HistoryItem[]> {
logger.debug(
`[HistoryAdapter] Searching history: "${query}" (max: ${maxResults})`,
)
try {
const results = await withTimeout(
chrome.history.search({
text: query,
maxResults,
startTime,
endTime,
}),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.history.search',
)
logger.debug(`[HistoryAdapter] Found ${results.length} history items`)
return results
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`[HistoryAdapter] Failed to search history: ${errorMessage}`)
throw new Error(`Failed to search history: ${errorMessage}`)
}
}
/**
* Get recent browser history
*
* @param maxResults - Maximum number of results (default: 20)
* @param hoursBack - How many hours back to search (default: 24)
* @returns Array of recent history items
*/
async getRecentHistory(
maxResults = 20,
hoursBack = 24,
): Promise<chrome.history.HistoryItem[]> {
logger.debug(
`[HistoryAdapter] Getting ${maxResults} recent history items (last ${hoursBack}h)`,
)
try {
const startTime = Date.now() - hoursBack * 60 * 60 * 1000
const results = await withTimeout(
chrome.history.search({
text: '',
maxResults,
startTime,
}),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.history.search',
)
logger.debug(`[HistoryAdapter] Retrieved ${results.length} recent items`)
return results
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[HistoryAdapter] Failed to get recent history: ${errorMessage}`,
)
throw new Error(`Failed to get recent history: ${errorMessage}`)
}
}
/**
* Get visit details for a specific URL
*
* @param url - URL to get visits for
* @returns Array of visit items
*/
async getVisits(url: string): Promise<chrome.history.VisitItem[]> {
logger.debug(`[HistoryAdapter] Getting visits for: ${url}`)
try {
const visits = await withTimeout(
chrome.history.getVisits({ url }),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.history.getVisits',
)
logger.debug(`[HistoryAdapter] Found ${visits.length} visits for ${url}`)
return visits
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`[HistoryAdapter] Failed to get visits: ${errorMessage}`)
throw new Error(`Failed to get visits: ${errorMessage}`)
}
}
/**
* Add a URL to browser history
*
* @param url - URL to add
*/
async addUrl(url: string): Promise<void> {
logger.debug(`[HistoryAdapter] Adding URL to history: ${url}`)
try {
await withTimeout(
chrome.history.addUrl({ url }),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.history.addUrl',
)
logger.debug(`[HistoryAdapter] Added URL: ${url}`)
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`[HistoryAdapter] Failed to add URL: ${errorMessage}`)
throw new Error(`Failed to add URL to history: ${errorMessage}`)
}
}
/**
* Remove a specific URL from history
*
* @param url - URL to remove
*/
async deleteUrl(url: string): Promise<void> {
logger.debug(`[HistoryAdapter] Removing URL from history: ${url}`)
try {
await withTimeout(
chrome.history.deleteUrl({ url }),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.history.deleteUrl',
)
logger.debug(`[HistoryAdapter] Removed URL: ${url}`)
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`[HistoryAdapter] Failed to delete URL: ${errorMessage}`)
throw new Error(`Failed to delete URL from history: ${errorMessage}`)
}
}
/**
* Delete history within a time range
*
* @param startTime - Start time in milliseconds since epoch
* @param endTime - End time in milliseconds since epoch
*/
async deleteRange(startTime: number, endTime: number): Promise<void> {
logger.debug(
`[HistoryAdapter] Deleting history range: ${new Date(startTime).toISOString()} to ${new Date(endTime).toISOString()}`,
)
try {
await withTimeout(
chrome.history.deleteRange({ startTime, endTime }),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.history.deleteRange',
)
logger.debug('[HistoryAdapter] Deleted history range')
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[HistoryAdapter] Failed to delete history range: ${errorMessage}`,
)
throw new Error(`Failed to delete history range: ${errorMessage}`)
}
}
/**
* Delete all browser history
*
* WARNING: This deletes ALL history permanently!
*/
async deleteAll(): Promise<void> {
logger.warn('[HistoryAdapter] Deleting ALL browser history')
try {
await withTimeout(
chrome.history.deleteAll(),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.history.deleteAll',
)
logger.warn('[HistoryAdapter] Deleted all history')
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[HistoryAdapter] Failed to delete all history: ${errorMessage}`,
)
throw new Error(`Failed to delete all history: ${errorMessage}`)
}
}
/**
* Get most visited URLs
*
* @param maxResults - Maximum number of results (default: 10)
* @returns Array of most visited history items
*/
async getMostVisited(maxResults = 10): Promise<chrome.history.HistoryItem[]> {
logger.debug(`[HistoryAdapter] Getting ${maxResults} most visited URLs`)
try {
// Get all recent history
const allHistory = await withTimeout(
chrome.history.search({
text: '',
maxResults: 1000, // Get a large sample
startTime: 0,
}),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.history.search',
)
// Sort by visit count
const sorted = allHistory
.filter((item) => item.visitCount && item.visitCount > 1)
.sort((a, b) => (b.visitCount || 0) - (a.visitCount || 0))
.slice(0, maxResults)
logger.debug(`[HistoryAdapter] Found ${sorted.length} most visited URLs`)
return sorted
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[HistoryAdapter] Failed to get most visited: ${errorMessage}`,
)
throw new Error(`Failed to get most visited URLs: ${errorMessage}`)
}
}
}

View File

@@ -1,488 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { logger } from '@/utils/logger'
import { CHROME_API_TIMEOUTS, withTimeout } from '@/utils/timeout'
/**
* TabAdapter - Wrapper for Chrome tabs API
*
* Responsibilities:
* - Provide clean Promise-based interface to Chrome tabs API
* - Handle Chrome API errors
* - Log operations for debugging
*
* Chrome tabs API is already Promise-based in Manifest V3,
* so we add error handling and logging.
*/
export class TabAdapter {
/**
* Get the currently active tab
*
* @param windowId - Optional window ID. If provided, gets active tab in that window. Otherwise uses current window.
* @returns Active tab in specified or current window
* @throws Error if no active tab found
*/
async getActiveTab(windowId?: number): Promise<chrome.tabs.Tab> {
logger.debug(
`[TabAdapter] Getting active tab${windowId !== undefined ? ` in window ${windowId}` : ''}`,
)
try {
const query: chrome.tabs.QueryInfo = { active: true }
if (windowId !== undefined) {
query.windowId = windowId
} else {
query.currentWindow = true
}
const tabs = await withTimeout(
chrome.tabs.query(query),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.tabs.query',
)
if (tabs.length === 0) {
throw new Error('No active tab found')
}
logger.debug(
`[TabAdapter] Found active tab: ${tabs[0].id} (${tabs[0].url})`,
)
return tabs[0]
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`[TabAdapter] Failed to get active tab: ${errorMessage}`)
throw new Error(`Failed to get active tab: ${errorMessage}`)
}
}
/**
* Get a specific tab by ID
*
* @param tabId - Tab ID to retrieve
* @returns Tab object
* @throws Error if tab not found
*/
async getTab(tabId: number): Promise<chrome.tabs.Tab> {
logger.debug(`[TabAdapter] Getting tab ${tabId}`)
try {
const tab = await withTimeout(
chrome.tabs.get(tabId),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.tabs.get',
)
logger.debug(`[TabAdapter] Found tab: ${tab.id} (${tab.url})`)
return tab
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`[TabAdapter] Failed to get tab ${tabId}: ${errorMessage}`)
throw new Error(`Tab not found (id: ${tabId})`)
}
}
/**
* Get all tabs across all windows
*
* @returns Array of all tabs
*/
async getAllTabs(): Promise<chrome.tabs.Tab[]> {
logger.debug('[TabAdapter] Getting all tabs')
try {
const tabs = await withTimeout(
chrome.tabs.query({}),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.tabs.query',
)
logger.debug(`[TabAdapter] Found ${tabs.length} tabs`)
return tabs
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`[TabAdapter] Failed to get all tabs: ${errorMessage}`)
throw new Error(`Failed to get tabs: ${errorMessage}`)
}
}
/**
* Query tabs with specific criteria
*
* @param query - Chrome tabs query object
* @returns Array of matching tabs
*/
async queryTabs(query: chrome.tabs.QueryInfo): Promise<chrome.tabs.Tab[]> {
logger.debug(`[TabAdapter] Querying tabs: ${JSON.stringify(query)}`)
try {
const tabs = await withTimeout(
chrome.tabs.query(query),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.tabs.query',
)
logger.debug(`[TabAdapter] Query found ${tabs.length} tabs`)
return tabs
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`[TabAdapter] Failed to query tabs: ${errorMessage}`)
throw new Error(`Failed to query tabs: ${errorMessage}`)
}
}
/**
* Get tabs in specific window
*
* @param windowId - Window ID
* @returns Array of tabs in window
*/
async getTabsInWindow(windowId: number): Promise<chrome.tabs.Tab[]> {
logger.debug(`[TabAdapter] Getting tabs in window ${windowId}`)
try {
const tabs = await withTimeout(
chrome.tabs.query({ windowId }),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.tabs.query',
)
logger.debug(
`[TabAdapter] Found ${tabs.length} tabs in window ${windowId}`,
)
return tabs
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[TabAdapter] Failed to get tabs in window ${windowId}: ${errorMessage}`,
)
throw new Error(`Failed to get tabs in window: ${errorMessage}`)
}
}
/**
* Get current window's tabs
*
* @param windowId - Optional window ID. If provided, gets tabs in that window. Otherwise uses current window.
* @returns Array of tabs in specified or current window
*/
async getCurrentWindowTabs(windowId?: number): Promise<chrome.tabs.Tab[]> {
logger.debug(
`[TabAdapter] Getting tabs in ${windowId !== undefined ? `window ${windowId}` : 'current window'}`,
)
try {
const query: chrome.tabs.QueryInfo = {}
if (windowId !== undefined) {
query.windowId = windowId
} else {
query.currentWindow = true
}
const tabs = await withTimeout(
chrome.tabs.query(query),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.tabs.query',
)
logger.debug(`[TabAdapter] Found ${tabs.length} tabs`)
return tabs
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[TabAdapter] Failed to get current window tabs: ${errorMessage}`,
)
throw new Error(`Failed to get current window tabs: ${errorMessage}`)
}
}
/**
* Open a new tab with optional URL
*
* @param url - URL to open (optional, defaults to new tab page)
* @param active - Whether to make the new tab active (default: true)
* @param windowId - Optional window ID to open tab in. If not provided, opens in current window.
* @returns Newly created tab
*/
async openTab(
url?: string,
active = true,
windowId?: number,
): Promise<chrome.tabs.Tab> {
const targetUrl = url || 'chrome://newtab/'
logger.debug(
`[TabAdapter] Opening new tab: ${targetUrl} (active: ${active}${windowId !== undefined ? `, window: ${windowId}` : ''})`,
)
try {
const createProps: chrome.tabs.CreateProperties = {
url: targetUrl,
active,
}
if (windowId !== undefined) {
createProps.windowId = windowId
}
const tab = await withTimeout(
chrome.tabs.create(createProps),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.tabs.create',
)
if (!tab.id) {
throw new Error('Created tab has no ID')
}
logger.debug(`[TabAdapter] Created tab ${tab.id}: ${targetUrl}`)
return tab
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`[TabAdapter] Failed to open tab: ${errorMessage}`)
throw new Error(`Failed to open tab: ${errorMessage}`)
}
}
/**
* Close a specific tab by ID
*
* @param tabId - Tab ID to close
*/
async closeTab(tabId: number): Promise<void> {
logger.debug(`[TabAdapter] Closing tab ${tabId}`)
try {
// Get tab info before closing for logging
const tab = await withTimeout(
chrome.tabs.get(tabId),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.tabs.get',
)
const title = tab.title || 'Untitled'
await withTimeout(
chrome.tabs.remove(tabId),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.tabs.remove',
)
logger.debug(`[TabAdapter] Closed tab ${tabId}: ${title}`)
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`[TabAdapter] Failed to close tab ${tabId}: ${errorMessage}`)
throw new Error(`Failed to close tab ${tabId}: ${errorMessage}`)
}
}
/**
* Switch to (activate) a specific tab by ID
*
* @param tabId - Tab ID to switch to
* @returns Updated tab object
*/
async switchTab(tabId: number): Promise<chrome.tabs.Tab> {
logger.debug(`[TabAdapter] Switching to tab ${tabId}`)
try {
// Update tab to be active
const tab = await withTimeout(
chrome.tabs.update(tabId, { active: true }),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.tabs.update',
)
if (!tab) {
throw new Error('Failed to update tab')
}
logger.debug(
`[TabAdapter] Switched to tab ${tabId}: ${tab.title || 'Untitled'}`,
)
return tab
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[TabAdapter] Failed to switch to tab ${tabId}: ${errorMessage}`,
)
throw new Error(`Failed to switch to tab ${tabId}: ${errorMessage}`)
}
}
/**
* Navigate a tab to a specific URL
*
* @param tabId - Tab ID to navigate
* @param url - URL to navigate to
* @returns Updated tab object
*/
async navigateTab(tabId: number, url: string): Promise<chrome.tabs.Tab> {
logger.debug(`[TabAdapter] Navigating tab ${tabId} to ${url}`)
try {
const tab = await withTimeout(
chrome.tabs.update(tabId, { url }),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.tabs.update',
)
if (!tab) {
throw new Error('Failed to update tab')
}
logger.debug(`[TabAdapter] Tab ${tabId} navigating to ${url}`)
return tab
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[TabAdapter] Failed to navigate tab ${tabId}: ${errorMessage}`,
)
throw new Error(
`Failed to navigate tab ${tabId} to ${url}: ${errorMessage}`,
)
}
}
/**
* Group tabs together
*
* @param tabIds - Array of tab IDs to group
* @param groupId - Optional existing group ID to add tabs to
* @param windowId - Optional window ID to create the group in (prevents tabs moving to wrong window)
* @returns Group ID of the created or updated group
*/
async groupTabs(
tabIds: number[],
groupId?: number,
windowId?: number,
): Promise<number> {
if (tabIds.length === 0) {
throw new Error('At least one tab ID is required')
}
logger.debug(
`Grouping tabs ${tabIds.join(', ')}${groupId ? ` into group ${groupId}` : ''}${windowId ? ` in window ${windowId}` : ''}`,
)
try {
// Chrome API expects [number, ...number[]] tuple type
const tabIdsTuple = tabIds as [number, ...number[]]
const options: chrome.tabs.GroupOptions = { tabIds: tabIdsTuple }
if (groupId !== undefined) {
options.groupId = groupId
}
// Specify windowId to prevent Chrome from moving tabs to the focused window
if (windowId !== undefined && groupId === undefined) {
options.createProperties = { windowId }
}
const resultGroupId = await withTimeout(
chrome.tabs.group(options),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.tabs.group',
)
logger.debug(`Grouped tabs into group ${resultGroupId}`)
return resultGroupId
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`Failed to group tabs: ${errorMessage}`)
throw new Error(`Failed to group tabs: ${errorMessage}`)
}
}
/**
* Ungroup tabs (remove them from their groups)
*
* @param tabIds - Array of tab IDs to ungroup
*/
async ungroupTabs(tabIds: number[]): Promise<void> {
if (tabIds.length === 0) {
throw new Error('At least one tab ID is required')
}
logger.debug(`Ungrouping tabs ${tabIds.join(', ')}`)
try {
// Chrome API expects [number, ...number[]] tuple type or single number
const tabIdsTuple = tabIds as [number, ...number[]]
await withTimeout(
chrome.tabs.ungroup(tabIdsTuple),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.tabs.ungroup',
)
logger.debug(`Ungrouped ${tabIds.length} tabs`)
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`Failed to ungroup tabs: ${errorMessage}`)
throw new Error(`Failed to ungroup tabs: ${errorMessage}`)
}
}
/**
* Get all tab groups in a window
*
* @param windowId - Optional window ID. If not provided, gets groups from all windows.
* @returns Array of tab groups
*/
async getTabGroups(windowId?: number): Promise<chrome.tabGroups.TabGroup[]> {
logger.debug(
`Getting tab groups${windowId !== undefined ? ` in window ${windowId}` : ''}`,
)
try {
const query: chrome.tabGroups.QueryInfo = {}
if (windowId !== undefined) {
query.windowId = windowId
}
const groups = await withTimeout(
chrome.tabGroups.query(query),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.tabGroups.query',
)
logger.debug(`Found ${groups.length} tab groups`)
return groups
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`Failed to get tab groups: ${errorMessage}`)
throw new Error(`Failed to get tab groups: ${errorMessage}`)
}
}
/**
* Update a tab group's properties
*
* @param groupId - Group ID to update
* @param properties - Properties to update (title, color, collapsed)
* @returns Updated tab group
*/
async updateTabGroup(
groupId: number,
properties: chrome.tabGroups.UpdateProperties,
): Promise<chrome.tabGroups.TabGroup> {
logger.debug(`Updating tab group ${groupId}: ${JSON.stringify(properties)}`)
try {
const group = await withTimeout(
chrome.tabGroups.update(groupId, properties),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.tabGroups.update',
)
if (!group) {
throw new Error(`Tab group ${groupId} not found`)
}
logger.debug(
`Updated tab group ${groupId}: title="${group.title}", color="${group.color}"`,
)
return group
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`Failed to update tab group ${groupId}: ${errorMessage}`)
throw new Error(`Failed to update tab group ${groupId}: ${errorMessage}`)
}
}
}

View File

@@ -1,390 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { ActionRegistry } from '@/actions/ActionRegistry'
import { CreateBookmarkAction } from '@/actions/bookmark/CreateBookmarkAction'
import { CreateBookmarkFolderAction } from '@/actions/bookmark/CreateBookmarkFolderAction'
import { GetBookmarkChildrenAction } from '@/actions/bookmark/GetBookmarkChildrenAction'
import { GetBookmarksAction } from '@/actions/bookmark/GetBookmarksAction'
import { MoveBookmarkAction } from '@/actions/bookmark/MoveBookmarkAction'
import { RemoveBookmarkAction } from '@/actions/bookmark/RemoveBookmarkAction'
import { RemoveBookmarkTreeAction } from '@/actions/bookmark/RemoveBookmarkTreeAction'
import { UpdateBookmarkAction } from '@/actions/bookmark/UpdateBookmarkAction'
import { CaptureScreenshotAction } from '@/actions/browser/CaptureScreenshotAction'
import { CaptureScreenshotPointerAction } from '@/actions/browser/CaptureScreenshotPointerAction'
import { ClearAction } from '@/actions/browser/ClearAction'
import { ClickAction } from '@/actions/browser/ClickAction'
import { ClickCoordinatesAction } from '@/actions/browser/ClickCoordinatesAction'
import { CloseWindowAction } from '@/actions/browser/CloseWindowAction'
import { CreateWindowAction } from '@/actions/browser/CreateWindowAction'
import { ExecuteJavaScriptAction } from '@/actions/browser/ExecuteJavaScriptAction'
import { GetAccessibilityTreeAction } from '@/actions/browser/GetAccessibilityTreeAction'
import { GetInteractiveSnapshotAction } from '@/actions/browser/GetInteractiveSnapshotAction'
import { GetPageLoadStatusAction } from '@/actions/browser/GetPageLoadStatusAction'
import { GetSnapshotAction } from '@/actions/browser/GetSnapshotAction'
import { InputTextAction } from '@/actions/browser/InputTextAction'
import { ScrollDownAction } from '@/actions/browser/ScrollDownAction'
import { ScrollToNodeAction } from '@/actions/browser/ScrollToNodeAction'
import { ScrollUpAction } from '@/actions/browser/ScrollUpAction'
import { SendKeysAction } from '@/actions/browser/SendKeysAction'
import { TypeAtCoordinatesAction } from '@/actions/browser/TypeAtCoordinatesAction'
import { CheckBrowserOSAction } from '@/actions/diagnostics/CheckBrowserOSAction'
import { GetRecentHistoryAction } from '@/actions/history/GetRecentHistoryAction'
import { SearchHistoryAction } from '@/actions/history/SearchHistoryAction'
import { CloseTabAction } from '@/actions/tab/CloseTabAction'
import { GetActiveTabAction } from '@/actions/tab/GetActiveTabAction'
import { GetTabsAction } from '@/actions/tab/GetTabsAction'
import { GroupTabsAction } from '@/actions/tab/GroupTabsAction'
import { ListTabGroupsAction } from '@/actions/tab/ListTabGroupsAction'
import { NavigateAction } from '@/actions/tab/NavigateAction'
import { OpenTabAction } from '@/actions/tab/OpenTabAction'
import { SwitchTabAction } from '@/actions/tab/SwitchTabAction'
import { UngroupTabsAction } from '@/actions/tab/UngroupTabsAction'
import { UpdateTabGroupAction } from '@/actions/tab/UpdateTabGroupAction'
import { CONCURRENCY_CONFIG } from '@/config/constants'
import type { ProtocolRequest, ProtocolResponse } from '@/protocol/types'
import { ConnectionStatus } from '@/protocol/types'
import { ConcurrencyLimiter } from '@/utils/ConcurrencyLimiter'
import { logger } from '@/utils/logger'
import { RequestTracker } from '@/utils/RequestTracker'
import { RequestValidator } from '@/utils/RequestValidator'
import { ResponseQueue } from '@/utils/ResponseQueue'
import type { PortProvider } from '@/websocket/WebSocketClient'
import { WebSocketClient } from '@/websocket/WebSocketClient'
/**
* BrowserOS Controller
*
* Main controller class that orchestrates all components.
* Message flow: WebSocket → Validator → Tracker → Limiter → Action → Response/Queue → WebSocket
*/
export class BrowserOSController {
private wsClient: WebSocketClient
private requestTracker: RequestTracker
private concurrencyLimiter: ConcurrencyLimiter
private requestValidator: RequestValidator
private responseQueue: ResponseQueue
private actionRegistry: ActionRegistry
constructor(getPort: PortProvider) {
logger.info('Initializing BrowserOS Controller...')
this.requestTracker = new RequestTracker()
this.concurrencyLimiter = new ConcurrencyLimiter(
CONCURRENCY_CONFIG.maxConcurrent,
CONCURRENCY_CONFIG.maxQueueSize,
)
this.requestValidator = new RequestValidator()
this.responseQueue = new ResponseQueue()
this.wsClient = new WebSocketClient(getPort)
this.actionRegistry = new ActionRegistry()
this.registerActions()
this.setupWebSocketHandlers()
}
async start(): Promise<void> {
logger.info('Starting BrowserOS Controller...')
await this.wsClient.connect()
// Report owned windows after connection is established
await this.reportOwnedWindows()
}
private async reportOwnedWindows(): Promise<void> {
try {
const windows = await chrome.windows.getAll()
const windowIds = windows
.map((w) => w.id)
.filter((id): id is number => id !== undefined)
if (windowIds.length > 0) {
this.wsClient.send({ type: 'register_windows', windowIds })
logger.info('Reported owned windows to server', {
windowCount: windowIds.length,
windowIds,
})
}
} catch (error) {
logger.warn('Failed to report owned windows', {
error: error instanceof Error ? error.message : String(error),
})
}
}
notifyWindowCreated(windowId: number): void {
try {
this.wsClient.send({ type: 'window_created', windowId })
logger.info('Sent window_created event', { windowId })
} catch (error) {
logger.warn('Failed to send window_created event', {
windowId,
error: error instanceof Error ? error.message : String(error),
})
}
}
notifyWindowRemoved(windowId: number): void {
try {
this.wsClient.send({ type: 'window_removed', windowId })
logger.debug('Sent window_removed event', { windowId })
} catch (error) {
logger.warn('Failed to send window_removed event', {
windowId,
error: error instanceof Error ? error.message : String(error),
})
}
}
stop(): void {
logger.info('Stopping BrowserOS Controller...')
this.wsClient.disconnect()
this.requestTracker.destroy()
this.requestValidator.destroy()
this.responseQueue.clear()
}
logStats(): void {
const stats = this.getStats()
logger.info('=== Controller Stats ===')
logger.info(`Connection: ${stats.connection}`)
logger.info(`Requests: ${JSON.stringify(stats.requests)}`)
logger.info(`Concurrency: ${JSON.stringify(stats.concurrency)}`)
logger.info(`Validator: ${JSON.stringify(stats.validator)}`)
logger.info(`Response Queue: ${stats.responseQueue.size} queued`)
}
getStats() {
return {
connection: this.wsClient.getStatus(),
requests: this.requestTracker.getStats(),
concurrency: this.concurrencyLimiter.getStats(),
validator: this.requestValidator.getStats(),
responseQueue: {
size: this.responseQueue.size(),
},
}
}
isConnected(): boolean {
return this.wsClient.isConnected()
}
notifyWindowFocused(windowId?: number): void {
try {
this.wsClient.send({ type: 'focused', windowId })
logger.debug('Sent focused event', { windowId })
} catch (error) {
logger.warn('Failed to send focused event', {
windowId,
error: error instanceof Error ? error.message : String(error),
})
}
}
private registerActions(): void {
logger.info('Registering actions...')
this.actionRegistry.register('checkBrowserOS', new CheckBrowserOSAction())
this.actionRegistry.register('getActiveTab', new GetActiveTabAction())
this.actionRegistry.register('getTabs', new GetTabsAction())
this.actionRegistry.register('openTab', new OpenTabAction())
this.actionRegistry.register('closeTab', new CloseTabAction())
this.actionRegistry.register('switchTab', new SwitchTabAction())
this.actionRegistry.register('navigate', new NavigateAction())
this.actionRegistry.register('listTabGroups', new ListTabGroupsAction())
this.actionRegistry.register('groupTabs', new GroupTabsAction())
this.actionRegistry.register('updateTabGroup', new UpdateTabGroupAction())
this.actionRegistry.register('ungroupTabs', new UngroupTabsAction())
this.actionRegistry.register('createWindow', new CreateWindowAction())
this.actionRegistry.register('closeWindow', new CloseWindowAction())
this.actionRegistry.register('getBookmarks', new GetBookmarksAction())
this.actionRegistry.register('createBookmark', new CreateBookmarkAction())
this.actionRegistry.register('removeBookmark', new RemoveBookmarkAction())
this.actionRegistry.register('updateBookmark', new UpdateBookmarkAction())
this.actionRegistry.register(
'createBookmarkFolder',
new CreateBookmarkFolderAction(),
)
this.actionRegistry.register(
'getBookmarkChildren',
new GetBookmarkChildrenAction(),
)
this.actionRegistry.register('moveBookmark', new MoveBookmarkAction())
this.actionRegistry.register(
'removeBookmarkTree',
new RemoveBookmarkTreeAction(),
)
this.actionRegistry.register('searchHistory', new SearchHistoryAction())
this.actionRegistry.register(
'getRecentHistory',
new GetRecentHistoryAction(),
)
this.actionRegistry.register(
'getInteractiveSnapshot',
new GetInteractiveSnapshotAction(),
)
this.actionRegistry.register('click', new ClickAction())
this.actionRegistry.register('inputText', new InputTextAction())
this.actionRegistry.register('clear', new ClearAction())
this.actionRegistry.register('scrollToNode', new ScrollToNodeAction())
this.actionRegistry.register(
'captureScreenshot',
new CaptureScreenshotAction(),
)
this.actionRegistry.register(
'captureScreenshotPointer',
new CaptureScreenshotPointerAction(),
)
this.actionRegistry.register('scrollDown', new ScrollDownAction())
this.actionRegistry.register('scrollUp', new ScrollUpAction())
this.actionRegistry.register(
'executeJavaScript',
new ExecuteJavaScriptAction(),
)
this.actionRegistry.register('sendKeys', new SendKeysAction())
this.actionRegistry.register(
'getPageLoadStatus',
new GetPageLoadStatusAction(),
)
this.actionRegistry.register('getSnapshot', new GetSnapshotAction())
this.actionRegistry.register(
'getAccessibilityTree',
new GetAccessibilityTreeAction(),
)
this.actionRegistry.register(
'clickCoordinates',
new ClickCoordinatesAction(),
)
this.actionRegistry.register(
'typeAtCoordinates',
new TypeAtCoordinatesAction(),
)
const actions = this.actionRegistry.getAvailableActions()
logger.info(`Registered ${actions.length} action(s): ${actions.join(', ')}`)
}
private setupWebSocketHandlers(): void {
this.wsClient.onMessage((message: ProtocolResponse) => {
this.handleIncomingMessage(message)
})
this.wsClient.onStatusChange((status: ConnectionStatus) => {
this.handleStatusChange(status)
})
}
private handleIncomingMessage(message: ProtocolResponse): void {
const rawMessage = message as ProtocolResponse & Partial<ProtocolRequest>
if (rawMessage.action) {
this.processRequest(rawMessage).catch((error) => {
logger.error(
`Unhandled error processing request ${rawMessage.id}: ${error}`,
)
})
} else if (rawMessage.ok !== undefined) {
logger.info(
`Received server message: ${rawMessage.id} - ${rawMessage.ok ? 'success' : 'error'}`,
)
if (rawMessage.data) {
logger.debug(`Server data: ${JSON.stringify(rawMessage.data)}`)
}
} else {
logger.warn(
`Received unknown message format: ${JSON.stringify(rawMessage)}`,
)
}
}
private async processRequest(request: unknown): Promise<void> {
let validatedRequest: ProtocolRequest
let requestId: string | undefined
try {
validatedRequest = this.requestValidator.validate(request)
requestId = validatedRequest.id
this.requestTracker.start(validatedRequest.id, validatedRequest.action)
await this.concurrencyLimiter.execute(async () => {
this.requestTracker.markExecuting(validatedRequest.id)
await this.executeAction(validatedRequest)
})
this.requestTracker.complete(validatedRequest.id)
this.requestValidator.markComplete(validatedRequest.id)
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`Request processing failed: ${errorMessage}`)
if (requestId) {
this.requestTracker.complete(requestId, errorMessage)
this.requestValidator.markComplete(requestId)
this.sendResponse({
id: requestId,
ok: false,
error: errorMessage,
})
}
}
}
private async executeAction(request: ProtocolRequest): Promise<void> {
logger.info(`Executing action: ${request.action} [${request.id}]`)
const actionResponse = await this.actionRegistry.dispatch(
request.action,
request.payload,
)
this.sendResponse({
id: request.id,
ok: actionResponse.ok,
data: actionResponse.data,
error: actionResponse.error,
})
const status = actionResponse.ok ? 'succeeded' : 'failed'
logger.info(`Action ${status}: ${request.action} [${request.id}]`)
}
private sendResponse(response: ProtocolResponse): void {
try {
if (this.wsClient.isConnected()) {
this.wsClient.send(response)
} else {
logger.warn(`Not connected. Queueing response: ${response.id}`)
this.responseQueue.enqueue(response)
}
} catch (error) {
logger.error(`Failed to send response ${response.id}: ${error}`)
this.responseQueue.enqueue(response)
}
}
private handleStatusChange(status: ConnectionStatus): void {
logger.info(`Connection status changed: ${status}`)
if (status === ConnectionStatus.CONNECTED) {
if (!this.responseQueue.isEmpty()) {
logger.info(`Flushing ${this.responseQueue.size()} queued responses...`)
this.responseQueue.flush((response) => {
this.wsClient.send(response)
})
}
}
}
}

View File

@@ -1,213 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { getWebSocketPort } from '@/utils/ConfigHelper'
import { startKeepAlive, stopKeepAlive } from '@/utils/KeepAlive'
import { logger } from '@/utils/logger'
import { BrowserOSController } from './BrowserOSController'
const STATS_LOG_INTERVAL_MS = 30000
interface ControllerState {
controller: BrowserOSController | null
initPromise: Promise<BrowserOSController> | null
statsTimer: ReturnType<typeof setInterval> | null
}
type BrowserOSGlobals = typeof globalThis & {
__browserosControllerState?: ControllerState
__browserosController?: BrowserOSController | null
}
const globals = globalThis as BrowserOSGlobals
const controllerState: ControllerState =
globals.__browserosControllerState ??
(() => {
const state: ControllerState = {
controller: globals.__browserosController ?? null,
initPromise: null,
statsTimer: null,
}
globals.__browserosControllerState = state
return state
})()
function setDebugController(controller: BrowserOSController | null): void {
globals.__browserosController = controller
}
function startStatsTimer(): void {
if (controllerState.statsTimer) {
return
}
controllerState.statsTimer = setInterval(() => {
controllerState.controller?.logStats()
}, STATS_LOG_INTERVAL_MS)
}
function stopStatsTimer(): void {
if (!controllerState.statsTimer) {
return
}
clearInterval(controllerState.statsTimer)
controllerState.statsTimer = null
}
async function getOrCreateController(): Promise<BrowserOSController> {
if (controllerState.controller) {
return controllerState.controller
}
if (!controllerState.initPromise) {
controllerState.initPromise = (async () => {
try {
await startKeepAlive()
const controller = new BrowserOSController(getWebSocketPort)
await controller.start()
controllerState.controller = controller
setDebugController(controller)
startStatsTimer()
return controller
} catch (error) {
controllerState.controller = null
setDebugController(null)
stopStatsTimer()
try {
await stopKeepAlive()
} catch {
// ignore
}
throw error
} finally {
controllerState.initPromise = null
}
})()
}
const initPromise = controllerState.initPromise
if (!initPromise) {
throw new Error('Controller init promise missing')
}
return initPromise
}
async function shutdownController(reason: string): Promise<void> {
logger.info('Controller shutdown requested', { reason })
if (controllerState.initPromise) {
try {
await controllerState.initPromise
} catch {
// ignore start errors during shutdown
}
}
const controller = controllerState.controller
if (!controller) {
try {
await stopKeepAlive()
} catch {
// ignore
}
stopStatsTimer()
setDebugController(null)
return
}
controller.stop()
controllerState.controller = null
setDebugController(null)
stopStatsTimer()
try {
await stopKeepAlive()
} catch {
// ignore
}
}
function ensureControllerRunning(trigger: string): void {
getOrCreateController().catch((error) => {
const message =
error instanceof Error ? error.message : JSON.stringify(error)
logger.error('Controller failed to start', { trigger, error: message })
})
}
logger.info('Extension loaded')
chrome.runtime.onInstalled.addListener(() => {
logger.info('Extension installed')
})
chrome.runtime.onStartup.addListener(() => {
logger.info('Browser startup event')
ensureControllerRunning('runtime.onStartup')
})
// Immediately attempt to start the controller when the service worker initializes
ensureControllerRunning('service-worker-init')
chrome.windows.onFocusChanged.addListener((windowId) => {
if (windowId === chrome.windows.WINDOW_ID_NONE) {
return
}
notifyWindowFocused(windowId).catch((error) => {
const message =
error instanceof Error ? error.message : JSON.stringify(error)
logger.warn('Failed to notify focus change', { windowId, error: message })
})
})
chrome.windows.onCreated.addListener((window) => {
logger.info('Window created event received', { windowId: window.id })
if (window.id === undefined) {
return
}
notifyWindowCreated(window.id).catch((error) => {
const message =
error instanceof Error ? error.message : JSON.stringify(error)
logger.warn('Failed to notify window created', {
windowId: window.id,
error: message,
})
})
})
chrome.windows.onRemoved.addListener((windowId) => {
notifyWindowRemoved(windowId).catch((error) => {
const message =
error instanceof Error ? error.message : JSON.stringify(error)
logger.warn('Failed to notify window removed', { windowId, error: message })
})
})
chrome.runtime.onSuspend?.addListener(() => {
logger.info('Extension suspending')
void shutdownController('runtime.onSuspend')
})
async function notifyWindowFocused(windowId: number): Promise<void> {
const controller = await getOrCreateController()
controller.notifyWindowFocused(windowId)
}
async function notifyWindowCreated(windowId: number): Promise<void> {
const controller = await getOrCreateController()
controller.notifyWindowCreated(windowId)
}
async function notifyWindowRemoved(windowId: number): Promise<void> {
const controller = await getOrCreateController()
controller.notifyWindowRemoved(windowId)
}

View File

@@ -1,59 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { CONTENT_LIMITS } from '@browseros/shared/constants/limits'
import { DEFAULT_PORTS } from '@browseros/shared/constants/ports'
import { TIMEOUTS } from '@browseros/shared/constants/timeouts'
export type WebSocketProtocol = 'ws' | 'wss'
export interface WebSocketConfig {
readonly protocol: WebSocketProtocol
readonly host: string
readonly path: string
readonly defaultExtensionPort: number
readonly reconnectIntervalMs: number
readonly heartbeatInterval: number
readonly heartbeatTimeout: number
readonly connectionTimeout: number
readonly requestTimeout: number
}
export interface ConcurrencyConfig {
readonly maxConcurrent: number
readonly maxQueueSize: number
}
export interface LoggingConfig {
readonly enabled: boolean
readonly level: 'debug' | 'info' | 'warn' | 'error'
readonly prefix: string
}
export const WEBSOCKET_CONFIG: WebSocketConfig = {
protocol: 'ws',
host: '127.0.0.1',
path: '/controller',
defaultExtensionPort: DEFAULT_PORTS.extension,
reconnectIntervalMs: TIMEOUTS.WS_RECONNECT_INTERVAL,
heartbeatInterval: TIMEOUTS.WS_HEARTBEAT_INTERVAL,
heartbeatTimeout: TIMEOUTS.WS_HEARTBEAT_TIMEOUT,
connectionTimeout: TIMEOUTS.WS_CONNECTION_TIMEOUT,
requestTimeout: TIMEOUTS.WS_REQUEST_TIMEOUT,
}
export const CONCURRENCY_CONFIG: ConcurrencyConfig = {
maxConcurrent: 1,
maxQueueSize: CONTENT_LIMITS.MAX_QUEUE_SIZE,
}
export const LOGGING_CONFIG: LoggingConfig = {
enabled: true,
level: 'info',
prefix: '',
}

View File

@@ -1,60 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
// Request schema
export const ProtocolRequestSchema = z.object({
id: z.string().describe('Request UUID'),
action: z.string().min(1).describe('Action name'),
payload: z.any().optional().describe('Action-specific data'),
})
// Response schema
export const ProtocolResponseSchema = z.object({
id: z.string().describe('Request ID (same as request)'),
ok: z.boolean().describe('Success flag'),
data: z.any().optional().describe('Result data'),
error: z.string().optional().describe('Error message'),
})
// Action response schema (used internally by action handlers)
export const ActionResponseSchema = z
.object({
ok: z.boolean().describe('Success flag'),
data: z.any().optional().describe('Result data'),
error: z.string().optional().describe('Error message'),
})
.refine(
(data) => {
// If ok is true, there should be no error
if (data.ok && data.error !== undefined) {
return false
}
// If ok is false, there should be an error
if (!data.ok && !data.error) {
return false
}
return true
},
{
message:
'When ok is true, error must be undefined. When ok is false, error must be provided.',
},
)
// Type exports
export type ProtocolRequest = z.infer<typeof ProtocolRequestSchema>
export type ProtocolResponse = z.infer<typeof ProtocolResponseSchema>
export type ActionResponse = z.infer<typeof ActionResponseSchema>
// Connection status enum
export enum ConnectionStatus {
DISCONNECTED = 'disconnected',
CONNECTING = 'connecting',
CONNECTED = 'connected',
RECONNECTING = 'reconnecting',
ERROR = 'error',
}

View File

@@ -1,419 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
// Type definitions for chrome.browserOS API
declare namespace chrome.browserOS {
// Page load status information
interface PageLoadStatus {
isResourcesLoading: boolean
isDOMContentLoaded: boolean
isPageComplete: boolean
}
// Rectangle bounds
interface Rect {
x: number
y: number
width: number
height: number
}
// Alias for backward compatibility
type BoundingRect = Rect
// Interactive element types
type InteractiveNodeType = 'clickable' | 'typeable' | 'selectable' | 'other'
// Supported keyboard keys
type Key =
| 'Enter'
| 'Delete'
| 'Backspace'
| 'Tab'
| 'Escape'
| 'ArrowUp'
| 'ArrowDown'
| 'ArrowLeft'
| 'ArrowRight'
| 'Home'
| 'End'
| 'PageUp'
| 'PageDown'
// Interactive node in the snapshot
interface InteractiveNode {
nodeId: number
type: InteractiveNodeType
name?: string
rect?: Rect
attributes?: {
in_viewport?: string // "true" if visible in viewport, "false" if not visible
[key: string]: string | undefined
}
}
// Snapshot of interactive elements
interface InteractiveSnapshot {
snapshotId: number
timestamp: number
elements: InteractiveNode[]
hierarchicalStructure?: string // Hierarchical text representation with context
processingTimeMs: number // Performance metrics
}
// Options for getInteractiveSnapshot
interface InteractiveSnapshotOptions {
viewportOnly?: boolean
}
// Accessibility node
interface AccessibilityNode {
id: number
role: string
name?: string
value?: string
attributes?: Record<string, unknown>
childIds?: number[]
}
// Accessibility tree
interface AccessibilityTree {
rootId: number
nodes: Record<string, AccessibilityNode>
}
// API functions
function getPageLoadStatus(
tabId: number,
callback: (status: PageLoadStatus) => void,
): void
function getPageLoadStatus(callback: (status: PageLoadStatus) => void): void
function getAccessibilityTree(
tabId: number,
callback: (tree: AccessibilityTree) => void,
): void
function getAccessibilityTree(
callback: (tree: AccessibilityTree) => void,
): void
function getInteractiveSnapshot(
tabId: number,
options: InteractiveSnapshotOptions,
callback: (snapshot: InteractiveSnapshot) => void,
): void
function getInteractiveSnapshot(
tabId: number,
callback: (snapshot: InteractiveSnapshot) => void,
): void
function getInteractiveSnapshot(
options: InteractiveSnapshotOptions,
callback: (snapshot: InteractiveSnapshot) => void,
): void
function getInteractiveSnapshot(
callback: (snapshot: InteractiveSnapshot) => void,
): void
function click(tabId: number, nodeId: number, callback: () => void): void
function click(nodeId: number, callback: () => void): void
function inputText(
tabId: number,
nodeId: number,
text: string,
callback: () => void,
): void
function inputText(nodeId: number, text: string, callback: () => void): void
function clear(tabId: number, nodeId: number, callback: () => void): void
function clear(nodeId: number, callback: () => void): void
function scrollUp(tabId: number, callback: () => void): void
function scrollUp(callback: () => void): void
function scrollDown(tabId: number, callback: () => void): void
function scrollDown(callback: () => void): void
function scrollToNode(
tabId: number,
nodeId: number,
callback: (scrolled: boolean) => void,
): void
function scrollToNode(
nodeId: number,
callback: (scrolled: boolean) => void,
): void
function sendKeys(
tabId: number,
key:
| 'Enter'
| 'Delete'
| 'Backspace'
| 'Tab'
| 'Escape'
| 'ArrowUp'
| 'ArrowDown'
| 'ArrowLeft'
| 'ArrowRight'
| 'Home'
| 'End'
| 'PageUp'
| 'PageDown',
callback: () => void,
): void
function sendKeys(
key:
| 'Enter'
| 'Delete'
| 'Backspace'
| 'Tab'
| 'Escape'
| 'ArrowUp'
| 'ArrowDown'
| 'ArrowLeft'
| 'ArrowRight'
| 'Home'
| 'End'
| 'PageUp'
| 'PageDown',
callback: () => void,
): void
// Capture screenshot with all optional parameters
function captureScreenshot(
tabId: number,
thumbnailSize: number,
showHighlights: boolean,
width: number,
height: number,
callback: (dataUrl: string) => void,
): void
// Capture screenshot with tab ID, thumbnail size, and highlights
function captureScreenshot(
tabId: number,
thumbnailSize: number,
showHighlights: boolean,
callback: (dataUrl: string) => void,
): void
// Capture screenshot with tab ID and thumbnail size
function captureScreenshot(
tabId: number,
thumbnailSize: number,
callback: (dataUrl: string) => void,
): void
// Capture screenshot with tab ID only (backwards compatibility)
function captureScreenshot(
tabId: number,
callback: (dataUrl: string) => void,
): void
// Capture screenshot of active tab with default size
function captureScreenshot(callback: (dataUrl: string) => void): void
// Snapshot extraction types
type SnapshotType = 'text' | 'links'
// Context for snapshot extraction
type SnapshotContext = 'visible' | 'full'
// Section types based on ARIA landmarks
type SectionType =
| 'main'
| 'navigation'
| 'footer'
| 'header'
| 'article'
| 'aside'
| 'complementary'
| 'contentinfo'
| 'form'
| 'search'
| 'region'
| 'other'
// Text snapshot result for a section
interface TextSnapshotResult {
text: string
characterCount: number
}
// Link information
interface LinkInfo {
text: string
url: string
title?: string
attributes?: Record<string, unknown>
isExternal: boolean
}
// Links snapshot result for a section
interface LinksSnapshotResult {
links: LinkInfo[]
}
// Section with all possible snapshot results
interface SnapshotSection {
type: string
textResult?: TextSnapshotResult
linksResult?: LinksSnapshotResult
}
// Main snapshot result
interface Snapshot {
type: SnapshotType
context: SnapshotContext
timestamp: number
sections: SnapshotSection[]
processingTimeMs: number
}
// Options for getSnapshot
interface SnapshotOptions {
context?: SnapshotContext
includeSections?: SectionType[]
}
function getSnapshot(
tabId: number,
type: SnapshotType,
options: SnapshotOptions,
callback: (snapshot: Snapshot) => void,
): void
function getSnapshot(
tabId: number,
type: SnapshotType,
callback: (snapshot: Snapshot) => void,
): void
function getSnapshot(
tabId: number,
callback: (snapshot: Snapshot) => void,
): void
function getSnapshot(
type: SnapshotType,
options: SnapshotOptions,
callback: (snapshot: Snapshot) => void,
): void
function getSnapshot(
type: SnapshotType,
callback: (snapshot: Snapshot) => void,
): void
// Get BrowserOS version number
function getVersionNumber(callback: (version: string) => void): void
// Logs a metric event with optional properties
function logMetric(
eventName: string,
properties: Record<string, unknown>,
callback: () => void,
): void
function logMetric(eventName: string, callback: () => void): void
function logMetric(
eventName: string,
properties?: Record<string, unknown>,
): void
function logMetric(eventName: string): void
// Execute JavaScript in a tab
function executeJavaScript(
tabId: number,
code: string,
callback: (result: unknown) => void,
): void
function executeJavaScript(
code: string,
callback: (result: unknown) => void,
): void
// Click at specific viewport coordinates
function clickCoordinates(
tabId: number,
x: number,
y: number,
callback: () => void,
): void
function clickCoordinates(x: number, y: number, callback: () => void): void
// Type text at specific viewport coordinates
function typeAtCoordinates(
tabId: number,
x: number,
y: number,
text: string,
callback: () => void,
): void
function typeAtCoordinates(
x: number,
y: number,
text: string,
callback: () => void,
): void
// Preference object
interface PrefObject {
key: string
type: string
value: unknown
}
// Get a specific preference value
function getPref(name: string, callback: (pref: PrefObject) => void): void
// Set a specific preference value
function setPref(
name: string,
value: unknown,
pageId: string,
callback: (success: boolean) => void,
): void
function setPref(
name: string,
value: unknown,
callback: (success: boolean) => void,
): void
// Get all preferences (filtered to browseros.* prefs)
function getAllPrefs(callback: (prefs: PrefObject[]) => void): void
}
declare namespace chrome {
namespace BrowserOS {
function getPrefs(
keys: string[],
callback: (prefs: Record<string, unknown>) => void,
): void
function setPrefs(
prefs: Record<string, unknown>,
callback?: (success: boolean) => void,
): void
}
}

View File

@@ -1,123 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { logger } from './logger'
interface QueuedTask<T> {
task: () => Promise<T>
resolve: (value: T) => void
reject: (error: Error) => void
}
export interface ConcurrencyStats {
inFlight: number
queued: number
utilization: number
}
export class ConcurrencyLimiter {
private isProcessing = false
private queue: Array<QueuedTask<unknown>> = []
constructor(
maxConcurrent: number,
private maxQueueSize = 1000,
) {
if (maxConcurrent !== 1) {
logger.warn(
`ConcurrencyLimiter: maxConcurrent=${maxConcurrent} but extension is single-threaded. ` +
`Using mutex mode (sequential execution) to prevent race conditions.`,
)
}
logger.info(
`ConcurrencyLimiter initialized: sequential=true, queueSize=${maxQueueSize}`,
)
}
async execute<T>(task: () => Promise<T>): Promise<T> {
// Queue limit check first
if (this.queue.length >= this.maxQueueSize) {
logger.error(
`Queue full (${this.maxQueueSize} requests). Rejecting request.`,
)
throw new Error(
`Controller overloaded. Queue full (${this.maxQueueSize} requests). Server should slow down.`,
)
}
return new Promise<T>((resolve, reject) => {
this.queue.push({
task,
// @ts-expect-error - TS can't infer generic type here
resolve,
reject,
})
const status = this.isProcessing ? 'QUEUED (mutex held)' : 'IMMEDIATE'
logger.info(
`[MUTEX] Task arrival - Status: ${status}, Queue size now: ${this.queue.length}`,
)
if (!this.isProcessing) {
this.processQueue()
}
})
}
private processQueue(): void {
if (this.isProcessing || this.queue.length === 0) {
return
}
// Log BEFORE we remove from queue to show true queue size
const queueSizeBeforeRemoval = this.queue.length
this.isProcessing = true
const item = this.queue.shift()
if (!item) {
this.isProcessing = false
return
}
const { task, resolve, reject } = item
logger.info(
`[MUTEX] Acquired. Started processing (${queueSizeBeforeRemoval} task(s) were queued, ${this.queue.length} still waiting).`,
)
const startTime = Date.now()
task()
.then(resolve)
.catch(reject)
.finally(() => {
const duration = Date.now() - startTime
this.isProcessing = false
logger.info(
`[MUTEX] Released after ${duration}ms. ${this.queue.length} task(s) remaining.`,
)
this.processQueue()
})
}
getStats(): ConcurrencyStats {
return {
inFlight: this.isProcessing ? 1 : 0,
queued: this.queue.length,
utilization: this.isProcessing ? 1.0 : 0.0,
}
}
// For debugging
logStats(): void {
const stats = this.getStats()
logger.info(
`Concurrency: ${stats.inFlight} in-flight (mutex mode), ` +
`${stats.queued} queued, ` +
`${Math.round(stats.utilization * 100)}% utilization`,
)
}
}

View File

@@ -1,37 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
/// <reference path="../types/chrome-browser-os.d.ts" />
import { getBrowserOSAdapter } from '@/adapters/BrowserOSAdapter'
import { WEBSOCKET_CONFIG } from '@/config/constants'
import { logger } from '@/utils/logger'
/**
* Get the WebSocket port from BrowserOS preferences
* Returns browseros.server.extension_port preference value
* Falls back to port from constants if preference cannot be retrieved
*/
export async function getWebSocketPort(): Promise<number> {
try {
const adapter = getBrowserOSAdapter()
const pref = await adapter.getPref('browseros.server.extension_port')
if (pref && typeof pref.value === 'number') {
logger.info(`Using port from BrowserOS preferences: ${pref.value}`)
return pref.value
}
logger.warn(
`Port preference not found, using default: ${WEBSOCKET_CONFIG.defaultExtensionPort}`,
)
return WEBSOCKET_CONFIG.defaultExtensionPort
} catch (error) {
logger.error(
`Failed to get port from BrowserOS preferences: ${error}, using default: ${WEBSOCKET_CONFIG.defaultExtensionPort}`,
)
return WEBSOCKET_CONFIG.defaultExtensionPort
}
}

View File

@@ -1,39 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { logger } from '@/utils/logger'
const KEEPALIVE_ALARM_NAME = 'browseros-keepalive'
const KEEPALIVE_INTERVAL_MINUTES = 0.33 // ~20 seconds
let isInitialized = false
export async function startKeepAlive(): Promise<void> {
if (isInitialized) {
logger.debug('KeepAlive already started')
return
}
chrome.alarms.onAlarm.addListener((alarm) => {
if (alarm.name === KEEPALIVE_ALARM_NAME) {
logger.debug('KeepAlive: ping (service worker alive)')
}
})
await chrome.alarms.create(KEEPALIVE_ALARM_NAME, {
periodInMinutes: KEEPALIVE_INTERVAL_MINUTES,
})
isInitialized = true
logger.info(
`KeepAlive started: alarm every ${KEEPALIVE_INTERVAL_MINUTES * 60}s`,
)
}
export async function stopKeepAlive(): Promise<void> {
await chrome.alarms.clear(KEEPALIVE_ALARM_NAME)
isInitialized = false
logger.info('KeepAlive stopped')
}

View File

@@ -1,142 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { getBrowserOSAdapter } from '@/adapters/BrowserOSAdapter'
import { logger } from '@/utils/logger'
const POINTER_DISPLAY_DURATION_MS = 3000
const POINTER_DELAY_BEFORE_ACTION_MS = 500
/**
* PointerOverlay - Shows a visual mouse pointer overlay before actions
*
* Injects JavaScript to display a pointer arrow at the specified coordinates.
* The pointer auto-removes after POINTER_DISPLAY_DURATION_MS.
*
* biome-ignore lint/complexity/noStaticOnlyClass: class created before biome was setup
*/
export class PointerOverlay {
private static browserOS = getBrowserOSAdapter()
/**
* Show a pointer at the specified coordinates
* @param tabId - Tab to show pointer in
* @param x - X coordinate in viewport pixels
* @param y - Y coordinate in viewport pixels
* @param text - Optional label text (e.g., "Click", "Type: hello...")
*/
static async showPointer(
tabId: number,
x: number,
y: number,
text?: string,
): Promise<void> {
const pointerId = `browseros-pointer-${Date.now()}`
const textLabel = text
? `
var label = document.createElement('div');
label.style.cssText = 'position: absolute; top: 20px; left: 12px; background: rgba(0,0,0,0.9); color: white; padding: 4px 8px; border-radius: 4px; font-size: 12px; font-family: monospace; white-space: nowrap; box-shadow: 0 2px 4px rgba(0,0,0,0.5);';
label.textContent = '${text.replace(/[`$\\]/g, '\\$&').replace(/'/g, "\\'")}';
shadow.appendChild(label);
`
: ''
const script = `
(function() {
var existing = document.querySelector('browseros-pointer');
if (existing) existing.remove();
if (!customElements.get('browseros-pointer')) {
customElements.define('browseros-pointer', class extends HTMLElement {
constructor() {
super();
this.attachShadow({ mode: 'open' });
}
});
}
var host = document.createElement('browseros-pointer');
host.id = '${pointerId}';
host.style.cssText = 'position: fixed; left: ${x}px; top: ${y}px; z-index: 2147483647; pointer-events: none;';
var shadow = host.shadowRoot;
var arrow = document.createElement('div');
arrow.style.cssText = 'width: 0; height: 0; border-style: solid; border-width: 0 12px 20px 12px; border-color: transparent transparent #FB6618 transparent; transform: translate(-3px, -3px) rotate(45deg); filter: drop-shadow(1px 1px 2px rgba(0,0,0,0.4));';
shadow.appendChild(arrow);
${textLabel}
document.body.appendChild(host);
setTimeout(function() {
var el = document.getElementById('${pointerId}');
if (el) el.remove();
}, ${POINTER_DISPLAY_DURATION_MS});
})();
`
try {
await PointerOverlay.browserOS.executeJavaScript(tabId, script)
logger.debug(
`[PointerOverlay] Showed pointer at (${x}, ${y}) in tab ${tabId}${text ? ` with label "${text}"` : ''}`,
)
} catch (error) {
logger.warn(
`[PointerOverlay] Failed to show pointer: ${error instanceof Error ? error.message : String(error)}`,
)
}
}
/**
* Show pointer and wait before action
* Returns after the delay so the action can proceed
*/
static async showPointerAndWait(
tabId: number,
x: number,
y: number,
text?: string,
): Promise<void> {
await PointerOverlay.showPointer(tabId, x, y, text)
await PointerOverlay.delay(POINTER_DELAY_BEFORE_ACTION_MS)
}
/**
* Calculate center coordinates from a rect
*/
static getCenterCoordinates(rect: {
x: number
y: number
width: number
height: number
}): { x: number; y: number } {
return {
x: Math.round(rect.x + rect.width / 2),
y: Math.round(rect.y + rect.height / 2),
}
}
/**
* Calculate left-center coordinates (for type actions)
*/
static getLeftCenterCoordinates(rect: {
x: number
y: number
width: number
height: number
}): { x: number; y: number } {
return {
x: Math.round(rect.x + 10),
y: Math.round(rect.y + rect.height / 2),
}
}
private static delay(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms))
}
}

View File

@@ -1,129 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { logger } from './logger'
export interface TrackedRequest {
id: string
action: string
startTime: number
status: 'pending' | 'executing' | 'completed' | 'failed'
duration?: number
error?: string
}
export interface RequestStats {
inFlight: number
avgDuration: number
errorRate: number
totalRequests: number
}
export class RequestTracker {
private requests = new Map<string, TrackedRequest>()
private cleanupInterval: ReturnType<typeof setInterval> | null = null
constructor() {
// Start periodic cleanup of old completed requests
this.cleanupInterval = setInterval(() => this.cleanup(), 60000) // Every 1 minute
}
start(id: string, action: string): void {
this.requests.set(id, {
id,
action,
startTime: Date.now(),
status: 'pending',
})
logger.debug(`Request started: ${id} [${action}]`)
}
markExecuting(id: string): void {
const req = this.requests.get(id)
if (req) {
req.status = 'executing'
logger.debug(`Request executing: ${id}`)
}
}
complete(id: string, error?: string): void {
const req = this.requests.get(id)
if (req) {
req.status = error ? 'failed' : 'completed'
req.duration = Date.now() - req.startTime
req.error = error
logger.info(
`Request ${error ? 'failed' : 'completed'}: ${id} [${req.action}] in ${req.duration}ms`,
)
// Schedule cleanup after 1 minute
setTimeout(() => this.requests.delete(id), 60000)
}
}
getActiveRequests(): TrackedRequest[] {
return Array.from(this.requests.values()).filter(
(r) => r.status === 'pending' || r.status === 'executing',
)
}
getStats(): RequestStats {
const all = Array.from(this.requests.values())
const inFlight = all.filter(
(r) => r.status === 'pending' || r.status === 'executing',
).length
const completed = all.filter(
(r): r is typeof r & { duration: number } => r.duration !== undefined,
)
const avgDuration =
completed.length > 0
? completed.reduce((sum, r) => sum + r.duration, 0) / completed.length
: 0
const failed = all.filter((r) => r.status === 'failed').length
const errorRate = all.length > 0 ? failed / all.length : 0
return {
inFlight,
avgDuration: Math.round(avgDuration),
errorRate: Math.round(errorRate * 100) / 100,
totalRequests: all.length,
}
}
getHungRequests(timeoutMs = 30000): TrackedRequest[] {
const now = Date.now()
return Array.from(this.requests.values()).filter(
(r) =>
(r.status === 'pending' || r.status === 'executing') &&
now - r.startTime > timeoutMs,
)
}
private cleanup(): void {
// Remove completed/failed requests older than 5 minutes
const now = Date.now()
const fiveMinutesAgo = now - 5 * 60 * 1000
for (const [id, req] of this.requests.entries()) {
if (
(req.status === 'completed' || req.status === 'failed') &&
req.startTime < fiveMinutesAgo
) {
this.requests.delete(id)
}
}
}
destroy(): void {
if (this.cleanupInterval) {
clearInterval(this.cleanupInterval)
this.cleanupInterval = null
}
this.requests.clear()
}
}

View File

@@ -1,78 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import type { ProtocolRequest } from '@/protocol/types'
import { ProtocolRequestSchema } from '@/protocol/types'
import { logger } from './logger'
export class RequestValidator {
private activeIds = new Set<string>()
private idTimestamps = new Map<string, number>()
private cleanupInterval: ReturnType<typeof setInterval> | null = null
constructor() {
// Periodically cleanup old IDs (prevent memory leak)
this.cleanupInterval = setInterval(() => this.cleanup(), 60000) // Every 1 minute
}
validate(message: unknown): ProtocolRequest {
// Step 1: Parse and validate with Zod
const request = ProtocolRequestSchema.parse(message)
// Step 2: Check for duplicate ID
if (this.activeIds.has(request.id)) {
logger.error(`Duplicate request ID detected: ${request.id}`)
throw new Error(
`Duplicate request ID: ${request.id}. Already processing this request.`,
)
}
// Step 3: Track this ID
this.activeIds.add(request.id)
this.idTimestamps.set(request.id, Date.now())
logger.debug(`Request validated: ${request.id} [${request.action}]`)
return request
}
markComplete(id: string): void {
this.activeIds.delete(id)
this.idTimestamps.delete(id)
logger.debug(`Request ID released: ${id}`)
}
private cleanup(): void {
// Remove IDs older than 5 minutes (safety measure in case markComplete() not called)
const now = Date.now()
const fiveMinutesAgo = now - 5 * 60 * 1000
for (const [id, timestamp] of this.idTimestamps.entries()) {
if (timestamp < fiveMinutesAgo) {
logger.warn(
`Cleaning up stale request ID: ${id} (age: ${Math.round((now - timestamp) / 1000)}s)`,
)
this.activeIds.delete(id)
this.idTimestamps.delete(id)
}
}
}
getStats(): { activeIds: number } {
return {
activeIds: this.activeIds.size,
}
}
destroy(): void {
if (this.cleanupInterval) {
clearInterval(this.cleanupInterval)
this.cleanupInterval = null
}
this.activeIds.clear()
this.idTimestamps.clear()
}
}

View File

@@ -1,73 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import type { ProtocolResponse } from '@/protocol/types'
import { logger } from './logger'
export class ResponseQueue {
private queue: ProtocolResponse[] = []
private maxSize: number
constructor(maxSize = 1000) {
this.maxSize = maxSize
logger.info(`ResponseQueue initialized: maxSize=${maxSize}`)
}
enqueue(response: ProtocolResponse): void {
if (this.queue.length >= this.maxSize) {
// Drop oldest response to prevent memory leak
const dropped = this.queue.shift()
logger.warn(
`Response queue full. Dropped oldest response: ${dropped?.id}`,
)
}
this.queue.push(response)
logger.debug(
`Response queued: ${response.id} (queue size: ${this.queue.length})`,
)
}
flush(send: (response: ProtocolResponse) => void): number {
let sent = 0
logger.info(`Flushing ${this.queue.length} queued responses...`)
while (this.queue.length > 0) {
const response = this.queue.shift()
if (!response) break
try {
send(response)
sent++
} catch (error) {
// Re-queue if send fails
logger.error(
`Failed to send response ${response.id}: ${error}. Re-queueing.`,
)
this.queue.unshift(response)
break
}
}
logger.info(`Flushed ${sent} responses. ${this.queue.length} remaining.`)
return sent
}
size(): number {
return this.queue.length
}
clear(): void {
const count = this.queue.length
this.queue = []
logger.warn(`Response queue cleared. Dropped ${count} responses.`)
}
isEmpty(): boolean {
return this.queue.length === 0
}
}

View File

@@ -1,99 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import type {
InteractiveNode,
InteractiveSnapshot,
Rect,
} from '@/adapters/BrowserOSAdapter'
import { logger } from '@/utils/logger'
/**
* SnapshotCache - Caches interactive snapshots per tabId for multi-agent support
*
* Used to lookup element coordinates from nodeId without re-fetching snapshot.
* This enables showing mouse pointer before click/type actions with minimal latency.
*/
class SnapshotCacheImpl {
private cache: Map<number, InteractiveSnapshot> = new Map()
private nodeRectCache: Map<number, Map<number, Rect>> = new Map()
/**
* Cache a snapshot for a tab
*/
set(tabId: number, snapshot: InteractiveSnapshot): void {
this.cache.set(tabId, snapshot)
const rectMap = new Map<number, Rect>()
for (const element of snapshot.elements) {
if (element.rect) {
rectMap.set(element.nodeId, element.rect)
}
}
this.nodeRectCache.set(tabId, rectMap)
logger.debug(
`[SnapshotCache] Cached snapshot for tab ${tabId} with ${snapshot.elements.length} elements`,
)
}
/**
* Get cached snapshot for a tab
*/
get(tabId: number): InteractiveSnapshot | undefined {
return this.cache.get(tabId)
}
/**
* Get element rect by nodeId from cache
*/
getNodeRect(tabId: number, nodeId: number): Rect | undefined {
const rectMap = this.nodeRectCache.get(tabId)
if (!rectMap) {
logger.debug(`[SnapshotCache] No cached snapshot for tab ${tabId}`)
return undefined
}
return rectMap.get(nodeId)
}
/**
* Get element by nodeId from cache
*/
getElement(tabId: number, nodeId: number): InteractiveNode | undefined {
const snapshot = this.cache.get(tabId)
if (!snapshot) {
return undefined
}
return snapshot.elements.find((el) => el.nodeId === nodeId)
}
/**
* Clear cache for a specific tab
*/
clear(tabId: number): void {
this.cache.delete(tabId)
this.nodeRectCache.delete(tabId)
logger.debug(`[SnapshotCache] Cleared cache for tab ${tabId}`)
}
/**
* Clear all caches
*/
clearAll(): void {
this.cache.clear()
this.nodeRectCache.clear()
logger.debug('[SnapshotCache] Cleared all caches')
}
/**
* Check if snapshot is cached for a tab
*/
has(tabId: number): boolean {
return this.cache.has(tabId)
}
}
export const SnapshotCache = new SnapshotCacheImpl()

View File

@@ -1,58 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import type { LoggerInterface, LogLevel } from '@browseros/shared/types/logger'
import { LOGGING_CONFIG } from '@/config/constants'
const LEVEL_PRIORITY: Record<LogLevel, number> = {
debug: 0,
info: 1,
warn: 2,
error: 3,
}
export class Logger implements LoggerInterface {
private prefix: string
constructor(prefix: string = LOGGING_CONFIG.prefix) {
this.prefix = prefix
}
private shouldLog(level: LogLevel): boolean {
if (!LOGGING_CONFIG.enabled) return false
return LEVEL_PRIORITY[level] >= LEVEL_PRIORITY[LOGGING_CONFIG.level]
}
private formatMessage(message: string): string {
const timestamp = new Date().toISOString()
return `${this.prefix} [${timestamp}] ${message}`
}
private formatData(data?: Record<string, unknown>): string {
return data ? `\n${JSON.stringify(data, null, 2)}` : ''
}
debug(message: string, data?: Record<string, unknown>): void {
if (!this.shouldLog('debug')) return
console.log(this.formatMessage(message) + this.formatData(data))
}
info(message: string, data?: Record<string, unknown>): void {
if (!this.shouldLog('info')) return
console.info(this.formatMessage(message) + this.formatData(data))
}
warn(message: string, data?: Record<string, unknown>): void {
if (!this.shouldLog('warn')) return
console.warn(this.formatMessage(message) + this.formatData(data))
}
error(message: string, data?: Record<string, unknown>): void {
if (!this.shouldLog('error')) return
console.error(this.formatMessage(message) + this.formatData(data))
}
}
export const logger = new Logger()

View File

@@ -1,68 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
/**
* Timeout configuration for Chrome API and BrowserOS operations.
* These prevent hung APIs from blocking the mutex indefinitely.
*/
export const CHROME_API_TIMEOUTS = {
/** Quick Chrome API calls (tabs, bookmarks, history queries) */
CHROME_API: 15_000,
/** BrowserOS page actions (click, scroll, input, etc.) */
BROWSEROS_ACTION: 10_000,
/** Heavy BrowserOS operations (screenshot, snapshot, accessibility tree) */
BROWSEROS_HEAVY: 60_000,
} as const
/**
* Error thrown when a Chrome API call times out.
*/
export class ChromeAPITimeoutError extends Error {
constructor(
public readonly operation: string,
public readonly timeoutMs: number,
) {
super(`Chrome API '${operation}' timed out after ${timeoutMs}ms`)
this.name = 'ChromeAPITimeoutError'
}
}
/**
* Wraps a promise with a timeout. If the promise doesn't resolve within
* the specified time, it rejects with a ChromeAPITimeoutError.
*
* IMPORTANT: This doesn't cancel the underlying Chrome API call - it just
* stops waiting for it. The API call may still complete in the background.
*
* @param promise - The promise to wrap
* @param timeoutMs - Timeout in milliseconds
* @param operation - Name of the operation (for error messages)
* @returns The result of the promise if it resolves in time
* @throws ChromeAPITimeoutError if the timeout is exceeded
*/
export function withTimeout<T>(
promise: Promise<T>,
timeoutMs: number,
operation: string,
): Promise<T> {
return new Promise<T>((resolve, reject) => {
const timer = setTimeout(() => {
reject(new ChromeAPITimeoutError(operation, timeoutMs))
}, timeoutMs)
promise
.then((result) => {
clearTimeout(timer)
resolve(result)
})
.catch((error) => {
clearTimeout(timer)
reject(error)
})
})
}

View File

@@ -1,28 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
// Parse "137.0.7207.69" → [137, 0, 7207, 69]
function parseVersion(version: string): number[] {
return version.split('.').map((n) => parseInt(n, 10) || 0)
}
// Compare if versionA >= versionB
export function isVersionAtLeast(current: string, required: string): boolean {
const currentParts = parseVersion(current)
const requiredParts = parseVersion(required)
for (
let i = 0;
i < Math.max(currentParts.length, requiredParts.length);
i++
) {
const curr = currentParts[i] || 0
const req = requiredParts[i] || 0
if (curr > req) return true
if (curr < req) return false
}
return true // Equal versions
}

View File

@@ -1,297 +0,0 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { WEBSOCKET_CONFIG } from '@/config/constants'
import type { ProtocolRequest, ProtocolResponse } from '@/protocol/types'
import { ConnectionStatus } from '@/protocol/types'
import { logger } from '@/utils/logger'
export type PortProvider = () => Promise<number>
export class WebSocketClient {
private ws: WebSocket | null = null
private status: ConnectionStatus = ConnectionStatus.DISCONNECTED
private reconnectTimer: ReturnType<typeof setTimeout> | null = null
private heartbeatTimer: ReturnType<typeof setInterval> | null = null
private heartbeatTimeoutTimer: ReturnType<typeof setTimeout> | null = null
private getPort: PortProvider
private lastPongReceived: number = Date.now()
private pendingPing = false
// Event handlers
private messageHandlers = new Set<(msg: ProtocolResponse) => void>()
private statusHandlers = new Set<(status: ConnectionStatus) => void>()
constructor(getPort: PortProvider) {
this.getPort = getPort
logger.info('WebSocketClient initialized')
}
// Public API
async connect(): Promise<void> {
if (this.status === ConnectionStatus.CONNECTED) {
logger.debug('Already connected')
return
}
this._setStatus(ConnectionStatus.CONNECTING)
try {
const port = await this.getPort()
const url = this._buildUrl(port)
logger.info(`Connecting to ${url}`)
this.ws = new WebSocket(url)
this.ws.onopen = this._handleOpen.bind(this)
this.ws.onmessage = this._handleMessage.bind(this)
this.ws.onerror = this._handleError.bind(this)
this.ws.onclose = this._handleClose.bind(this)
// Wait for connection with timeout
await this._waitForConnection()
} catch (error) {
logger.error(`Connection failed: ${error}`)
this._handleConnectionFailure()
}
}
disconnect(): void {
logger.info('Disconnecting...')
this._clearTimers()
if (this.ws) {
this.ws.close()
this.ws = null
}
this._setStatus(ConnectionStatus.DISCONNECTED)
}
send(
message: ProtocolRequest | ProtocolResponse | Record<string, unknown>,
): void {
this._sendSerialized(message)
}
onMessage(handler: (msg: ProtocolResponse) => void): void {
this.messageHandlers.add(handler)
}
onStatusChange(handler: (status: ConnectionStatus) => void): void {
this.statusHandlers.add(handler)
}
isConnected(): boolean {
return this.status === ConnectionStatus.CONNECTED
}
getStatus(): ConnectionStatus {
return this.status
}
// Private methods
private _buildUrl(port: number): string {
const { protocol, host, path } = WEBSOCKET_CONFIG
return `${protocol}://${host}:${port}${path}`
}
private async _waitForConnection(): Promise<void> {
return new Promise((resolve, reject) => {
const timeout = setTimeout(() => {
reject(new Error('Connection timeout'))
}, WEBSOCKET_CONFIG.connectionTimeout)
const checkConnection = () => {
if (this.status === ConnectionStatus.CONNECTED) {
clearTimeout(timeout)
resolve()
} else if (this.status === ConnectionStatus.ERROR) {
clearTimeout(timeout)
reject(new Error('Connection failed'))
} else {
setTimeout(checkConnection, 100)
}
}
checkConnection()
})
}
private _handleOpen(): void {
logger.info('WebSocket connected')
this.lastPongReceived = Date.now()
this.pendingPing = false
this._setStatus(ConnectionStatus.CONNECTED)
this._startHeartbeat()
}
private _handleMessage(event: MessageEvent): void {
try {
const message = JSON.parse(event.data)
// Handle pong response for heartbeat
if (message.type === 'pong') {
this.lastPongReceived = Date.now()
this.pendingPing = false
logger.debug('Received pong from server')
return
}
logger.debug(`Received: ${JSON.stringify(message).substring(0, 100)}...`)
// Emit to all message handlers
for (const handler of this.messageHandlers) {
handler(message as ProtocolResponse)
}
} catch (error) {
logger.error(`Failed to parse message: ${error}`)
}
}
private _handleError(event: Event): void {
logger.error(`WebSocket error: ${event}`)
this._setStatus(ConnectionStatus.ERROR)
}
private _handleClose(event: CloseEvent): void {
logger.warn(`WebSocket closed: code=${event.code}, reason=${event.reason}`)
this._clearTimers()
this.ws = null
// Only reconnect if we're not deliberately disconnecting
if (this.status !== ConnectionStatus.DISCONNECTED) {
this._reconnect()
}
}
private _handleConnectionFailure(): void {
this._setStatus(ConnectionStatus.ERROR)
this._reconnect()
}
private _reconnect(): void {
if (this.reconnectTimer) {
return // Already reconnecting
}
this._setStatus(ConnectionStatus.RECONNECTING)
const delay = WEBSOCKET_CONFIG.reconnectIntervalMs
logger.warn(`Reconnecting in ${Math.round(delay)}ms`)
this.reconnectTimer = setTimeout(() => {
this.reconnectTimer = null
this.connect().catch((err) => {
logger.error(`Reconnection failed: ${err}`)
})
}, delay)
}
private _startHeartbeat(): void {
this._clearHeartbeat()
this.heartbeatTimer = setInterval(() => {
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) {
return
}
// Check if previous ping timed out
const timeSinceLastPong = Date.now() - this.lastPongReceived
if (
timeSinceLastPong >
WEBSOCKET_CONFIG.heartbeatInterval + WEBSOCKET_CONFIG.heartbeatTimeout
) {
logger.error(
`Heartbeat timeout: no pong received for ${timeSinceLastPong}ms`,
)
this._handleHeartbeatTimeout()
return
}
// Send ping
try {
this._sendSerialized({ type: 'ping' })
this.pendingPing = true
logger.debug('Sent heartbeat ping')
// Set timeout for this specific ping
this._clearHeartbeatTimeout()
this.heartbeatTimeoutTimer = setTimeout(() => {
if (this.pendingPing) {
logger.error(
`Ping timeout: no pong received within ${WEBSOCKET_CONFIG.heartbeatTimeout}ms`,
)
this._handleHeartbeatTimeout()
}
}, WEBSOCKET_CONFIG.heartbeatTimeout)
} catch (error) {
logger.error(`Failed to send ping: ${error}`)
this._handleHeartbeatTimeout()
}
}, WEBSOCKET_CONFIG.heartbeatInterval)
}
private _handleHeartbeatTimeout(): void {
logger.warn('Heartbeat failed, forcing reconnection')
if (this.ws) {
this.ws.close()
}
}
private _clearHeartbeat(): void {
if (this.heartbeatTimer) {
clearInterval(this.heartbeatTimer)
this.heartbeatTimer = null
}
this._clearHeartbeatTimeout()
}
private _clearHeartbeatTimeout(): void {
if (this.heartbeatTimeoutTimer) {
clearTimeout(this.heartbeatTimeoutTimer)
this.heartbeatTimeoutTimer = null
}
}
private _clearTimers(): void {
this._clearHeartbeat()
if (this.reconnectTimer) {
clearTimeout(this.reconnectTimer)
this.reconnectTimer = null
}
}
private _setStatus(status: ConnectionStatus): void {
if (this.status === status) return
this.status = status
logger.info(`Status changed: ${status}`)
// Emit to all status handlers
for (const handler of this.statusHandlers) {
handler(status)
}
}
private _sendSerialized(
message: ProtocolRequest | ProtocolResponse | Record<string, unknown>,
): void {
if (this.status !== ConnectionStatus.CONNECTED) {
throw new Error('WebSocket not connected')
}
if (!this.ws) {
throw new Error('WebSocket instance is null')
}
const messageStr = JSON.stringify(message)
logger.debug(`Sending: ${messageStr.substring(0, 100)}...`)
this.ws.send(messageStr)
}
}

View File

@@ -1,257 +0,0 @@
{
"tests": [
{
"name": "Check BrowserOS Availability",
"action": "checkBrowserOS",
"payload": {},
"description": "Verify chrome.browserOS is available and list all APIs"
},
{
"name": "Get Active Tab",
"action": "getActiveTab",
"payload": {},
"description": "Get the currently active tab information"
},
{
"name": "Get All Tabs",
"action": "getTabs",
"payload": {},
"description": "Get all open tabs in the browser"
},
{
"name": "Get Page Load Status",
"action": "getPageLoadStatus",
"payload": {
"tabId": "{{activeTabId}}"
},
"description": "Check if the active tab has finished loading"
},
{
"name": "Capture Screenshot",
"action": "captureScreenshot",
"payload": {
"tabId": "{{activeTabId}}",
"size": "small"
},
"description": "Capture a screenshot of the active tab"
},
{
"name": "Get Interactive Snapshot",
"action": "getInteractiveSnapshot",
"payload": {
"tabId": "{{activeTabId}}"
},
"description": "Get interactive elements snapshot from active tab"
},
{
"name": "Execute JavaScript - Get Title",
"action": "executeJavaScript",
"payload": {
"tabId": "{{activeTabId}}",
"code": "document.title"
},
"description": "Execute JavaScript to get page title"
},
{
"name": "Execute JavaScript - Get URL",
"action": "executeJavaScript",
"payload": {
"tabId": "{{activeTabId}}",
"code": "window.location.href"
},
"description": "Execute JavaScript to get current URL"
},
{
"name": "Scroll Down",
"action": "scrollDown",
"payload": {
"tabId": "{{activeTabId}}"
},
"description": "Scroll the active tab down by one viewport"
},
{
"name": "Scroll Up",
"action": "scrollUp",
"payload": {
"tabId": "{{activeTabId}}"
},
"description": "Scroll the active tab up by one viewport"
},
{
"name": "Get Snapshot (Text)",
"action": "getSnapshot",
"payload": {
"tabId": "{{activeTabId}}",
"type": "text",
"options": {
"context": "visible"
}
},
"description": "Get text snapshot of the active tab"
},
{
"name": "Open New Tab",
"action": "openTab",
"payload": {
"url": "https://www.example.com",
"active": true
},
"description": "Open a new tab with example.com"
},
{
"name": "Navigate Current Tab",
"action": "navigate",
"payload": {
"url": "https://www.wikipedia.org"
},
"description": "Navigate the active tab to Wikipedia"
},
{
"name": "Switch Tab",
"action": "switchTab",
"payload": {
"tabId": "{{activeTabId}}"
},
"description": "Switch to the active tab (should already be active)"
},
{
"name": "Get Recent Bookmarks",
"action": "getBookmarks",
"payload": {
"recent": true,
"limit": 10
},
"description": "Get 10 most recent bookmarks"
},
{
"name": "Search Bookmarks",
"action": "getBookmarks",
"payload": {
"query": "google",
"limit": 5
},
"description": "Search for bookmarks containing 'google'"
},
{
"name": "Get Recent History",
"action": "getRecentHistory",
"payload": {
"maxResults": 10,
"hoursBack": 24
},
"description": "Get last 10 history items from past 24 hours"
},
{
"name": "Search History",
"action": "searchHistory",
"payload": {
"query": "github",
"maxResults": 5
},
"description": "Search history for 'github'"
}
],
"tabManagementTests": [
{
"name": "Close Tab (Manual - requires tab ID)",
"action": "closeTab",
"payload": {
"tabId": "MANUAL_INPUT_REQUIRED"
},
"description": "Close a specific tab by ID (get ID from getTabs first)"
}
],
"bookmarkTests": [
{
"name": "Create Bookmark (Manual)",
"action": "createBookmark",
"payload": {
"title": "Example Site",
"url": "https://www.example.com"
},
"description": "Create a bookmark for example.com"
},
{
"name": "Remove Bookmark (Manual - requires bookmark ID)",
"action": "removeBookmark",
"payload": {
"id": "MANUAL_INPUT_REQUIRED"
},
"description": "Remove a bookmark by ID (get ID from getBookmarks first)"
}
],
"manualTests": [
{
"name": "Click Element (Requires nodeId)",
"action": "click",
"payload": {
"tabId": "{{activeTabId}}",
"nodeId": "MANUAL_INPUT_REQUIRED"
},
"description": "Click an element by nodeId (get nodeId from getInteractiveSnapshot first)"
},
{
"name": "Input Text (Requires nodeId)",
"action": "inputText",
"payload": {
"tabId": "{{activeTabId}}",
"nodeId": "MANUAL_INPUT_REQUIRED",
"text": "Test input"
},
"description": "Type text into an input element"
},
{
"name": "Clear Input (Requires nodeId)",
"action": "clear",
"payload": {
"tabId": "{{activeTabId}}",
"nodeId": "MANUAL_INPUT_REQUIRED"
},
"description": "Clear an input element"
},
{
"name": "Scroll to Node (Requires nodeId)",
"action": "scrollToNode",
"payload": {
"tabId": "{{activeTabId}}",
"nodeId": "MANUAL_INPUT_REQUIRED"
},
"description": "Scroll to a specific element"
},
{
"name": "Click at Coordinates",
"action": "clickCoordinates",
"payload": {
"tabId": "{{activeTabId}}",
"x": 100,
"y": 100
},
"description": "Click at specific screen coordinates"
},
{
"name": "Type at Coordinates",
"action": "typeAtCoordinates",
"payload": {
"tabId": "{{activeTabId}}",
"x": 100,
"y": 100,
"text": "Test text"
},
"description": "Type at specific screen coordinates"
},
{
"name": "Send Keys",
"action": "sendKeys",
"payload": {
"tabId": "{{activeTabId}}",
"keys": "Hello"
},
"description": "Send keyboard keys to the active tab"
}
],
"_notes": {
"activeTabId": "This placeholder will be replaced with the actual active tab ID at runtime",
"tests": "These tests can run automatically without manual input",
"manualTests": "These tests require manual nodeId input or specific coordinates"
}
}

View File

@@ -1,23 +0,0 @@
{
"compilerOptions": {
"target": "ES2020",
"module": "ES2020",
"moduleResolution": "bundler",
"lib": ["ES2020", "DOM"],
"outDir": "./dist",
"strict": true,
"esModuleInterop": true,
"skipLibCheck": true,
"resolveJsonModule": true,
"declaration": true,
"declarationMap": true,
"sourceMap": true,
"types": ["chrome", "node"],
"baseUrl": ".",
"paths": {
"@/*": ["src/*"]
}
},
"include": ["src/**/*"],
"exclude": ["node_modules", "dist"]
}

View File

@@ -1,83 +0,0 @@
const path = require('node:path')
const webpack = require('webpack')
const TerserPlugin = require('terser-webpack-plugin')
const CopyPlugin = require('copy-webpack-plugin')
module.exports = (_env, argv) => {
const isProduction = argv.mode === 'production'
return {
mode: isProduction ? 'production' : 'development',
entry: {
background: './src/background/index.ts',
},
output: {
path: path.resolve(__dirname, 'dist'),
filename: '[name].js',
clean: true,
},
resolve: {
extensions: ['.ts', '.js'],
alias: {
'@': path.resolve(__dirname, 'src'),
},
},
module: {
rules: [
{
test: /\.ts$/,
use: {
loader: 'ts-loader',
options: {
onlyCompileBundledFiles: true,
compilerOptions: {
declaration: false,
declarationMap: false,
},
},
},
exclude: [/node_modules/, /\.(test|spec)\.(ts|tsx)$/],
},
],
},
plugins: [
new webpack.optimize.LimitChunkCountPlugin({
maxChunks: 1,
}),
new CopyPlugin({
patterns: [
{ from: 'manifest.json', to: '.' },
{ from: 'assets', to: 'assets' },
],
}),
],
devtool: isProduction ? false : 'source-map',
optimization: {
splitChunks: false,
runtimeChunk: false,
minimize: isProduction,
minimizer: isProduction
? [
new TerserPlugin({
extractComments: false,
terserOptions: {
format: {
comments: false,
},
compress: {
// FIXME: nikhil - remove this later after few releases
drop_console: false,
drop_debugger: true,
},
},
}),
]
: [],
},
performance: {
hints: isProduction ? 'warning' : false,
maxEntrypointSize: 512000,
maxAssetSize: 512000,
},
}
}

View File

@@ -179,7 +179,7 @@ The `apiKey` field supports two formats:
}
```
Each worker gets its own Chrome instance. Worker N uses `base_port + N` for CDP, server, and extension ports.
Each worker gets its own Chrome instance. Worker N uses `base_port + N` for CDP and server ports. `base_extension_port` is still reserved as a legacy BrowserOS launch argument for compatibility with Chromium builds that still pass it.
### Execution settings

View File

@@ -13,14 +13,14 @@ const MCP_URL = `${SERVER_URL}/mcp`
const NUM_TURNS = 60
const SCREENSHOT_EVERY_N_TURNS = 1
async function checkExtension(): Promise<boolean> {
async function checkBrowserReady(): Promise<boolean> {
try {
const res = await fetch(`${SERVER_URL}/extension-status`, {
const res = await fetch(`${SERVER_URL}/health`, {
signal: AbortSignal.timeout(5000),
})
if (!res.ok) return false
const data = (await res.json()) as { extensionConnected?: boolean }
return data.extensionConnected === true
const data = (await res.json()) as { cdpConnected?: boolean }
return data.cdpConnected === true
} catch {
return false
}
@@ -156,7 +156,7 @@ async function main() {
let screenshotFail = 0
let toolSuccess = 0
let toolFail = 0
let extensionDisconnects = 0
let browserDisconnects = 0
const startTime = Date.now()
@@ -201,18 +201,18 @@ async function main() {
}
}
// Check extension status
const extConnected = await checkExtension()
if (!extConnected) {
extensionDisconnects++
console.log(` Turn ${turn}: ⚠️ Extension disconnected!`)
// Check browser status
const browserReady = await checkBrowserReady()
if (!browserReady) {
browserDisconnects++
console.log(` Turn ${turn}: ⚠️ Browser became unavailable!`)
}
// Progress
if (turn % 10 === 0) {
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1)
console.log(
`Turn ${turn}/${NUM_TURNS} - Screenshots: ${screenshotSuccess}/${turn}, Tools: ${toolSuccess}/${turn}, Disconnects: ${extensionDisconnects}, Elapsed: ${elapsed}s`,
`Turn ${turn}/${NUM_TURNS} - Screenshots: ${screenshotSuccess}/${turn}, Tools: ${toolSuccess}/${turn}, Disconnects: ${browserDisconnects}, Elapsed: ${elapsed}s`,
)
}
@@ -237,9 +237,9 @@ async function main() {
console.log(
`Tool calls: ${toolSuccess}/${NUM_TURNS} (${((toolSuccess / NUM_TURNS) * 100).toFixed(1)}%)`,
)
console.log(`Extension disconnects: ${extensionDisconnects}`)
console.log(`Browser disconnects: ${browserDisconnects}`)
if (screenshotFail > 0 || toolFail > 0 || extensionDisconnects > 0) {
if (screenshotFail > 0 || toolFail > 0 || browserDisconnects > 0) {
console.log('\n⚠ Issues detected during long run!')
} else {
console.log('\n✅ All operations completed successfully!')

View File

@@ -29,14 +29,14 @@ async function checkHealth(): Promise<boolean> {
}
}
async function checkExtension(): Promise<boolean> {
async function checkBrowserReady(): Promise<boolean> {
try {
const res = await fetch(`${SERVER_URL}/extension-status`, {
const res = await fetch(`${SERVER_URL}/health`, {
signal: AbortSignal.timeout(5000),
})
if (!res.ok) return false
const data = (await res.json()) as { extensionConnected?: boolean }
return data.extensionConnected === true
const data = (await res.json()) as { cdpConnected?: boolean }
return data.cdpConnected === true
} catch {
return false
}
@@ -124,9 +124,9 @@ async function main() {
if (!healthy) throw new Error('Server not healthy')
})
await runTest('Extension status', async () => {
const connected = await checkExtension()
if (!connected) throw new Error('Extension not connected')
await runTest('Browser status', async () => {
const connected = await checkBrowserReady()
if (!connected) throw new Error('Browser not ready')
})
// Phase 2: List tools
@@ -208,10 +208,10 @@ async function main() {
console.log(` Screenshot ${i}: ❌ ${res.error} (${res.duration}ms)`)
}
// Check extension status between screenshots
const extConnected = await checkExtension()
// Check browser status between screenshots
const extConnected = await checkBrowserReady()
if (!extConnected) {
console.log(` ⚠️ Extension disconnected after screenshot ${i}!`)
console.log(` ⚠️ Browser became unavailable after screenshot ${i}!`)
}
// Small delay between screenshots
@@ -270,10 +270,10 @@ async function main() {
})
}
// Final extension check
await runTest('Final extension status', async () => {
const connected = await checkExtension()
if (!connected) throw new Error('Extension not connected')
// Final browser readiness check
await runTest('Final browser status', async () => {
const connected = await checkBrowserReady()
if (!connected) throw new Error('Browser not ready')
})
// Summary

View File

@@ -14,7 +14,6 @@ import { type Subprocess, spawn, spawnSync } from 'bun'
const EVAL_PORTS = {
cdp: 9005,
server: 9105, // http_mcp in config.dev.json
extension: 9305,
} as const
const MONOREPO_ROOT = join(dirname(fileURLToPath(import.meta.url)), '../../..')
@@ -61,7 +60,7 @@ async function killBrowserOSApp(): Promise<void> {
async function launchBrowserOSApp(): Promise<boolean> {
log(
'BROWSEROS',
`Launching BrowserOS (server disabled, CDP=${EVAL_PORTS.cdp}, Extension=${EVAL_PORTS.extension})...`,
`Launching BrowserOS (server disabled, CDP=${EVAL_PORTS.cdp})...`,
)
spawnSync({
cmd: [
@@ -71,7 +70,6 @@ async function launchBrowserOSApp(): Promise<boolean> {
'--args',
'--disable-browseros-server',
`--browseros-cdp-port=${EVAL_PORTS.cdp}`,
`--browseros-extension-port=${EVAL_PORTS.extension}`,
],
})
for (let i = 0; i < 30; i++) {
@@ -123,19 +121,19 @@ async function waitForServerHealth(
return false
}
async function waitForExtension(
async function waitForBrowserReady(
port: number,
maxAttempts = 60,
): Promise<boolean> {
let connectedCount = 0
for (let i = 0; i < maxAttempts; i++) {
try {
const res = await fetch(`http://127.0.0.1:${port}/extension-status`, {
const res = await fetch(`http://127.0.0.1:${port}/health`, {
signal: AbortSignal.timeout(2000),
})
if (res.ok) {
const data = (await res.json()) as { extensionConnected?: boolean }
if (data.extensionConnected) {
const data = (await res.json()) as { cdpConnected?: boolean }
if (data.cdpConnected) {
connectedCount++
if (connectedCount >= 3) return true
} else {
@@ -150,14 +148,14 @@ async function waitForExtension(
return false
}
async function checkExtensionConnected(port: number): Promise<boolean> {
async function checkBrowserReady(port: number): Promise<boolean> {
try {
const res = await fetch(`http://127.0.0.1:${port}/extension-status`, {
const res = await fetch(`http://127.0.0.1:${port}/health`, {
signal: AbortSignal.timeout(3000),
})
if (res.ok) {
const data = (await res.json()) as { extensionConnected?: boolean }
return data.extensionConnected === true
const data = (await res.json()) as { cdpConnected?: boolean }
return data.cdpConnected === true
}
} catch {
/* failed */
@@ -174,9 +172,7 @@ let serverProc: Subprocess | null = null
async function startServer(): Promise<Subprocess> {
log('SERVER', 'Cleaning up ports...')
killPort(EVAL_PORTS.server)
killPort(EVAL_PORTS.extension)
await waitForPortFree(EVAL_PORTS.server, 30)
await waitForPortFree(EVAL_PORTS.extension, 30)
log('SERVER', 'Starting server process...')
const proc = spawn({
@@ -185,8 +181,6 @@ async function startServer(): Promise<Subprocess> {
'apps/server/src/index.ts',
'--server-port',
String(EVAL_PORTS.server),
'--extension-port',
String(EVAL_PORTS.extension),
'--cdp-port',
String(EVAL_PORTS.cdp),
],
@@ -250,9 +244,9 @@ async function scenario1_AppNotRunningAtStart(): Promise<void> {
log('RESULT', 'SUCCESS - App is now running, can proceed with server start')
}
async function scenario2_ExtensionNotConnecting(): Promise<void> {
async function scenario2_BrowserNotReady(): Promise<void> {
console.log(`\n${'='.repeat(70)}`)
console.log('SCENARIO 2: Extension Does Not Connect Within 30 Seconds')
console.log('SCENARIO 2: Browser Does Not Become Ready Within 30 Seconds')
console.log('='.repeat(70))
console.log(
'Expected: Wait 30s → Restart BrowserOS app → Retry → Success or fail after 3 attempts\n',
@@ -280,11 +274,11 @@ async function scenario2_ExtensionNotConnecting(): Promise<void> {
}
log('HEALTH', 'Server health OK')
log('WAIT', 'Waiting for extension to connect (30s timeout)...')
const extConnected = await waitForExtension(EVAL_PORTS.server, 60) // 60 * 500ms = 30s
log('WAIT', 'Waiting for browser readiness (30s timeout)...')
const browserReady = await waitForBrowserReady(EVAL_PORTS.server, 60)
if (!extConnected) {
log('TIMEOUT', 'Extension did not connect within 30 seconds')
if (!browserReady) {
log('TIMEOUT', 'Browser did not become ready within 30 seconds')
await stopServer(proc)
if (!browserOSRestartAttempted) {
@@ -302,10 +296,10 @@ async function scenario2_ExtensionNotConnecting(): Promise<void> {
}
}
throw new Error('Extension did not connect')
throw new Error('Browser did not become ready')
}
log('CONNECTED', 'Extension connected!')
log('CONNECTED', 'Browser ready!')
await stopServer(proc)
log('RESULT', 'SUCCESS - Would proceed with task execution')
return
@@ -342,14 +336,14 @@ async function scenario3_ServerCrashesMidTask(): Promise<void> {
return
}
const extConnected = await waitForExtension(EVAL_PORTS.server, 60)
if (!extConnected) {
log('SETUP', 'Extension failed to connect')
const browserReady = await waitForBrowserReady(EVAL_PORTS.server, 60)
if (!browserReady) {
log('SETUP', 'Browser did not become ready')
await stopServer(proc)
return
}
log('READY', 'Server and extension ready')
log('READY', 'Server and browser ready')
log('SIMULATE', 'Simulating server crash by killing the process...')
// Kill the server to simulate crash
@@ -361,9 +355,9 @@ async function scenario3_ServerCrashesMidTask(): Promise<void> {
const stillHealthy = await waitForServerHealth(EVAL_PORTS.server, 5)
log('CHECK', `Server health: ${stillHealthy ? 'OK' : 'FAILED'}`)
log('CHECK', 'Checking extension status...')
const stillConnected = await checkExtensionConnected(EVAL_PORTS.server)
log('CHECK', `Extension connected: ${stillConnected}`)
log('CHECK', 'Checking browser readiness...')
const stillConnected = await checkBrowserReady(EVAL_PORTS.server)
log('CHECK', `Browser ready: ${stillConnected}`)
if (!stillHealthy || !stillConnected) {
log('DETECTED', '→ Infrastructure failure detected!')
@@ -373,7 +367,6 @@ async function scenario3_ServerCrashesMidTask(): Promise<void> {
)
killPort(EVAL_PORTS.server)
killPort(EVAL_PORTS.extension)
log('CLEANUP', 'Ports cleaned')
log('RESULT', 'Task would FAIL, but next task gets clean environment')
@@ -394,7 +387,6 @@ async function scenario4_ToolTimeout(): Promise<void> {
log('ERROR', `Received error: "${errorMessage}"`)
const isInfraError =
errorMessage.includes('Extension') ||
errorMessage.includes('BrowserOS') ||
errorMessage.includes('server') ||
errorMessage.includes('not connected') ||
@@ -415,9 +407,9 @@ async function scenario4_ToolTimeout(): Promise<void> {
}
}
async function scenario5_ExtensionDisconnectsMidTask(): Promise<void> {
async function scenario5_BrowserUnavailableMidTask(): Promise<void> {
console.log(`\n${'='.repeat(70)}`)
console.log('SCENARIO 5: Extension Disconnects Mid-Task (App Crashes)')
console.log('SCENARIO 5: Browser Becomes Unavailable Mid-Task (App Crashes)')
console.log('='.repeat(70))
console.log(
'Expected: Tool call fails → "not connected" error → Kill app → Restart for next task\n',
@@ -432,20 +424,20 @@ async function scenario5_ExtensionDisconnectsMidTask(): Promise<void> {
log('WAIT', 'Waiting for server to be ready...')
await waitForServerHealth(EVAL_PORTS.server, 30)
await waitForExtension(EVAL_PORTS.server, 60)
log('READY', 'Server and extension ready')
await waitForBrowserReady(EVAL_PORTS.server, 60)
log('READY', 'Server and browser ready')
log('SIMULATE', 'Simulating BrowserOS crash by killing the app...')
await killBrowserOSApp()
await sleep(2000)
// Check extension status
log('CHECK', 'Checking extension status after app crash...')
const stillConnected = await checkExtensionConnected(EVAL_PORTS.server)
log('CHECK', `Extension connected: ${stillConnected}`)
// Check browser status
log('CHECK', 'Checking browser readiness after app crash...')
const stillConnected = await checkBrowserReady(EVAL_PORTS.server)
log('CHECK', `Browser ready: ${stillConnected}`)
if (!stillConnected) {
log('DETECTED', '→ Extension disconnected!')
log('DETECTED', '→ Browser became unavailable!')
const errorMessage = 'BrowserOS helper service not connected'
log('ERROR', `Tool call would fail with: "${errorMessage}"`)
@@ -457,7 +449,6 @@ async function scenario5_ExtensionDisconnectsMidTask(): Promise<void> {
log('RECOVERY', '→ Cleaning up for next task...')
await stopServer(proc)
killPort(EVAL_PORTS.server)
killPort(EVAL_PORTS.extension)
log('RECOVERY', '→ Next task would check if BrowserOS is running...')
const appRunning = isBrowserOSAppRunning()
@@ -517,12 +508,12 @@ async function scenario7_ConsecutiveFailures(): Promise<void> {
// Simulate infrastructure check before task
log('FLOW', '→ Start server')
log('FLOW', '→ Wait for health')
log('FLOW', '→ Wait for extension')
log('FLOW', '→ Wait for browser readiness')
// Simulate task failure
const failureReason =
taskId === 'task-1'
? 'Extension did not connect'
? 'Browser did not become ready'
: taskId === 'task-2'
? 'Tool timed out after 65000ms'
: 'BrowserOS helper service not connected'
@@ -530,14 +521,12 @@ async function scenario7_ConsecutiveFailures(): Promise<void> {
log('ERROR', `Task failed: ${failureReason}`)
const isInfraError =
failureReason.includes('Extension') ||
failureReason.includes('timeout') ||
failureReason.includes('not connected')
if (isInfraError) {
log('CLEANUP', '→ Detected infra error, cleaning ports')
log('CLEANUP', '→ killPort(9110)')
log('CLEANUP', '→ killPort(9310)')
}
log('CLEANUP', '→ Stop server')
@@ -558,7 +547,6 @@ async function main() {
console.log('Failure Scenario Test Suite')
console.log('='.repeat(70))
console.log(`Server Port: ${EVAL_PORTS.server}`)
console.log(`Extension Port: ${EVAL_PORTS.extension}`)
console.log(`CDP Port: ${EVAL_PORTS.cdp}`)
console.log()
@@ -570,8 +558,8 @@ async function main() {
},
{
num: 2,
name: 'Extension Does Not Connect (30s timeout)',
fn: scenario2_ExtensionNotConnecting,
name: 'Browser Does Not Become Ready (30s timeout)',
fn: scenario2_BrowserNotReady,
},
{
num: 3,
@@ -585,8 +573,8 @@ async function main() {
},
{
num: 5,
name: 'Extension Disconnects Mid-Task (App Crash)',
fn: scenario5_ExtensionDisconnectsMidTask,
name: 'Browser Becomes Unavailable Mid-Task (App Crash)',
fn: scenario5_BrowserUnavailableMidTask,
},
{
num: 6,
@@ -627,7 +615,6 @@ async function main() {
} catch {}
}
killPort(EVAL_PORTS.server)
killPort(EVAL_PORTS.extension)
process.exit(0)
}
process.on('SIGINT', cleanup)

View File

@@ -5,7 +5,7 @@
* Tests:
* 1. BrowserOS app detection
* 2. Server start/stop
* 3. Extension connection with verification
* 3. Browser readiness with verification
* 4. Window create/close
* 5. Screenshot capture
* 6. Multiple tasks in sequence with server restart
@@ -21,7 +21,6 @@ import { type Subprocess, spawn, spawnSync } from 'bun'
const EVAL_PORTS = {
cdp: 9005,
server: 9105, // http_mcp in config.dev.json
extension: 9305,
} as const
const MONOREPO_ROOT = join(dirname(fileURLToPath(import.meta.url)), '../../..')
const MCP_URL = `http://127.0.0.1:${EVAL_PORTS.server}/mcp`
@@ -60,7 +59,7 @@ async function _killBrowserOSApp(): Promise<void> {
async function _launchBrowserOSApp(): Promise<boolean> {
console.log(
` Launching BrowserOS (server disabled, CDP=${EVAL_PORTS.cdp}, Extension=${EVAL_PORTS.extension})...`,
` Launching BrowserOS (server disabled, CDP=${EVAL_PORTS.cdp})...`,
)
spawnSync({
cmd: [
@@ -72,7 +71,6 @@ async function _launchBrowserOSApp(): Promise<boolean> {
`--remote-debugging-port=${EVAL_PORTS.cdp}`,
`--browseros-cdp-port=${EVAL_PORTS.cdp}`,
`--browseros-mcp-port=${EVAL_PORTS.server}`,
`--browseros-extension-port=${EVAL_PORTS.extension}`,
],
})
for (let i = 0; i < 30; i++) {
@@ -119,22 +117,19 @@ async function waitForServerHealth(
return false
}
async function waitForExtension(
async function waitForBrowserReady(
serverPort: number,
maxAttempts = 90,
): Promise<boolean> {
let connectedCount = 0
for (let i = 0; i < maxAttempts; i++) {
try {
const response = await fetch(
`http://127.0.0.1:${serverPort}/extension-status`,
{
signal: AbortSignal.timeout(2000),
},
)
const response = await fetch(`http://127.0.0.1:${serverPort}/health`, {
signal: AbortSignal.timeout(2000),
})
if (response.ok) {
const data = (await response.json()) as { extensionConnected?: boolean }
if (data.extensionConnected) {
const data = (await response.json()) as { cdpConnected?: boolean }
if (data.cdpConnected) {
connectedCount++
if (connectedCount >= 3) return true
} else {
@@ -151,9 +146,7 @@ async function waitForExtension(
async function startServer(): Promise<Subprocess> {
killPort(EVAL_PORTS.server)
killPort(EVAL_PORTS.extension)
await waitForPortFree(EVAL_PORTS.server, 30)
await waitForPortFree(EVAL_PORTS.extension, 30)
const serverProc = spawn({
cmd: [
@@ -161,8 +154,6 @@ async function startServer(): Promise<Subprocess> {
'apps/server/src/index.ts',
'--server-port',
String(EVAL_PORTS.server),
'--extension-port',
String(EVAL_PORTS.extension),
'--cdp-port',
String(EVAL_PORTS.cdp),
],
@@ -259,14 +250,14 @@ async function testServerStartStop(): Promise<boolean> {
}
console.log(' ✅ Server healthy')
console.log(' Waiting for extension...')
const extConnected = await waitForExtension(EVAL_PORTS.server, 60)
if (!extConnected) {
console.log(' ❌ Extension did not connect')
console.log(' Waiting for browser readiness...')
const browserReady = await waitForBrowserReady(EVAL_PORTS.server, 60)
if (!browserReady) {
console.log(' ❌ Browser did not become ready')
await stopServer(proc)
return false
}
console.log(' ✅ Extension connected')
console.log(' ✅ Browser ready')
console.log(' Stopping server...')
await stopServer(proc)
@@ -288,9 +279,9 @@ async function testWindowLifecycle(): Promise<boolean> {
return false
}
const extConnected = await waitForExtension(EVAL_PORTS.server, 60)
if (!extConnected) {
console.log(' ❌ Extension did not connect')
const browserReady = await waitForBrowserReady(EVAL_PORTS.server, 60)
if (!browserReady) {
console.log(' ❌ Browser did not become ready')
await stopServer(proc)
return false
}
@@ -371,9 +362,9 @@ async function testMultipleTasksWithRestart(): Promise<boolean> {
continue
}
const extConnected = await waitForExtension(EVAL_PORTS.server, 60)
if (!extConnected) {
console.log(` ❌ Task ${task.id}: Extension not connected`)
const browserReady = await waitForBrowserReady(EVAL_PORTS.server, 60)
if (!browserReady) {
console.log(` ❌ Task ${task.id}: Browser not ready`)
await stopServer(proc)
continue
}
@@ -416,8 +407,8 @@ async function testMultipleTasksWithRestart(): Promise<boolean> {
return successCount === tasks.length
}
async function testExtensionReconnect(): Promise<boolean> {
console.log('\n=== Test 5: Extension Stability (30 seconds) ===')
async function testBrowserStability(): Promise<boolean> {
console.log('\n=== Test 5: Browser Stability (30 seconds) ===')
console.log(' Starting server...')
const proc = await startServer()
@@ -429,14 +420,14 @@ async function testExtensionReconnect(): Promise<boolean> {
return false
}
const extConnected = await waitForExtension(EVAL_PORTS.server, 60)
if (!extConnected) {
console.log(' ❌ Extension did not connect')
const browserReady = await waitForBrowserReady(EVAL_PORTS.server, 60)
if (!browserReady) {
console.log(' ❌ Browser did not become ready')
await stopServer(proc)
return false
}
console.log(' Monitoring extension connection for 30 seconds...')
console.log(' Monitoring browser readiness for 30 seconds...')
let disconnects = 0
const checkInterval = 2000
const totalChecks = 30000 / checkInterval
@@ -444,21 +435,21 @@ async function testExtensionReconnect(): Promise<boolean> {
for (let i = 0; i < totalChecks; i++) {
try {
const response = await fetch(
`http://127.0.0.1:${EVAL_PORTS.server}/extension-status`,
`http://127.0.0.1:${EVAL_PORTS.server}/health`,
{
signal: AbortSignal.timeout(2000),
},
)
const data = (await response.json()) as { extensionConnected?: boolean }
if (!data.extensionConnected) {
const data = (await response.json()) as { cdpConnected?: boolean }
if (!data.cdpConnected) {
disconnects++
console.log(
` ⚠️ Extension disconnected at check ${i + 1}/${totalChecks}`,
` ⚠️ Browser became unavailable at check ${i + 1}/${totalChecks}`,
)
}
} catch {
disconnects++
console.log(` ⚠️ Failed to check extension at ${i + 1}/${totalChecks}`)
console.log(` ⚠️ Failed to check browser at ${i + 1}/${totalChecks}`)
}
await new Promise((r) => setTimeout(r, checkInterval))
}
@@ -466,11 +457,11 @@ async function testExtensionReconnect(): Promise<boolean> {
await stopServer(proc)
if (disconnects > 0) {
console.log(`Extension had ${disconnects} disconnections`)
console.log(`Browser had ${disconnects} readiness failures`)
return false
}
console.log(' ✅ Extension stayed connected for 30 seconds')
console.log(' ✅ Browser stayed ready for 30 seconds')
return true
}
@@ -483,7 +474,6 @@ async function main() {
console.log('Eval Lifecycle Test Suite')
console.log('='.repeat(60))
console.log(`Server Port: ${EVAL_PORTS.server}`)
console.log(`Extension Port: ${EVAL_PORTS.extension}`)
console.log(`CDP Port: ${EVAL_PORTS.cdp}`)
const results: { name: string; passed: boolean }[] = []
@@ -516,10 +506,10 @@ async function main() {
passed: await testMultipleTasksWithRestart(),
})
// Test 5: Extension Stability
// Test 5: Browser Stability
results.push({
name: 'Extension Stability',
passed: await testExtensionReconnect(),
name: 'Browser Stability',
passed: await testBrowserStability(),
})
// Summary

View File

@@ -30,21 +30,6 @@ import { Executor, type ExecutorCallbacks } from './executor'
import { OrchestratorAgent } from './orchestrator-agent'
import type { ExecutorFactory, ExecutorResult } from './types'
/** Stub controller for eval — CDP handles all browser interaction */
interface ControllerStub {
start(): Promise<void>
stop(): Promise<void>
isConnected(): boolean
send(action: string, payload?: Record<string, unknown>): Promise<unknown>
}
const CONTROLLER_STUB: ControllerStub = {
start: async () => {},
stop: async () => {},
isConnected: () => false,
send: async () => ({}),
}
function extractCdpPort(config: EvalConfig): number {
const serverUrl = config.browseros.server_url
const match = serverUrl.match(/:(\d+)$/)
@@ -159,7 +144,7 @@ export class OrchestratorExecutorEvaluator implements AgentEvaluator {
const cdpPort = extractCdpPort(config)
const cdp = new CdpBackend({ port: cdpPort })
await cdp.connect()
const browser = new Browser(cdp, CONTROLLER_STUB)
const browser = new Browser(cdp)
capture.screenshot.setBrowser(browser)
const captchaWaiter = config.captcha

View File

@@ -14,13 +14,6 @@ import { resolveProviderConfig } from '../utils/resolve-provider-config'
import { withEvalTimeout } from '../utils/with-eval-timeout'
import type { AgentContext, AgentEvaluator, AgentResult } from './types'
const CONTROLLER_STUB = {
start: async () => {},
stop: async () => {},
isConnected: () => false,
send: async () => ({}),
} as any
function extractCdpPort(config: EvalConfig): number {
const serverUrl = config.browseros.server_url
const match = serverUrl.match(/:(\d+)$/)
@@ -62,7 +55,7 @@ export class SingleAgentEvaluator implements AgentEvaluator {
const cdp = new CdpBackend({ port: cdpPort })
await cdp.connect()
const browser = new Browser(cdp, CONTROLLER_STUB)
const browser = new Browser(cdp)
capture.screenshot.setBrowser(browser)
// Build browser context so the agent knows the correct starting page ID

View File

@@ -12,7 +12,7 @@ export interface AgentContext {
// Page resolved once at task start (fresh browser has exactly one page)
initialPageId: number
// Browser window info (only for controller-based agents, not used by CDP-based single-agent)
// Browser window info for agents that operate on explicit window/tab ids
windowId?: number
tabId?: number

View File

@@ -5,11 +5,10 @@
* Mirrors scripts/dev/start.ts --manual mode with per-worker isolation:
*
* 1. Kill ports
* 2. Build extensions (once, shared across workers)
* 3. Launch Chrome directly with per-worker user-data-dir and ports
* 4. Wait for CDP
* 5. Start server with port env vars
* 6. Wait for server health
* 2. Launch Chrome directly with per-worker user-data-dir and ports
* 3. Wait for CDP
* 4. Start server with port env vars
* 5. Wait for server health
*
* Each worker gets isolated ports: base + workerIndex offset.
*/
@@ -40,7 +39,6 @@ const BROWSEROS_BINARY =
process.env.BROWSEROS_BINARY ||
'/Applications/BrowserOS.app/Contents/MacOS/BrowserOS'
const CONTROLLER_EXT_DIR = join(MONOREPO_ROOT, 'apps/controller-ext/dist')
const CAPTCHA_EXT_DIR = join(
dirname(fileURLToPath(import.meta.url)),
'../../extensions/nopecha',
@@ -80,24 +78,6 @@ export class BrowserOSAppManager {
return this.ports
}
/**
* Build extensions (call once before starting workers).
* Builds controller-ext — same as start.ts buildExtension('controller-ext', 'build:ext')
*/
static buildExtensions(): void {
console.log(`[BROWSEROS] Building controller extension...`)
const result = spawnSync({
cmd: ['bun', 'run', 'build:ext'],
cwd: MONOREPO_ROOT,
stdout: 'inherit',
stderr: 'inherit',
})
if (result.exitCode !== 0) {
throw new Error('Failed to build controller extension')
}
console.log(`[BROWSEROS] Controller extension built`)
}
/**
* Restart: kill existing, then start fresh
*/
@@ -135,7 +115,7 @@ export class BrowserOSAppManager {
* --disable-browseros-extensions (we load them explicitly if needed)
* --remote-debugging-port, --browseros-mcp-port, --browseros-extension-port
* --user-data-dir (unique per worker)
* --load-extension (optional, controller-ext)
* --load-extension (optional, unpacked helper extensions)
*/
private async startAll(): Promise<void> {
const { cdp, server, extension } = this.ports
@@ -164,10 +144,7 @@ export class BrowserOSAppManager {
]
const extensions: string[] = []
if (this.loadExtensions && existsSync(CONTROLLER_EXT_DIR)) {
extensions.push(CONTROLLER_EXT_DIR)
}
if (existsSync(CAPTCHA_EXT_DIR)) {
if (this.loadExtensions && existsSync(CAPTCHA_EXT_DIR)) {
extensions.push(CAPTCHA_EXT_DIR)
}
if (extensions.length > 0) {

View File

@@ -87,11 +87,7 @@ export class ParallelExecutor {
const cleanup = this.setupSignalHandlers()
// Build extensions once if needed (shared across workers)
const loadExtensions = this.config.config.browseros.load_extensions ?? false
if (loadExtensions) {
BrowserOSAppManager.buildExtensions()
}
// Patch NopeCHA API key before launching any workers
const captchaConfig = this.config.config.captcha

View File

@@ -29,23 +29,22 @@ MCP server and AI agent loop powering BrowserOS browser automation. This is the
│ │ └── MCP client for external tool servers │ │
│ └─────────────────────────────────────────────────────────────┘ │
│ │
│ ┌────────────────────┐ ┌────────────────────────────────────┐ │
│ │ CDP Tools Controller Tools │ │
│ │ (screenshots, │ │ (tabs, bookmarks, history, │ │
│ │ DOM, network, navigation, tab groups)
│ console, input) │ │ │
│ └────────────────────┘ └────────────────────────────────────┘ │
│ ┌─────────────────────────────────────────────────────────────┐
│ │ CDP-backed browser tools
│ │ (tabs, bookmarks, history, navigation, tab groups,
│ │ screenshots, DOM, network, console, input)
└─────────────────────────────────────────────────────────────┘
└──────────────────────────────────────────────────────────────────────┘
│ Chrome DevTools Protocol │ WebSocket
┌─────────────────────┐ ┌─────────────────────────────────
Chromium CDP │ │ Controller Extension
│ (port 9000) │ (port 9300)
DOM, network, │ │ chrome.tabs, chrome.history,
input, screenshots │ │ chrome.bookmarks
└─────────────────────┘ └─────────────────────────────────
│ Chrome DevTools Protocol
┌─────────────────────┐
│ Chromium CDP
│ (port 9000) │
│ │
│ DOM, network,
│ input, screenshots
└─────────────────────┘
```
## MCP Tools
@@ -137,7 +136,7 @@ apps/server/
### Prerequisites
- [Bun](https://bun.sh) runtime
- A running BrowserOS instance (for CDP and controller connections)
- A running BrowserOS instance (for CDP connectivity)
### Setup
@@ -178,4 +177,4 @@ bun scripts/build/server.ts --target=all --no-upload
|------|-------------|---------|
| 9100 | `BROWSEROS_SERVER_PORT` | HTTP server (MCP, chat, health) |
| 9000 | `BROWSEROS_CDP_PORT` | Chromium CDP (server connects as client) |
| 9300 | `BROWSEROS_EXTENSION_PORT` | WebSocket for controller extension |
| 9300 | `BROWSEROS_EXTENSION_PORT` | Legacy BrowserOS launch arg kept for compatibility |

View File

@@ -13,7 +13,11 @@ interface HealthDeps {
export function createHealthRoute(deps: HealthDeps = {}) {
return new Hono().get('/', (c) => {
const cdpConnected = deps.browser?.isCdpConnected() ?? true
return c.json({ status: 'ok', cdpConnected })
const cdpConnected = deps.browser?.isCdpConnected()
return c.json(
cdpConnected === undefined
? { status: 'ok' }
: { status: 'ok', cdpConnected },
)
})
}

View File

@@ -5,19 +5,19 @@
*/
import { Hono } from 'hono'
import type { ControllerBackend } from '../../browser/backends/controller'
import type { Browser } from '../../browser/browser'
interface StatusDeps {
controller: ControllerBackend
browser?: Browser
}
export function createStatusRoute(deps: StatusDeps) {
const { controller } = deps
return new Hono().get('/', (c) =>
c.json({
status: 'ok',
extensionConnected: controller.isConnected(),
}),
)
export function createStatusRoute(deps: StatusDeps = {}) {
return new Hono().get('/', (c) => {
const cdpConnected = deps.browser?.isCdpConnected()
return c.json(
cdpConnected === undefined
? { status: 'ok' }
: { status: 'ok', cdpConnected },
)
})
}

View File

@@ -74,7 +74,6 @@ export async function createHttpServer(config: HttpServerConfig) {
resourcesDir,
version,
browser,
controller,
registry,
} = config
@@ -120,7 +119,7 @@ export async function createHttpServer(config: HttpServerConfig) {
},
}),
)
.route('/status', createStatusRoute({ controller }))
.route('/status', createStatusRoute({ browser }))
.route('/soul', createSoulRoutes())
.route('/memory', createMemoryRoutes())
.route('/skills', createSkillsRoutes())

View File

@@ -14,7 +14,6 @@ import {
} from '@browseros/shared/schemas/browser-context'
import { LLMConfigSchema } from '@browseros/shared/schemas/llm'
import { z } from 'zod'
import type { ControllerBackend } from '../browser/backends/controller'
import type { Browser } from '../browser/browser'
import type { ToolRegistry } from '../tools/tool-registry'
@@ -93,7 +92,6 @@ export interface HttpServerConfig {
version: string
browser: Browser
controller: ControllerBackend
registry: ToolRegistry
browserosId?: string

View File

@@ -1,250 +0,0 @@
import { TIMEOUTS } from '@browseros/shared/constants/timeouts'
import type { WebSocket } from 'ws'
import { WebSocketServer } from 'ws'
import { logger } from '../../lib/logger'
import type { ControllerBackend as IControllerBackend } from './types'
interface PendingRequest {
resolve: (value: unknown) => void
reject: (error: Error) => void
timeout: NodeJS.Timeout
}
export class ControllerBackend implements IControllerBackend {
private wss: WebSocketServer | null = null
private port: number
private clients = new Map<string, WebSocket>()
private primaryClientId: string | null = null
private requestCounter = 0
private pendingRequests = new Map<string, PendingRequest>()
constructor(config: { port: number }) {
this.port = config.port
}
async start(): Promise<void> {
return new Promise((resolve, reject) => {
this.wss = new WebSocketServer({
port: this.port,
host: '127.0.0.1',
})
const onListening = () => {
this.wss?.off('error', onError)
logger.info(
`Controller WebSocket server listening on ws://127.0.0.1:${this.port}`,
)
resolve()
}
const onError = (error: Error) => {
this.wss?.off('listening', onListening)
reject(error)
}
this.wss.once('listening', onListening)
this.wss.once('error', onError)
this.wss.on('connection', (ws: WebSocket) => {
const clientId = this.registerClient(ws)
logger.info('Extension connected', { clientId })
ws.on('message', (data: Buffer) => {
try {
const message = data.toString()
const parsed = JSON.parse(message)
if (parsed.type === 'ping') {
ws.send(JSON.stringify({ type: 'pong' }))
return
}
if (parsed.type === 'focused') {
this.handleFocusEvent(clientId)
return
}
if (
parsed.type === 'register_windows' ||
parsed.type === 'window_created' ||
parsed.type === 'window_removed'
) {
// Window ownership messages — ignored for now (multi-profile deferred)
return
}
this.handleResponse(parsed)
} catch (error) {
logger.error(`Error parsing message from ${clientId}: ${error}`)
}
})
ws.on('close', () => {
logger.info('Extension disconnected', { clientId })
this.handleClientDisconnect(clientId)
})
ws.on('error', (error: Error) => {
logger.error(`WebSocket error for ${clientId}: ${error.message}`)
})
})
this.wss.on('error', (error: Error) => {
logger.error(`WebSocket server error: ${error.message}`)
})
})
}
async stop(): Promise<void> {
return new Promise((resolve) => {
for (const [id, pending] of this.pendingRequests.entries()) {
clearTimeout(pending.timeout)
pending.reject(new Error('ControllerBackend stopping'))
this.pendingRequests.delete(id)
}
for (const ws of this.clients.values()) {
try {
ws.close()
} catch {
// ignore
}
}
this.clients.clear()
this.primaryClientId = null
if (this.wss) {
this.wss.close(() => {
logger.info('Controller WebSocket server closed')
resolve()
})
} else {
resolve()
}
})
}
isConnected(): boolean {
return this.primaryClientId !== null
}
async send(
action: string,
payload?: Record<string, unknown>,
): Promise<unknown> {
if (!this.isConnected()) {
throw new Error('BrowserOS helper service not connected')
}
const client = this.primaryClientId
? this.clients.get(this.primaryClientId)
: null
if (!client) {
throw new Error('BrowserOS helper service not connected')
}
const id = `${Date.now()}-${++this.requestCounter}`
const timeoutMs = TIMEOUTS.CONTROLLER_BRIDGE
return new Promise((resolve, reject) => {
const timeout = setTimeout(() => {
this.pendingRequests.delete(id)
reject(new Error(`Request ${action} timed out after ${timeoutMs}ms`))
}, timeoutMs)
this.pendingRequests.set(id, { resolve, reject, timeout })
try {
const message = JSON.stringify({
id,
action,
payload: payload ?? {},
})
client.send(message)
} catch (error) {
clearTimeout(timeout)
this.pendingRequests.delete(id)
reject(error)
}
})
}
private handleResponse(response: {
id: string
ok: boolean
data?: unknown
error?: string
}): void {
const pending = this.pendingRequests.get(response.id)
if (!pending) {
logger.warn(`Received response for unknown request ID: ${response.id}`)
return
}
clearTimeout(pending.timeout)
this.pendingRequests.delete(response.id)
if (response.ok) {
pending.resolve(response.data)
} else {
pending.reject(new Error(response.error || 'Unknown error'))
}
}
private registerClient(ws: WebSocket): string {
const clientId = `client-${Date.now()}-${Math.floor(Math.random() * 1000000)}`
this.clients.set(clientId, ws)
if (!this.primaryClientId) {
this.primaryClientId = clientId
logger.info('Primary controller assigned', { clientId })
} else {
logger.info('Controller connected in standby mode', {
clientId,
primaryClientId: this.primaryClientId,
})
}
return clientId
}
private handleClientDisconnect(clientId: string): void {
const wasPrimary = this.primaryClientId === clientId
this.clients.delete(clientId)
if (wasPrimary) {
this.primaryClientId = null
for (const [id, pending] of this.pendingRequests.entries()) {
clearTimeout(pending.timeout)
pending.reject(new Error('Primary connection closed'))
this.pendingRequests.delete(id)
}
this.promoteNextPrimary()
}
}
private promoteNextPrimary(): void {
const nextEntry = this.clients.keys().next()
if (nextEntry.done) {
logger.warn('No controller connections available to promote')
return
}
this.primaryClientId = nextEntry.value
logger.info('Promoted controller to primary', {
clientId: this.primaryClientId,
})
}
private handleFocusEvent(clientId: string): void {
if (this.primaryClientId === clientId) return
const previousPrimary = this.primaryClientId
this.primaryClientId = clientId
logger.info('Primary controller reassigned due to focus event', {
clientId,
previousPrimary,
})
}
}

View File

@@ -12,13 +12,6 @@ export interface CdpBackend extends ProtocolApi {
): () => void
}
export interface ControllerBackend {
start(): Promise<void>
stop(): Promise<void>
isConnected(): boolean
send(action: string, payload?: Record<string, unknown>): Promise<unknown>
}
export interface CdpTarget {
id: string
type: string

View File

@@ -1,6 +1,6 @@
import type { ProtocolApi } from '@browseros/cdp-protocol/protocol-api'
import { logger } from '../lib/logger'
import type { CdpBackend, ControllerBackend } from './backends/types'
import type { CdpBackend } from './backends/types'
import type { BookmarkNode } from './bookmarks'
import * as bookmarks from './bookmarks'
import {
@@ -87,16 +87,13 @@ const EXCLUDED_URL_PREFIXES = [
export class Browser {
private cdp: CdpBackend
// biome-ignore lint/correctness/noUnusedPrivateClassMembers: kept for later removal
private controller: ControllerBackend
private consoleCollector: ConsoleCollector
private pages = new Map<number, PageInfo>()
private sessions = new Map<string, string>()
private nextPageId = 1
constructor(cdp: CdpBackend, controller: ControllerBackend) {
constructor(cdp: CdpBackend) {
this.cdp = cdp
this.controller = controller
this.consoleCollector = new ConsoleCollector(cdp)
this.setupEventHandlers()
}

View File

@@ -20,7 +20,7 @@ export const ServerConfigSchema = z.object({
cdpPort: portSchema.nullable(),
serverPort: portSchema,
agentPort: portSchema,
extensionPort: portSchema,
extensionPort: portSchema.nullable(),
resourcesDir: z.string(),
executionDir: z.string(),
mcpAllowRemote: z.boolean(),
@@ -120,7 +120,7 @@ function parseCliArgs(argv: string[]): ConfigResult<ParsedCliArgs> {
)
.option(
'--extension-port <port>',
'Extension WebSocket port',
'[DEPRECATED] No-op, kept for backwards compatibility',
parsePortArg,
)
.option('--resources-dir <path>', 'Resources directory path')
@@ -151,12 +151,6 @@ function parseCliArgs(argv: string[]): ConfigResult<ParsedCliArgs> {
const opts = program.opts()
if (opts.disableMcpServer) {
console.warn(
'Warning: --disable-mcp-server is deprecated and has no effect',
)
}
if (opts.httpMcpPort !== undefined) {
console.warn('Warning: --http-mcp-port is deprecated. Use --server-port.')
}
@@ -167,6 +161,10 @@ function parseCliArgs(argv: string[]): ConfigResult<ParsedCliArgs> {
)
}
if (opts.extensionPort !== undefined) {
console.warn('Warning: --extension-port is deprecated and has no effect.')
}
const cwd = process.cwd()
return {
@@ -302,6 +300,7 @@ function validateInlinedEnv(): ConfigResult<void> {
function getDefaults(cwd: string): PartialConfig {
return {
cdpPort: null,
extensionPort: null,
resourcesDir: cwd,
executionDir: cwd,
mcpAllowRemote: false,

View File

@@ -14,7 +14,6 @@ import path from 'node:path'
import { EXIT_CODES } from '@browseros/shared/constants/exit-codes'
import { createHttpServer } from './api/server'
import { CdpBackend } from './browser/backends/cdp'
import { ControllerBackend } from './browser/backends/controller'
import { Browser } from './browser/browser'
import type { ServerConfig } from './config'
import { INLINED_ENV } from './env'
@@ -57,20 +56,6 @@ export class Application {
await this.initCoreServices()
const controller = new ControllerBackend({
port: this.config.extensionPort,
})
let controllerServerStarted = false
try {
logger.debug(
`Starting WebSocket server on port ${this.config.extensionPort}`,
)
await controller.start()
controllerServerStarted = true
} catch (error) {
this.handleControllerStartupError(this.config.extensionPort, error)
}
if (!this.config.cdpPort) {
logger.error('CDP port is required (--cdp-port)')
process.exit(EXIT_CODES.GENERAL_ERROR)
@@ -85,7 +70,7 @@ export class Application {
return this.handleStartupError('CDP', this.config.cdpPort, error)
}
const browser = new Browser(cdp, controller)
const browser = new Browser(cdp)
logger.info(`Loaded ${registry.names().length} unified tools`)
@@ -95,7 +80,6 @@ export class Application {
host: '0.0.0.0',
version: VERSION,
browser,
controller,
registry,
browserosId: identity.getBrowserOSId(),
executionDir: this.config.executionDir,
@@ -131,7 +115,7 @@ export class Application {
`Health endpoint: http://127.0.0.1:${this.config.serverPort}/health`,
)
this.logStartupSummary(controllerServerStarted)
this.logStartupSummary()
startSkillSync()
metrics.log('http_server.started', { version: VERSION })
@@ -244,31 +228,9 @@ export class Application {
process.exit(EXIT_CODES.GENERAL_ERROR)
}
private handleControllerStartupError(port: number, error: unknown): void {
const errorMsg = error instanceof Error ? error.message : String(error)
logger.warn(
'Controller WebSocket server unavailable, continuing without controller bridge',
{ port, error: errorMsg },
)
if (isPortInUseError(error)) {
logger.warn(
'Controller WebSocket port is already in use, continuing without controller bridge',
{ port },
)
}
if (!isPortInUseError(error)) {
Sentry.captureException(error)
}
}
private logStartupSummary(controllerServerStarted: boolean): void {
private logStartupSummary(): void {
logger.info('')
logger.info('Services running:')
logger.info(
controllerServerStarted
? ` Controller Server: ws://127.0.0.1:${this.config.extensionPort}`
: ' Controller Server: unavailable',
)
logger.info(` HTTP Server: http://127.0.0.1:${this.config.serverPort}`)
logger.info('')
}

View File

@@ -76,8 +76,6 @@ export async function spawnServer(config: ServerConfig): Promise<ServerState> {
config.cdpPort.toString(),
'--server-port',
config.serverPort.toString(),
'--extension-port',
config.extensionPort.toString(),
],
{
stdio: ['ignore', 'pipe', 'pipe'],

View File

@@ -19,41 +19,10 @@ export interface TestEnvironmentConfig {
cdpPort: number
serverPort: number
extensionPort: number
skipExtension?: boolean
}
let runtimePlan: TestRuntimePlan | null = null
async function isExtensionConnected(port: number): Promise<boolean> {
try {
const response = await fetch(`http://127.0.0.1:${port}/status`, {
signal: AbortSignal.timeout(1000),
})
if (response.ok) {
const data = (await response.json()) as { extensionConnected: boolean }
return data.extensionConnected
}
} catch {
// Not connected yet
}
return false
}
async function waitForExtensionConnection(
port: number,
// Extension startup can be slow on a cold BrowserOS profile.
// Keep this aligned with typical per-test timeouts (30s).
maxAttempts = 60,
): Promise<void> {
for (let i = 0; i < maxAttempts; i++) {
if (await isExtensionConnected(port)) {
return
}
await new Promise((resolve) => setTimeout(resolve, 500))
}
throw new Error(`Extension failed to connect on port ${port} within timeout`)
}
function configsMatch(
a: TestEnvironmentConfig,
b: TestEnvironmentConfig,
@@ -69,7 +38,6 @@ function configsMatch(
* Ensures the full BrowserOS test environment is ready:
* 1. Server running and healthy
* 2. Browser running with CDP available
* 3. Extension connected to server
*
* Reuses existing processes if already running with same config.
*/
@@ -84,7 +52,6 @@ export async function ensureBrowserOS(
cdpPort: options?.cdpPort ?? runtimePlan.ports.cdp,
serverPort: options?.serverPort ?? runtimePlan.ports.server,
extensionPort: options?.extensionPort ?? runtimePlan.ports.extension,
skipExtension: options?.skipExtension ?? false,
}
// Fast path: already running with same config
@@ -96,26 +63,8 @@ export async function ensureBrowserOS(
configsMatch(serverState.config, config) &&
configsMatch(browserState.config, config)
) {
if (config.skipExtension) {
console.log('Reusing existing test environment')
return config
}
if (await isExtensionConnected(config.serverPort)) {
console.log('Reusing existing test environment')
return config
}
// Same server+browser are already running; we just need the extension.
// Avoid restarting processes (which can flake by killing the test runner).
console.log('Reusing existing test environment (waiting for extension)...')
try {
await waitForExtensionConnection(config.serverPort)
console.log('Extension connected')
return config
} catch {
// Fall through to full setup below.
}
console.log('Reusing existing test environment')
return config
}
// Config changed or not running: full setup
@@ -139,15 +88,6 @@ export async function ensureBrowserOS(
// 3. Start server once CDP is available.
await spawnServer(config)
// 4. Wait for extension to connect (unless skipped for CDP-only tests)
if (!config.skipExtension) {
console.log('Waiting for extension to connect...')
await waitForExtensionConnection(config.serverPort)
console.log('Extension connected')
} else {
console.log('Skipping extension connection (CDP-only mode)')
}
console.log('=== Test environment ready ===\n')
return config
}

View File

@@ -1,7 +1,6 @@
import { existsSync } from 'node:fs'
import { Mutex } from 'async-mutex'
import { CdpBackend } from '../../src/browser/backends/cdp'
import type { ControllerBackend } from '../../src/browser/backends/types'
import { Browser } from '../../src/browser/browser'
import type { ToolDefinition } from '../../src/tools/framework'
import { executeTool } from '../../src/tools/framework'
@@ -15,15 +14,6 @@ let cachedCdp: CdpBackend | null = null
let cachedBrowser: Browser | null = null
let runtimePlan: TestRuntimePlan | null = null
const stubController: ControllerBackend = {
start: async () => {},
stop: async () => {},
isConnected: () => false,
send: async () => {
throw new Error('Controller not available in test mode')
},
}
async function getOrCreateBrowser(): Promise<Browser> {
if (cachedBrowser && cachedCdp?.isConnected()) return cachedBrowser
@@ -53,7 +43,7 @@ async function getOrCreateBrowser(): Promise<Browser> {
cachedCdp = new CdpBackend({ port: runtimePlan.ports.cdp })
await cachedCdp.connect()
cachedBrowser = new Browser(cachedCdp, stubController)
cachedBrowser = new Browser(cachedCdp)
return cachedBrowser
}

View File

@@ -0,0 +1,43 @@
/**
* @license
* Copyright 2025 BrowserOS
*/
import { describe, it } from 'bun:test'
import assert from 'node:assert'
import { createStatusRoute } from '../../../src/api/routes/status'
describe('createStatusRoute', () => {
it('returns status ok when no browser is provided', async () => {
const route = createStatusRoute()
const response = await route.request('/')
assert.strictEqual(response.status, 200)
const body = await response.json()
assert.deepStrictEqual(body, { status: 'ok' })
})
it('reads CDP connectivity on each request', async () => {
let connected = false
const route = createStatusRoute({
browser: {
isCdpConnected: () => connected,
} as never,
})
const firstResponse = await route.request('/')
assert.deepStrictEqual(await firstResponse.json(), {
status: 'ok',
cdpConnected: false,
})
connected = true
const secondResponse = await route.request('/')
assert.deepStrictEqual(await secondResponse.json(), {
status: 'ok',
cdpConnected: true,
})
})
})

Some files were not shown because too many files have changed in this diff Show More