Compare commits

..

2 Commits

Author SHA1 Message Date
Nikhil Sonti
ed926fb5d5 fix: update remaining lowercase browseros-cli references in release workflow 2026-03-27 13:24:59 -07:00
Nikhil Sonti
70ec6076f1 fix: standardize release names to "BrowserOS <Product> - vX.Y.Z" format
Update workflow release titles for Extension, Agent SDK, and CLI to use
consistent branding. Existing GitHub releases also renamed via gh CLI.
2026-03-27 13:12:55 -07:00
189 changed files with 9993 additions and 4588 deletions

2
.gitattributes vendored
View File

@@ -9,6 +9,4 @@ packages/browseros/chromium_patches/**/*.py linguist-generated
scripts/*.py linguist-generated
# Mark build directories as generated
build/* linguist-generated
# Mark eval/test framework as vendored so it's excluded from language stats
packages/browseros-agent/apps/eval/** linguist-vendored
docs/videos/** filter=lfs diff=lfs merge=lfs -text

View File

@@ -38,13 +38,40 @@ jobs:
bun-version: "1.3.6"
- name: Run tests
run: make test
run: go test ./... -v
- name: Run vet
run: make vet
run: go vet ./...
- name: Build all platforms
run: make release VERSION=${{ inputs.version }} POSTHOG_API_KEY=${{ secrets.POSTHOG_API_KEY }}
run: |
VERSION="${{ inputs.version }}"
LDFLAGS="-s -w -X main.version=${VERSION}"
DIST="dist"
mkdir -p "$DIST"
for pair in darwin/amd64 darwin/arm64 linux/amd64 linux/arm64 windows/amd64 windows/arm64; do
OS="${pair%/*}"
ARCH="${pair#*/}"
BIN="browseros-cli"
EXT=""
if [ "$OS" = "windows" ]; then EXT=".exe"; fi
echo "Building ${OS}/${ARCH}..."
GOOS=$OS GOARCH=$ARCH CGO_ENABLED=0 go build -trimpath -ldflags "$LDFLAGS" -o "${DIST}/${BIN}${EXT}" .
ARCHIVE="browseros-cli_${VERSION}_${OS}_${ARCH}"
if [ "$OS" = "windows" ]; then
(cd "$DIST" && zip "${ARCHIVE}.zip" "${BIN}${EXT}")
else
(cd "$DIST" && tar czf "${ARCHIVE}.tar.gz" "${BIN}")
fi
rm "${DIST}/${BIN}${EXT}"
done
(cd "$DIST" && sha256sum *.tar.gz *.zip > checksums.txt)
echo "=== Built artifacts ==="
ls -lh "$DIST"
- name: Install dependencies
run: bun install
@@ -75,7 +102,7 @@ jobs:
PREV_TAG=$(git tag -l "browseros-cli-v*" --sort=-v:refname | grep -v "^${TAG}$" | head -n 1)
if [ -z "$PREV_TAG" ]; then
echo "Initial release of browseros-cli." > "$CHANGELOG_FILE"
echo "Initial release of BrowserOS CLI." > "$CHANGELOG_FILE"
else
COMMITS=$(git log "$PREV_TAG"..HEAD --pretty=format:"%H" -- "$CLI_PATH")
@@ -103,13 +130,6 @@ jobs:
## Install `browseros-cli`
### npm / npx
```bash
npx browseros-cli --help
npm install -g browseros-cli
```
### macOS / Linux
```bash
@@ -135,7 +155,7 @@ jobs:
git config user.email "github-actions[bot]@users.noreply.github.com"
if ! git rev-parse "$TAG" >/dev/null 2>&1; then
git tag -a "$TAG" -m "browseros-cli v${{ inputs.version }}"
git tag -a "$TAG" -m "BrowserOS CLI v${{ inputs.version }}"
git push origin "$TAG"
fi
@@ -145,17 +165,3 @@ jobs:
--notes-file /tmp/release-notes.md \
${CLI_DIST}/*
working-directory: ${{ github.workspace }}
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: "20"
registry-url: "https://registry.npmjs.org"
- name: Publish to npm
env:
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
run: |
make npm-version VERSION=${{ inputs.version }}
cd npm
npm publish --access public

View File

@@ -1,147 +0,0 @@
name: Release BrowserOS Server
on:
workflow_dispatch:
inputs:
version:
description: "Release version (e.g. 0.0.80)"
required: true
type: string
concurrency:
group: release-server
cancel-in-progress: false
jobs:
release:
if: github.ref == 'refs/heads/main'
runs-on: ubuntu-latest
environment: release-core
permissions:
contents: write
defaults:
run:
working-directory: packages/browseros-agent
steps:
- uses: actions/checkout@v6
with:
fetch-depth: 0
- uses: oven-sh/setup-bun@v2
with:
bun-version: "1.3.6"
- name: Install dependencies
run: bun ci
- name: Prepare production env file
run: cp apps/server/.env.production.example apps/server/.env.production
- name: Validate version
id: version
env:
REQUESTED_VERSION: ${{ inputs.version }}
run: |
PACKAGE_VERSION=$(node -p "require('./apps/server/package.json').version")
echo "package_version=$PACKAGE_VERSION" >> "$GITHUB_OUTPUT"
echo "release_sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
if [ "$PACKAGE_VERSION" != "$REQUESTED_VERSION" ]; then
echo "Requested version $REQUESTED_VERSION does not match apps/server/package.json ($PACKAGE_VERSION)"
exit 1
fi
- name: Build release artifacts
run: bun run build:server:ci
- name: Verify release artifacts
run: |
mapfile -t ZIP_FILES < <(find dist/prod/server -maxdepth 1 -type f -name 'browseros-server-resources-*.zip' | sort)
if [ "${#ZIP_FILES[@]}" -eq 0 ]; then
echo "No server release zip files were produced"
exit 1
fi
printf 'Found release artifacts:\n%s\n' "${ZIP_FILES[@]}"
- name: Generate release notes
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PACKAGE_VERSION: ${{ steps.version.outputs.package_version }}
run: |
SERVER_APP_PATH="packages/browseros-agent/apps/server"
SERVER_BUILD_DIR="packages/browseros-agent/scripts/build/server"
SERVER_BUILD_ENTRY="packages/browseros-agent/scripts/build/server.ts"
SERVER_RESOURCE_MANIFEST="packages/browseros-agent/scripts/build/config/server-prod-resources.json"
SERVER_WORKSPACE_PKG="packages/browseros-agent/package.json"
CURRENT_TAG="browseros-server-v$PACKAGE_VERSION"
PREV_TAG=$(git tag -l "browseros-server-v*" --sort=-v:refname | grep -v "^${CURRENT_TAG}$" | head -n 1)
if [ -z "$PREV_TAG" ]; then
echo "Initial release of browseros-server." > /tmp/release-notes.md
else
COMMITS=$(git log "$PREV_TAG"..HEAD --pretty=format:"%H" -- \
"$SERVER_APP_PATH" \
"$SERVER_BUILD_DIR" \
"$SERVER_BUILD_ENTRY" \
"$SERVER_RESOURCE_MANIFEST" \
"$SERVER_WORKSPACE_PKG")
if [ -z "$COMMITS" ]; then
echo "No notable changes." > /tmp/release-notes.md
else
echo "## What's Changed" > /tmp/release-notes.md
echo "" >> /tmp/release-notes.md
while IFS= read -r SHA; do
SUBJECT=$(git log -1 --pretty=format:"%s" "$SHA")
PR_NUM=$(gh api "/repos/${{ github.repository }}/commits/${SHA}/pulls" --jq '.[0].number // empty' 2>/dev/null)
if [ -n "$PR_NUM" ] && ! echo "$SUBJECT" | grep -qF "(#${PR_NUM})"; then
echo "- ${SUBJECT} (#${PR_NUM})" >> /tmp/release-notes.md
else
echo "- ${SUBJECT}" >> /tmp/release-notes.md
fi
done <<< "$COMMITS"
fi
fi
working-directory: ${{ github.workspace }}
- name: Create GitHub release
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PACKAGE_VERSION: ${{ steps.version.outputs.package_version }}
RELEASE_SHA: ${{ steps.version.outputs.release_sha }}
run: |
TAG="browseros-server-v$PACKAGE_VERSION"
TITLE="BrowserOS Server - v$PACKAGE_VERSION"
mapfile -t ZIP_FILES < <(find packages/browseros-agent/dist/prod/server -maxdepth 1 -type f -name 'browseros-server-resources-*.zip' | sort)
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
if git rev-parse "$TAG" >/dev/null 2>&1; then
echo "Tag $TAG already exists, skipping tag creation"
else
git tag -a "$TAG" -m "browseros-server v$PACKAGE_VERSION" "$RELEASE_SHA"
fi
if git ls-remote --tags origin "$TAG" | grep -q "$TAG"; then
echo "Tag $TAG already on remote, skipping push"
else
git push origin "$TAG"
fi
if gh release view "$TAG" >/dev/null 2>&1; then
echo "Release $TAG already exists, updating"
gh release edit "$TAG" --title "$TITLE" --notes-file /tmp/release-notes.md
gh release upload "$TAG" "${ZIP_FILES[@]}" --clobber
else
gh release create "$TAG" \
--title "$TITLE" \
--notes-file /tmp/release-notes.md \
"${ZIP_FILES[@]}"
fi
working-directory: ${{ github.workspace }}

View File

@@ -192,7 +192,7 @@ We'd love your help making BrowserOS better! See our [Contributing Guide](CONTRI
BrowserOS is open source under the [AGPL-3.0 license](LICENSE).
Copyright &copy; 2026 Felafax, Inc.
Copyright &copy; 2025 Felafax, Inc.
## Stargazers

View File

@@ -32,7 +32,7 @@ Use **kebab-case** for all file and folder names:
| Multi-word files | kebab-case | `gemini-agent.ts`, `mcp-context.ts` |
| Single-word files | lowercase | `types.ts`, `browser.ts`, `index.ts` |
| Test files | `.test.ts` suffix | `mcp-context.test.ts` |
| Folders | kebab-case | `rate-limiter/`, `browser-tools/` |
| Folders | kebab-case | `controller-server/`, `rate-limiter/` |
Classes remain PascalCase in code, but live in kebab-case files:
```typescript
@@ -97,16 +97,21 @@ The main MCP server that exposes browser automation tools via HTTP/SSE.
**Key components:**
- `src/tools/` - MCP tool definitions, split into:
- `cdp-based/` - Tools using Chrome DevTools Protocol (navigation, DOM interaction, network, console, emulation, input, etc.)
- `cdp-based/` - Tools using Chrome DevTools Protocol (network, console, emulation, input, etc.)
- `controller-based/` - Tools using the browser extension (navigation, clicks, screenshots, tabs, history, bookmarks)
- `src/controller-server/` - WebSocket server that bridges to the browser extension
- `ControllerBridge` handles WebSocket connections with extension clients
- `ControllerContext` wraps the bridge for tool handlers
- `src/common/` - Shared utilities (McpContext, PageCollector, browser connection, identity, db)
- `src/agent/` - AI agent functionality (Gemini adapter, rate limiting, session management)
- `src/http/` - Hono HTTP server with MCP, health, and provider routes
**Tool types:**
- CDP tools require a direct CDP connection (`--cdp-port`)
- Controller tools work via the browser extension over WebSocket
### Shared (`packages/shared`)
Shared constants, types, and configuration used across packages. Avoids magic numbers.
Shared constants, types, and configuration used by both server and extension. Avoids magic numbers.
**Structure:**
- `src/constants/` - Configuration values (ports, timeouts, limits, urls, paths)
@@ -114,12 +119,22 @@ Shared constants, types, and configuration used across packages. Avoids magic nu
**Exports:** `@browseros/shared/constants/*`, `@browseros/shared/types/*`
### Controller Extension (`apps/controller-ext`)
Chrome extension that receives commands from the server via WebSocket.
**Entry point:** `src/background/index.ts` → `BrowserOSController`
**Structure:**
- `src/actions/` - Action handlers organized by domain (browser/, tab/, bookmark/, history/)
- `src/adapters/` - Chrome API adapters (TabAdapter, BookmarkAdapter, HistoryAdapter)
- `src/websocket/` - WebSocket client that connects to the server
### Communication Flow
```
AI Agent/MCP Client → HTTP Server (Hono) → Tool Handler
CDP → BrowserOS / Chrome APIs
CDP (direct) ←── or ──→ WebSocket → Extension → Chrome APIs
```
## Creating Packages

View File

@@ -10,6 +10,7 @@ apps/
agent/ # Agent UI (Chrome extension)
cli/ # Go CLI for controlling BrowserOS from the terminal
eval/ # Evaluation framework for benchmarking agents
controller-ext/ # BrowserOS Controller (Chrome extension for chrome.* APIs)
packages/
agent-sdk/ # Node.js SDK (@browseros-ai/agent-sdk)
@@ -23,6 +24,7 @@ packages/
| `apps/agent` | Agent UI — Chrome extension for the chat interface |
| `apps/cli` | Go CLI — control BrowserOS from the terminal or AI coding agents |
| `apps/eval` | Benchmark framework — WebVoyager, Mind2Web evaluation |
| `apps/controller-ext` | BrowserOS Controller — bridges `chrome.*` APIs to the server via WebSocket |
| `packages/agent-sdk` | Node.js SDK for browser automation with natural language |
| `packages/cdp-protocol` | Auto-generated CDP type bindings used by the server |
| `packages/shared` | Shared constants used across packages |
@@ -31,6 +33,7 @@ packages/
- `apps/server`: Bun server which contains the agent loop and tools.
- `apps/agent`: Agent UI (Chrome extension).
- `apps/controller-ext`: BrowserOS Controller - a Chrome extension that bridges `chrome.*` APIs to the server. Controller tools within the server communicate with this extension via WebSocket.
```
┌──────────────────────────────────────────────────────────────────────────┐
@@ -48,19 +51,19 @@ packages/
│ /health ─── Health check │
│ │
│ Tools: │
── CDP-backed browser tools (tabs, navigation, input, screenshots, │
bookmarks, history, console, DOM, tab groups, windows, ...)
── CDP Tools (console, network, input, screenshot, ...)
└── Controller Tools (tabs, navigation, clicks, bookmarks, history)
└──────────────────────────────────────────────────────────────────────────┘
CDP (client)
─────────────────────┐
Chromium CDP
(cdpPort: 9000) │
│ │
Server connects
│ TO this as client
─────────────────────┘
│ CDP (client)WebSocket (server)
┌─────────────────────┐ ┌─────────────────────────────────────┐
Chromium CDP BrowserOS Controller Extension
(cdpPort: 9000) (extensionPort: 9300)
│ Server connects Bridges chrome.tabs, chrome.history
│ TO this as client │ │ chrome.bookmarks to the server
└─────────────────────┘ └─────────────────────────────────────┘
```
### Ports
@@ -69,7 +72,7 @@ packages/
|------|--------------|---------|
| 9100 | `BROWSEROS_SERVER_PORT` | HTTP server - MCP endpoints, agent chat, health |
| 9000 | `BROWSEROS_CDP_PORT` | Chromium CDP server (BrowserOS Server connects as client) |
| 9300 | `BROWSEROS_EXTENSION_PORT` | Legacy BrowserOS launch arg kept for compatibility; not used by the server |
| 9300 | `BROWSEROS_EXTENSION_PORT` | WebSocket server for controller extension |
## Development
@@ -93,8 +96,9 @@ process-compose up
The `process-compose up` command runs the following in order:
1. `bun install` — installs dependencies
2. `bun --cwd apps/agent codegen` — generates agent code
3. `bun --cwd apps/server start` and `bun --cwd apps/agent dev` — starts server and agent in parallel
2. `bun --cwd apps/controller-ext build` — builds the controller extension
3. `bun --cwd apps/agent codegen` — generates agent code
4. `bun --cwd apps/server start` and `bun --cwd apps/agent dev` — starts server and agent in parallel
### Environment Variables
@@ -110,7 +114,7 @@ Runtime uses `.env.development`, while production artifact builds use `.env.prod
|----------|---------|-------------|
| `BROWSEROS_SERVER_PORT` | 9100 | HTTP server port (MCP, chat, health) |
| `BROWSEROS_CDP_PORT` | 9000 | Chromium CDP port (server connects as client) |
| `BROWSEROS_EXTENSION_PORT` | 9300 | Legacy BrowserOS launch arg kept for compatibility |
| `BROWSEROS_EXTENSION_PORT` | 9300 | WebSocket port for controller extension |
| `BROWSEROS_CONFIG_URL` | - | Remote config endpoint for rate limits |
| `BROWSEROS_INSTALL_ID` | - | Unique installation identifier (analytics) |
| `BROWSEROS_CLIENT_ID` | - | Client identifier (analytics) |
@@ -142,7 +146,7 @@ Copy from `apps/server/.env.production.example` before running `build:server`.
|----------|---------|-------------|
| `BROWSEROS_SERVER_PORT` | 9100 | Passed to BrowserOS via CLI args |
| `BROWSEROS_CDP_PORT` | 9000 | Passed to BrowserOS via CLI args |
| `BROWSEROS_EXTENSION_PORT` | 9300 | Legacy BrowserOS CLI arg still passed for compatibility |
| `BROWSEROS_EXTENSION_PORT` | 9300 | Passed to BrowserOS via CLI args |
| `VITE_BROWSEROS_SERVER_PORT` | 9100 | Agent UI connects to server (must match `BROWSEROS_SERVER_PORT`) |
| `BROWSEROS_BINARY` | - | Path to BrowserOS binary |
| `USE_BROWSEROS_BINARY` | true | Use BrowserOS instead of default Chrome |
@@ -159,13 +163,15 @@ bun run start:server # Start the server
bun run start:agent # Start agent extension (dev mode)
# Build
bun run build # Build server and agent
bun run build # Build server, agent, and controller extension
bun run build:server # Build production server resource artifacts and upload zips to R2
bun run build:agent # Build agent extension
bun run build:ext # Build controller extension
# Test
bun run test # Run standard tests
bun run test:cdp # Run CDP-based tests
bun run test:controller # Run controller-based tests
bun run test:integration # Run integration tests
# Quality

View File

@@ -1,18 +1,5 @@
# BrowserOS Agent Extension
## v0.0.98 (2026-03-27)
## What's Changed
- chore: update agent version (#608)
- chore: fix version number for extension (#606)
- fix: improve chat history freshness and reduce query payload (#598)
- feat: isolate new-tab agent navigation from origin tab (#593)
- docs: overhaul READMEs across all major packages (#594)
- fix(ui): resolve MCP promo banner dismiss button overlapping with text (#581)
- docs: update agent extension changelog for v0.0.52 (#573)
## v0.0.52 (2026-03-26)
Initial release

View File

@@ -1,26 +1,17 @@
import { zodResolver } from '@hookform/resolvers/zod'
import Fuse from 'fuse.js'
import {
Check,
CheckCircle2,
ChevronDown,
ExternalLink,
Loader2,
SearchIcon,
XCircle,
} from 'lucide-react'
import { type FC, useEffect, useMemo, useState } from 'react'
import { type FC, useEffect, useRef, useState } from 'react'
import { useForm } from 'react-hook-form'
import { z } from 'zod/v3'
import { Button } from '@/components/ui/button'
import { Checkbox } from '@/components/ui/checkbox'
import {
Command,
CommandEmpty,
CommandGroup,
CommandInput,
CommandItem,
CommandList,
} from '@/components/ui/command'
import {
Dialog,
DialogContent,
@@ -39,11 +30,6 @@ import {
FormMessage,
} from '@/components/ui/form'
import { Input } from '@/components/ui/input'
import {
Popover,
PopoverContent,
PopoverTrigger,
} from '@/components/ui/popover'
import {
Select,
SelectContent,
@@ -56,10 +42,8 @@ import { useAgentServerUrl } from '@/lib/browseros/useBrowserOSProviders'
import { useCapabilities } from '@/lib/browseros/useCapabilities'
import {
AI_PROVIDER_ADDED_EVENT,
AI_PROVIDER_UPDATED_EVENT,
KIMI_API_KEY_CONFIGURED_EVENT,
KIMI_API_KEY_GUIDE_CLICKED_EVENT,
MODEL_SELECTED_EVENT,
} from '@/lib/constants/analyticsEvents'
import { useKimiLaunch } from '@/lib/feature-flags/useKimiLaunch'
import {
@@ -71,7 +55,11 @@ import { type TestResult, testProvider } from '@/lib/llm-providers/testProvider'
import type { LlmProviderConfig, ProviderType } from '@/lib/llm-providers/types'
import { track } from '@/lib/metrics/track'
import { cn } from '@/lib/utils'
import { getModelContextLength, getModelsForProvider } from './models'
import {
getModelContextLength,
getModelsForProvider,
type ModelInfo,
} from './models'
const providerTypeEnum = z.enum([
'moonshot',
@@ -194,6 +182,100 @@ function formatContextWindow(tokens: number): string {
return `${tokens}`
}
function ModelPickerList({
models,
selectedModelId,
onSelect,
onCustomSubmit,
onClose,
}: {
models: ModelInfo[]
selectedModelId: string
onSelect: (modelId: string) => void
onCustomSubmit: (modelId: string) => void
onClose: () => void
}) {
const [search, setSearch] = useState('')
const inputRef = useRef<HTMLInputElement>(null)
const containerRef = useRef<HTMLDivElement>(null)
useEffect(() => {
inputRef.current?.focus()
}, [])
useEffect(() => {
const handleClickOutside = (e: MouseEvent) => {
if (
containerRef.current &&
!containerRef.current.contains(e.target as Node)
) {
onClose()
}
}
document.addEventListener('mousedown', handleClickOutside)
return () => document.removeEventListener('mousedown', handleClickOutside)
}, [onClose])
const query = search.toLowerCase()
const filtered = query
? models.filter((m) => m.modelId.toLowerCase().includes(query))
: models
const handleKeyDown = (e: React.KeyboardEvent) => {
if (e.key === 'Enter' && search) {
e.preventDefault()
onCustomSubmit(search)
}
if (e.key === 'Escape') {
onClose()
}
}
return (
<div ref={containerRef} className="rounded-md border">
<div className="flex items-center gap-2 border-b px-3">
<SearchIcon className="h-4 w-4 shrink-0 text-muted-foreground opacity-50" />
<input
ref={inputRef}
type="text"
value={search}
onChange={(e) => setSearch(e.target.value)}
onKeyDown={handleKeyDown}
placeholder="Search or type a custom model ID..."
className="flex h-9 w-full bg-transparent py-2 text-sm outline-none placeholder:text-muted-foreground"
/>
</div>
<div className="max-h-[200px] overflow-y-auto">
{filtered.length > 0 ? (
filtered.map((model) => {
const isSelected = selectedModelId === model.modelId
return (
<button
key={model.modelId}
type="button"
onClick={() => onSelect(model.modelId)}
className={cn(
'flex w-full items-center justify-between px-3 py-2 text-left text-sm transition-colors hover:bg-accent',
isSelected && 'bg-accent font-medium',
)}
>
<span className="truncate">{model.modelId}</span>
<span className="ml-2 shrink-0 rounded-md bg-muted px-1.5 py-0.5 font-mono text-[10px] text-muted-foreground">
{formatContextWindow(model.contextLength)}
</span>
</button>
)
})
) : (
<div className="px-3 py-6 text-center text-muted-foreground text-sm">
No models match. Press Enter to use &quot;{search}&quot;
</div>
)}
</div>
</div>
)
}
/**
* Props for NewProviderDialog
* @public
@@ -221,8 +303,7 @@ export const NewProviderDialog: FC<NewProviderDialogProps> = ({
}) => {
const [isTesting, setIsTesting] = useState(false)
const [testResult, setTestResult] = useState<TestResult | null>(null)
const [modelPickerOpen, setModelPickerOpen] = useState(false)
const [modelSearch, setModelSearch] = useState('')
const [modelListOpen, setModelListOpen] = useState(false)
const { supports } = useCapabilities()
const { baseUrl: agentServerUrl } = useAgentServerUrl()
const kimiLaunch = useKimiLaunch()
@@ -295,20 +376,6 @@ export const NewProviderDialog: FC<NewProviderDialogProps> = ({
const modelInfoList = getModelsForProvider(watchedType as ProviderType)
const modelFuse = useMemo(
() =>
new Fuse(modelInfoList, {
keys: ['modelId'],
threshold: 0.4,
distance: 100,
}),
[modelInfoList],
)
const filteredModels = modelSearch
? modelFuse.search(modelSearch).map((r) => r.item)
: modelInfoList
// Handle provider type change (user-initiated via Select)
const handleTypeChange = (newType: ProviderType) => {
form.setValue('type', newType)
@@ -404,11 +471,6 @@ export const NewProviderDialog: FC<NewProviderDialogProps> = ({
provider_type: values.type,
model: values.modelId,
})
} else {
track(AI_PROVIDER_UPDATED_EVENT, {
provider_type: values.type,
model: values.modelId,
})
}
if (values.type === 'moonshot') {
track(KIMI_API_KEY_CONFIGURED_EVENT, {
@@ -862,95 +924,36 @@ export const NewProviderDialog: FC<NewProviderDialogProps> = ({
{...field}
/>
</FormControl>
) : (
<Popover
open={modelPickerOpen}
onOpenChange={(isOpen) => {
setModelPickerOpen(isOpen)
if (!isOpen) setModelSearch('')
) : modelListOpen ? (
<ModelPickerList
models={modelInfoList}
selectedModelId={field.value}
onSelect={(modelId) => {
form.setValue('modelId', modelId)
setModelListOpen(false)
}}
onCustomSubmit={(modelId) => {
form.setValue('modelId', modelId)
setModelListOpen(false)
}}
onClose={() => setModelListOpen(false)}
/>
) : (
<button
type="button"
onClick={() => setModelListOpen(true)}
className={cn(
'flex h-9 w-full items-center justify-between rounded-md border border-input bg-transparent px-3 py-1 text-sm shadow-xs',
field.value
? 'text-foreground'
: 'text-muted-foreground',
)}
>
<PopoverTrigger asChild>
<button
type="button"
className={cn(
'flex h-9 w-full items-center justify-between rounded-md border border-input bg-transparent px-3 py-1 text-sm shadow-xs',
field.value
? 'text-foreground'
: 'text-muted-foreground',
)}
>
<span className="truncate">
{field.value || 'Select a model...'}
</span>
<ChevronDown className="ml-2 h-4 w-4 shrink-0 opacity-50" />
</button>
</PopoverTrigger>
<PopoverContent
className="w-[var(--radix-popover-trigger-width)] p-0"
align="start"
>
<Command shouldFilter={false}>
<CommandInput
placeholder="Search models..."
value={modelSearch}
onValueChange={setModelSearch}
onKeyDown={(e) => {
if (
e.key === 'Enter' &&
modelSearch &&
filteredModels.length === 0
) {
e.preventDefault()
form.setValue('modelId', modelSearch)
track(MODEL_SELECTED_EVENT, {
provider_type: watchedType,
model_id: modelSearch,
is_custom_model: true,
})
setModelPickerOpen(false)
setModelSearch('')
}
}}
/>
<CommandList>
<CommandEmpty>
No models found. Press Enter to use &quot;
{modelSearch}&quot;
</CommandEmpty>
<CommandGroup>
{filteredModels.map((model) => (
<CommandItem
key={model.modelId}
value={model.modelId}
onSelect={() => {
form.setValue('modelId', model.modelId)
track(MODEL_SELECTED_EVENT, {
provider_type: watchedType,
model_id: model.modelId,
context_window: model.contextLength,
is_custom_model: false,
})
setModelPickerOpen(false)
setModelSearch('')
}}
>
<span className="flex-1 truncate">
{model.modelId}
</span>
<span className="ml-2 shrink-0 rounded-md bg-muted px-1.5 py-0.5 font-mono text-[10px] text-muted-foreground">
{formatContextWindow(model.contextLength)}
</span>
{field.value === model.modelId && (
<Check className="ml-2 h-4 w-4 shrink-0" />
)}
</CommandItem>
))}
</CommandGroup>
</CommandList>
</Command>
</PopoverContent>
</Popover>
<span className="truncate">
{field.value || 'Select a model...'}
</span>
<ChevronDown className="ml-2 h-4 w-4 shrink-0 opacity-50" />
</button>
)}
<FormMessage />
</FormItem>

View File

@@ -561,11 +561,9 @@ export const useChatSession = (options?: ChatSessionOptions) => {
}, [])
const handleSelectProvider = (provider: Provider) => {
const fullProvider = llmProviders.find((p) => p.id === provider.id)
track(PROVIDER_SELECTED_EVENT, {
provider_id: provider.id,
provider_type: provider.type,
model_id: fullProvider?.modelId,
})
setDefaultProvider(provider.id)
}

View File

@@ -29,12 +29,6 @@ export const CONVERSATION_RESET_EVENT = 'ui.conversation.reset'
/** @public */
export const AI_PROVIDER_ADDED_EVENT = 'settings.ai_provider.added'
/** @public */
export const AI_PROVIDER_UPDATED_EVENT = 'settings.ai_provider.updated'
/** @public */
export const MODEL_SELECTED_EVENT = 'settings.model.selected'
/** @public */
export const CHATGPT_PRO_OAUTH_STARTED_EVENT =
'settings.chatgpt_pro.oauth_started'

View File

@@ -2,7 +2,7 @@
"name": "@browseros/agent",
"description": "manifest.json description",
"private": true,
"version": "0.0.98",
"version": "0.0.52",
"type": "module",
"scripts": {
"dev": "test -d generated/graphql || bun run codegen; mkdir -p /tmp/browseros-dev; bun --env-file=.env.development wxt",
@@ -67,7 +67,6 @@
"embla-carousel-react": "^8.6.0",
"es-toolkit": "^1.42.0",
"eventsource-parser": "^3.0.6",
"fuse.js": "^7.1.0",
"graphql": "^16.12.0",
"hono": "^4.12.3",
"idb-keyval": "^6.2.2",

View File

@@ -1,13 +1,19 @@
import { dirname, join } from 'node:path'
import { fileURLToPath } from 'node:url'
import { defineWebExtConfig } from 'wxt'
// biome-ignore lint/style/noProcessEnv: config file needs env access
const env = process.env
const MONOREPO_ROOT = join(dirname(fileURLToPath(import.meta.url)), '../..')
const CONTROLLER_EXT_DIR = join(MONOREPO_ROOT, 'apps/controller-ext/dist')
const chromiumArgs = [
'--use-mock-keychain',
'--show-component-extension-options',
'--disable-browseros-server',
'--disable-browseros-extensions',
`--load-extension=${CONTROLLER_EXT_DIR}`,
]
if (env.BROWSEROS_CDP_PORT) {

View File

@@ -0,0 +1,50 @@
version: 2
project_name: browseros-cli
monorepo:
tag_prefix: browseros-cli-
builds:
- main: .
binary: browseros-cli
env:
- CGO_ENABLED=0
flags:
- -trimpath
ldflags:
- -s -w -X main.version={{ .Version }} -X browseros-cli/analytics.posthogAPIKey={{ .Env.POSTHOG_API_KEY }}
targets:
- darwin_amd64
- darwin_arm64
- linux_amd64
- linux_arm64
- windows_amd64
- windows_arm64
archives:
- format: tar.gz
format_overrides:
- goos: windows
format: zip
name_template: "{{ .ProjectName }}_{{ .Version }}_{{ .Os }}_{{ .Arch }}"
files:
- "none*"
checksum:
name_template: checksums.txt
changelog:
sort: asc
filters:
exclude:
- "^docs:"
- "^test:"
- "^ci:"
release:
github:
owner: browseros-ai
name: BrowserOS
prerelease: auto
name_template: "browseros-cli v{{ .Version }}"

View File

@@ -2,26 +2,18 @@ BINARY := browseros-cli
SOURCES := $(shell find . -name '*.go')
VERSION ?= dev
POSTHOG_API_KEY ?=
DIST := dist
LDFLAGS := -X main.version=$(VERSION) -X browseros-cli/analytics.posthogAPIKey=$(POSTHOG_API_KEY)
HOST_OS := $(shell go env GOOS)
HOST_ARCH := $(shell go env GOARCH)
HOST_EXT := $(if $(filter windows,$(HOST_OS)),.exe,)
HOST_BINARY = $(DIST)/$(BINARY)_$(HOST_OS)_$(HOST_ARCH)$(HOST_EXT)
$(BINARY): $(SOURCES)
go build -ldflags "$(LDFLAGS)" -o $(BINARY) .
PLATFORMS := darwin/amd64 darwin/arm64 linux/amd64 linux/arm64 windows/amd64 windows/arm64
.PHONY: install clean vet test release
.PHONY: install clean vet test
install:
go install -ldflags "$(LDFLAGS)" .
clean:
rm -f $(BINARY)
rm -rf $(DIST)
vet:
go vet ./...
@@ -29,41 +21,8 @@ vet:
test:
go test -tags integration -v -timeout 120s ./...
release-dry:
goreleaser release --snapshot --clean
release:
@if [ "$(VERSION)" = "dev" ]; then echo "Error: VERSION required (e.g. make release VERSION=0.1.0)" >&2; exit 1; fi
@rm -rf $(DIST) && mkdir -p $(DIST)
@for pair in $(PLATFORMS); do \
OS=$${pair%/*}; \
ARCH=$${pair#*/}; \
EXT=""; \
if [ "$$OS" = "windows" ]; then EXT=".exe"; fi; \
echo "Building $$OS/$$ARCH..."; \
GOOS=$$OS GOARCH=$$ARCH CGO_ENABLED=0 go build -trimpath \
-ldflags "-s -w $(LDFLAGS)" \
-o "$(DIST)/$(BINARY)$$EXT" .; \
ARCHIVE="$(BINARY)_$(VERSION)_$${OS}_$${ARCH}"; \
if [ "$$OS" = "windows" ]; then \
(cd $(DIST) && zip "$${ARCHIVE}.zip" "$(BINARY)$$EXT"); \
else \
(cd $(DIST) && tar czf "$${ARCHIVE}.tar.gz" "$(BINARY)"); \
fi; \
mv "$(DIST)/$(BINARY)$$EXT" "$(DIST)/$(BINARY)_$${OS}_$${ARCH}$$EXT"; \
done
@ACTUAL_VERSION=$$($(HOST_BINARY) --version | awk '{print $$3}'); \
if [ "$$ACTUAL_VERSION" != "$(VERSION)" ]; then \
echo "Error: expected $(HOST_BINARY) to report version $(VERSION), got $$ACTUAL_VERSION" >&2; \
exit 1; \
fi
@cd $(DIST) && (command -v sha256sum >/dev/null 2>&1 && sha256sum *.tar.gz *.zip || shasum -a 256 *.tar.gz *.zip) > checksums.txt
@echo "=== Built artifacts ==="
@ls -lh $(DIST)
.PHONY: npm-version npm-publish
npm-version:
@if [ "$(VERSION)" = "dev" ]; then echo "Error: VERSION required" >&2; exit 1; fi
@node -e "const p=require('./npm/package.json');p.version='$(VERSION)';require('fs').writeFileSync('./npm/package.json',JSON.stringify(p,null,2)+'\n')"
@echo "npm/package.json version set to $(VERSION)"
npm-publish: npm-version
cd npm && npm publish
goreleaser release --clean

View File

@@ -54,16 +54,6 @@ browseros-cli init # interactive — prompts for URL
Config is saved to `~/.config/browseros-cli/config.yaml`. The CLI also auto-discovers the server from `~/.browseros/server.json` (written by BrowserOS on startup).
### CLI updates
The CLI checks for a newer BrowserOS CLI release in the background about once per day and will suggest an update on a later run when one is available.
```bash
browseros-cli update # check and apply the latest CLI release
browseros-cli update --check # check only
browseros-cli update --yes # apply without prompting
```
## Usage
```bash

View File

@@ -49,7 +49,7 @@ func init() {
statusCmd := &cobra.Command{
Use: "status",
Annotations: map[string]string{"group": "Setup:"},
Short: "Check BrowserOS runtime status",
Short: "Check extension connection status",
Args: cobra.NoArgs,
Run: func(cmd *cobra.Command, args []string) {
c := newClient()
@@ -64,12 +64,12 @@ func init() {
green := color.New(color.FgGreen).SprintFunc()
red := color.New(color.FgRed).SprintFunc()
cdp := data["cdpConnected"]
cdpStr := red("disconnected")
if b, ok := cdp.(bool); ok && b {
cdpStr = green("connected")
ext := data["extensionConnected"]
extStr := red("disconnected")
if b, ok := ext.(bool); ok && b {
extStr = green("connected")
}
fmt.Printf("Browser: %s\n", cdpStr)
fmt.Printf("Extension: %s\n", extStr)
},
}

View File

@@ -25,17 +25,13 @@ func init() {
Long: `Set up the CLI by providing the MCP server URL from BrowserOS.
Open BrowserOS → Settings → BrowserOS MCP to find your Server URL.
The URL looks like: http://127.0.0.1:9000/mcp
The URL looks like: http://127.0.0.1:9004/mcp
The port varies per installation, so this step is required on first use.
Run again if your port changes.
You can provide the full URL or just the port number:
browseros-cli init http://127.0.0.1:9000/mcp
browseros-cli init 9000
Three modes:
browseros-cli init <url> Non-interactive (full URL or port number)
browseros-cli init <url> Non-interactive, use the provided URL
browseros-cli init --auto Auto-discover from ~/.browseros/server.json
browseros-cli init Interactive prompt`,
Annotations: map[string]string{"group": "Setup:"},
@@ -69,14 +65,13 @@ Three modes:
bold.Println("BrowserOS CLI Setup")
fmt.Println()
fmt.Println("Open BrowserOS → Settings → BrowserOS MCP")
fmt.Println("Copy the Server URL or port number shown there.")
fmt.Println("Copy the Server URL shown there.")
fmt.Println()
dim.Println("Examples: http://127.0.0.1:9000/mcp")
dim.Println(" 9000")
dim.Println("It looks like: http://127.0.0.1:9004/mcp")
fmt.Println()
reader := bufio.NewReader(os.Stdin)
fmt.Print("Server URL or port: ")
fmt.Print("Server URL: ")
line, err := reader.ReadString('\n')
if err != nil {
output.Error("failed to read input", 1)

View File

@@ -1,7 +1,6 @@
package cmd
import (
"context"
"encoding/json"
"fmt"
"os"
@@ -14,7 +13,6 @@ import (
"browseros-cli/config"
"browseros-cli/mcp"
"browseros-cli/output"
"browseros-cli/update"
"github.com/fatih/color"
"github.com/spf13/cobra"
@@ -30,11 +28,8 @@ var (
version = "dev"
)
const automaticUpdateDrainTimeout = 150 * time.Millisecond
func SetVersion(v string) {
version = v
rootCmd.Version = v
}
var (
@@ -119,24 +114,11 @@ var rootCmd = &cobra.Command{
}
func Execute() {
automaticUpdater := newAutomaticUpdateManager(os.Args[1:])
automaticNotice := ""
var automaticCheckDone <-chan struct{}
if automaticUpdater != nil {
automaticNotice = automaticUpdater.CachedNotice()
automaticCheckDone = automaticUpdater.StartBackgroundCheck(context.Background())
}
analytics.Init(version)
start := time.Now()
err := rootCmd.Execute()
if automaticNotice != "" && err == nil {
fmt.Fprintln(os.Stderr, automaticNotice)
}
drainAutomaticUpdateCheck(automaticCheckDone)
analytics.Track(commandName(os.Args[1:]), err == nil, time.Since(start))
analytics.Close()
@@ -201,93 +183,6 @@ func envBool(key string) bool {
return v == "1" || v == "true"
}
func newAutomaticUpdateManager(args []string) *update.Manager {
if shouldSkipAutomaticUpdates(args) {
return nil
}
return update.NewManager(update.Options{
CurrentVersion: version,
JSONOutput: requestedBoolFlag(args, "--json", jsonOut),
Debug: requestedBoolFlag(args, "--debug", debug),
Automatic: true,
})
}
func shouldSkipAutomaticUpdates(args []string) bool {
if hasHelpFlag(args) || requestedBoolFlag(args, "--version", false) {
return true
}
switch primaryCommand(args) {
case "help", "completion", "update", "self-update", "upgrade":
return true
default:
return false
}
}
func hasHelpFlag(args []string) bool {
if requestedBoolFlag(args, "--help", false) {
return true
}
for _, arg := range args {
if arg == "-h" {
return true
}
}
return false
}
func primaryCommand(args []string) string {
for _, arg := range args {
if strings.HasPrefix(arg, "-") {
continue
}
return arg
}
return ""
}
func requestedBoolFlag(args []string, flagName string, current bool) bool {
if current {
return true
}
prefix := flagName + "="
for _, arg := range args {
if arg == flagName {
return true
}
if strings.HasPrefix(arg, prefix) {
value, err := strconv.ParseBool(strings.TrimPrefix(arg, prefix))
return err == nil && value
}
}
return false
}
func drainAutomaticUpdateCheck(done <-chan struct{}) {
drainAutomaticUpdateCheckWithTimeout(done, automaticUpdateDrainTimeout)
}
func drainAutomaticUpdateCheckWithTimeout(done <-chan struct{}, timeout time.Duration) {
if done == nil {
return
}
timer := time.NewTimer(timeout)
defer timer.Stop()
select {
case <-done:
case <-timer.C:
}
}
func defaultServerURL() string {
// 1. Explicit env var always wins
if env := normalizeServerURL(os.Getenv("BROWSEROS_URL")); env != "" {
@@ -339,27 +234,10 @@ func loadBrowserosServerURL() string {
func normalizeServerURL(raw string) string {
normalized := strings.TrimSpace(raw)
if isPortOnly(normalized) {
normalized = "http://127.0.0.1:" + normalized
}
normalized = strings.TrimSuffix(normalized, "/mcp")
return strings.TrimSuffix(normalized, "/")
}
func isPortOnly(s string) bool {
if s == "" {
return false
}
for _, c := range s {
if c < '0' || c > '9' {
return false
}
}
return true
}
func validateServerURL(raw string) (string, error) {
baseURL := normalizeServerURL(raw)
if baseURL != "" {

View File

@@ -1,27 +1,6 @@
package cmd
import (
"testing"
"time"
)
func TestSetVersionUpdatesRootCommand(t *testing.T) {
originalVersion := version
originalRootVersion := rootCmd.Version
t.Cleanup(func() {
version = originalVersion
rootCmd.Version = originalRootVersion
})
SetVersion("1.2.3")
if version != "1.2.3" {
t.Fatalf("version = %q, want %q", version, "1.2.3")
}
if rootCmd.Version != "1.2.3" {
t.Fatalf("rootCmd.Version = %q, want %q", rootCmd.Version, "1.2.3")
}
}
import "testing"
func TestCommandName(t *testing.T) {
tests := []struct {
@@ -44,103 +23,3 @@ func TestCommandName(t *testing.T) {
})
}
}
func TestPrimaryCommand(t *testing.T) {
tests := []struct {
name string
args []string
want string
}{
{"empty", nil, ""},
{"root flag then command", []string{"--json", "update"}, "update"},
{"subcommand", []string{"bookmark", "update"}, "bookmark"},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := primaryCommand(tt.args); got != tt.want {
t.Fatalf("primaryCommand(%v) = %q, want %q", tt.args, got, tt.want)
}
})
}
}
func TestRequestedBoolFlag(t *testing.T) {
if !requestedBoolFlag([]string{"--json"}, "--json", false) {
t.Fatal("requestedBoolFlag() = false, want true")
}
if !requestedBoolFlag([]string{"--debug=true"}, "--debug", false) {
t.Fatal("requestedBoolFlag() with assignment = false, want true")
}
if requestedBoolFlag([]string{"--debug=false"}, "--debug", false) {
t.Fatal("requestedBoolFlag() with false assignment = true, want false")
}
}
func TestShouldSkipAutomaticUpdates(t *testing.T) {
tests := []struct {
name string
args []string
want bool
}{
{"short help flag", []string{"-h"}, true},
{"help flag", []string{"--help"}, true},
{"version flag", []string{"--version"}, true},
{"update command", []string{"update"}, true},
{"bookmark update subcommand", []string{"bookmark", "update"}, false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := shouldSkipAutomaticUpdates(tt.args); got != tt.want {
t.Fatalf("shouldSkipAutomaticUpdates(%v) = %t, want %t", tt.args, got, tt.want)
}
})
}
}
func TestDrainAutomaticUpdateCheckWithTimeoutWaitsForCompletion(t *testing.T) {
done := make(chan struct{})
returned := make(chan struct{})
go func() {
drainAutomaticUpdateCheckWithTimeout(done, time.Second)
close(returned)
}()
select {
case <-returned:
t.Fatal("drainAutomaticUpdateCheckWithTimeout() returned before check completed")
case <-time.After(10 * time.Millisecond):
}
close(done)
select {
case <-returned:
case <-time.After(100 * time.Millisecond):
t.Fatal("drainAutomaticUpdateCheckWithTimeout() did not return after check completed")
}
}
func TestDrainAutomaticUpdateCheckWithTimeoutStopsWaiting(t *testing.T) {
done := make(chan struct{})
returned := make(chan struct{})
go func() {
drainAutomaticUpdateCheckWithTimeout(done, 20*time.Millisecond)
close(returned)
}()
select {
case <-returned:
t.Fatal("drainAutomaticUpdateCheckWithTimeout() returned before timeout elapsed")
case <-time.After(5 * time.Millisecond):
}
select {
case <-returned:
case <-time.After(100 * time.Millisecond):
t.Fatal("drainAutomaticUpdateCheckWithTimeout() did not return after timeout")
}
}

View File

@@ -1,179 +0,0 @@
package cmd
import (
"bufio"
"context"
"fmt"
"io"
"os"
"strings"
"browseros-cli/output"
"browseros-cli/update"
"github.com/spf13/cobra"
)
type updateManager interface {
CheckNow(context.Context) (*update.CheckResult, error)
Apply(context.Context, *update.CheckResult) error
}
type updateOutcome struct {
result *update.CheckResult
applied bool
canceled bool
}
func init() {
cmd := &cobra.Command{
Use: "update",
Aliases: []string{"self-update", "upgrade"},
Annotations: map[string]string{"group": "Setup:"},
Short: "Check for and apply CLI updates",
Args: cobra.NoArgs,
Run: func(cmd *cobra.Command, args []string) {
checkOnly, _ := cmd.Flags().GetBool("check")
yes, _ := cmd.Flags().GetBool("yes")
manager := update.NewManager(update.Options{
CurrentVersion: version,
JSONOutput: jsonOut,
Debug: debug,
Automatic: false,
})
outcome, err := runUpdateCommand(
cmd.Context(),
manager,
checkOnly,
yes,
stdinIsInteractive(os.Stdin),
os.Stdin,
os.Stderr,
)
if err != nil {
output.Error(err.Error(), 1)
}
printUpdateOutcome(outcome)
},
}
cmd.Flags().Bool("check", false, "Check for updates without applying them")
cmd.Flags().Bool("yes", false, "Apply update without prompting")
rootCmd.AddCommand(cmd)
}
func runUpdateCommand(
ctx context.Context,
manager updateManager,
checkOnly bool,
yes bool,
interactive bool,
stdin io.Reader,
stderr io.Writer,
) (*updateOutcome, error) {
result, err := manager.CheckNow(ctx)
if err != nil {
return nil, err
}
outcome := &updateOutcome{result: result}
if checkOnly || !result.UpdateAvailable {
return outcome, nil
}
if !yes {
if !interactive {
return nil, fmt.Errorf("update requires confirmation; rerun with --yes")
}
confirmed, err := confirmUpdate(stdin, stderr, result)
if err != nil {
return nil, err
}
if !confirmed {
outcome.canceled = true
return outcome, nil
}
}
if err := manager.Apply(ctx, result); err != nil {
return nil, err
}
outcome.applied = true
return outcome, nil
}
func printUpdateOutcome(outcome *updateOutcome) {
if jsonOut {
output.JSONRaw(updateOutcomePayload(outcome))
return
}
switch {
case outcome.applied:
fmt.Printf("Updated browseros-cli to v%s\n", outcome.result.LatestVersion)
case outcome.canceled:
fmt.Println("Update canceled.")
case outcome.result.UpdateAvailable:
fmt.Println(update.FormatNotice(outcome.result.CurrentVersion, outcome.result.LatestVersion))
case outcome.result != nil:
fmt.Printf("browseros-cli is up to date (v%s)\n", outcome.result.CurrentVersion)
}
}
func updateOutcomePayload(outcome *updateOutcome) map[string]any {
payload := map[string]any{
"applied": outcome.applied,
}
if outcome.canceled {
payload["canceled"] = true
}
if outcome.result == nil {
return payload
}
payload["currentVersion"] = outcome.result.CurrentVersion
payload["latestVersion"] = outcome.result.LatestVersion
payload["updateAvailable"] = outcome.result.UpdateAvailable
if outcome.result.Asset != nil {
payload["asset"] = map[string]any{
"filename": outcome.result.Asset.Filename,
"url": outcome.result.Asset.URL,
"archiveFormat": outcome.result.Asset.ArchiveFormat,
}
}
return payload
}
func confirmUpdate(
stdin io.Reader,
stderr io.Writer,
result *update.CheckResult,
) (bool, error) {
if _, err := fmt.Fprintf(
stderr,
"Install browseros-cli v%s over v%s? [y/N]: ",
result.LatestVersion,
result.CurrentVersion,
); err != nil {
return false, err
}
line, err := bufio.NewReader(stdin).ReadString('\n')
if err != nil && err != io.EOF {
return false, err
}
answer := strings.ToLower(strings.TrimSpace(line))
return answer == "y" || answer == "yes", nil
}
func stdinIsInteractive(file *os.File) bool {
info, err := file.Stat()
if err != nil {
return false
}
return info.Mode()&os.ModeCharDevice != 0
}

View File

@@ -1,176 +0,0 @@
package cmd
import (
"bytes"
"context"
"errors"
"net/http"
"net/http/httptest"
"runtime"
"testing"
"browseros-cli/update"
)
func TestRunUpdateCommandCheckOnly(t *testing.T) {
configRoot := t.TempDir()
t.Setenv("XDG_CONFIG_HOME", configRoot)
manager := newTestUpdateManager(t)
outcome, err := runUpdateCommand(
context.Background(),
manager,
true,
false,
false,
bytes.NewBufferString(""),
&bytes.Buffer{},
)
if err != nil {
t.Fatalf("runUpdateCommand() error = %v", err)
}
if outcome.applied {
t.Fatal("runUpdateCommand() applied = true, want false")
}
if !outcome.result.UpdateAvailable {
t.Fatal("runUpdateCommand() UpdateAvailable = false, want true")
}
}
func TestRunUpdateCommandRequiresYesWithoutTTY(t *testing.T) {
configRoot := t.TempDir()
t.Setenv("XDG_CONFIG_HOME", configRoot)
_, err := runUpdateCommand(
context.Background(),
newTestUpdateManager(t),
false,
false,
false,
bytes.NewBufferString(""),
&bytes.Buffer{},
)
if err == nil {
t.Fatal("runUpdateCommand() error = nil, want confirmation error")
}
}
func TestRunUpdateCommandCancel(t *testing.T) {
configRoot := t.TempDir()
t.Setenv("XDG_CONFIG_HOME", configRoot)
stderr := &bytes.Buffer{}
outcome, err := runUpdateCommand(
context.Background(),
newTestUpdateManager(t),
false,
false,
true,
bytes.NewBufferString("n\n"),
stderr,
)
if err != nil {
t.Fatalf("runUpdateCommand() error = %v", err)
}
if !outcome.canceled {
t.Fatal("runUpdateCommand() canceled = false, want true")
}
if stderr.Len() == 0 {
t.Fatal("confirm prompt was not written to stderr")
}
}
func TestRunUpdateCommandYesAppliesWithoutPrompt(t *testing.T) {
manager := &fakeUpdateManager{
result: &update.CheckResult{
CurrentVersion: "1.0.0",
LatestVersion: "9.9.9",
UpdateAvailable: true,
Asset: &update.Asset{
Filename: "browseros-cli_9.9.9_test.tar.gz",
URL: "https://cdn.example.com/cli/v9.9.9/browseros-cli_9.9.9_test.tar.gz",
ArchiveFormat: "tar.gz",
SHA256: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
},
},
}
stderr := &bytes.Buffer{}
outcome, err := runUpdateCommand(
context.Background(),
manager,
false,
true,
false,
bytes.NewBufferString(""),
stderr,
)
if err != nil {
t.Fatalf("runUpdateCommand() error = %v", err)
}
if !outcome.applied {
t.Fatal("runUpdateCommand() applied = false, want true")
}
if manager.applyCalls != 1 {
t.Fatalf("Apply() calls = %d, want 1", manager.applyCalls)
}
if stderr.Len() != 0 {
t.Fatal("prompt was written despite --yes")
}
}
type fakeUpdateManager struct {
result *update.CheckResult
checkErr error
applyErr error
applyCalls int
}
func (m *fakeUpdateManager) CheckNow(context.Context) (*update.CheckResult, error) {
if m.checkErr != nil {
return nil, m.checkErr
}
if m.result == nil {
return nil, errors.New("missing check result")
}
return m.result, nil
}
func (m *fakeUpdateManager) Apply(context.Context, *update.CheckResult) error {
m.applyCalls++
return m.applyErr
}
func newTestUpdateManager(t *testing.T) *update.Manager {
t.Helper()
key, err := update.PlatformKey(runtime.GOOS, runtime.GOARCH)
if err != nil {
t.Fatalf("PlatformKey() error = %v", err)
}
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{
"version":"9.9.9",
"published_at":"2026-03-27T19:00:00Z",
"tag":"browseros-cli-v9.9.9",
"assets":{
"` + key + `":{
"filename":"browseros-cli_9.9.9_test.tar.gz",
"url":"https://cdn.example.com/cli/v9.9.9/browseros-cli_9.9.9_test.tar.gz",
"archive_format":"tar.gz",
"sha256":"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
}
}
}`))
}))
t.Cleanup(server.Close)
return update.NewManager(update.Options{
CurrentVersion: "1.0.0",
ManifestURL: server.URL,
Automatic: false,
HTTPClient: server.Client(),
})
}

View File

@@ -4,16 +4,13 @@ go 1.25.7
require (
github.com/fatih/color v1.18.0
github.com/minio/selfupdate v0.6.0
github.com/modelcontextprotocol/go-sdk v1.4.0
github.com/posthog/posthog-go v1.11.2
github.com/spf13/cobra v1.10.2
golang.org/x/mod v0.34.0
gopkg.in/yaml.v3 v3.0.1
)
require (
aead.dev/minisign v0.2.0 // indirect
github.com/goccy/go-json v0.10.5 // indirect
github.com/google/jsonschema-go v0.4.2 // indirect
github.com/google/uuid v1.6.0 // indirect
@@ -25,7 +22,6 @@ require (
github.com/segmentio/encoding v0.5.3 // indirect
github.com/spf13/pflag v1.0.9 // indirect
github.com/yosida95/uritemplate/v3 v3.0.2 // indirect
golang.org/x/crypto v0.0.0-20211209193657-4570a0811e8b // indirect
golang.org/x/oauth2 v0.34.0 // indirect
golang.org/x/sys v0.40.0 // indirect
)

View File

@@ -1,5 +1,3 @@
aead.dev/minisign v0.2.0 h1:kAWrq/hBRu4AARY6AlciO83xhNnW9UaC8YipS2uhLPk=
aead.dev/minisign v0.2.0/go.mod h1:zdq6LdSd9TbuSxchxwhpA9zEb9YXcVGoE8JakuiGaIQ=
github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@@ -24,8 +22,6 @@ github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovk
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/minio/selfupdate v0.6.0 h1:i76PgT0K5xO9+hjzKcacQtO7+MjJ4JKA8Ak8XQ9DDwU=
github.com/minio/selfupdate v0.6.0/go.mod h1:bO02GTIPCMQFTEvE5h4DjYB58bCoZ35XLeBf0buTDdM=
github.com/modelcontextprotocol/go-sdk v1.4.0 h1:u0kr8lbJc1oBcawK7Df+/ajNMpIDFE41OEPxdeTLOn8=
github.com/modelcontextprotocol/go-sdk v1.4.0/go.mod h1:Nxc2n+n/GdCebUaqCOhTetptS17SXXNu9IfNTaLDi1E=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
@@ -46,33 +42,14 @@ github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD
github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4=
github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4=
go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20210220033148-5ea612d1eb83/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I=
golang.org/x/crypto v0.0.0-20211209193657-4570a0811e8b h1:QAqMVf3pSa6eeTsuklijukjXBlj7Es2QQplab+/RbQ4=
golang.org/x/crypto v0.0.0-20211209193657-4570a0811e8b/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
golang.org/x/mod v0.34.0 h1:xIHgNUUnW6sYkcM5Jleh05DvLOtwc6RitGHbDk4akRI=
golang.org/x/mod v0.34.0/go.mod h1:ykgH52iCZe79kzLLMhyCUzhMci+nQj+0XkbXpNYtVjY=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/oauth2 v0.34.0 h1:hqK/t4AKgbqWkdkcAeI8XLmbK+4m4G5YeQRrmiotGlw=
golang.org/x/oauth2 v0.34.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210228012217-479acdf4ea46/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.40.0 h1:DBZZqJ2Rkml6QMQsZywtnjnnGvHza6BTfYFWY9kjEWQ=
golang.org/x/sys v0.40.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.42.0 h1:uNgphsn75Tdz5Ji2q36v/nsFSfR/9BRFvqhGBaJGd5k=
golang.org/x/tools v0.42.0/go.mod h1:Ma6lCIwGZvHK6XtgbswSoWroEkhugApmsXyrUmBhfr0=
golang.org/x/tools v0.41.0 h1:a9b8iMweWG+S0OBnlU36rzLp20z1Rp10w+IY2czHTQc=
golang.org/x/tools v0.41.0/go.mod h1:XSY6eDqxVNiYgezAVqqCeihT4j1U2CCsqvH3WhQpnlg=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=

View File

@@ -1,2 +0,0 @@
.binary/
node_modules/

View File

@@ -1,81 +0,0 @@
# browseros-cli
Command-line interface for controlling BrowserOS -- launch and automate the browser from the terminal.
## Installation
**Zero install (recommended):**
```bash
npx browseros-cli --help
```
**Global install:**
```bash
npm install -g browseros-cli
```
**Shell script fallback:**
```bash
curl -fsSL https://cdn.browseros.com/cli/install.sh | bash
```
## Quick Start
```bash
# Download BrowserOS
browseros-cli install
# Start BrowserOS
browseros-cli launch
# Auto-configure MCP settings for your AI tools
browseros-cli init --auto
# Verify everything is working
browseros-cli health
```
## Usage
### Navigation
```bash
browseros-cli navigate "https://example.com"
```
### Observation
```bash
browseros-cli snapshot # Get the accessibility tree of the current page
browseros-cli console-logs # View browser console output
```
### Screenshots
```bash
browseros-cli screenshot # Capture the current page
```
### Input
```bash
browseros-cli click 42 # Click an element by its node ID
browseros-cli fill 85 "query" # Type text into an input field
```
### Agent Mode
```bash
browseros-cli agent "Search for flights to Tokyo"
```
## Documentation
Full documentation is available at [browseros.com](https://browseros.com).
## License
MIT

View File

@@ -1,32 +0,0 @@
#!/usr/bin/env node
const { execFileSync, spawnSync } = require('node:child_process')
const path = require('node:path')
const fs = require('node:fs')
const BINARY_DIR = path.join(__dirname, '..', '.binary')
const EXT = process.platform === 'win32' ? '.exe' : ''
const BIN_PATH = path.join(BINARY_DIR, `browseros-cli${EXT}`)
if (!fs.existsSync(BIN_PATH)) {
console.error('browseros-cli: binary not found, downloading...')
try {
execFileSync(
process.execPath,
[path.join(__dirname, '..', 'scripts', 'postinstall.js')],
{ stdio: 'inherit', env: { ...process.env, BROWSEROS_NPM_FORCE: '1' } },
)
} catch {
console.error(
'browseros-cli: failed to download binary. Try reinstalling:\n npm install -g browseros-cli',
)
process.exit(1)
}
}
const result = spawnSync(BIN_PATH, process.argv.slice(2), {
stdio: 'inherit',
env: { ...process.env, BROWSEROS_INSTALL_METHOD: 'npm' },
})
process.exit(result.status ?? 1)

View File

@@ -1,45 +0,0 @@
{
"name": "browseros-cli",
"version": "0.2.0",
"description": "Command-line interface for controlling BrowserOS — launch and automate the browser from the terminal",
"bin": {
"browseros-cli": "bin/browseros-cli.js"
},
"scripts": {
"postinstall": "node scripts/postinstall.js"
},
"keywords": [
"browseros",
"cli",
"browser",
"automation",
"mcp",
"ai-agent",
"model-context-protocol"
],
"repository": {
"type": "git",
"url": "https://github.com/browseros-ai/BrowserOS",
"directory": "packages/browseros-agent/apps/cli/npm"
},
"homepage": "https://browseros.com",
"bugs": "https://github.com/browseros-ai/BrowserOS/issues",
"license": "MIT",
"os": [
"darwin",
"linux",
"win32"
],
"cpu": [
"x64",
"arm64"
],
"engines": {
"node": ">=18"
},
"files": [
"bin/",
"scripts/",
"README.md"
]
}

View File

@@ -1,142 +0,0 @@
const https = require('node:https')
const http = require('node:http')
const fs = require('node:fs')
const path = require('node:path')
const { execSync } = require('node:child_process')
const { createHash } = require('node:crypto')
const VERSION = require('../package.json').version
const GITHUB_RELEASE_BASE = `https://github.com/browseros-ai/BrowserOS/releases/download/browseros-cli-v${VERSION}`
const BINARY_DIR = path.join(__dirname, '..', '.binary')
const EXT = process.platform === 'win32' ? '.exe' : ''
const BINARY_PATH = path.join(BINARY_DIR, `browseros-cli${EXT}`)
if (process.env.CI && !process.env.BROWSEROS_NPM_FORCE) {
process.exit(0)
}
const PLATFORM_MAP = { darwin: 'darwin', linux: 'linux', win32: 'windows' }
const ARCH_MAP = { x64: 'amd64', arm64: 'arm64' }
const platform = PLATFORM_MAP[process.platform]
const arch = ARCH_MAP[process.arch]
if (!platform || !arch) {
console.error(
`browseros-cli: unsupported platform ${process.platform}/${process.arch}`,
)
process.exit(1)
}
const isWindows = platform === 'windows'
const archiveExt = isWindows ? 'zip' : 'tar.gz'
const archiveName = `browseros-cli_${VERSION}_${platform}_${arch}.${archiveExt}`
const archiveURL = `${GITHUB_RELEASE_BASE}/${archiveName}`
const checksumURL = `${GITHUB_RELEASE_BASE}/checksums.txt`
const MAX_REDIRECTS = 5
function download(url, redirects = 0) {
return new Promise((resolve, reject) => {
if (redirects > MAX_REDIRECTS) {
return reject(new Error(`Too many redirects for ${url}`))
}
const client = url.startsWith('https') ? https : http
client
.get(url, { headers: { 'User-Agent': 'browseros-cli-npm' } }, (res) => {
if (
res.statusCode >= 300 &&
res.statusCode < 400 &&
res.headers.location
) {
return download(res.headers.location, redirects + 1).then(
resolve,
reject,
)
}
if (res.statusCode !== 200) {
return reject(new Error(`HTTP ${res.statusCode} for ${url}`))
}
const chunks = []
res.on('data', (chunk) => chunks.push(chunk))
res.on('end', () => resolve(Buffer.concat(chunks)))
res.on('error', reject)
})
.on('error', reject)
})
}
async function main() {
console.log(
`browseros-cli: downloading v${VERSION} for ${platform}/${arch}...`,
)
const [archiveBuffer, checksumBuffer] = await Promise.all([
download(archiveURL),
download(checksumURL).catch(() => null),
])
if (checksumBuffer) {
const checksumText = checksumBuffer.toString('utf-8')
const expectedLine = checksumText
.split('\n')
.find((l) => l.includes(archiveName))
if (expectedLine) {
const expected = expectedLine.split(/\s+/)[0]
const actual = createHash('sha256').update(archiveBuffer).digest('hex')
if (actual !== expected) {
console.error(
`browseros-cli: checksum mismatch!\n expected: ${expected}\n got: ${actual}`,
)
process.exit(1)
}
console.log('browseros-cli: checksum verified.')
} else {
console.warn(
'browseros-cli: warning: checksum entry not found in checksums.txt, skipping verification.',
)
}
} else {
console.warn(
'browseros-cli: warning: could not fetch checksums.txt, skipping verification.',
)
}
fs.mkdirSync(BINARY_DIR, { recursive: true })
const tmpArchive = path.join(BINARY_DIR, archiveName)
fs.writeFileSync(tmpArchive, archiveBuffer)
if (isWindows) {
execSync(
`powershell -Command "Expand-Archive -Force -Path '${tmpArchive}' -DestinationPath '${BINARY_DIR}'"`,
{ stdio: 'inherit' },
)
} else {
execSync(`tar -xzf "${tmpArchive}" -C "${BINARY_DIR}"`, {
stdio: 'inherit',
})
}
fs.unlinkSync(tmpArchive)
if (!fs.existsSync(BINARY_PATH)) {
console.error(
`browseros-cli: binary not found after extraction at ${BINARY_PATH}`,
)
process.exit(1)
}
if (!isWindows) {
fs.chmodSync(BINARY_PATH, 0o755)
}
console.log(`browseros-cli: installed v${VERSION} successfully.`)
}
main().catch((err) => {
console.error(`browseros-cli: installation failed: ${err.message}`)
console.error(
'You can install manually: curl -fsSL https://cdn.browseros.com/cli/install.sh | bash',
)
process.exit(1)
})

View File

@@ -1,49 +0,0 @@
package update
import (
"bytes"
"crypto/sha256"
"encoding/hex"
"fmt"
"strings"
"github.com/minio/selfupdate"
)
func CheckPermissions(targetPath string) error {
options := selfupdate.Options{TargetPath: targetPath}
return options.CheckPermissions()
}
func VerifyChecksum(data []byte, expectedHex string) error {
expected, err := decodeChecksum(expectedHex)
if err != nil {
return err
}
actual := sha256.Sum256(data)
if !bytes.Equal(actual[:], expected) {
return fmt.Errorf(
"checksum mismatch: expected %s, got %s",
hex.EncodeToString(expected),
hex.EncodeToString(actual[:]),
)
}
return nil
}
func ApplyBinary(binary []byte, targetPath string) error {
options := selfupdate.Options{TargetPath: targetPath}
err := selfupdate.Apply(bytes.NewReader(binary), options)
if rollbackErr := selfupdate.RollbackError(err); rollbackErr != nil {
return fmt.Errorf("update failed and rollback failed: %w", rollbackErr)
}
return err
}
func decodeChecksum(checksumHex string) ([]byte, error) {
value := strings.TrimSpace(checksumHex)
if value == "" {
return nil, fmt.Errorf("missing checksum")
}
return hex.DecodeString(value)
}

View File

@@ -1,138 +0,0 @@
package update
import (
"archive/tar"
"archive/zip"
"bytes"
"compress/gzip"
"context"
"fmt"
"io"
"net/http"
)
const maxAssetSize = 64 << 20
const maxBinarySize = 256 << 20
func DownloadAsset(ctx context.Context, client *http.Client, asset Asset) ([]byte, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, asset.URL, nil)
if err != nil {
return nil, err
}
resp, err := client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("update download returned HTTP %d", resp.StatusCode)
}
return readAssetBytes(resp.Body)
}
func readAssetBytes(reader io.Reader) ([]byte, error) {
limited := io.LimitReader(reader, maxAssetSize+1)
data, err := io.ReadAll(limited)
if err != nil {
return nil, err
}
if len(data) > maxAssetSize {
return nil, fmt.Errorf("update asset exceeds %d bytes", maxAssetSize)
}
return data, nil
}
func ExtractBinary(archive []byte, format string) ([]byte, error) {
switch format {
case "tar.gz":
return extractTarGzBinary(archive)
case "zip":
return extractZipBinary(archive)
default:
return nil, fmt.Errorf("unsupported archive format %q", format)
}
}
func extractTarGzBinary(archive []byte) ([]byte, error) {
gzipReader, err := gzip.NewReader(bytes.NewReader(archive))
if err != nil {
return nil, err
}
defer gzipReader.Close()
tarReader := tar.NewReader(gzipReader)
return readTarBinary(tarReader)
}
func readTarBinary(reader *tar.Reader) ([]byte, error) {
var binary []byte
for {
header, err := reader.Next()
if err == io.EOF {
break
}
if err != nil {
return nil, err
}
if header.Typeflag != tar.TypeReg {
continue
}
if binary != nil {
return nil, fmt.Errorf("archive contains multiple files; expected exactly one binary")
}
binary, err = io.ReadAll(io.LimitReader(reader, maxBinarySize+1))
if err != nil {
return nil, err
}
if len(binary) > maxBinarySize {
return nil, fmt.Errorf("extracted binary exceeds %d bytes", maxBinarySize)
}
}
if binary == nil {
return nil, fmt.Errorf("archive does not contain a file")
}
return binary, nil
}
func extractZipBinary(archive []byte) ([]byte, error) {
reader, err := zip.NewReader(bytes.NewReader(archive), int64(len(archive)))
if err != nil {
return nil, err
}
var binary []byte
for _, file := range reader.File {
if file.FileInfo().IsDir() {
continue
}
if binary != nil {
return nil, fmt.Errorf("archive contains multiple files; expected exactly one binary")
}
rc, err := file.Open()
if err != nil {
return nil, err
}
binary, err = io.ReadAll(io.LimitReader(rc, maxBinarySize+1))
rc.Close()
if err != nil {
return nil, err
}
if len(binary) > maxBinarySize {
return nil, fmt.Errorf("extracted binary exceeds %d bytes", maxBinarySize)
}
}
if binary == nil {
return nil, fmt.Errorf("archive does not contain a file")
}
return binary, nil
}

View File

@@ -1,168 +0,0 @@
package update
import (
"archive/tar"
"archive/zip"
"bytes"
"compress/gzip"
"crypto/sha256"
"encoding/hex"
"os"
"path/filepath"
"testing"
)
func TestExtractBinaryTarGz(t *testing.T) {
archive := createTarGz(t, map[string]string{"browseros-cli": "new-binary"})
binary, err := ExtractBinary(archive, "tar.gz")
if err != nil {
t.Fatalf("ExtractBinary() error = %v", err)
}
if string(binary) != "new-binary" {
t.Fatalf("ExtractBinary() = %q, want %q", string(binary), "new-binary")
}
}
func TestExtractBinaryZip(t *testing.T) {
archive := createZip(t, map[string]string{"browseros-cli.exe": "new-binary"})
binary, err := ExtractBinary(archive, "zip")
if err != nil {
t.Fatalf("ExtractBinary() error = %v", err)
}
if string(binary) != "new-binary" {
t.Fatalf("ExtractBinary() = %q, want %q", string(binary), "new-binary")
}
}
func TestExtractBinaryTarGzRejectsMultipleFiles(t *testing.T) {
archive := createTarGz(t, map[string]string{
"browseros-cli": "new-binary",
"browseros-cli.sig": "signature",
})
_, err := ExtractBinary(archive, "tar.gz")
if err == nil {
t.Fatal("ExtractBinary() error = nil, want multiple files error")
}
if err.Error() != "archive contains multiple files; expected exactly one binary" {
t.Fatalf("ExtractBinary() error = %q", err)
}
}
func TestVerifyChecksumValid(t *testing.T) {
data := []byte("some-data")
sum := sha256.Sum256(data)
if err := VerifyChecksum(data, hex.EncodeToString(sum[:])); err != nil {
t.Fatalf("VerifyChecksum() error = %v", err)
}
}
func TestVerifyChecksumMismatch(t *testing.T) {
data := []byte("some-data")
badChecksum := "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
if err := VerifyChecksum(data, badChecksum); err == nil {
t.Fatal("VerifyChecksum() error = nil, want mismatch error")
}
}
func TestApplyBinary(t *testing.T) {
targetPath := filepath.Join(t.TempDir(), "browseros-cli")
if err := os.WriteFile(targetPath, []byte("old-binary"), 0755); err != nil {
t.Fatalf("WriteFile() error = %v", err)
}
newBinary := []byte("new-binary")
if err := ApplyBinary(newBinary, targetPath); err != nil {
t.Fatalf("ApplyBinary() error = %v", err)
}
data, err := os.ReadFile(targetPath)
if err != nil {
t.Fatalf("ReadFile() error = %v", err)
}
if string(data) != "new-binary" {
t.Fatalf("updated binary = %q, want %q", string(data), "new-binary")
}
}
func TestVerifyThenApplyIntegration(t *testing.T) {
archive := createTarGz(t, map[string]string{"browseros-cli": "updated-binary"})
archiveSum := sha256.Sum256(archive)
if err := VerifyChecksum(archive, hex.EncodeToString(archiveSum[:])); err != nil {
t.Fatalf("VerifyChecksum(archive) error = %v", err)
}
binary, err := ExtractBinary(archive, "tar.gz")
if err != nil {
t.Fatalf("ExtractBinary() error = %v", err)
}
targetPath := filepath.Join(t.TempDir(), "browseros-cli")
if err := os.WriteFile(targetPath, []byte("old"), 0755); err != nil {
t.Fatalf("WriteFile() error = %v", err)
}
if err := ApplyBinary(binary, targetPath); err != nil {
t.Fatalf("ApplyBinary() error = %v", err)
}
data, err := os.ReadFile(targetPath)
if err != nil {
t.Fatalf("ReadFile() error = %v", err)
}
if string(data) != "updated-binary" {
t.Fatalf("binary = %q, want %q", string(data), "updated-binary")
}
}
func createTarGz(t *testing.T, files map[string]string) []byte {
t.Helper()
var buffer bytes.Buffer
gzipWriter := gzip.NewWriter(&buffer)
tarWriter := tar.NewWriter(gzipWriter)
for name, body := range files {
data := []byte(body)
if err := tarWriter.WriteHeader(&tar.Header{
Name: name,
Mode: 0755,
Size: int64(len(data)),
}); err != nil {
t.Fatalf("WriteHeader() error = %v", err)
}
if _, err := tarWriter.Write(data); err != nil {
t.Fatalf("Write() error = %v", err)
}
}
if err := tarWriter.Close(); err != nil {
t.Fatalf("Close() error = %v", err)
}
if err := gzipWriter.Close(); err != nil {
t.Fatalf("Close() error = %v", err)
}
return buffer.Bytes()
}
func createZip(t *testing.T, files map[string]string) []byte {
t.Helper()
var buffer bytes.Buffer
zipWriter := zip.NewWriter(&buffer)
for name, body := range files {
fileWriter, err := zipWriter.Create(name)
if err != nil {
t.Fatalf("Create() error = %v", err)
}
if _, err := fileWriter.Write([]byte(body)); err != nil {
t.Fatalf("Write() error = %v", err)
}
}
if err := zipWriter.Close(); err != nil {
t.Fatalf("Close() error = %v", err)
}
return buffer.Bytes()
}

View File

@@ -1,273 +0,0 @@
package update
import (
"context"
"fmt"
"net/http"
"os"
"runtime"
"time"
)
const (
DefaultManifestURL = "https://cdn.browseros.com/cli/latest/manifest.json"
DefaultCheckTTL = 24 * time.Hour
DefaultHTTPTimeout = 2 * time.Second
DefaultDownloadTimeout = 5 * time.Minute
SkipCheckEnv = "BROWSEROS_SKIP_UPDATE_CHECK"
InstallMethodEnv = "BROWSEROS_INSTALL_METHOD"
)
type Options struct {
CurrentVersion string
ManifestURL string
CheckTTL time.Duration
HTTPTimeout time.Duration
DownloadTimeout time.Duration
JSONOutput bool
Debug bool
Automatic bool
HTTPClient *http.Client
Now func() time.Time
}
type Manager struct {
options Options
state *State
}
type CheckResult struct {
CurrentVersion string `json:"current_version"`
LatestVersion string `json:"latest_version"`
LatestPublishedAt string `json:"latest_published_at,omitempty"`
UpdateAvailable bool `json:"update_available"`
CheckedAt time.Time `json:"checked_at"`
Asset *Asset `json:"asset,omitempty"`
}
func NewManager(options Options) *Manager {
if options.ManifestURL == "" {
options.ManifestURL = DefaultManifestURL
}
if options.CheckTTL == 0 {
options.CheckTTL = DefaultCheckTTL
}
if options.HTTPTimeout == 0 {
options.HTTPTimeout = DefaultHTTPTimeout
}
if options.DownloadTimeout == 0 {
options.DownloadTimeout = DefaultDownloadTimeout
}
if options.Now == nil {
options.Now = time.Now
}
if options.HTTPClient == nil {
options.HTTPClient = &http.Client{}
}
state, err := LoadState()
if err != nil {
state = &State{}
}
return &Manager{
options: options,
state: state,
}
}
func (m *Manager) CachedNotice() string {
if !m.AutomaticEnabled() || m.state == nil || m.state.LatestVersion == "" {
return ""
}
comparison, err := CompareVersions(m.options.CurrentVersion, m.state.LatestVersion)
if err != nil || comparison >= 0 {
return ""
}
return FormatNotice(m.options.CurrentVersion, m.state.LatestVersion)
}
func (m *Manager) AutomaticEnabled() bool {
if !m.options.Automatic || m.options.JSONOutput {
return false
}
if os.Getenv(SkipCheckEnv) != "" {
return false
}
if installedViaPackageManager() {
return false
}
return IsReleaseVersion(m.options.CurrentVersion)
}
func installedViaPackageManager() bool {
method := os.Getenv(InstallMethodEnv)
switch method {
case "npm", "brew", "homebrew":
return true
}
return false
}
func (m *Manager) ShouldCheck() bool {
if !m.AutomaticEnabled() {
return false
}
return m.state.IsStale(m.options.Now(), m.options.CheckTTL)
}
func (m *Manager) StartBackgroundCheck(ctx context.Context) <-chan struct{} {
done := make(chan struct{})
if !m.ShouldCheck() {
close(done)
return done
}
go func() {
defer close(done)
_, _ = m.CheckNow(ctx)
}()
return done
}
func (m *Manager) CheckNow(ctx context.Context) (*CheckResult, error) {
if !IsReleaseVersion(m.options.CurrentVersion) {
return nil, fmt.Errorf("self-update is unavailable for non-release build %q", m.options.CurrentVersion)
}
checkCtx, cancel := context.WithTimeout(ctx, m.options.HTTPTimeout)
defer cancel()
manifest, err := FetchManifest(checkCtx, cloneHTTPClient(m.options.HTTPClient), m.options.ManifestURL)
if err != nil {
m.recordError(err)
return nil, err
}
asset, err := SelectAsset(manifest, runtime.GOOS, runtime.GOARCH)
if err != nil {
m.recordError(err)
return nil, err
}
comparison, err := CompareVersions(m.options.CurrentVersion, manifest.Version)
if err != nil {
m.recordError(err)
return nil, err
}
result := &CheckResult{
CurrentVersion: m.options.CurrentVersion,
LatestVersion: manifest.Version,
LatestPublishedAt: manifest.PublishedAt,
UpdateAvailable: comparison < 0,
CheckedAt: m.options.Now(),
}
if result.UpdateAvailable {
assetCopy := asset
result.Asset = &assetCopy
}
m.state = &State{
LastCheckedAt: result.CheckedAt,
LatestVersion: manifest.Version,
LatestPublishedAt: manifest.PublishedAt,
AssetURL: asset.URL,
}
_ = SaveState(m.state)
return result, nil
}
func (m *Manager) Apply(ctx context.Context, result *CheckResult) error {
if result == nil || !result.UpdateAvailable || result.Asset == nil {
return fmt.Errorf("browseros-cli is already up to date")
}
downloadCtx, cancel := context.WithTimeout(ctx, m.options.DownloadTimeout)
defer cancel()
archive, err := DownloadAsset(downloadCtx, cloneHTTPClient(m.options.HTTPClient), *result.Asset)
if err != nil {
return err
}
if err := VerifyChecksum(archive, result.Asset.SHA256); err != nil {
return err
}
binary, err := ExtractBinary(archive, result.Asset.ArchiveFormat)
if err != nil {
return err
}
targetPath, err := os.Executable()
if err != nil {
return err
}
if err := CheckPermissions(targetPath); err != nil {
return fmt.Errorf(
"cannot replace %s: %w\n\nReinstall with the installer script or move the binary to a writable location.",
targetPath,
err,
)
}
if err := ApplyBinary(binary, targetPath); err != nil {
return err
}
m.saveAppliedState(result)
return nil
}
func FormatNotice(currentVersion, latestVersion string) string {
notice := fmt.Sprintf(
"Update available: browseros-cli v%s (current v%s)",
latestVersion,
currentVersion,
)
switch os.Getenv(InstallMethodEnv) {
case "npm":
notice += "\nRun `npm update -g browseros-cli` to upgrade."
case "brew", "homebrew":
notice += "\nRun `brew upgrade browseros-cli` to upgrade."
default:
notice += "\nRun `browseros-cli update` to upgrade."
}
return notice
}
func (m *Manager) recordError(err error) {
state := &State{}
if m.state != nil {
*state = *m.state
}
state.CheckError = err.Error()
m.state = state
_ = SaveState(state)
}
func (m *Manager) saveAppliedState(result *CheckResult) {
state := &State{
LastCheckedAt: m.options.Now(),
LatestVersion: result.LatestVersion,
LatestPublishedAt: result.LatestPublishedAt,
AssetURL: result.Asset.URL,
}
m.state = state
_ = SaveState(state)
}
func cloneHTTPClient(client *http.Client) *http.Client {
if client == nil {
return &http.Client{}
}
cloned := *client
cloned.Timeout = 0
return &cloned
}

View File

@@ -1,188 +0,0 @@
package update
import (
"context"
"net/http"
"net/http/httptest"
"runtime"
"testing"
"time"
)
func TestManagerCachedNotice(t *testing.T) {
manager := NewManager(Options{
CurrentVersion: "1.0.0",
Automatic: true,
})
manager.state = &State{LatestVersion: "1.2.0"}
notice := manager.CachedNotice()
if notice == "" {
t.Fatal("CachedNotice() returned empty notice")
}
}
func TestManagerShouldCheck(t *testing.T) {
manager := NewManager(Options{
CurrentVersion: "1.0.0",
Automatic: true,
CheckTTL: time.Minute,
Now: func() time.Time {
return time.Unix(1000, 0).UTC()
},
})
manager.state = &State{LastCheckedAt: time.Unix(0, 0).UTC()}
if !manager.ShouldCheck() {
t.Fatal("ShouldCheck() = false, want true")
}
}
func TestManagerCheckNow(t *testing.T) {
configRoot := t.TempDir()
t.Setenv("XDG_CONFIG_HOME", configRoot)
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{
"version":"9.9.9",
"published_at":"2026-03-27T19:00:00Z",
"tag":"browseros-cli-v9.9.9",
"assets":{
"` + runtimePlatformKey(t) + `":{
"filename":"browseros-cli_9.9.9_test.tar.gz",
"url":"https://cdn.example.com/cli/v9.9.9/browseros-cli_9.9.9_test.tar.gz",
"archive_format":"tar.gz",
"sha256":"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
}
}
}`))
}))
defer server.Close()
manager := NewManager(Options{
CurrentVersion: "1.0.0",
ManifestURL: server.URL,
Automatic: false,
HTTPClient: server.Client(),
Now: func() time.Time {
return time.Unix(100, 0).UTC()
},
})
result, err := manager.CheckNow(context.Background())
if err != nil {
t.Fatalf("CheckNow() error = %v", err)
}
if !result.UpdateAvailable {
t.Fatal("CheckNow() UpdateAvailable = false, want true")
}
if result.LatestPublishedAt != "2026-03-27T19:00:00Z" {
t.Fatalf(
"CheckNow() LatestPublishedAt = %q, want %q",
result.LatestPublishedAt,
"2026-03-27T19:00:00Z",
)
}
if manager.state.LatestPublishedAt != "2026-03-27T19:00:00Z" {
t.Fatalf(
"state LatestPublishedAt = %q, want %q",
manager.state.LatestPublishedAt,
"2026-03-27T19:00:00Z",
)
}
}
func TestCloneHTTPClientClearsTimeout(t *testing.T) {
base := &http.Client{Timeout: time.Second}
cloned := cloneHTTPClient(base)
if cloned == base {
t.Fatal("cloneHTTPClient() returned the original client")
}
if cloned.Timeout != 0 {
t.Fatalf("cloneHTTPClient() Timeout = %s, want 0", cloned.Timeout)
}
if base.Timeout != time.Second {
t.Fatalf("base Timeout = %s, want %s", base.Timeout, time.Second)
}
}
func TestManagerSaveAppliedState(t *testing.T) {
configRoot := t.TempDir()
t.Setenv("XDG_CONFIG_HOME", configRoot)
now := time.Unix(200, 0).UTC()
manager := NewManager(Options{
CurrentVersion: "1.0.0",
Now: func() time.Time {
return now
},
})
manager.state = &State{
LastCheckedAt: time.Unix(100, 0).UTC(),
CheckError: "manifest fetch failed",
}
manager.saveAppliedState(&CheckResult{
LatestVersion: "9.9.9",
LatestPublishedAt: "2026-03-27T19:00:00Z",
Asset: &Asset{
URL: "https://cdn.example.com/cli/v9.9.9/browseros-cli_9.9.9_test.tar.gz",
},
})
if manager.state.LastCheckedAt != now {
t.Fatalf("LastCheckedAt = %v, want %v", manager.state.LastCheckedAt, now)
}
if manager.state.CheckError != "" {
t.Fatalf("CheckError = %q, want empty", manager.state.CheckError)
}
if manager.state.LatestPublishedAt != "2026-03-27T19:00:00Z" {
t.Fatalf("LatestPublishedAt = %q", manager.state.LatestPublishedAt)
}
}
func TestAutomaticEnabledSkipsForPackageManagerInstall(t *testing.T) {
t.Setenv("BROWSEROS_INSTALL_METHOD", "npm")
manager := NewManager(Options{
CurrentVersion: "1.0.0",
Automatic: true,
})
if manager.AutomaticEnabled() {
t.Fatal("AutomaticEnabled() = true, want false when BROWSEROS_INSTALL_METHOD=npm")
}
}
func TestAutomaticEnabledAllowsNormalInstall(t *testing.T) {
t.Setenv("BROWSEROS_INSTALL_METHOD", "")
manager := NewManager(Options{
CurrentVersion: "1.0.0",
Automatic: true,
})
if !manager.AutomaticEnabled() {
t.Fatal("AutomaticEnabled() = false, want true when BROWSEROS_INSTALL_METHOD is empty")
}
}
func runtimePlatformKey(t *testing.T) string {
t.Helper()
key, err := PlatformKey(runtimeGOOS(), runtimeGOARCH())
if err != nil {
t.Fatalf("PlatformKey() error = %v", err)
}
return key
}
func runtimeGOOS() string {
return runtime.GOOS
}
func runtimeGOARCH() string {
return runtime.GOARCH
}

View File

@@ -1,144 +0,0 @@
package update
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
"golang.org/x/mod/semver"
)
const maxManifestSize = 1 << 20
type Manifest struct {
Version string `json:"version"`
PublishedAt string `json:"published_at"`
Tag string `json:"tag"`
Assets map[string]Asset `json:"assets"`
}
type Asset struct {
Filename string `json:"filename"`
URL string `json:"url"`
ArchiveFormat string `json:"archive_format"`
SHA256 string `json:"sha256"`
}
func FetchManifest(
ctx context.Context,
client *http.Client,
url string,
) (*Manifest, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return nil, err
}
resp, err := client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("update manifest returned HTTP %d", resp.StatusCode)
}
var manifest Manifest
if err := json.NewDecoder(io.LimitReader(resp.Body, maxManifestSize)).Decode(&manifest); err != nil {
return nil, err
}
if err := manifest.Validate(); err != nil {
return nil, err
}
return &manifest, nil
}
func (m *Manifest) Validate() error {
if m == nil {
return fmt.Errorf("update manifest is nil")
}
if !IsReleaseVersion(m.Version) {
return fmt.Errorf("invalid manifest version %q", m.Version)
}
if len(m.Assets) == 0 {
return fmt.Errorf("update manifest has no assets")
}
for key, asset := range m.Assets {
if asset.URL == "" {
return fmt.Errorf("asset %q is missing url", key)
}
if asset.SHA256 == "" {
return fmt.Errorf("asset %q is missing sha256", key)
}
if asset.ArchiveFormat != "tar.gz" && asset.ArchiveFormat != "zip" {
return fmt.Errorf("asset %q has unsupported archive format %q", key, asset.ArchiveFormat)
}
}
return nil
}
func NormalizeVersion(version string) string {
value := strings.TrimSpace(version)
if value == "" {
return ""
}
if !strings.HasPrefix(value, "v") {
value = "v" + value
}
return semver.Canonical(value)
}
func IsReleaseVersion(version string) bool {
return NormalizeVersion(version) != ""
}
func CompareVersions(current, latest string) (int, error) {
normalizedCurrent := NormalizeVersion(current)
if normalizedCurrent == "" {
return 0, fmt.Errorf("invalid current version %q", current)
}
normalizedLatest := NormalizeVersion(latest)
if normalizedLatest == "" {
return 0, fmt.Errorf("invalid latest version %q", latest)
}
return semver.Compare(normalizedCurrent, normalizedLatest), nil
}
func PlatformKey(goos, goarch string) (string, error) {
switch goos {
case "darwin", "linux", "windows":
default:
return "", fmt.Errorf("unsupported os %q", goos)
}
switch goarch {
case "amd64", "arm64":
default:
return "", fmt.Errorf("unsupported arch %q", goarch)
}
return goos + "/" + goarch, nil
}
func SelectAsset(manifest *Manifest, goos, goarch string) (Asset, error) {
key, err := PlatformKey(goos, goarch)
if err != nil {
return Asset{}, err
}
asset, ok := manifest.Assets[key]
if !ok {
return Asset{}, fmt.Errorf("no update asset for %s", key)
}
return asset, nil
}

View File

@@ -1,102 +0,0 @@
package update
import (
"context"
"net/http"
"net/http/httptest"
"strings"
"testing"
)
func TestNormalizeVersion(t *testing.T) {
if got := NormalizeVersion("1.2.3"); got != "v1.2.3" {
t.Fatalf("NormalizeVersion() = %q, want %q", got, "v1.2.3")
}
if got := NormalizeVersion("dev"); got != "" {
t.Fatalf("NormalizeVersion(dev) = %q, want empty", got)
}
}
func TestCompareVersions(t *testing.T) {
got, err := CompareVersions("1.2.3", "1.3.0")
if err != nil {
t.Fatalf("CompareVersions() error = %v", err)
}
if got >= 0 {
t.Fatalf("CompareVersions() = %d, want < 0", got)
}
}
func TestSelectAsset(t *testing.T) {
manifest := &Manifest{
Version: "1.2.3",
Assets: map[string]Asset{
"darwin/arm64": {
URL: "https://cdn.example.com/cli/v1.2.3/browseros-cli.tar.gz",
ArchiveFormat: "tar.gz",
SHA256: "abc",
},
},
}
asset, err := SelectAsset(manifest, "darwin", "arm64")
if err != nil {
t.Fatalf("SelectAsset() error = %v", err)
}
if asset.URL == "" {
t.Fatal("SelectAsset() returned empty URL")
}
}
func TestFetchManifest(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{
"version":"1.2.3",
"published_at":"2026-03-27T19:00:00Z",
"tag":"browseros-cli-v1.2.3",
"assets":{
"darwin/arm64":{
"filename":"browseros-cli_1.2.3_darwin_arm64.tar.gz",
"url":"https://cdn.example.com/cli/v1.2.3/browseros-cli_1.2.3_darwin_arm64.tar.gz",
"archive_format":"tar.gz",
"sha256":"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
}
}
}`))
}))
defer server.Close()
manifest, err := FetchManifest(context.Background(), server.Client(), server.URL)
if err != nil {
t.Fatalf("FetchManifest() error = %v", err)
}
if manifest.Version != "1.2.3" {
t.Fatalf("FetchManifest() version = %q, want %q", manifest.Version, "1.2.3")
}
}
func TestFetchManifestRejectsOversizedResponse(t *testing.T) {
hugeName := strings.Repeat("a", maxManifestSize)
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{
"version":"1.2.3",
"published_at":"2026-03-27T19:00:00Z",
"tag":"browseros-cli-v1.2.3",
"assets":{
"darwin/arm64":{
"filename":"` + hugeName + `",
"url":"https://cdn.example.com/cli/v1.2.3/browseros-cli_1.2.3_darwin_arm64.tar.gz",
"archive_format":"tar.gz",
"sha256":"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
}
}
}`))
}))
defer server.Close()
if _, err := FetchManifest(context.Background(), server.Client(), server.URL); err == nil {
t.Fatal("FetchManifest() error = nil, want oversized response error")
}
}

View File

@@ -1,80 +0,0 @@
package update
import (
"encoding/json"
"os"
"path/filepath"
"time"
"browseros-cli/config"
)
type State struct {
LastCheckedAt time.Time `json:"last_checked_at"`
LatestVersion string `json:"latest_version,omitempty"`
LatestPublishedAt string `json:"latest_published_at,omitempty"`
AssetURL string `json:"asset_url,omitempty"`
CheckError string `json:"check_error,omitempty"`
}
func StatePath() string {
return filepath.Join(config.Dir(), "update-state.json")
}
func LoadState() (*State, error) {
data, err := os.ReadFile(StatePath())
if err != nil {
if os.IsNotExist(err) {
return &State{}, nil
}
return nil, err
}
var state State
if err := json.Unmarshal(data, &state); err != nil {
return nil, err
}
return &state, nil
}
func SaveState(state *State) error {
if state == nil {
state = &State{}
}
dir := config.Dir()
if err := os.MkdirAll(dir, 0755); err != nil {
return err
}
tmpFile, err := os.CreateTemp(dir, "update-state-*.json")
if err != nil {
return err
}
encoder := json.NewEncoder(tmpFile)
encoder.SetIndent("", " ")
if err := encoder.Encode(state); err != nil {
tmpFile.Close()
os.Remove(tmpFile.Name())
return err
}
if err := tmpFile.Close(); err != nil {
os.Remove(tmpFile.Name())
return err
}
if err := os.Rename(tmpFile.Name(), StatePath()); err != nil {
os.Remove(tmpFile.Name())
return err
}
return nil
}
func (s *State) IsStale(now time.Time, ttl time.Duration) bool {
if s == nil || s.LastCheckedAt.IsZero() {
return true
}
return now.Sub(s.LastCheckedAt) >= ttl
}

View File

@@ -1,54 +0,0 @@
package update
import (
"path/filepath"
"testing"
"time"
)
func TestLoadStateMissing(t *testing.T) {
configRoot := t.TempDir()
t.Setenv("XDG_CONFIG_HOME", configRoot)
state, err := LoadState()
if err != nil {
t.Fatalf("LoadState() error = %v", err)
}
if state == nil {
t.Fatal("LoadState() returned nil state")
}
}
func TestSaveStateRoundTrip(t *testing.T) {
configRoot := t.TempDir()
t.Setenv("XDG_CONFIG_HOME", configRoot)
want := &State{
LastCheckedAt: time.Unix(100, 0).UTC(),
LatestVersion: "1.2.3",
LatestPublishedAt: "2026-03-27T19:00:00Z",
AssetURL: "https://cdn.example.com/cli/v1.2.3/browseros-cli.tar.gz",
}
if err := SaveState(want); err != nil {
t.Fatalf("SaveState() error = %v", err)
}
got, err := LoadState()
if err != nil {
t.Fatalf("LoadState() error = %v", err)
}
if got.LatestVersion != want.LatestVersion {
t.Fatalf("LatestVersion = %q, want %q", got.LatestVersion, want.LatestVersion)
}
if StatePath() != filepath.Join(configRoot, "browseros-cli", "update-state.json") {
t.Fatalf("StatePath() = %q", StatePath())
}
}
func TestStateIsStale(t *testing.T) {
now := time.Unix(200, 0).UTC()
state := &State{LastCheckedAt: time.Unix(0, 0).UTC()}
if !state.IsStale(now, time.Minute) {
t.Fatal("IsStale() = false, want true")
}
}

View File

@@ -0,0 +1,32 @@
# Dependencies
node_modules/
# Build output
dist/
# Build unpublished docs
docs/
# TypeScript
*.tsbuildinfo
# IDE
.vscode/
.idea/
*.swp
*.swo
# OS
.DS_Store
Thumbs.db
# Logs
*.log
npm-debug.log*
# Environment
.env
.env.local
# Claude
.claude

View File

@@ -0,0 +1,430 @@
# BrowserOS Controller
WebSocket-based Chrome Extension that exposes browser automation APIs for remote control.
**⚠️ IMPORTANT:** This extension ONLY works in **BrowserOS Chrome**, not regular Chrome!
---
## 🚀 Quick Start
### 1. Build the Extension
```bash
npm install
npm run build
```
### 2. Load Extension in BrowserOS Chrome
1. Open BrowserOS Chrome
2. Go to `chrome://extensions/`
3. Enable **"Developer mode"** (top-right toggle)
4. Click **"Load unpacked"**
5. Select the `dist/` folder
6. Verify extension is loaded (you should see "BrowserOS Controller")
### 3. Test the Extension
```bash
npm test
```
This starts an interactive test client. You should see:
```
🚀 Starting BrowserOS Controller Test Client
──────────────────────────────────────────────────────────
WebSocket Server Started
Listening on: ws://localhost:9224/controller
Waiting for extension to connect...
✅ Extension connected!
Running Diagnostic Test
============================================================
📤 Sending: checkBrowserOS
Request ID: test-1729012345678
📨 Response: test-1729012345678
Status: ✅ SUCCESS
Data: {
"available": true,
"apis": [
"captureScreenshot",
"clear",
"click",
...
]
}
```
**If you see "available": true**, you're all set! 🎉
**If you see "available": false**, you're not using BrowserOS Chrome.
---
## ⚙️ Configuration
The extension can be configured using environment variables. This is optional - sensible defaults are provided.
### Environment Variables
Create a `.env` file in the project root to customize configuration:
```bash
# Copy the example file
cp .env.example .env
# Edit .env with your values
```
### Available Configuration Options
#### WebSocket Configuration
```bash
WEBSOCKET_PROTOCOL=ws # ws or wss (default: ws)
WEBSOCKET_HOST=localhost # Server host (default: localhost)
WEBSOCKET_PORT=9224 # Server port (default: 9224)
WEBSOCKET_PATH=/controller # Server path (default: /controller)
```
#### Connection Settings
```bash
WEBSOCKET_RECONNECT_DELAY=1000 # Initial reconnect delay in ms (default: 1000)
WEBSOCKET_MAX_RECONNECT_DELAY=30000 # Max reconnect delay in ms (default: 30000)
WEBSOCKET_RECONNECT_MULTIPLIER=1.5 # Exponential backoff multiplier (default: 1.5)
WEBSOCKET_MAX_RECONNECT_ATTEMPTS=0 # Max reconnect attempts, 0 = infinite (default: 0)
WEBSOCKET_HEARTBEAT_INTERVAL=30000 # Heartbeat interval in ms (default: 30000)
WEBSOCKET_HEARTBEAT_TIMEOUT=5000 # Heartbeat timeout in ms (default: 5000)
WEBSOCKET_CONNECTION_TIMEOUT=10000 # Connection timeout in ms (default: 10000)
WEBSOCKET_REQUEST_TIMEOUT=30000 # Request timeout in ms (default: 30000)
```
#### Concurrency Settings
```bash
CONCURRENCY_MAX_CONCURRENT=100 # Max concurrent requests (default: 100)
CONCURRENCY_MAX_QUEUE_SIZE=1000 # Max queued requests (default: 1000)
```
#### Logging Settings
```bash
LOGGING_ENABLED=true # Enable/disable logging (default: true)
LOGGING_LEVEL=info # Log level: debug, info, warn, error (default: info)
LOGGING_PREFIX=[BrowserOS Controller] # Log message prefix (default: [BrowserOS Controller])
```
### Example: Custom Port Configuration
If you want to use a different port (e.g., 8080):
```bash
# .env
WEBSOCKET_PORT=8080
```
Then rebuild the extension:
```bash
npm run build
```
The extension will now connect to `ws://localhost:8080/controller` instead of the default port 9224.
---
## 📖 Architecture
See [ARCHITECTURE.md](./ARCHITECTURE.md) for complete system documentation including:
- High-level architecture diagram
- Request flow (step-by-step)
- Component details
- All 14 registered actions
- WebSocket protocol specification
- Debugging guide
---
## 🧪 Testing
The test client (`npm test`) provides an interactive menu:
```
Available Commands:
Tab Actions:
1. getActiveTab - Get currently active tab
2. getTabs - Get all tabs
Browser Actions:
3. getInteractiveSnapshot - Get page elements (requires tabId)
4. click - Click element (requires tabId, nodeId)
5. inputText - Type text (requires tabId, nodeId, text)
6. captureScreenshot - Take screenshot (requires tabId)
Diagnostic:
d. checkBrowserOS - Check if chrome.browserOS is available
Other:
h. Show this menu
q. Quit
```
### Example Usage:
1. Type `1` → Get active tab
2. Type `d` → Run diagnostic
3. Type `q` → Quit
---
## 🔧 Development
### Build Commands
```bash
npm run build # Production build
npm run build:dev # Development build (with source maps)
npm run watch # Watch mode for development
```
### Debug Extension
1. Go to `chrome://extensions/`
2. Click **"Inspect views service worker"** under "BrowserOS Controller"
3. Service worker console shows all logs
**Check extension status:**
```javascript
__browserosController.getStats();
```
**Expected output:**
```javascript
{
connection: "connected",
requests: { inFlight: 0, avgDuration: 0, errorRate: 0, totalRequests: 0 },
concurrency: { inFlight: 0, queued: 0, utilization: 0 },
validator: { activeIds: 0 },
responseQueue: { size: 0 }
}
```
**Check registered actions:**
Look for this log on extension load:
```
Registered 14 action(s): checkBrowserOS, getActiveTab, getTabs, ...
```
---
## 📋 Available Actions
| Action | Input | Output | Description |
| ------------------------ | --------------------------------- | ------------------------------- | -------------------------------------- |
| `checkBrowserOS` | `{}` | `{available, apis}` | Check if chrome.browserOS is available |
| `getActiveTab` | `{}` | `{tabId, url, title, windowId}` | Get currently active tab |
| `getTabs` | `{}` | `{tabs[]}` | Get all open tabs |
| `getInteractiveSnapshot` | `{tabId, options?}` | `InteractiveSnapshot` | Get all interactive elements on page |
| `click` | `{tabId, nodeId}` | `{success}` | Click element by nodeId |
| `inputText` | `{tabId, nodeId, text}` | `{success}` | Type text into element |
| `clear` | `{tabId, nodeId}` | `{success}` | Clear text from element |
| `scrollToNode` | `{tabId, nodeId}` | `{scrolled}` | Scroll element into view |
| `captureScreenshot` | `{tabId, size?, showHighlights?}` | `{dataUrl}` | Take screenshot |
| `sendKeys` | `{tabId, keys}` | `{success}` | Send keyboard keys |
| `getPageLoadStatus` | `{tabId}` | `PageLoadStatus` | Get page load status |
| `getSnapshot` | `{tabId, type, options?}` | `Snapshot` | Get text/links snapshot |
| `clickCoordinates` | `{tabId, x, y}` | `{success}` | Click at coordinates |
| `typeAtCoordinates` | `{tabId, x, y, text}` | `{success}` | Type at coordinates |
---
## 🔌 WebSocket Protocol
**Endpoint:** `ws://localhost:9224/controller`
**Request Format:**
```json
{
"id": "unique-request-id",
"action": "click",
"payload": {
"tabId": 12345,
"nodeId": 42
}
}
```
**Response Format:**
```json
{
"id": "unique-request-id",
"ok": true,
"data": {
"success": true
}
}
```
**Error Response:**
```json
{
"id": "unique-request-id",
"ok": false,
"error": "Element not found: nodeId 42"
}
```
---
## ⚠️ Common Issues
### Issue 1: "chrome.browserOS is undefined"
**Symptoms:**
- Diagnostic shows `"available": false`
- All browser actions fail
**Cause:** Not using BrowserOS Chrome
**Solution:**
- Download and use BrowserOS Chrome (not regular Chrome)
- Verify at `chrome://version` - should show "BrowserOS" in the name
---
### Issue 2: "Port 9224 is already in use"
**Symptoms:**
```
❌ Fatal Error: Port 9224 is already in use!
```
**Solution:**
```bash
lsof -ti:9224 | xargs kill -9
npm test
```
---
### Issue 3: Extension Not Connecting
**Symptoms:**
- Test client shows "Waiting for extension to connect..." forever
- Service worker console shows "Connection timeout"
**Checklist:**
1. ✅ Test server running (`npm test`)
2. ✅ Extension loaded in BrowserOS Chrome
3. ✅ Extension enabled (chrome://extensions/)
4. ✅ Service worker active (not suspended)
**Solution:**
1. Reload extension: chrome://extensions/ → "Reload" button
2. Restart test server: Ctrl+C, then `npm test`
---
### Issue 4: "Unknown action"
**Symptoms:**
```
Error: Unknown action: "click". Available actions: getActiveTab, getTabs, ...
```
**Cause:** Action not registered (extension didn't reload properly)
**Solution:**
1. Toggle extension OFF and ON at chrome://extensions/
2. Check service worker console for: `Registered 14 action(s): ...`
---
## 📁 Project Structure
```
browseros-controller/
├── README.md # This file
├── ARCHITECTURE.md # Complete architecture documentation
├── .env.example # Environment variable template
├── manifest.json # Extension manifest
├── package.json # Node dependencies
├── webpack.config.js # Build configuration
├── src/ # Source code
│ ├── background/ # Service worker entry point
│ ├── actions/ # Action handlers
│ │ ├── bookmark/ # Bookmark management actions
│ │ ├── browser/ # Browser interaction actions
│ │ ├── diagnostics/ # Diagnostic actions
│ │ ├── history/ # History management actions
│ │ └── tab/ # Tab management actions
│ ├── adapters/ # Chrome API wrappers
│ ├── config/ # Configuration management
│ │ ├── constants.ts # Application constants
│ │ └── environment.ts # Environment variable handling
│ ├── websocket/ # WebSocket client
│ ├── utils/ # Utilities
│ ├── protocol/ # Protocol types
│ └── types/ # TypeScript definitions
├── tests/ # Test files
│ ├── test-simple.js # Interactive test client
│ └── test-auto.js # Automated test client
└── dist/ # Built extension (generated)
├── background.js
└── manifest.json
```
---
## 🔗 Related Projects
- **BrowserOS-agent**: AI agent that uses this controller for browser automation
- **BrowserOS Chrome**: Custom Chrome build with `chrome.browserOS` APIs
---
## 📄 License
MIT
---
## 🆘 Support
For issues or questions:
1. Check [ARCHITECTURE.md](./ARCHITECTURE.md) for detailed documentation
2. Review the "Common Issues" section above
3. Check service worker console for detailed error logs
4. Verify you're using BrowserOS Chrome (run diagnostic test)
---
**Happy automating! 🚀**

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 574 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

View File

@@ -0,0 +1,38 @@
{
"manifest_version": 3,
"name": "BrowserOS Controller",
"version": "1.0.0.8",
"description": "BrowserOS API bridge for BrowserOS Server",
"key": "MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAhlh9i/c2A3f0PL86hXhGPzguLIOQ+sPf3/Y8RD11gmdvoU6XqnUqv7GgBvm7SW7316uPnS58AYZY13jGtF4rFrscdda5H2CjZrtOyOycmKp2KzibJLwibXNm/JwKhZ3QEfgsW/orh1SMY2kNj62JemkWLcLyn3E1T+KTcTVyFOxiJS3hyQ+Y0/Jp1HOqGh5lYS58YYzwhId5rrJjfL7wFYtALgt2dEA2r7p4qpe+SW0QLA+ayjRAjS+yt+qitR0eWg+XgqcIk1f1KblN8/yDISssSD4LWiPofe5CmJPnqlHIuI0CpgvAFv9dvgR/w8OFkXxK5h06i6saum1xExj+IwIDAQAB",
"permissions": [
"tabs",
"activeTab",
"bookmarks",
"history",
"scripting",
"storage",
"tabGroups",
"webNavigation",
"downloads",
"browserOS",
"alarms"
],
"update_url": "https://cdn.browseros.com/extensions/update-manifest.xml",
"host_permissions": ["<all_urls>"],
"background": {
"service_worker": "background.js",
"type": "module"
},
"action": {
"default_icon": {
"16": "assets/icon16.png",
"48": "assets/icon48.png",
"128": "assets/icon128.png"
}
},
"icons": {
"16": "assets/icon16.png",
"48": "assets/icon48.png",
"128": "assets/icon128.png"
}
}

View File

@@ -0,0 +1,39 @@
{
"name": "browseros-controller",
"version": "1.0.0",
"description": "Chrome Extension API bridge for BrowserOS Server",
"directories": {
"doc": "docs"
},
"scripts": {
"build": "webpack --mode production",
"build:dev": "webpack --mode development",
"watch": "webpack --mode development --watch",
"test": "node tests/test-simple.js",
"test:auto": "node tests/test-auto.js",
"typecheck": "tsc --noEmit"
},
"keywords": [
"browser-automation",
"chrome-extension",
"browseros"
],
"author": "BrowserOS Team",
"license": "MIT",
"type": "commonjs",
"dependencies": {
"@browseros/shared": "workspace:*",
"zod": "^4.1.12"
},
"devDependencies": {
"@types/chrome": "^0.1.24",
"@types/node": "^24.7.1",
"copy-webpack-plugin": "^12.0.2",
"terser-webpack-plugin": "^5.3.11",
"ts-loader": "^9.5.4",
"typescript": "^5.9.3",
"webpack": "^5.102.1",
"webpack-cli": "^6.0.1",
"ws": "^8.18.3"
}
}

View File

@@ -0,0 +1,106 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import type { ActionResponse } from '@/protocol/types'
import { ActionResponseSchema } from '@/protocol/types'
import { logger } from '@/utils/logger'
// Re-export for convenience
export type { ActionResponse }
export { ActionResponseSchema }
/**
* ActionHandler - Abstract base class for all actions
*
* Responsibilities:
* - Define contract for all actions (must implement inputSchema + execute)
* - Validate input using Zod schemas
* - Handle validation and execution errors
* - Return standardized ActionResponse
*
* Usage:
* class MyAction extends ActionHandler<InputType, OutputType> {
* inputSchema = z.object({ ... });
* async execute(input: InputType): Promise<OutputType> { ... }
* }
*/
export abstract class ActionHandler<TInput = unknown, TOutput = unknown> {
/**
* Zod schema for input validation
* Must be implemented by concrete actions
*/
abstract readonly inputSchema: z.ZodSchema<TInput>
/**
* Execute the action logic
* Must be implemented by concrete actions
*
* @param input - Validated input (guaranteed to match inputSchema)
* @returns Action result
*/
abstract execute(input: TInput): Promise<TOutput>
/**
* Handle request with validation and error handling
* Called by ActionRegistry
*
* Flow:
* 1. Validate input with Zod schema
* 2. Execute action logic
* 3. Return standardized response (ok/error)
*
* @param payload - Raw payload from request (unvalidated)
* @returns Standardized action response
*/
async handle(payload: unknown): Promise<ActionResponse> {
const actionName = this.constructor.name
try {
// Step 1: Validate input
logger.debug(`[${actionName}] Validating input`)
const validatedInput = this.inputSchema.parse(payload)
// Step 2: Execute action
logger.debug(`[${actionName}] Executing action`)
const result = await this.execute(validatedInput)
// Step 3: Return success response
logger.debug(`[${actionName}] Action completed successfully`)
return { ok: true, data: result }
} catch (error) {
// Handle validation or execution errors
const errorMessage = this._formatError(error)
logger.error(`[${actionName}] Action failed: ${errorMessage}`)
return { ok: false, error: errorMessage }
}
}
/**
* Format error for user-friendly response
*
* @param error - Error from validation or execution
* @returns Formatted error message
*/
protected _formatError(error: unknown): string {
// Zod validation error
if (error instanceof z.ZodError) {
const errors = error.issues.map((e: z.ZodIssue) => {
const path = e.path.length > 0 ? `${e.path.join('.')}: ` : ''
return `${path}${e.message}`
})
return `Validation error: ${errors.join(', ')}`
}
// Standard Error
if (error instanceof Error) {
return error.message
}
// Unknown error
return String(error)
}
}

View File

@@ -0,0 +1,148 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { logger } from '@/utils/logger'
import type { ActionHandler, ActionResponse } from './ActionHandler'
/**
* ActionRegistry - Central dispatcher for all actions
*
* Responsibilities:
* - Register action handlers by name
* - Dispatch requests to correct handler
* - Return error for unknown actions
* - Provide introspection (list available actions)
*
* Usage:
* const registry = new ActionRegistry();
* registry.register('getActiveTab', new GetActiveTabAction());
* const response = await registry.dispatch('getActiveTab', {});
*/
export class ActionRegistry {
private handlers = new Map<string, ActionHandler>()
/**
* Register an action handler
*
* @param actionName - Unique action name (e.g., "getActiveTab")
* @param handler - Action handler instance
*/
register(actionName: string, handler: ActionHandler): void {
if (this.handlers.has(actionName)) {
logger.warn(
`[ActionRegistry] Action "${actionName}" already registered, overwriting`,
)
}
this.handlers.set(actionName, handler)
logger.info(`[ActionRegistry] Registered action: ${actionName}`)
}
/**
* Dispatch request to appropriate action handler
*
* Flow:
* 1. Find handler for action name
* 2. If not found, return error
* 3. If found, delegate to handler.handle()
* 4. Handler validates input and executes
* 5. Return result
*
* @param actionName - Action to execute
* @param payload - Action payload (unvalidated)
* @returns Action response
*/
async dispatch(
actionName: string,
payload: unknown,
): Promise<ActionResponse> {
logger.debug(`[ActionRegistry] Dispatching action: ${actionName}`)
// Check if action exists
const handler = this.handlers.get(actionName)
if (!handler) {
const availableActions = Array.from(this.handlers.keys()).join(', ')
const errorMessage = `Unknown action: "${actionName}". Available actions: ${availableActions || 'none'}`
logger.error(`[ActionRegistry] ${errorMessage}`)
return {
ok: false,
error: errorMessage,
}
}
// Delegate to handler
try {
const response = await handler.handle(payload)
logger.debug(
`[ActionRegistry] Action "${actionName}" ${response.ok ? 'succeeded' : 'failed'}`,
)
return response
} catch (error) {
// Catch any unexpected errors from handler
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[ActionRegistry] Unexpected error in "${actionName}": ${errorMessage}`,
)
return {
ok: false,
error: `Action execution failed: ${errorMessage}`,
}
}
}
/**
* Get list of registered action names
*
* @returns Array of action names
*/
getAvailableActions(): string[] {
return Array.from(this.handlers.keys())
}
/**
* Check if action is registered
*
* @param actionName - Action name to check
* @returns True if action exists
*/
hasAction(actionName: string): boolean {
return this.handlers.has(actionName)
}
/**
* Get number of registered actions
*
* @returns Count of registered actions
*/
getActionCount(): number {
return this.handlers.size
}
/**
* Unregister an action (useful for testing)
*
* @param actionName - Action to remove
* @returns True if action was removed
*/
unregister(actionName: string): boolean {
const removed = this.handlers.delete(actionName)
if (removed) {
logger.info(`[ActionRegistry] Unregistered action: ${actionName}`)
}
return removed
}
/**
* Clear all registered actions (useful for testing)
*/
clear(): void {
const count = this.handlers.size
this.handlers.clear()
logger.info(`[ActionRegistry] Cleared ${count} registered actions`)
}
}

View File

@@ -0,0 +1,81 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { BookmarkAdapter } from '@/adapters/BookmarkAdapter'
import { ActionHandler } from '../ActionHandler'
// Input schema
const CreateBookmarkInputSchema = z.object({
title: z.string().describe('Bookmark title'),
url: z.string().url().describe('Bookmark URL'),
parentId: z
.string()
.optional()
.describe('Parent folder ID (optional, defaults to "Other Bookmarks")'),
})
// Output schema
const CreateBookmarkOutputSchema = z.object({
id: z.string().describe('Created bookmark ID'),
title: z.string().describe('Bookmark title'),
url: z.string().describe('Bookmark URL'),
dateAdded: z
.number()
.optional()
.describe('Timestamp when bookmark was created'),
})
type CreateBookmarkInput = z.infer<typeof CreateBookmarkInputSchema>
type CreateBookmarkOutput = z.infer<typeof CreateBookmarkOutputSchema>
/**
* CreateBookmarkAction - Create a new bookmark
*
* Creates a bookmark with the specified title and URL.
*
* Input:
* - title: Display title for the bookmark
* - url: Full URL to bookmark
* - parentId (optional): Parent folder ID
*
* Output:
* - id: Created bookmark ID
* - title: Bookmark title
* - url: Bookmark URL
* - dateAdded: Creation timestamp
*
* Usage:
* Create a bookmark in the default location (Other Bookmarks).
*
* Example:
* {
* "title": "Google",
* "url": "https://www.google.com"
* }
* // Returns: { id: "123", title: "Google", url: "https://www.google.com", dateAdded: 1729012345678 }
*/
export class CreateBookmarkAction extends ActionHandler<
CreateBookmarkInput,
CreateBookmarkOutput
> {
readonly inputSchema = CreateBookmarkInputSchema
private bookmarkAdapter = new BookmarkAdapter()
async execute(input: CreateBookmarkInput): Promise<CreateBookmarkOutput> {
const created = await this.bookmarkAdapter.createBookmark({
title: input.title,
url: input.url,
parentId: input.parentId,
})
return {
id: created.id,
title: created.title,
url: created.url || '',
dateAdded: created.dateAdded,
}
}
}

View File

@@ -0,0 +1,52 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { BookmarkAdapter } from '@/adapters/BookmarkAdapter'
import { ActionHandler } from '../ActionHandler'
const CreateBookmarkFolderInputSchema = z.object({
title: z.string().describe('Folder name'),
parentId: z
.string()
.optional()
.describe('Parent folder ID (defaults to "1" = Bookmarks Bar)'),
})
const CreateBookmarkFolderOutputSchema = z.object({
id: z.string().describe('Created folder ID'),
title: z.string().describe('Folder name'),
parentId: z.string().optional().describe('Parent folder ID'),
dateAdded: z.number().optional().describe('Creation timestamp'),
})
type CreateBookmarkFolderInput = z.infer<typeof CreateBookmarkFolderInputSchema>
type CreateBookmarkFolderOutput = z.infer<
typeof CreateBookmarkFolderOutputSchema
>
export class CreateBookmarkFolderAction extends ActionHandler<
CreateBookmarkFolderInput,
CreateBookmarkFolderOutput
> {
readonly inputSchema = CreateBookmarkFolderInputSchema
private bookmarkAdapter = new BookmarkAdapter()
async execute(
input: CreateBookmarkFolderInput,
): Promise<CreateBookmarkFolderOutput> {
const created = await this.bookmarkAdapter.createBookmarkFolder({
title: input.title,
parentId: input.parentId,
})
return {
id: created.id,
title: created.title,
parentId: created.parentId,
dateAdded: created.dateAdded,
}
}
}

View File

@@ -0,0 +1,59 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { BookmarkAdapter } from '@/adapters/BookmarkAdapter'
import { ActionHandler } from '../ActionHandler'
const GetBookmarkChildrenInputSchema = z.object({
folderId: z.string().describe('Folder ID to get children from'),
})
const GetBookmarkChildrenOutputSchema = z.object({
children: z.array(
z.object({
id: z.string(),
title: z.string(),
url: z.string().optional(),
parentId: z.string().optional(),
dateAdded: z.number().optional(),
isFolder: z.boolean(),
}),
),
count: z.number(),
})
type GetBookmarkChildrenInput = z.infer<typeof GetBookmarkChildrenInputSchema>
type GetBookmarkChildrenOutput = z.infer<typeof GetBookmarkChildrenOutputSchema>
export class GetBookmarkChildrenAction extends ActionHandler<
GetBookmarkChildrenInput,
GetBookmarkChildrenOutput
> {
readonly inputSchema = GetBookmarkChildrenInputSchema
private bookmarkAdapter = new BookmarkAdapter()
async execute(
input: GetBookmarkChildrenInput,
): Promise<GetBookmarkChildrenOutput> {
const results = await this.bookmarkAdapter.getBookmarkChildren(
input.folderId,
)
const children = results.map((node) => ({
id: node.id,
title: node.title,
url: node.url,
parentId: node.parentId,
dateAdded: node.dateAdded,
isFolder: !node.url,
}))
return {
children,
count: children.length,
}
}
}

View File

@@ -0,0 +1,111 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { BookmarkAdapter } from '@/adapters/BookmarkAdapter'
import { ActionHandler } from '../ActionHandler'
// Input schema
const GetBookmarksInputSchema = z.object({
query: z
.string()
.optional()
.describe(
'Search query to filter bookmarks (optional, returns all if not provided)',
),
limit: z
.number()
.int()
.positive()
.optional()
.default(20)
.describe('Maximum number of results (default: 20)'),
recent: z
.boolean()
.optional()
.default(false)
.describe('Get recent bookmarks instead of searching'),
})
// Output schema
const GetBookmarksOutputSchema = z.object({
bookmarks: z.array(
z.object({
id: z.string(),
title: z.string(),
url: z.string().optional(),
dateAdded: z.number().optional(),
parentId: z.string().optional(),
}),
),
count: z.number(),
})
type GetBookmarksInput = z.infer<typeof GetBookmarksInputSchema>
type GetBookmarksOutput = z.infer<typeof GetBookmarksOutputSchema>
/**
* GetBookmarksAction - Get or search bookmarks
*
* Retrieves bookmarks with optional filtering.
*
* Input:
* - query (optional): Search query to match title or URL
* - limit (optional): Maximum results (default: 20)
* - recent (optional): Get recent bookmarks instead (default: false)
*
* Output:
* - bookmarks: Array of bookmark objects
* - count: Number of bookmarks returned
*
* Usage:
* - Get recent: { "recent": true }
* - Search: { "query": "github" }
* - Get all (limited): { "limit": 50 }
*
* Example:
* {
* "query": "google",
* "limit": 10
* }
* // Returns: { bookmarks: [{id: "1", title: "Google", url: "https://google.com"}], count: 1 }
*/
export class GetBookmarksAction extends ActionHandler<
GetBookmarksInput,
GetBookmarksOutput
> {
readonly inputSchema = GetBookmarksInputSchema
private bookmarkAdapter = new BookmarkAdapter()
async execute(input: GetBookmarksInput): Promise<GetBookmarksOutput> {
let results: chrome.bookmarks.BookmarkTreeNode[]
if (input.recent) {
// Get recent bookmarks
results = await this.bookmarkAdapter.getRecentBookmarks(input.limit)
} else if (input.query) {
// Search bookmarks
results = await this.bookmarkAdapter.searchBookmarks(input.query)
results = results.slice(0, input.limit)
} else {
// Get recent by default
results = await this.bookmarkAdapter.getRecentBookmarks(input.limit)
}
// Map to output format
const bookmarks = results.map((b) => ({
id: b.id,
title: b.title,
url: b.url,
dateAdded: b.dateAdded,
parentId: b.parentId,
}))
return {
bookmarks,
count: bookmarks.length,
}
}
}

View File

@@ -0,0 +1,49 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { BookmarkAdapter } from '@/adapters/BookmarkAdapter'
import { ActionHandler } from '../ActionHandler'
const MoveBookmarkInputSchema = z.object({
id: z.string().describe('Bookmark or folder ID to move'),
parentId: z.string().optional().describe('New parent folder ID'),
index: z.number().int().min(0).optional().describe('Position within parent'),
})
const MoveBookmarkOutputSchema = z.object({
id: z.string().describe('Moved bookmark ID'),
title: z.string().describe('Bookmark title'),
url: z.string().optional().describe('Bookmark URL (undefined if folder)'),
parentId: z.string().optional().describe('New parent folder ID'),
index: z.number().optional().describe('New position within parent'),
})
type MoveBookmarkInput = z.infer<typeof MoveBookmarkInputSchema>
type MoveBookmarkOutput = z.infer<typeof MoveBookmarkOutputSchema>
export class MoveBookmarkAction extends ActionHandler<
MoveBookmarkInput,
MoveBookmarkOutput
> {
readonly inputSchema = MoveBookmarkInputSchema
private bookmarkAdapter = new BookmarkAdapter()
async execute(input: MoveBookmarkInput): Promise<MoveBookmarkOutput> {
const destination: { parentId?: string; index?: number } = {}
if (input.parentId !== undefined) destination.parentId = input.parentId
if (input.index !== undefined) destination.index = input.index
const moved = await this.bookmarkAdapter.moveBookmark(input.id, destination)
return {
id: moved.id,
title: moved.title,
url: moved.url,
parentId: moved.parentId,
index: moved.index,
}
}
}

View File

@@ -0,0 +1,62 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { BookmarkAdapter } from '@/adapters/BookmarkAdapter'
import { ActionHandler } from '../ActionHandler'
// Input schema
const RemoveBookmarkInputSchema = z.object({
id: z.string().describe('Bookmark ID to remove'),
})
// Output schema
const RemoveBookmarkOutputSchema = z.object({
success: z
.boolean()
.describe('Whether the bookmark was successfully removed'),
message: z.string().describe('Confirmation message'),
})
type RemoveBookmarkInput = z.infer<typeof RemoveBookmarkInputSchema>
type RemoveBookmarkOutput = z.infer<typeof RemoveBookmarkOutputSchema>
/**
* RemoveBookmarkAction - Remove a bookmark
*
* Deletes a bookmark by its ID.
*
* Input:
* - id: Bookmark ID to remove
*
* Output:
* - success: true if removed
* - message: Confirmation message
*
* Usage:
* Get the bookmark ID from getBookmarks first, then remove it.
*
* Example:
* {
* "id": "123"
* }
* // Returns: { success: true, message: "Removed bookmark 123" }
*/
export class RemoveBookmarkAction extends ActionHandler<
RemoveBookmarkInput,
RemoveBookmarkOutput
> {
readonly inputSchema = RemoveBookmarkInputSchema
private bookmarkAdapter = new BookmarkAdapter()
async execute(input: RemoveBookmarkInput): Promise<RemoveBookmarkOutput> {
await this.bookmarkAdapter.removeBookmark(input.id)
return {
success: true,
message: `Removed bookmark ${input.id}`,
}
}
}

View File

@@ -0,0 +1,48 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { BookmarkAdapter } from '@/adapters/BookmarkAdapter'
import { ActionHandler } from '../ActionHandler'
const RemoveBookmarkTreeInputSchema = z.object({
id: z.string().describe('Folder ID to remove'),
confirm: z.boolean().describe('Must be true to confirm recursive deletion'),
})
const RemoveBookmarkTreeOutputSchema = z.object({
success: z.boolean().describe('Whether the folder was removed'),
message: z.string().describe('Result message'),
})
type RemoveBookmarkTreeInput = z.infer<typeof RemoveBookmarkTreeInputSchema>
type RemoveBookmarkTreeOutput = z.infer<typeof RemoveBookmarkTreeOutputSchema>
export class RemoveBookmarkTreeAction extends ActionHandler<
RemoveBookmarkTreeInput,
RemoveBookmarkTreeOutput
> {
readonly inputSchema = RemoveBookmarkTreeInputSchema
private bookmarkAdapter = new BookmarkAdapter()
async execute(
input: RemoveBookmarkTreeInput,
): Promise<RemoveBookmarkTreeOutput> {
if (input.confirm !== true) {
return {
success: false,
message:
'Recursive deletion requires confirm: true. This will permanently delete the folder and all its contents.',
}
}
await this.bookmarkAdapter.removeBookmarkTree(input.id)
return {
success: true,
message: `Removed folder ${input.id} and all its contents`,
}
}
}

View File

@@ -0,0 +1,82 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { BookmarkAdapter } from '@/adapters/BookmarkAdapter'
import { ActionHandler } from '../ActionHandler'
// Input schema
const UpdateBookmarkInputSchema = z.object({
id: z.string().describe('Bookmark ID to update'),
title: z.string().optional().describe('New bookmark title'),
url: z.string().url().optional().describe('New bookmark URL'),
})
// Output schema
const UpdateBookmarkOutputSchema = z.object({
id: z.string().describe('Bookmark ID'),
title: z.string().describe('Updated bookmark title'),
url: z.string().optional().describe('Updated bookmark URL'),
})
type UpdateBookmarkInput = z.infer<typeof UpdateBookmarkInputSchema>
type UpdateBookmarkOutput = z.infer<typeof UpdateBookmarkOutputSchema>
/**
* UpdateBookmarkAction - Update a bookmark's title or URL
*
* Updates an existing bookmark with new title and/or URL.
*
* Input:
* - id: Bookmark ID to update
* - title (optional): New title for the bookmark
* - url (optional): New URL for the bookmark
*
* Output:
* - id: Bookmark ID
* - title: Updated title
* - url: Updated URL
*
* Usage:
* Update a bookmark's title or URL (at least one must be provided).
*
* Example:
* {
* "id": "123",
* "title": "New Title",
* "url": "https://www.example.com"
* }
* // Returns: { id: "123", title: "New Title", url: "https://www.example.com" }
*/
export class UpdateBookmarkAction extends ActionHandler<
UpdateBookmarkInput,
UpdateBookmarkOutput
> {
readonly inputSchema = UpdateBookmarkInputSchema
private bookmarkAdapter = new BookmarkAdapter()
async execute(input: UpdateBookmarkInput): Promise<UpdateBookmarkOutput> {
const changes: { title?: string; url?: string } = {}
if (input.title !== undefined) {
changes.title = input.title
}
if (input.url !== undefined) {
changes.url = input.url
}
if (Object.keys(changes).length === 0) {
throw new Error('At least one of title or url must be provided')
}
const updated = await this.bookmarkAdapter.updateBookmark(input.id, changes)
return {
id: updated.id,
title: updated.title,
url: updated.url,
}
}
}

View File

@@ -0,0 +1,79 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import {
BrowserOSAdapter,
type ScreenshotSizeKey,
} from '@/adapters/BrowserOSAdapter'
import { ActionHandler } from '../ActionHandler'
// Input schema
const CaptureScreenshotInputSchema = z.object({
tabId: z.number().describe('The tab ID to capture'),
size: z
.enum(['small', 'medium', 'large'])
.optional()
.default('medium')
.describe('Screenshot size preset (default: medium)'),
showHighlights: z
.boolean()
.optional()
.default(true)
.describe('Show element highlights (default: true)'),
width: z.number().optional().describe('Exact width in pixels'),
height: z.number().optional().describe('Exact height in pixels'),
})
// Output schema
const CaptureScreenshotOutputSchema = z.object({
dataUrl: z.string().describe('Base64-encoded PNG data URL'),
})
type CaptureScreenshotInput = z.infer<typeof CaptureScreenshotInputSchema>
type CaptureScreenshotOutput = z.infer<typeof CaptureScreenshotOutputSchema>
/**
* CaptureScreenshotAction - Capture a screenshot of the page
*
* Captures a screenshot with configurable size and options.
*
* Size Options:
* - small (512px): Low detail, minimal tokens
* - medium (768px): Balanced quality/tokens (default)
* - large (1028px): High detail, maximum tokens
*
* Or specify exact dimensions with width/height.
*
* Returns:
* - dataUrl: PNG image as base64 data URL (data:image/png;base64,...)
*
* Usage:
* 1. For AI vision models: use 'medium' or 'large'
* 2. For debugging: use 'small'
* 3. For exact size: specify width and height
*
* Used by: ScreenshotTool, VisualClick, VisualType
*/
export class CaptureScreenshotAction extends ActionHandler<
CaptureScreenshotInput,
CaptureScreenshotOutput
> {
readonly inputSchema = CaptureScreenshotInputSchema
private browserOSAdapter = BrowserOSAdapter.getInstance()
async execute(
input: CaptureScreenshotInput,
): Promise<CaptureScreenshotOutput> {
const dataUrl = await this.browserOSAdapter.captureScreenshot(
input.tabId,
input.size as ScreenshotSizeKey | undefined,
input.showHighlights,
input.width,
input.height,
)
return { dataUrl }
}
}

View File

@@ -0,0 +1,124 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import {
BrowserOSAdapter,
type ScreenshotSizeKey,
} from '@/adapters/BrowserOSAdapter'
import { logger } from '@/utils/logger'
import { PointerOverlay } from '@/utils/PointerOverlay'
import { SnapshotCache } from '@/utils/SnapshotCache'
import { ActionHandler } from '../ActionHandler'
// Input schema
const CaptureScreenshotPointerInputSchema = z.object({
tabId: z.number().describe('The tab ID to capture'),
nodeId: z
.number()
.int()
.positive()
.describe('The nodeId to show pointer over'),
size: z
.enum(['small', 'medium', 'large'])
.optional()
.default('medium')
.describe('Screenshot size preset (default: medium)'),
pointerLabel: z
.string()
.optional()
.describe('Optional label to show with pointer (e.g., "Click", "Type")'),
})
// Output schema
const CaptureScreenshotPointerOutputSchema = z.object({
dataUrl: z.string().describe('Base64-encoded PNG data URL'),
pointerPosition: z
.object({
x: z.number(),
y: z.number(),
})
.optional()
.describe('Coordinates where pointer was shown'),
})
type CaptureScreenshotPointerInput = z.infer<
typeof CaptureScreenshotPointerInputSchema
>
type CaptureScreenshotPointerOutput = z.infer<
typeof CaptureScreenshotPointerOutputSchema
>
/**
* CaptureScreenshotPointerAction - Show pointer over element and capture screenshot
*
* Shows a visual pointer overlay at the center of the specified element,
* then captures a screenshot with the pointer visible.
*
* Prerequisites:
* - Must call getInteractiveSnapshot first to populate the cache
* - NodeId must exist in the cached snapshot
*
* Usage:
* 1. Get snapshot to find elements and populate cache
* 2. Call captureScreenshotPointer with tabId and nodeId
* 3. Returns screenshot with pointer overlay visible
*
* Used by: Visual debugging, automation demos, step-by-step captures
*/
export class CaptureScreenshotPointerAction extends ActionHandler<
CaptureScreenshotPointerInput,
CaptureScreenshotPointerOutput
> {
readonly inputSchema = CaptureScreenshotPointerInputSchema
private browserOSAdapter = BrowserOSAdapter.getInstance()
async execute(
input: CaptureScreenshotPointerInput,
): Promise<CaptureScreenshotPointerOutput> {
const { tabId, nodeId, size, pointerLabel } = input
// Get element rect from cache
const rect = SnapshotCache.getNodeRect(tabId, nodeId)
let pointerPosition: { x: number; y: number } | undefined
if (rect) {
// Calculate center coordinates
const { x, y } = PointerOverlay.getCenterCoordinates(rect)
pointerPosition = { x, y }
// Show pointer
await PointerOverlay.showPointer(tabId, x, y, pointerLabel)
logger.debug(
`[CaptureScreenshotPointerAction] Showed pointer at (${x}, ${y}) for node ${nodeId}`,
)
} else {
logger.warn(
`[CaptureScreenshotPointerAction] No cached rect for node ${nodeId} in tab ${tabId}. Capturing without pointer.`,
)
}
// Small delay to ensure pointer is rendered
await this.delay(100)
// Capture screenshot with pointer visible
const dataUrl = await this.browserOSAdapter.captureScreenshot(
tabId,
size as ScreenshotSizeKey | undefined,
false, // Don't show highlights, we have the pointer
)
return {
dataUrl,
pointerPosition,
}
}
private delay(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms))
}
}

View File

@@ -0,0 +1,38 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { BrowserOSAdapter } from '@/adapters/BrowserOSAdapter'
import { ActionHandler } from '../ActionHandler'
const ClearInputSchema = z.object({
tabId: z.number().describe('The tab ID containing the element'),
nodeId: z
.number()
.int()
.positive()
.describe('The nodeId from interactive snapshot'),
})
type ClearInput = z.infer<typeof ClearInputSchema>
interface ClearOutput {
success: boolean
}
/**
* ClearAction - Clear text from an input element
*
* Clears all text from an input field or textarea.
* Used before inputText or to reset form fields.
*/
export class ClearAction extends ActionHandler<ClearInput, ClearOutput> {
readonly inputSchema = ClearInputSchema
private browserOSAdapter = BrowserOSAdapter.getInstance()
async execute(input: ClearInput): Promise<ClearOutput> {
await this.browserOSAdapter.clear(input.tabId, input.nodeId)
return { success: true }
}
}

View File

@@ -0,0 +1,62 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { BrowserOSAdapter } from '@/adapters/BrowserOSAdapter'
import { PointerOverlay } from '@/utils/PointerOverlay'
import { SnapshotCache } from '@/utils/SnapshotCache'
import { ActionHandler } from '../ActionHandler'
// Input schema
const ClickInputSchema = z.object({
tabId: z.number().describe('The tab ID containing the element'),
nodeId: z
.number()
.int()
.positive()
.describe('The nodeId from interactive snapshot'),
})
// Output schema
const ClickOutputSchema = z.object({
success: z.boolean().describe('Whether the click succeeded'),
})
type ClickInput = z.infer<typeof ClickInputSchema>
type ClickOutput = z.infer<typeof ClickOutputSchema>
/**
* ClickAction - Click an element by its nodeId
*
* This action clicks an interactive element identified by its nodeId from getInteractiveSnapshot.
*
* Prerequisites:
* - Must call getInteractiveSnapshot first to get valid nodeIds
* - NodeIds are valid only for the current page state
* - NodeIds are invalidated on page navigation
*
* Usage:
* 1. Get snapshot to find clickable elements
* 2. Choose element by nodeId
* 3. Call click with tabId and nodeId
*
* Used by: ClickTool, all automation workflows
*/
export class ClickAction extends ActionHandler<ClickInput, ClickOutput> {
readonly inputSchema = ClickInputSchema
private browserOSAdapter = BrowserOSAdapter.getInstance()
async execute(input: ClickInput): Promise<ClickOutput> {
// Show pointer overlay before click
const rect = SnapshotCache.getNodeRect(input.tabId, input.nodeId)
if (rect) {
const { x, y } = PointerOverlay.getCenterCoordinates(rect)
await PointerOverlay.showPointerAndWait(input.tabId, x, y, 'Click')
}
await this.browserOSAdapter.click(input.tabId, input.nodeId)
return { success: true }
}
}

View File

@@ -0,0 +1,69 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { getBrowserOSAdapter } from '@/adapters/BrowserOSAdapter'
import { PointerOverlay } from '@/utils/PointerOverlay'
import { ActionHandler } from '../ActionHandler'
// Input schema for clickCoordinates action
const ClickCoordinatesInputSchema = z.object({
tabId: z.number().int().positive().describe('Tab ID to click in'),
x: z.number().int().nonnegative().describe('X coordinate in viewport pixels'),
y: z.number().int().nonnegative().describe('Y coordinate in viewport pixels'),
})
type ClickCoordinatesInput = z.infer<typeof ClickCoordinatesInputSchema>
// Output confirms the click
export interface ClickCoordinatesOutput {
success: boolean
message: string
coordinates: {
x: number
y: number
}
}
/**
* ClickCoordinatesAction - Click at specific viewport coordinates
*
* Performs a click at the specified (x, y) coordinates in the viewport.
* Coordinates are in pixels relative to the top-left of the visible viewport (0, 0).
*
* Useful when:
* - Elements don't have accessible node IDs
* - Working with canvas or interactive graphics
* - Vision-based automation (e.g., AI identifies coordinates from screenshots)
*
* Example payload:
* {
* "tabId": 123,
* "x": 500,
* "y": 300
* }
*/
export class ClickCoordinatesAction extends ActionHandler<
ClickCoordinatesInput,
ClickCoordinatesOutput
> {
readonly inputSchema = ClickCoordinatesInputSchema
private browserOS = getBrowserOSAdapter()
async execute(input: ClickCoordinatesInput): Promise<ClickCoordinatesOutput> {
const { tabId, x, y } = input
// Show pointer overlay before click
await PointerOverlay.showPointerAndWait(tabId, x, y, 'Click')
await this.browserOS.clickCoordinates(tabId, x, y)
return {
success: true,
message: `Successfully clicked at coordinates (${x}, ${y}) in tab ${tabId}`,
coordinates: { x, y },
}
}
}

View File

@@ -0,0 +1,38 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { CHROME_API_TIMEOUTS, withTimeout } from '@/utils/timeout'
import { ActionHandler } from '../ActionHandler'
const CloseWindowInputSchema = z.object({
windowId: z.number().int().positive().describe('ID of the window to close'),
})
const CloseWindowOutputSchema = z.object({
success: z.boolean().describe('Whether the window was successfully closed'),
})
type CloseWindowInput = z.infer<typeof CloseWindowInputSchema>
type CloseWindowOutput = z.infer<typeof CloseWindowOutputSchema>
export class CloseWindowAction extends ActionHandler<
CloseWindowInput,
CloseWindowOutput
> {
readonly inputSchema = CloseWindowInputSchema
async execute(input: CloseWindowInput): Promise<CloseWindowOutput> {
await withTimeout(
chrome.windows.remove(input.windowId),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.windows.remove',
)
return {
success: true,
}
}
}

View File

@@ -0,0 +1,73 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { CHROME_API_TIMEOUTS, withTimeout } from '@/utils/timeout'
import { ActionHandler } from '../ActionHandler'
const CreateWindowInputSchema = z.object({
url: z
.string()
.optional()
.default('about:blank')
.describe('URL to open in the new window'),
incognito: z
.boolean()
.optional()
.default(false)
.describe('Create an incognito window'),
focused: z
.boolean()
.optional()
.default(true)
.describe('Whether to focus the new window'),
})
const CreateWindowOutputSchema = z.object({
windowId: z.number().describe('ID of the newly created window'),
tabId: z.number().describe('ID of the first tab in the new window'),
})
type CreateWindowInput = z.infer<typeof CreateWindowInputSchema>
type CreateWindowOutput = z.infer<typeof CreateWindowOutputSchema>
export class CreateWindowAction extends ActionHandler<
CreateWindowInput,
CreateWindowOutput
> {
readonly inputSchema = CreateWindowInputSchema
async execute(input: CreateWindowInput): Promise<CreateWindowOutput> {
const createData: chrome.windows.CreateData = {
url: input.url,
focused: input.focused,
incognito: input.incognito,
}
const createdWindow = await withTimeout(
chrome.windows.create(createData),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.windows.create',
)
if (!createdWindow) {
throw new Error('Failed to create window')
}
if (createdWindow.id === undefined) {
throw new Error('Created window has no ID')
}
const tabId = createdWindow.tabs?.[0]?.id
if (tabId === undefined) {
throw new Error('Created window has no tab')
}
return {
windowId: createdWindow.id,
tabId,
}
}
}

View File

@@ -0,0 +1,64 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { BrowserOSAdapter } from '@/adapters/BrowserOSAdapter'
import { ActionHandler } from '../ActionHandler'
// Input schema
const ExecuteJavaScriptInputSchema = z.object({
tabId: z.number().describe('The tab ID to execute code in'),
code: z.string().describe('JavaScript code to execute'),
})
// Output schema
const ExecuteJavaScriptOutputSchema = z.object({
result: z.any().describe('The result of the code execution'),
})
type ExecuteJavaScriptInput = z.infer<typeof ExecuteJavaScriptInputSchema>
type ExecuteJavaScriptOutput = z.infer<typeof ExecuteJavaScriptOutputSchema>
/**
* ExecuteJavaScriptAction - Execute JavaScript code in page context
*
* Executes arbitrary JavaScript code in the page and returns the result.
*
* Input:
* - tabId: Tab ID to execute code in
* - code: JavaScript code as string
*
* Output:
* - result: The return value of the executed code
*
* Usage:
* - Extract data from page: "document.title"
* - Manipulate DOM: "document.body.style.background = 'red'"
* - Get element values: "document.querySelector('#email').value"
*
* Example:
* {
* "tabId": 123,
* "code": "document.title"
* }
* // Returns: { result: "Google" }
*/
export class ExecuteJavaScriptAction extends ActionHandler<
ExecuteJavaScriptInput,
ExecuteJavaScriptOutput
> {
readonly inputSchema = ExecuteJavaScriptInputSchema
private browserOSAdapter = BrowserOSAdapter.getInstance()
async execute(
input: ExecuteJavaScriptInput,
): Promise<ExecuteJavaScriptOutput> {
const result = await this.browserOSAdapter.executeJavaScript(
input.tabId,
input.code,
)
return { result }
}
}

View File

@@ -0,0 +1,53 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { BrowserOSAdapter } from '@/adapters/BrowserOSAdapter'
import { ActionHandler } from '../ActionHandler'
const GetAccessibilityTreeInputSchema = z.object({
tabId: z
.number()
.int()
.positive()
.describe('Tab ID to get accessibility tree from'),
})
type GetAccessibilityTreeInput = z.infer<typeof GetAccessibilityTreeInputSchema>
export type GetAccessibilityTreeOutput = chrome.browserOS.AccessibilityTree
/**
* GetAccessibilityTreeAction - Get accessibility tree for a tab
*
* Returns the full accessibility tree structure containing:
* - rootId: The root node ID
* - nodes: Map of node IDs to accessibility nodes
*
* Each node contains:
* - nodeId: Unique node identifier
* - role: Accessibility role (e.g., 'staticText', 'heading', 'button')
* - name: Text content or label
* - childIds: Array of child node IDs
*
* Example payload:
* {
* "tabId": 123
* }
*/
export class GetAccessibilityTreeAction extends ActionHandler<
GetAccessibilityTreeInput,
GetAccessibilityTreeOutput
> {
readonly inputSchema = GetAccessibilityTreeInputSchema
private browserOSAdapter = BrowserOSAdapter.getInstance()
async execute(
input: GetAccessibilityTreeInput,
): Promise<GetAccessibilityTreeOutput> {
const { tabId } = input
const tree = await this.browserOSAdapter.getAccessibilityTree(tabId)
return tree
}
}

View File

@@ -0,0 +1,71 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import type {
InteractiveSnapshot,
InteractiveSnapshotOptions,
} from '@/adapters/BrowserOSAdapter'
import { BrowserOSAdapter } from '@/adapters/BrowserOSAdapter'
import { SnapshotCache } from '@/utils/SnapshotCache'
import { ActionHandler } from '../ActionHandler'
// Input schema
const GetInteractiveSnapshotInputSchema = z.object({
tabId: z.number().describe('The tab ID to get snapshot from'),
options: z
.object({
includeHidden: z
.boolean()
.optional()
.default(false)
.describe('Include hidden elements (default: false)'),
})
.optional()
.describe('Optional snapshot options'),
})
type GetInteractiveSnapshotInput = z.infer<
typeof GetInteractiveSnapshotInputSchema
>
/**
* GetInteractiveSnapshotAction - Get interactive elements from the page
*
* This is THE MOST CRITICAL action - it returns all interactive elements
* with their nodeIds, which are needed by click, inputText, clear, and scrollToNode actions.
*
* Returns:
* - elements: Array of interactive nodes with nodeIds
* - hierarchicalStructure: String representation of page structure
*
* Each element contains:
* - nodeId: Sequential integer ID (1, 2, 3...)
* - type: 'clickable' | 'typeable' | 'selectable'
* - name: Element text/label
* - attributes: Element properties (html-tag, role, etc.)
* - rect: Bounding box coordinates
*/
export class GetInteractiveSnapshotAction extends ActionHandler<
GetInteractiveSnapshotInput,
InteractiveSnapshot
> {
readonly inputSchema = GetInteractiveSnapshotInputSchema
private browserOSAdapter = BrowserOSAdapter.getInstance()
async execute(
input: GetInteractiveSnapshotInput,
): Promise<InteractiveSnapshot> {
const snapshot = await this.browserOSAdapter.getInteractiveSnapshot(
input.tabId,
input.options as InteractiveSnapshotOptions | undefined,
)
// Cache snapshot for pointer overlay lookup
SnapshotCache.set(input.tabId, snapshot)
return snapshot
}
}

View File

@@ -0,0 +1,69 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import {
BrowserOSAdapter,
type PageLoadStatus,
} from '@/adapters/BrowserOSAdapter'
import { ActionHandler } from '../ActionHandler'
// Input schema for getPageLoadStatus action
const GetPageLoadStatusInputSchema = z.object({
tabId: z
.number()
.int()
.positive()
.describe('Tab ID to check page load status'),
})
type GetPageLoadStatusInput = z.infer<typeof GetPageLoadStatusInputSchema>
// Output includes page load status details
export interface GetPageLoadStatusOutput {
tabId: number
isResourcesLoading: boolean
isDOMContentLoaded: boolean
isPageComplete: boolean
}
/**
* GetPageLoadStatusAction - Get page loading status for a tab
*
* Returns the current page load status including:
* - isResourcesLoading: Whether resources (images, scripts, etc.) are still loading
* - isDOMContentLoaded: Whether the DOM is fully parsed and ready
* - isPageComplete: Whether the page has completely finished loading
*
* Useful for waiting for pages to load before taking actions.
*
* Example payload:
* {
* "tabId": 123
* }
*/
export class GetPageLoadStatusAction extends ActionHandler<
GetPageLoadStatusInput,
GetPageLoadStatusOutput
> {
readonly inputSchema = GetPageLoadStatusInputSchema
private browserOSAdapter = BrowserOSAdapter.getInstance()
async execute(
input: GetPageLoadStatusInput,
): Promise<GetPageLoadStatusOutput> {
const { tabId } = input
const status: PageLoadStatus =
await this.browserOSAdapter.getPageLoadStatus(tabId)
return {
tabId,
isResourcesLoading: status.isResourcesLoading,
isDOMContentLoaded: status.isDOMContentLoaded,
isPageComplete: status.isPageComplete,
}
}
}

View File

@@ -0,0 +1,74 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { BrowserOSAdapter, type Snapshot } from '@/adapters/BrowserOSAdapter'
import { logger } from '@/utils/logger'
import { ActionHandler } from '../ActionHandler'
// Input schema for getSnapshot action
const GetSnapshotInputSchema = z.object({
tabId: z.number().int().positive().describe('Tab ID to get snapshot from'),
type: z
.enum(['text', 'links'])
.default('text')
.describe('Type of snapshot: text or links'),
options: z
.object({
context: z.enum(['visible', 'full']).optional(),
includeSections: z
.array(
z.enum([
'main',
'navigation',
'footer',
'header',
'article',
'aside',
]),
)
.optional(),
})
.optional()
.describe('Optional snapshot configuration'),
})
type GetSnapshotInput = z.infer<typeof GetSnapshotInputSchema>
// Output is the full snapshot structure
export type GetSnapshotOutput = Snapshot
/**
* GetSnapshotAction - Extract page content snapshot
*
* Extracts structured content from the page including:
* - Headings (with levels)
* - Text content
* - Links (with URLs)
*
* Returns items in document order with type information.
*
* Example payload:
* {
* "tabId": 123,
* "type": "text"
* }
*/
export class GetSnapshotAction extends ActionHandler<
GetSnapshotInput,
GetSnapshotOutput
> {
readonly inputSchema = GetSnapshotInputSchema
private browserOSAdapter = BrowserOSAdapter.getInstance()
async execute(input: GetSnapshotInput): Promise<GetSnapshotOutput> {
const { tabId, type } = input
logger.info(
`[GetSnapshotAction] Getting snapshot for tab ${tabId} with type ${type}`,
)
const snapshot = await this.browserOSAdapter.getSnapshot(tabId, type)
return snapshot
}
}

View File

@@ -0,0 +1,75 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { BrowserOSAdapter } from '@/adapters/BrowserOSAdapter'
import { PointerOverlay } from '@/utils/PointerOverlay'
import { SnapshotCache } from '@/utils/SnapshotCache'
import { ActionHandler } from '../ActionHandler'
// Input schema
const InputTextInputSchema = z.object({
tabId: z.number().describe('The tab ID containing the element'),
nodeId: z
.number()
.int()
.positive()
.describe('The nodeId from interactive snapshot'),
text: z.string().describe('Text to type into the element'),
})
// Output schema
const InputTextOutputSchema = z.object({
success: z.boolean().describe('Whether the input succeeded'),
})
type InputTextInput = z.infer<typeof InputTextInputSchema>
type InputTextOutput = z.infer<typeof InputTextOutputSchema>
/**
* InputTextAction - Type text into an element by its nodeId
*
* This action types text into an input field or textarea identified by its nodeId.
*
* Prerequisites:
* - Must call getInteractiveSnapshot first to get valid nodeIds
* - Element must be typeable (type: 'typeable' in snapshot)
* - NodeIds are valid only for the current page state
*
* Behavior:
* - Automatically clears existing text before typing (handled by adapter)
* - Types the full text string
* - Triggers input/change events
*
* Usage:
* 1. Get snapshot to find typeable elements
* 2. Choose input field by nodeId
* 3. Call inputText with tabId, nodeId, and text
*
* Used by: TypeTool, form automation workflows
*/
export class InputTextAction extends ActionHandler<
InputTextInput,
InputTextOutput
> {
readonly inputSchema = InputTextInputSchema
private browserOSAdapter = BrowserOSAdapter.getInstance()
async execute(input: InputTextInput): Promise<InputTextOutput> {
// Show pointer overlay before typing
const rect = SnapshotCache.getNodeRect(input.tabId, input.nodeId)
if (rect) {
const { x, y } = PointerOverlay.getLeftCenterCoordinates(rect)
const textPreview =
input.text.length > 20
? `Type: ${input.text.substring(0, 20)}...`
: `Type: ${input.text}`
await PointerOverlay.showPointerAndWait(input.tabId, x, y, textPreview)
}
await this.browserOSAdapter.inputText(input.tabId, input.nodeId, input.text)
return { success: true }
}
}

View File

@@ -0,0 +1,54 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { BrowserOSAdapter } from '@/adapters/BrowserOSAdapter'
import { ActionHandler } from '../ActionHandler'
// Input schema
const ScrollDownInputSchema = z.object({
tabId: z.number().describe('The tab ID to scroll'),
})
// Output schema
const ScrollDownOutputSchema = z.object({
success: z.boolean().describe('Whether the scroll succeeded'),
})
type ScrollDownInput = z.infer<typeof ScrollDownInputSchema>
type ScrollDownOutput = z.infer<typeof ScrollDownOutputSchema>
/**
* ScrollDownAction - Scroll page down
*
* Scrolls the page down by one viewport height using PageDown key.
* This approach is more reliable than the direct scrollDown API.
*
* Input:
* - tabId: Tab ID to scroll
*
* Output:
* - success: true if scroll succeeded
*
* Usage:
* Used for scrolling through long pages to view content below the fold.
*/
export class ScrollDownAction extends ActionHandler<
ScrollDownInput,
ScrollDownOutput
> {
readonly inputSchema = ScrollDownInputSchema
private browserOSAdapter = BrowserOSAdapter.getInstance()
async execute(input: ScrollDownInput): Promise<ScrollDownOutput> {
// Use sendKeys with PageDown instead of scrollDown API (more reliable)
await this.browserOSAdapter.sendKeys(input.tabId, 'PageDown')
// Add small delay for scroll to complete
await new Promise((resolve) => setTimeout(resolve, 100))
return { success: true }
}
}

View File

@@ -0,0 +1,42 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { BrowserOSAdapter } from '@/adapters/BrowserOSAdapter'
import { ActionHandler } from '../ActionHandler'
const ScrollToNodeInputSchema = z.object({
tabId: z.number().describe('The tab ID containing the element'),
nodeId: z.number().int().positive().describe('The nodeId to scroll to'),
})
type ScrollToNodeInput = z.infer<typeof ScrollToNodeInputSchema>
interface ScrollToNodeOutput {
scrolled: boolean
}
/**
* ScrollToNodeAction - Scroll an element into view
*
* Scrolls the page so that the specified element is visible in the viewport.
* Returns whether scrolling actually occurred.
*
* Used by: Click/Type tools to ensure element is visible before interaction
*/
export class ScrollToNodeAction extends ActionHandler<
ScrollToNodeInput,
ScrollToNodeOutput
> {
readonly inputSchema = ScrollToNodeInputSchema
private browserOSAdapter = BrowserOSAdapter.getInstance()
async execute(input: ScrollToNodeInput): Promise<ScrollToNodeOutput> {
const scrolled = await this.browserOSAdapter.scrollToNode(
input.tabId,
input.nodeId,
)
return { scrolled }
}
}

View File

@@ -0,0 +1,54 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { BrowserOSAdapter } from '@/adapters/BrowserOSAdapter'
import { ActionHandler } from '../ActionHandler'
// Input schema
const ScrollUpInputSchema = z.object({
tabId: z.number().describe('The tab ID to scroll'),
})
// Output schema
const ScrollUpOutputSchema = z.object({
success: z.boolean().describe('Whether the scroll succeeded'),
})
type ScrollUpInput = z.infer<typeof ScrollUpInputSchema>
type ScrollUpOutput = z.infer<typeof ScrollUpOutputSchema>
/**
* ScrollUpAction - Scroll page up
*
* Scrolls the page up by one viewport height using PageUp key.
* This approach is more reliable than the direct scrollUp API.
*
* Input:
* - tabId: Tab ID to scroll
*
* Output:
* - success: true if scroll succeeded
*
* Usage:
* Used for scrolling back up through long pages.
*/
export class ScrollUpAction extends ActionHandler<
ScrollUpInput,
ScrollUpOutput
> {
readonly inputSchema = ScrollUpInputSchema
private browserOSAdapter = BrowserOSAdapter.getInstance()
async execute(input: ScrollUpInput): Promise<ScrollUpOutput> {
// Use sendKeys with PageUp instead of scrollUp API (more reliable)
await this.browserOSAdapter.sendKeys(input.tabId, 'PageUp')
// Add small delay for scroll to complete
await new Promise((resolve) => setTimeout(resolve, 100))
return { success: true }
}
}

View File

@@ -0,0 +1,69 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { getBrowserOSAdapter } from '@/adapters/BrowserOSAdapter'
import { ActionHandler } from '../ActionHandler'
// Input schema for sendKeys action
const SendKeysInputSchema = z.object({
tabId: z.number().int().positive().describe('Tab ID to send keys to'),
key: z
.enum([
'Enter',
'Delete',
'Backspace',
'Tab',
'Escape',
'ArrowUp',
'ArrowDown',
'ArrowLeft',
'ArrowRight',
'Home',
'End',
'PageUp',
'PageDown',
])
.describe('Keyboard key to send'),
})
type SendKeysInput = z.infer<typeof SendKeysInputSchema>
// Output is just success (void result)
export interface SendKeysOutput {
success: boolean
message: string
}
/**
* SendKeysAction - Send keyboard keys to a tab
*
* Sends special keyboard keys (Enter, Escape, arrows, etc.) to the specified tab.
* Useful for navigation, form submission, closing dialogs, etc.
*
* Example payload:
* {
* "tabId": 123,
* "key": "Enter"
* }
*/
export class SendKeysAction extends ActionHandler<
SendKeysInput,
SendKeysOutput
> {
readonly inputSchema = SendKeysInputSchema
private browserOS = getBrowserOSAdapter()
async execute(input: SendKeysInput): Promise<SendKeysOutput> {
const { tabId, key } = input
await this.browserOS.sendKeys(tabId, key as chrome.browserOS.Key)
return {
success: true,
message: `Successfully sent "${key}" to tab ${tabId}`,
}
}
}

View File

@@ -0,0 +1,81 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { getBrowserOSAdapter } from '@/adapters/BrowserOSAdapter'
import { PointerOverlay } from '@/utils/PointerOverlay'
import { ActionHandler } from '../ActionHandler'
// Input schema for typeAtCoordinates action
const TypeAtCoordinatesInputSchema = z.object({
tabId: z.number().int().positive().describe('Tab ID to type in'),
x: z.number().int().nonnegative().describe('X coordinate in viewport pixels'),
y: z.number().int().nonnegative().describe('Y coordinate in viewport pixels'),
text: z.string().min(1).describe('Text to type at the location'),
})
type TypeAtCoordinatesInput = z.infer<typeof TypeAtCoordinatesInputSchema>
// Output confirms the typing
export interface TypeAtCoordinatesOutput {
success: boolean
message: string
coordinates: {
x: number
y: number
}
textLength: number
}
/**
* TypeAtCoordinatesAction - Type text at specific viewport coordinates
*
* Clicks at the specified (x, y) coordinates and types the provided text.
* Coordinates are in pixels relative to the top-left of the visible viewport (0, 0).
*
* The action will:
* 1. Click at the coordinates to focus the element
* 2. Type the specified text
*
* Useful when:
* - Input fields don't have accessible node IDs
* - Working with complex forms or canvas-based inputs
* - Vision-based automation (e.g., AI identifies input coordinates from screenshots)
*
* Example payload:
* {
* "tabId": 123,
* "x": 500,
* "y": 300,
* "text": "Hello World"
* }
*/
export class TypeAtCoordinatesAction extends ActionHandler<
TypeAtCoordinatesInput,
TypeAtCoordinatesOutput
> {
readonly inputSchema = TypeAtCoordinatesInputSchema
private browserOS = getBrowserOSAdapter()
async execute(
input: TypeAtCoordinatesInput,
): Promise<TypeAtCoordinatesOutput> {
const { tabId, x, y, text } = input
// Show pointer overlay before typing
const textPreview =
text.length > 20 ? `Type: ${text.substring(0, 20)}...` : `Type: ${text}`
await PointerOverlay.showPointerAndWait(tabId, x, y, textPreview)
await this.browserOS.typeAtCoordinates(tabId, x, y, text)
return {
success: true,
message: `Successfully typed "${text.substring(0, 50)}${text.length > 50 ? '...' : ''}" at coordinates (${x}, ${y}) in tab ${tabId}`,
coordinates: { x, y },
textLength: text.length,
}
}
}

View File

@@ -0,0 +1,90 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { ActionHandler } from '../ActionHandler'
// Input schema - no input needed
const CheckBrowserOSInputSchema = z.any()
// Output schema
const CheckBrowserOSOutputSchema = z.object({
available: z.boolean(),
apis: z.array(z.string()).optional(),
error: z.string().optional(),
})
type CheckBrowserOSInput = z.infer<typeof CheckBrowserOSInputSchema>
type CheckBrowserOSOutput = z.infer<typeof CheckBrowserOSOutputSchema>
/**
* CheckBrowserOSAction - Diagnostic action to check if chrome.browserOS is available
*
* This action checks:
* 1. Whether chrome.browserOS namespace exists
* 2. What APIs are available in the namespace
* 3. Returns detailed diagnostic information
*/
export class CheckBrowserOSAction extends ActionHandler<
CheckBrowserOSInput,
CheckBrowserOSOutput
> {
readonly inputSchema = CheckBrowserOSInputSchema
async execute(_input: CheckBrowserOSInput): Promise<CheckBrowserOSOutput> {
try {
console.log('[CheckBrowserOSAction] Starting diagnostic...')
console.log('[CheckBrowserOSAction] typeof chrome:', typeof chrome)
console.log('[CheckBrowserOSAction] chrome exists:', chrome !== undefined)
// Check if chrome.browserOS exists
const browserOSExists = typeof chrome.browserOS !== 'undefined'
console.log(
'[CheckBrowserOSAction] typeof chrome.browserOS:',
typeof chrome.browserOS,
)
console.log('[CheckBrowserOSAction] browserOSExists:', browserOSExists)
if (!browserOSExists) {
console.log('[CheckBrowserOSAction] chrome.browserOS is NOT available')
return {
available: false,
error:
'chrome.browserOS is undefined - not running in BrowserOS Chrome',
}
}
// Get available APIs
const apis: string[] = []
const browserOS = chrome.browserOS as Record<string, unknown>
for (const key in browserOS) {
if (typeof browserOS[key] === 'function') {
apis.push(key)
}
}
console.log('[CheckBrowserOSAction] Found APIs:', apis)
return {
available: true,
apis: apis.sort(),
}
} catch (error) {
console.error('[CheckBrowserOSAction] Error during diagnostic:', error)
const errorMsg =
error instanceof Error
? error.message
: error
? String(error)
: 'Unknown error'
return {
available: false,
error: errorMsg,
}
}
}
}

View File

@@ -0,0 +1,96 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { HistoryAdapter } from '@/adapters/HistoryAdapter'
import { ActionHandler } from '../ActionHandler'
// Input schema
const GetRecentHistoryInputSchema = z.object({
maxResults: z
.number()
.int()
.positive()
.optional()
.default(20)
.describe('Maximum number of results (default: 20)'),
hoursBack: z
.number()
.int()
.positive()
.optional()
.default(24)
.describe('How many hours back to search (default: 24)'),
})
// Output schema
const GetRecentHistoryOutputSchema = z.object({
items: z.array(
z.object({
id: z.string(),
url: z.string().optional(),
title: z.string().optional(),
lastVisitTime: z.number().optional(),
visitCount: z.number().optional(),
}),
),
count: z.number(),
})
type GetRecentHistoryInput = z.infer<typeof GetRecentHistoryInputSchema>
type GetRecentHistoryOutput = z.infer<typeof GetRecentHistoryOutputSchema>
/**
* GetRecentHistoryAction - Get recent browser history
*
* Retrieves the most recent browser history items.
*
* Input:
* - maxResults (optional): Max results (default: 20)
* - hoursBack (optional): Time range in hours (default: 24)
*
* Output:
* - items: Array of recent history items
* - count: Number of items returned
*
* Usage:
* - Last 24 hours: { }
* - Last hour: { "hoursBack": 1 }
* - Last week: { "hoursBack": 168, "maxResults": 50 }
*
* Example:
* {
* "maxResults": 10,
* "hoursBack": 1
* }
* // Returns: { items: [{url: "https://google.com", title: "Google", lastVisitTime: 1729012345678}], count: 10 }
*/
export class GetRecentHistoryAction extends ActionHandler<
GetRecentHistoryInput,
GetRecentHistoryOutput
> {
readonly inputSchema = GetRecentHistoryInputSchema
private historyAdapter = new HistoryAdapter()
async execute(input: GetRecentHistoryInput): Promise<GetRecentHistoryOutput> {
const results = await this.historyAdapter.getRecentHistory(
input.maxResults,
input.hoursBack,
)
const items = results.map((item) => ({
id: item.id,
url: item.url,
title: item.title,
lastVisitTime: item.lastVisitTime,
visitCount: item.visitCount,
}))
return {
items,
count: items.length,
}
}
}

View File

@@ -0,0 +1,104 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { HistoryAdapter } from '@/adapters/HistoryAdapter'
import { ActionHandler } from '../ActionHandler'
// Input schema
const SearchHistoryInputSchema = z.object({
query: z.string().describe('Search query to match URL or title'),
maxResults: z
.number()
.int()
.positive()
.optional()
.default(20)
.describe('Maximum number of results (default: 20)'),
startTime: z
.number()
.optional()
.describe('Start time in milliseconds since epoch (optional)'),
endTime: z
.number()
.optional()
.describe('End time in milliseconds since epoch (optional)'),
})
// Output schema
const SearchHistoryOutputSchema = z.object({
items: z.array(
z.object({
id: z.string(),
url: z.string().optional(),
title: z.string().optional(),
lastVisitTime: z.number().optional(),
visitCount: z.number().optional(),
typedCount: z.number().optional(),
}),
),
count: z.number(),
})
type SearchHistoryInput = z.infer<typeof SearchHistoryInputSchema>
type SearchHistoryOutput = z.infer<typeof SearchHistoryOutputSchema>
/**
* SearchHistoryAction - Search browser history
*
* Searches browser history for matching URLs and titles.
*
* Input:
* - query: Search text (matches URL and title)
* - maxResults (optional): Max results (default: 20)
* - startTime (optional): Start time filter
* - endTime (optional): End time filter
*
* Output:
* - items: Array of history items
* - count: Number of items returned
*
* Usage:
* - Simple search: { "query": "github" }
* - With limit: { "query": "google", "maxResults": 10 }
* - Time range: { "query": "", "startTime": 1729000000000, "endTime": 1729100000000 }
*
* Example:
* {
* "query": "github",
* "maxResults": 5
* }
* // Returns: { items: [{url: "https://github.com", title: "GitHub", visitCount: 42}], count: 1 }
*/
export class SearchHistoryAction extends ActionHandler<
SearchHistoryInput,
SearchHistoryOutput
> {
readonly inputSchema = SearchHistoryInputSchema
private historyAdapter = new HistoryAdapter()
async execute(input: SearchHistoryInput): Promise<SearchHistoryOutput> {
const results = await this.historyAdapter.searchHistory(
input.query,
input.maxResults,
input.startTime,
input.endTime,
)
const items = results.map((item) => ({
id: item.id,
url: item.url,
title: item.title,
lastVisitTime: item.lastVisitTime,
visitCount: item.visitCount,
typedCount: item.typedCount,
}))
return {
items,
count: items.length,
}
}
}

View File

@@ -0,0 +1,61 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { TabAdapter } from '@/adapters/TabAdapter'
import { ActionHandler } from '../ActionHandler'
// Input schema
const CloseTabInputSchema = z.object({
tabId: z.number().int().positive().describe('Tab ID to close'),
})
// Output schema
const CloseTabOutputSchema = z.object({
success: z.boolean().describe('Whether the tab was successfully closed'),
message: z.string().describe('Confirmation message'),
})
type CloseTabInput = z.infer<typeof CloseTabInputSchema>
type CloseTabOutput = z.infer<typeof CloseTabOutputSchema>
/**
* CloseTabAction - Close a specific tab by ID
*
* Closes the tab with the given ID.
*
* Input:
* - tabId: ID of the tab to close
*
* Output:
* - success: true if tab was closed
* - message: Confirmation message
*
* Usage:
* Use this to close tabs that are no longer needed.
* You can get tab IDs from the getTabs or openTab actions.
*
* Example:
* {
* "tabId": 123
* }
* // Returns: { success: true, message: "Closed tab 123" }
*/
export class CloseTabAction extends ActionHandler<
CloseTabInput,
CloseTabOutput
> {
readonly inputSchema = CloseTabInputSchema
private tabAdapter = new TabAdapter()
async execute(input: CloseTabInput): Promise<CloseTabOutput> {
await this.tabAdapter.closeTab(input.tabId)
return {
success: true,
message: `Closed tab ${input.tabId}`,
}
}
}

View File

@@ -0,0 +1,103 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { TabAdapter } from '@/adapters/TabAdapter'
import { ActionHandler } from '../ActionHandler'
/**
* GetActiveTabAction - Returns information about the currently active tab
*
* Input: None (void)
* Output: { tabId, url, title, windowId }
*
* Use Case:
* - Agent needs to know which tab user is currently viewing
* - Required for most automation actions (need to know target tab)
*
* Example Request:
* {
* "id": "req-123",
* "action": "getActiveTab",
* "payload": {}
* }
*
* Example Response:
* {
* "id": "req-123",
* "ok": true,
* "data": {
* "tabId": 5,
* "url": "https://google.com",
* "title": "Google",
* "windowId": 1
* }
* }
*/
// Input schema - accepts optional windowId for multi-window support
const GetActiveTabInputSchema = z
.object({
windowId: z
.number()
.int()
.optional()
.describe(
'Window ID to get active tab from. If not provided, uses current window.',
),
})
.passthrough()
// Output type
export interface GetActiveTabOutput {
tabId: number
url: string
title: string
windowId: number
}
type GetActiveTabInput = z.infer<typeof GetActiveTabInputSchema>
export class GetActiveTabAction extends ActionHandler<
GetActiveTabInput,
GetActiveTabOutput
> {
readonly inputSchema = GetActiveTabInputSchema
private tabAdapter = new TabAdapter()
/**
* Execute getActiveTab action
*
* Logic:
* 1. Get active tab via TabAdapter (using windowId if provided)
* 2. Extract relevant fields
* 3. Return typed result
*
* @param input - Optional windowId to specify which window
* @returns Active tab information
* @throws Error if no active tab found
*/
async execute(input: GetActiveTabInput): Promise<GetActiveTabOutput> {
// Get active tab from Chrome (use windowId if provided)
const tab = await this.tabAdapter.getActiveTab(input.windowId)
// Validate required fields exist
if (tab.id === undefined) {
throw new Error('Active tab has no ID')
}
if (tab.windowId === undefined) {
throw new Error('Active tab has no window ID')
}
// Return typed result
return {
tabId: tab.id,
url: tab.url || '',
title: tab.title || '',
windowId: tab.windowId,
}
}
}

View File

@@ -0,0 +1,122 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { TabAdapter } from '@/adapters/TabAdapter'
import { ActionHandler } from '../ActionHandler'
// Input schema for getTabs action
const GetTabsInputSchema = z
.object({
currentWindowOnly: z
.boolean()
.optional()
.default(false)
.describe('If true, return only tabs in current window'),
windowId: z
.number()
.int()
.optional()
.describe('If specified, return tabs in this window only'),
url: z
.string()
.optional()
.describe(
'URL pattern to filter tabs (supports wildcards like "*://*.google.com/*")',
),
title: z.string().optional().describe('Title pattern to filter tabs'),
})
.describe('Optional filters for querying tabs')
type GetTabsInput = z.infer<typeof GetTabsInputSchema>
// Tab info in output
interface TabInfo {
id: number
url: string
title: string
windowId: number
active: boolean
index: number
}
// Output with array of tabs
export interface GetTabsOutput {
tabs: TabInfo[]
count: number
}
/**
* GetTabsAction - List all available tabs
*
* Returns a list of all tabs (or filtered tabs) with their IDs, URLs, titles, and window info.
* Essential for discovering which tabs exist before taking actions on them.
*
* Filters (all optional):
* - currentWindowOnly: true to only get tabs in the current window
* - windowId: Get tabs in a specific window
* - url: URL pattern (supports wildcards like "*://*.google.com/*")
* - title: Title pattern (supports wildcards)
*
* Example payloads:
*
* Get all tabs across all windows:
* {}
*
* Get tabs in current window only:
* { "currentWindowOnly": true }
*
* Get tabs in specific window:
* { "windowId": 12345 }
*
* Get all Google tabs:
* { "url": "*://*.google.com/*" }
*/
export class GetTabsAction extends ActionHandler<GetTabsInput, GetTabsOutput> {
readonly inputSchema = GetTabsInputSchema
private tabAdapter = new TabAdapter()
async execute(input: GetTabsInput): Promise<GetTabsOutput> {
let tabs: chrome.tabs.Tab[]
// Apply filters based on input
if (input.windowId) {
// Get tabs in specific window (windowId takes precedence)
tabs = await this.tabAdapter.getTabsInWindow(input.windowId)
} else if (input.currentWindowOnly) {
// Get tabs in current window (windowId may be injected by agent for multi-window support)
tabs = await this.tabAdapter.getCurrentWindowTabs()
} else if (input.url || input.title) {
// Use query API for URL/title filtering
const query: chrome.tabs.QueryInfo = {}
if (input.url) query.url = input.url
if (input.title) query.title = input.title
tabs = await this.tabAdapter.queryTabs(query)
} else {
// Get all tabs
tabs = await this.tabAdapter.getAllTabs()
}
// Convert to simplified TabInfo format
const tabInfos: TabInfo[] = tabs
.filter(
(tab): tab is chrome.tabs.Tab & { id: number; windowId: number } =>
tab.id !== undefined && tab.windowId !== undefined,
)
.map((tab) => ({
id: tab.id,
url: tab.url || '',
title: tab.title || '',
windowId: tab.windowId,
active: tab.active || false,
index: tab.index,
}))
return {
tabs: tabInfos,
count: tabInfos.length,
}
}
}

View File

@@ -0,0 +1,126 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { TabAdapter } from '@/adapters/TabAdapter'
import { ActionHandler } from '../ActionHandler'
const TabGroupColorSchema = z.enum([
'grey',
'blue',
'red',
'yellow',
'green',
'pink',
'purple',
'cyan',
'orange',
])
const GroupTabsInputSchema = z
.object({
tabIds: z
.array(z.number().int().positive())
.min(1)
.describe('Array of tab IDs to group together'),
title: z
.string()
.optional()
.describe('Title for the group (e.g., "Shopping", "Work", "Research")'),
color: TabGroupColorSchema.optional().describe(
'Color for the group: grey, blue, red, yellow, green, pink, purple, cyan, orange',
),
groupId: z
.number()
.int()
.optional()
.describe(
'Existing group ID to add tabs to. If not specified, creates a new group.',
),
windowId: z
.number()
.int()
.optional()
.describe('Window ID for scoping the group lookup'),
})
.describe('Group tabs together with optional title and color')
type GroupTabsInput = z.infer<typeof GroupTabsInputSchema>
export interface GroupTabsOutput {
groupId: number
title: string
color: string
tabCount: number
}
/**
* GroupTabsAction - Group tabs together
*
* Groups the specified tabs together into a new or existing group.
* Optionally sets a title and color for the group.
*
* Example payloads:
*
* Create new group with tabs:
* { "tabIds": [123, 456, 789], "title": "Shopping", "color": "green" }
*
* Add tabs to existing group:
* { "tabIds": [123, 456], "groupId": 1 }
*
* Create unnamed group:
* { "tabIds": [123, 456] }
*/
export class GroupTabsAction extends ActionHandler<
GroupTabsInput,
GroupTabsOutput
> {
readonly inputSchema = GroupTabsInputSchema
private tabAdapter = new TabAdapter()
async execute(input: GroupTabsInput): Promise<GroupTabsOutput> {
// Group the tabs (pass windowId to prevent tabs moving to wrong window)
const groupId = await this.tabAdapter.groupTabs(
input.tabIds,
input.groupId,
input.windowId,
)
// Update group properties if title or color provided
if (input.title !== undefined || input.color !== undefined) {
const updateProps: chrome.tabGroups.UpdateProperties = {}
if (input.title !== undefined) updateProps.title = input.title
if (input.color !== undefined) updateProps.color = input.color
const updatedGroup = await this.tabAdapter.updateTabGroup(
groupId,
updateProps,
)
return {
groupId,
title: updatedGroup.title || '',
color: updatedGroup.color,
tabCount: input.tabIds.length,
}
}
// Get group info if no updates were made
// Determine which window to query - use windowId if provided, otherwise query all windows
const groups = await this.tabAdapter.getTabGroups(input.windowId)
const group = groups.find((g) => g.id === groupId)
if (!group) {
throw new Error(`Tab group ${groupId} not found`)
}
return {
groupId,
title: group.title || '',
color: group.color,
tabCount: input.tabIds.length,
}
}
}

View File

@@ -0,0 +1,83 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { TabAdapter } from '@/adapters/TabAdapter'
import { ActionHandler } from '../ActionHandler'
const ListTabGroupsInputSchema = z
.object({
windowId: z
.number()
.int()
.optional()
.describe(
'Window ID to get groups from. If not specified, gets all groups.',
),
})
.describe('Optional filters for querying tab groups')
type ListTabGroupsInput = z.infer<typeof ListTabGroupsInputSchema>
interface TabGroupInfo {
id: number
windowId: number
title: string
color: string
collapsed: boolean
tabIds: number[]
}
export interface ListTabGroupsOutput {
groups: TabGroupInfo[]
count: number
}
/**
* ListTabGroupsAction - List all tab groups
*
* Returns a list of all tab groups with their IDs, titles, colors, and member tabs.
*
* Example payloads:
*
* Get all groups across all windows:
* {}
*
* Get groups in specific window:
* { "windowId": 12345 }
*/
export class ListTabGroupsAction extends ActionHandler<
ListTabGroupsInput,
ListTabGroupsOutput
> {
readonly inputSchema = ListTabGroupsInputSchema
private tabAdapter = new TabAdapter()
async execute(input: ListTabGroupsInput): Promise<ListTabGroupsOutput> {
const groups = await this.tabAdapter.getTabGroups(input.windowId)
// Get all tabs to find which tabs belong to each group
const tabs = input.windowId
? await this.tabAdapter.getTabsInWindow(input.windowId)
: await this.tabAdapter.getAllTabs()
// Build group info with tab IDs
const groupInfos: TabGroupInfo[] = groups.map((group) => ({
id: group.id,
windowId: group.windowId,
title: group.title || '',
color: group.color,
collapsed: group.collapsed,
tabIds: tabs
.filter((tab) => tab.groupId === group.id && tab.id !== undefined)
.map((tab) => tab.id as number),
}))
return {
groups: groupInfos,
count: groupInfos.length,
}
}
}

View File

@@ -0,0 +1,93 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { TabAdapter } from '@/adapters/TabAdapter'
import { ActionHandler } from '../ActionHandler'
// Input schema
const NavigateInputSchema = z.object({
url: z.string().url().describe('URL to navigate to (must include https://)'),
tabId: z
.number()
.int()
.positive()
.optional()
.describe('Tab ID to navigate (optional, defaults to active tab)'),
windowId: z
.number()
.int()
.optional()
.describe('Window ID for getting active tab when tabId not provided'),
})
// Output schema
const NavigateOutputSchema = z.object({
tabId: z.number().describe('ID of the navigated tab'),
windowId: z.number().describe('ID of the window containing the tab'),
url: z.string().describe('URL that the tab is navigating to'),
message: z.string().describe('Confirmation message'),
})
type NavigateInput = z.infer<typeof NavigateInputSchema>
type NavigateOutput = z.infer<typeof NavigateOutputSchema>
/**
* NavigateAction - Navigate a tab to a URL
*
* Navigates the current tab or a specific tab to a URL.
*
* Input:
* - url: URL to navigate to (must be a valid URL with protocol)
* - tabId (optional): Specific tab to navigate (defaults to active tab)
*
* Output:
* - tabId: ID of the tab that was navigated
* - url: URL that the tab is navigating to
* - message: Confirmation message
*
* Usage:
* - Navigate active tab: { "url": "https://google.com" }
* - Navigate specific tab: { "url": "https://google.com", "tabId": 123 }
*
* Example:
* {
* "url": "https://www.wikipedia.org"
* }
* // Returns: { tabId: 123, url: "https://www.wikipedia.org", message: "Navigating to https://www.wikipedia.org" }
*/
export class NavigateAction extends ActionHandler<
NavigateInput,
NavigateOutput
> {
readonly inputSchema = NavigateInputSchema
private tabAdapter = new TabAdapter()
async execute(input: NavigateInput): Promise<NavigateOutput> {
// If no tabId provided, use the active tab (in specified window if provided)
let targetTabId = input.tabId
if (!targetTabId) {
const activeTab = await this.tabAdapter.getActiveTab(input.windowId)
if (activeTab.id === undefined) {
throw new Error('Active tab has no ID')
}
targetTabId = activeTab.id
}
// Navigate the tab
const tab = await this.tabAdapter.navigateTab(targetTabId, input.url)
if (tab.id === undefined || tab.windowId === undefined) {
throw new Error('Navigated tab has no ID or windowId')
}
return {
tabId: tab.id,
windowId: tab.windowId,
url: input.url,
message: `Navigating to ${input.url}`,
}
}
}

View File

@@ -0,0 +1,88 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { TabAdapter } from '@/adapters/TabAdapter'
import { ActionHandler } from '../ActionHandler'
// Input schema
const OpenTabInputSchema = z.object({
url: z
.string()
.url()
.optional()
.describe('URL to open (optional, defaults to new tab page)'),
active: z
.boolean()
.optional()
.default(true)
.describe('Whether to make the new tab active'),
windowId: z
.number()
.int()
.optional()
.describe(
'Window ID to open the tab in. If not provided, opens in current window.',
),
})
// Output schema
const OpenTabOutputSchema = z.object({
tabId: z.number().describe('ID of the newly created tab'),
url: z.string().describe('URL of the new tab'),
title: z.string().optional().describe('Title of the new tab'),
})
type OpenTabInput = z.infer<typeof OpenTabInputSchema>
type OpenTabOutput = z.infer<typeof OpenTabOutputSchema>
/**
* OpenTabAction - Open a new browser tab
*
* Opens a new tab with an optional URL. If no URL is provided,
* opens a new tab page.
*
* Input:
* - url (optional): URL to open in the new tab
* - active (optional): Whether to make the tab active (default: true)
*
* Output:
* - tabId: ID of the newly created tab
* - url: URL of the new tab
* - title: Title of the new tab (if available)
*
* Usage:
* - Open blank tab: { }
* - Open specific URL: { "url": "https://google.com" }
* - Open in background: { "url": "https://google.com", "active": false }
*
* Example:
* {
* "url": "https://www.google.com",
* "active": true
* }
* // Returns: { tabId: 456, url: "https://www.google.com", title: "Google" }
*/
export class OpenTabAction extends ActionHandler<OpenTabInput, OpenTabOutput> {
readonly inputSchema = OpenTabInputSchema
private tabAdapter = new TabAdapter()
async execute(input: OpenTabInput): Promise<OpenTabOutput> {
const tab = await this.tabAdapter.openTab(
input.url,
input.active ?? true,
input.windowId,
)
if (tab.id === undefined) {
throw new Error('Opened tab has no ID')
}
return {
tabId: tab.id,
url: tab.url || tab.pendingUrl || input.url || 'chrome://newtab/',
title: tab.title,
}
}
}

View File

@@ -0,0 +1,66 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { TabAdapter } from '@/adapters/TabAdapter'
import { ActionHandler } from '../ActionHandler'
// Input schema
const SwitchTabInputSchema = z.object({
tabId: z.number().int().positive().describe('Tab ID to switch to'),
})
// Output schema
const SwitchTabOutputSchema = z.object({
tabId: z.number().describe('ID of the tab that is now active'),
url: z.string().describe('URL of the active tab'),
title: z.string().describe('Title of the active tab'),
})
type SwitchTabInput = z.infer<typeof SwitchTabInputSchema>
type SwitchTabOutput = z.infer<typeof SwitchTabOutputSchema>
/**
* SwitchTabAction - Switch to (focus) a specific tab
*
* Makes the specified tab the active tab in its window.
*
* Input:
* - tabId: ID of the tab to switch to
*
* Output:
* - tabId: ID of the now-active tab
* - url: URL of the active tab
* - title: Title of the active tab
*
* Usage:
* Use this to switch between tabs. Get tab IDs from the getTabs action.
*
* Example:
* {
* "tabId": 123
* }
* // Returns: { tabId: 123, url: "https://google.com", title: "Google" }
*/
export class SwitchTabAction extends ActionHandler<
SwitchTabInput,
SwitchTabOutput
> {
readonly inputSchema = SwitchTabInputSchema
private tabAdapter = new TabAdapter()
async execute(input: SwitchTabInput): Promise<SwitchTabOutput> {
const tab = await this.tabAdapter.switchTab(input.tabId)
if (tab.id === undefined) {
throw new Error('Switched tab has no ID')
}
return {
tabId: tab.id,
url: tab.url || '',
title: tab.title || '',
}
}
}

View File

@@ -0,0 +1,48 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { TabAdapter } from '@/adapters/TabAdapter'
import { ActionHandler } from '../ActionHandler'
const UngroupTabsInputSchema = z
.object({
tabIds: z
.array(z.number().int().positive())
.min(1)
.describe('Array of tab IDs to remove from their groups'),
})
.describe('Remove tabs from their groups')
type UngroupTabsInput = z.infer<typeof UngroupTabsInputSchema>
export interface UngroupTabsOutput {
ungroupedCount: number
}
/**
* UngroupTabsAction - Remove tabs from their groups
*
* Removes the specified tabs from any groups they belong to.
* The tabs remain open but are no longer part of any group.
*
* Example payload:
* { "tabIds": [123, 456, 789] }
*/
export class UngroupTabsAction extends ActionHandler<
UngroupTabsInput,
UngroupTabsOutput
> {
readonly inputSchema = UngroupTabsInputSchema
private tabAdapter = new TabAdapter()
async execute(input: UngroupTabsInput): Promise<UngroupTabsOutput> {
await this.tabAdapter.ungroupTabs(input.tabIds)
return {
ungroupedCount: input.tabIds.length,
}
}
}

View File

@@ -0,0 +1,90 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
import { TabAdapter } from '@/adapters/TabAdapter'
import { ActionHandler } from '../ActionHandler'
const TabGroupColorSchema = z.enum([
'grey',
'blue',
'red',
'yellow',
'green',
'pink',
'purple',
'cyan',
'orange',
])
const UpdateTabGroupInputSchema = z
.object({
groupId: z.number().int().describe('ID of the group to update'),
title: z.string().optional().describe('New title for the group'),
color: TabGroupColorSchema.optional().describe(
'New color for the group: grey, blue, red, yellow, green, pink, purple, cyan, orange',
),
collapsed: z
.boolean()
.optional()
.describe('Whether to collapse (hide) the group tabs'),
})
.describe('Update tab group properties')
type UpdateTabGroupInput = z.infer<typeof UpdateTabGroupInputSchema>
export interface UpdateTabGroupOutput {
groupId: number
title: string
color: string
collapsed: boolean
}
/**
* UpdateTabGroupAction - Update a tab group's properties
*
* Updates the title, color, or collapsed state of an existing tab group.
*
* Example payloads:
*
* Rename a group:
* { "groupId": 1, "title": "Work Projects" }
*
* Change color:
* { "groupId": 1, "color": "blue" }
*
* Collapse a group:
* { "groupId": 1, "collapsed": true }
*
* Update multiple properties:
* { "groupId": 1, "title": "Research", "color": "purple", "collapsed": false }
*/
export class UpdateTabGroupAction extends ActionHandler<
UpdateTabGroupInput,
UpdateTabGroupOutput
> {
readonly inputSchema = UpdateTabGroupInputSchema
private tabAdapter = new TabAdapter()
async execute(input: UpdateTabGroupInput): Promise<UpdateTabGroupOutput> {
const updateProps: chrome.tabGroups.UpdateProperties = {}
if (input.title !== undefined) updateProps.title = input.title
if (input.color !== undefined) updateProps.color = input.color
if (input.collapsed !== undefined) updateProps.collapsed = input.collapsed
const group = await this.tabAdapter.updateTabGroup(
input.groupId,
updateProps,
)
return {
groupId: group.id,
title: group.title || '',
color: group.color,
collapsed: group.collapsed,
}
}
}

View File

@@ -0,0 +1,377 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { logger } from '@/utils/logger'
import { CHROME_API_TIMEOUTS, withTimeout } from '@/utils/timeout'
/**
* BookmarkAdapter - Wrapper for Chrome bookmarks API
*
* Responsibilities:
* - Provide clean Promise-based interface to Chrome bookmarks API
* - Handle Chrome API errors
* - Log operations for debugging
*/
export class BookmarkAdapter {
/**
* Get all bookmarks as a tree structure
*
* @returns Bookmark tree root nodes
*/
async getBookmarkTree(): Promise<chrome.bookmarks.BookmarkTreeNode[]> {
logger.debug('[BookmarkAdapter] Getting bookmark tree')
try {
const tree = await withTimeout(
chrome.bookmarks.getTree(),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.bookmarks.getTree',
)
logger.debug(
`[BookmarkAdapter] Retrieved bookmark tree with ${tree.length} root nodes`,
)
return tree
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[BookmarkAdapter] Failed to get bookmark tree: ${errorMessage}`,
)
throw new Error(`Failed to get bookmark tree: ${errorMessage}`)
}
}
/**
* Search bookmarks by query
*
* @param query - Search query (matches title and URL)
* @returns Array of matching bookmarks
*/
async searchBookmarks(
query: string,
): Promise<chrome.bookmarks.BookmarkTreeNode[]> {
logger.debug(`[BookmarkAdapter] Searching bookmarks: "${query}"`)
try {
const results = await withTimeout(
chrome.bookmarks.search(query),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.bookmarks.search',
)
logger.debug(
`[BookmarkAdapter] Found ${results.length} bookmarks matching "${query}"`,
)
return results
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[BookmarkAdapter] Failed to search bookmarks: ${errorMessage}`,
)
throw new Error(`Failed to search bookmarks: ${errorMessage}`)
}
}
/**
* Get bookmark by ID
*
* @param id - Bookmark ID
* @returns Bookmark node
*/
async getBookmark(id: string): Promise<chrome.bookmarks.BookmarkTreeNode> {
logger.debug(`[BookmarkAdapter] Getting bookmark: ${id}`)
try {
const results = await withTimeout(
chrome.bookmarks.get(id),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.bookmarks.get',
)
if (results.length === 0) {
throw new Error('Bookmark not found')
}
logger.debug(`[BookmarkAdapter] Retrieved bookmark: ${id}`)
return results[0]
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`[BookmarkAdapter] Failed to get bookmark: ${errorMessage}`)
throw new Error(`Failed to get bookmark: ${errorMessage}`)
}
}
/**
* Create a new bookmark
*
* @param bookmark - Bookmark creation details
* @returns Created bookmark node
*/
async createBookmark(bookmark: {
title: string
url: string
parentId?: string
}): Promise<chrome.bookmarks.BookmarkTreeNode> {
logger.debug(
`[BookmarkAdapter] Creating bookmark: ${bookmark.title || 'Untitled'}`,
)
try {
const created = await withTimeout(
chrome.bookmarks.create(bookmark),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.bookmarks.create',
)
logger.debug(
`[BookmarkAdapter] Created bookmark: ${created.id} - ${created.title}`,
)
return created
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[BookmarkAdapter] Failed to create bookmark: ${errorMessage}`,
)
throw new Error(`Failed to create bookmark: ${errorMessage}`)
}
}
/**
* Remove a bookmark by ID
*
* @param id - Bookmark ID to remove
*/
async removeBookmark(id: string): Promise<void> {
logger.debug(`[BookmarkAdapter] Removing bookmark: ${id}`)
try {
await withTimeout(
chrome.bookmarks.remove(id),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.bookmarks.remove',
)
logger.debug(`[BookmarkAdapter] Removed bookmark: ${id}`)
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[BookmarkAdapter] Failed to remove bookmark ${id}: ${errorMessage}`,
)
throw new Error(`Failed to remove bookmark: ${errorMessage}`)
}
}
/**
* Update a bookmark
*
* @param id - Bookmark ID to update
* @param changes - Changes to apply
* @returns Updated bookmark node
*/
async updateBookmark(
id: string,
changes: { title?: string; url?: string },
): Promise<chrome.bookmarks.BookmarkTreeNode> {
logger.debug(`[BookmarkAdapter] Updating bookmark: ${id}`)
try {
const updated = await withTimeout(
chrome.bookmarks.update(id, changes),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.bookmarks.update',
)
logger.debug(
`[BookmarkAdapter] Updated bookmark: ${id} - ${updated.title}`,
)
return updated
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[BookmarkAdapter] Failed to update bookmark ${id}: ${errorMessage}`,
)
throw new Error(`Failed to update bookmark: ${errorMessage}`)
}
}
/**
* Get recent bookmarks
*
* @param limit - Maximum number of bookmarks to return
* @returns Array of recent bookmarks
*/
async getRecentBookmarks(
limit = 20,
): Promise<chrome.bookmarks.BookmarkTreeNode[]> {
logger.debug(`[BookmarkAdapter] Getting ${limit} recent bookmarks`)
try {
const tree = await withTimeout(
chrome.bookmarks.getTree(),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.bookmarks.getTree',
)
const bookmarks = this._flattenBookmarkTree(tree)
// Filter to only URL bookmarks (not folders) and sort by dateAdded
const urlBookmarks = bookmarks
.filter((b) => b.url && b.dateAdded)
.sort((a, b) => (b.dateAdded || 0) - (a.dateAdded || 0))
.slice(0, limit)
logger.debug(
`[BookmarkAdapter] Found ${urlBookmarks.length} recent bookmarks`,
)
return urlBookmarks
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[BookmarkAdapter] Failed to get recent bookmarks: ${errorMessage}`,
)
throw new Error(`Failed to get recent bookmarks: ${errorMessage}`)
}
}
/**
* Create a bookmark folder
*
* @param title - Folder name
* @param parentId - Parent folder ID (defaults to "1" = Bookmarks Bar)
* @returns Created folder node
*/
async createBookmarkFolder(options: {
title: string
parentId?: string
}): Promise<chrome.bookmarks.BookmarkTreeNode> {
const { title, parentId = '1' } = options
logger.debug(
`[BookmarkAdapter] Creating bookmark folder: "${title}" in parent ${parentId}`,
)
try {
const created = await chrome.bookmarks.create({
title,
parentId,
})
logger.debug(
`[BookmarkAdapter] Created folder: ${created.id} - ${created.title}`,
)
return created
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[BookmarkAdapter] Failed to create bookmark folder: ${errorMessage}`,
)
throw new Error(`Failed to create bookmark folder: ${errorMessage}`)
}
}
/**
* Get direct children of a folder
*
* @param folderId - Folder ID to get children from
* @returns Array of child nodes
*/
async getBookmarkChildren(
folderId: string,
): Promise<chrome.bookmarks.BookmarkTreeNode[]> {
logger.debug(`[BookmarkAdapter] Getting children of folder: ${folderId}`)
try {
const children = await chrome.bookmarks.getChildren(folderId)
logger.debug(
`[BookmarkAdapter] Found ${children.length} children in folder ${folderId}`,
)
return children
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[BookmarkAdapter] Failed to get bookmark children: ${errorMessage}`,
)
throw new Error(`Failed to get bookmark children: ${errorMessage}`)
}
}
/**
* Move a bookmark or folder to a new location
*
* @param id - Bookmark or folder ID to move
* @param destination - New location
* @returns Updated bookmark node
*/
async moveBookmark(
id: string,
destination: { parentId?: string; index?: number },
): Promise<chrome.bookmarks.BookmarkTreeNode> {
logger.debug(
`[BookmarkAdapter] Moving bookmark ${id} to parent ${destination.parentId}, index ${destination.index}`,
)
try {
const moved = await chrome.bookmarks.move(id, destination)
logger.debug(
`[BookmarkAdapter] Moved bookmark ${id} to ${moved.parentId}`,
)
return moved
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[BookmarkAdapter] Failed to move bookmark ${id}: ${errorMessage}`,
)
throw new Error(`Failed to move bookmark: ${errorMessage}`)
}
}
/**
* Remove a folder and all its contents recursively
*
* @param id - Folder ID to remove
* @throws if id is a root node ("0", "1", "2")
*/
async removeBookmarkTree(id: string): Promise<void> {
const protectedIds = ['0', '1', '2']
if (protectedIds.includes(id)) {
throw new Error(
`Cannot delete protected bookmark folder: ${id}. Root folders (Bookmarks Bar, Other Bookmarks, Mobile Bookmarks) cannot be deleted.`,
)
}
logger.debug(`[BookmarkAdapter] Removing bookmark tree: ${id}`)
try {
await chrome.bookmarks.removeTree(id)
logger.debug(`[BookmarkAdapter] Removed bookmark tree: ${id}`)
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[BookmarkAdapter] Failed to remove bookmark tree ${id}: ${errorMessage}`,
)
throw new Error(`Failed to remove bookmark tree: ${errorMessage}`)
}
}
/**
* Flatten bookmark tree into array
* @private
*/
private _flattenBookmarkTree(
nodes: chrome.bookmarks.BookmarkTreeNode[],
): chrome.bookmarks.BookmarkTreeNode[] {
const result: chrome.bookmarks.BookmarkTreeNode[] = []
for (const node of nodes) {
result.push(node)
if (node.children) {
result.push(...this._flattenBookmarkTree(node.children))
}
}
return result
}
}

View File

@@ -0,0 +1,907 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
/// <reference path="../types/chrome-browser-os.d.ts" />
import { logger } from '@/utils/logger'
import { CHROME_API_TIMEOUTS, withTimeout } from '@/utils/timeout'
// ============= Re-export types from chrome.browserOS namespace =============
export type InteractiveNode = chrome.browserOS.InteractiveNode
export type InteractiveSnapshot = chrome.browserOS.InteractiveSnapshot
export type InteractiveSnapshotOptions =
chrome.browserOS.InteractiveSnapshotOptions
export type PageLoadStatus = chrome.browserOS.PageLoadStatus
export type InteractiveNodeType = chrome.browserOS.InteractiveNodeType
export type Rect = chrome.browserOS.BoundingRect
// New snapshot types
export type SnapshotType = chrome.browserOS.SnapshotType
export type SnapshotContext = chrome.browserOS.SnapshotContext
export type SectionType = chrome.browserOS.SectionType
export type TextSnapshotResult = chrome.browserOS.TextSnapshotResult
export type LinkInfo = chrome.browserOS.LinkInfo
export type LinksSnapshotResult = chrome.browserOS.LinksSnapshotResult
export type SnapshotSection = chrome.browserOS.SnapshotSection
export type Snapshot = chrome.browserOS.Snapshot
export type SnapshotOptions = chrome.browserOS.SnapshotOptions
export type PrefObject = chrome.browserOS.PrefObject
// ============= BrowserOS Adapter =============
// Screenshot size constants
export const SCREENSHOT_SIZES = {
small: 512, // Low token usage
medium: 768, // Balanced (default)
large: 1028, // High detail (note: 1028 not 1024)
} as const
export type ScreenshotSizeKey = keyof typeof SCREENSHOT_SIZES
/**
* Adapter for Chrome BrowserOS Extension APIs
* Provides a clean interface to browserOS functionality with extensibility
*/
export class BrowserOSAdapter {
private static instance: BrowserOSAdapter | null = null
private constructor() {}
/**
* Get singleton instance
*/
static getInstance(): BrowserOSAdapter {
if (!BrowserOSAdapter.instance) {
BrowserOSAdapter.instance = new BrowserOSAdapter()
}
return BrowserOSAdapter.instance
}
/**
* Get interactive snapshot of the current page
*/
async getInteractiveSnapshot(
tabId: number,
options?: InteractiveSnapshotOptions,
): Promise<InteractiveSnapshot> {
try {
logger.debug(
`[BrowserOSAdapter] Getting interactive snapshot for tab ${tabId} with options: ${JSON.stringify(options)}`,
)
const promise = new Promise<InteractiveSnapshot>((resolve, reject) => {
if (options) {
chrome.browserOS.getInteractiveSnapshot(
tabId,
options,
(snapshot: InteractiveSnapshot) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
logger.debug(
`[BrowserOSAdapter] Retrieved snapshot with ${snapshot.elements.length} elements`,
)
resolve(snapshot)
}
},
)
} else {
chrome.browserOS.getInteractiveSnapshot(
tabId,
(snapshot: InteractiveSnapshot) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
logger.debug(
`[BrowserOSAdapter] Retrieved snapshot with ${snapshot.elements.length} elements`,
)
resolve(snapshot)
}
},
)
}
})
return withTimeout(
promise,
CHROME_API_TIMEOUTS.BROWSEROS_HEAVY,
'getInteractiveSnapshot',
)
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[BrowserOSAdapter] Failed to get interactive snapshot: ${errorMessage}`,
)
throw new Error(`Failed to get interactive snapshot: ${errorMessage}`)
}
}
/**
* Click an element by node ID
*/
async click(tabId: number, nodeId: number): Promise<void> {
try {
logger.debug(`[BrowserOSAdapter] Clicking node ${nodeId} in tab ${tabId}`)
const promise = new Promise<void>((resolve, reject) => {
chrome.browserOS.click(tabId, nodeId, () => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
resolve()
}
})
})
return withTimeout(promise, CHROME_API_TIMEOUTS.BROWSEROS_ACTION, 'click')
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`[BrowserOSAdapter] Failed to click node: ${errorMessage}`)
throw new Error(`Failed to click node ${nodeId}: ${errorMessage}`)
}
}
/**
* Input text into an element
*/
async inputText(tabId: number, nodeId: number, text: string): Promise<void> {
try {
logger.debug(
`[BrowserOSAdapter] Inputting text into node ${nodeId} in tab ${tabId}`,
)
const promise = new Promise<void>((resolve, reject) => {
chrome.browserOS.inputText(tabId, nodeId, text, () => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
resolve()
}
})
})
return withTimeout(
promise,
CHROME_API_TIMEOUTS.BROWSEROS_ACTION,
'inputText',
)
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`[BrowserOSAdapter] Failed to input text: ${errorMessage}`)
throw new Error(
`Failed to input text into node ${nodeId}: ${errorMessage}`,
)
}
}
/**
* Clear text from an element
*/
async clear(tabId: number, nodeId: number): Promise<void> {
try {
logger.debug(`[BrowserOSAdapter] Clearing node ${nodeId} in tab ${tabId}`)
const promise = new Promise<void>((resolve, reject) => {
chrome.browserOS.clear(tabId, nodeId, () => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
resolve()
}
})
})
return withTimeout(promise, CHROME_API_TIMEOUTS.BROWSEROS_ACTION, 'clear')
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`[BrowserOSAdapter] Failed to clear node: ${errorMessage}`)
throw new Error(`Failed to clear node ${nodeId}: ${errorMessage}`)
}
}
/**
* Scroll to a specific node
*/
async scrollToNode(tabId: number, nodeId: number): Promise<boolean> {
try {
logger.debug(
`[BrowserOSAdapter] Scrolling to node ${nodeId} in tab ${tabId}`,
)
const promise = new Promise<boolean>((resolve, reject) => {
chrome.browserOS.scrollToNode(tabId, nodeId, (scrolled: boolean) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
resolve(scrolled)
}
})
})
return withTimeout(
promise,
CHROME_API_TIMEOUTS.BROWSEROS_ACTION,
'scrollToNode',
)
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[BrowserOSAdapter] Failed to scroll to node: ${errorMessage}`,
)
throw new Error(`Failed to scroll to node ${nodeId}: ${errorMessage}`)
}
}
/**
* Send keyboard keys
*/
async sendKeys(tabId: number, keys: chrome.browserOS.Key): Promise<void> {
try {
logger.debug(`[BrowserOSAdapter] Sending keys "${keys}" to tab ${tabId}`)
const promise = new Promise<void>((resolve, reject) => {
chrome.browserOS.sendKeys(tabId, keys, () => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
resolve()
}
})
})
return withTimeout(
promise,
CHROME_API_TIMEOUTS.BROWSEROS_ACTION,
'sendKeys',
)
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`[BrowserOSAdapter] Failed to send keys: ${errorMessage}`)
throw new Error(`Failed to send keys: ${errorMessage}`)
}
}
/**
* Get page load status
*/
async getPageLoadStatus(tabId: number): Promise<PageLoadStatus> {
try {
logger.debug(
`[BrowserOSAdapter] Getting page load status for tab ${tabId}`,
)
const promise = new Promise<PageLoadStatus>((resolve, reject) => {
chrome.browserOS.getPageLoadStatus(tabId, (status: PageLoadStatus) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
resolve(status)
}
})
})
return withTimeout(
promise,
CHROME_API_TIMEOUTS.BROWSEROS_HEAVY,
'getPageLoadStatus',
)
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[BrowserOSAdapter] Failed to get page load status: ${errorMessage}`,
)
throw new Error(`Failed to get page load status: ${errorMessage}`)
}
}
/**
* Get accessibility tree (if available)
*/
async getAccessibilityTree(
tabId: number,
): Promise<chrome.browserOS.AccessibilityTree> {
try {
logger.debug(
`[BrowserOSAdapter] Getting accessibility tree for tab ${tabId}`,
)
const promise = new Promise<chrome.browserOS.AccessibilityTree>(
(resolve, reject) => {
chrome.browserOS.getAccessibilityTree(
tabId,
(tree: chrome.browserOS.AccessibilityTree) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
resolve(tree)
}
},
)
},
)
return withTimeout(
promise,
CHROME_API_TIMEOUTS.BROWSEROS_HEAVY,
'getAccessibilityTree',
)
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[BrowserOSAdapter] Failed to get accessibility tree: ${errorMessage}`,
)
throw new Error(`Failed to get accessibility tree: ${errorMessage}`)
}
}
/**
* Capture a screenshot of the tab
* @param tabId - The tab ID to capture
* @param size - Optional screenshot size ('small', 'medium', or 'large')
* @param showHighlights - Optional flag to show element highlights
* @param width - Optional exact width for screenshot
* @param height - Optional exact height for screenshot
*/
async captureScreenshot(
tabId: number,
size?: ScreenshotSizeKey,
showHighlights?: boolean,
width?: number,
height?: number,
): Promise<string> {
try {
const sizeDesc = size ? ` (${size})` : ''
const highlightDesc = showHighlights ? ' with highlights' : ''
const dimensionsDesc = width && height ? ` (${width}x${height})` : ''
logger.debug(
`[BrowserOSAdapter] Capturing screenshot for tab ${tabId}${sizeDesc}${highlightDesc}${dimensionsDesc}`,
)
const promise = new Promise<string>((resolve, reject) => {
// Use exact dimensions if provided
if (width !== undefined && height !== undefined) {
chrome.browserOS.captureScreenshot(
tabId,
0, // thumbnailSize ignored when width/height specified
showHighlights || false,
width,
height,
(dataUrl: string) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
logger.debug(
`[BrowserOSAdapter] Screenshot captured for tab ${tabId} (${width}x${height})${highlightDesc}`,
)
resolve(dataUrl)
}
},
)
} else if (size !== undefined || showHighlights !== undefined) {
const pixelSize = size ? SCREENSHOT_SIZES[size] : 0
// Use the API with thumbnail size and highlights
if (showHighlights !== undefined) {
chrome.browserOS.captureScreenshot(
tabId,
pixelSize,
showHighlights,
(dataUrl: string) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
logger.debug(
`[BrowserOSAdapter] Screenshot captured for tab ${tabId}${sizeDesc}${highlightDesc}`,
)
resolve(dataUrl)
}
},
)
} else {
chrome.browserOS.captureScreenshot(
tabId,
pixelSize,
(dataUrl: string) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
logger.debug(
`[BrowserOSAdapter] Screenshot captured for tab ${tabId} (${size}: ${pixelSize}px)`,
)
resolve(dataUrl)
}
},
)
}
} else {
// Use the original API without size (backwards compatibility)
chrome.browserOS.captureScreenshot(tabId, (dataUrl: string) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
logger.debug(
`[BrowserOSAdapter] Screenshot captured for tab ${tabId}`,
)
resolve(dataUrl)
}
})
}
})
return withTimeout(
promise,
CHROME_API_TIMEOUTS.BROWSEROS_HEAVY,
'captureScreenshot',
)
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[BrowserOSAdapter] Failed to capture screenshot: ${errorMessage}`,
)
throw new Error(`Failed to capture screenshot: ${errorMessage}`)
}
}
/**
* Get a content snapshot from the page
*/
async getSnapshot(tabId: number, _type: SnapshotType): Promise<Snapshot> {
try {
logger.debug(`[BrowserOSAdapter] Getting snapshot for tab ${tabId}`)
const promise = new Promise<Snapshot>((resolve, reject) => {
chrome.browserOS.getSnapshot(tabId, (snapshot: Snapshot) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
logger.debug(
`[BrowserOSAdapter] Retrieved snapshot: ${JSON.stringify(snapshot)}`,
)
resolve(snapshot)
}
})
})
return withTimeout(
promise,
CHROME_API_TIMEOUTS.BROWSEROS_HEAVY,
'getSnapshot',
)
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`[BrowserOSAdapter] Failed to get snapshot: ${errorMessage}`)
throw new Error(`Failed to get snapshot: ${errorMessage}`)
}
}
/**
* Get text content snapshot from the page
* Convenience method (deprecated - use getSnapshot directly)
* Use getSnapshot(tabId, 'text') instead
*/
async getTextSnapshot(tabId: number): Promise<Snapshot> {
return this.getSnapshot(tabId, 'text')
}
/**
* Get links snapshot from the page
* Convenience method (deprecated - use getSnapshot directly)
* Use getSnapshot(tabId, 'links') instead
*/
async getLinksSnapshot(tabId: number): Promise<Snapshot> {
return this.getSnapshot(tabId, 'links')
}
/**
* Generic method to invoke any BrowserOS API
* Useful for future APIs or experimental features
*/
async invokeAPI(method: string, ...args: unknown[]): Promise<unknown> {
try {
logger.debug(`[BrowserOSAdapter] Invoking BrowserOS API: ${method}`)
if (!(method in chrome.browserOS)) {
throw new Error(`Unknown BrowserOS API method: ${method}`)
}
// @ts-expect-error - Dynamic API invocation
const result = await chrome.browserOS[method](...args)
return result
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[BrowserOSAdapter] Failed to invoke API ${method}: ${errorMessage}`,
)
throw new Error(
`Failed to invoke BrowserOS API ${method}: ${errorMessage}`,
)
}
}
/**
* Check if a specific API is available
*/
isAPIAvailable(method: string): boolean {
return method in chrome.browserOS
}
/**
* Get list of available BrowserOS APIs
*/
getAvailableAPIs(): string[] {
return Object.keys(chrome.browserOS).filter((key) => {
// @ts-expect-error - Dynamic key access for API discovery
return typeof chrome.browserOS[key] === 'function'
})
}
/**
* Get BrowserOS version information
*/
async getVersion(): Promise<string | null> {
try {
logger.debug('[BrowserOSAdapter] Getting BrowserOS version')
return new Promise<string | null>((resolve, reject) => {
// Check if getVersionNumber API is available
if (
'getVersionNumber' in chrome.browserOS &&
typeof chrome.browserOS.getVersionNumber === 'function'
) {
chrome.browserOS.getVersionNumber((version: string) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
logger.debug(`[BrowserOSAdapter] BrowserOS version: ${version}`)
resolve(version)
}
})
} else {
// Fallback - return null if API not available
resolve(null)
}
})
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`[BrowserOSAdapter] Failed to get version: ${errorMessage}`)
// Return null on error
return null
}
}
/**
* Log a metric event with optional properties
*/
async logMetric(
eventName: string,
properties?: Record<string, unknown>,
): Promise<void> {
try {
logger.debug(
`[BrowserOSAdapter] Logging metric: ${eventName} with properties: ${JSON.stringify(properties)}`,
)
return new Promise<void>((resolve, reject) => {
// Check if logMetric API is available
if (
'logMetric' in chrome.browserOS &&
typeof chrome.browserOS.logMetric === 'function'
) {
if (properties) {
chrome.browserOS.logMetric(eventName, properties, () => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
logger.debug(`[BrowserOSAdapter] Metric logged: ${eventName}`)
resolve()
}
})
} else {
chrome.browserOS.logMetric(eventName, () => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
logger.debug(`[BrowserOSAdapter] Metric logged: ${eventName}`)
resolve()
}
})
}
} else {
// If API not available, log a warning but don't fail
logger.warn(
`[BrowserOSAdapter] logMetric API not available, skipping metric: ${eventName}`,
)
resolve()
}
})
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`[BrowserOSAdapter] Failed to log metric: ${errorMessage}`)
return
}
}
/**
* Execute JavaScript code in the specified tab
* @param tabId - The tab ID to execute code in
* @param code - The JavaScript code to execute
* @returns The result of the execution
*/
async executeJavaScript(tabId: number, code: string): Promise<unknown> {
try {
logger.debug(`[BrowserOSAdapter] Executing JavaScript in tab ${tabId}`)
const promise = new Promise<unknown>((resolve, reject) => {
// Check if executeJavaScript API is available
if (
'executeJavaScript' in chrome.browserOS &&
typeof chrome.browserOS.executeJavaScript === 'function'
) {
chrome.browserOS.executeJavaScript(tabId, code, (result: unknown) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
logger.debug(
`[BrowserOSAdapter] JavaScript executed successfully in tab ${tabId}`,
)
resolve(result)
}
})
} else {
reject(new Error('executeJavaScript API not available'))
}
})
return withTimeout(
promise,
CHROME_API_TIMEOUTS.BROWSEROS_HEAVY,
'executeJavaScript',
)
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[BrowserOSAdapter] Failed to execute JavaScript: ${errorMessage}`,
)
throw new Error(`Failed to execute JavaScript: ${errorMessage}`)
}
}
/**
* Click at specific viewport coordinates
* @param tabId - The tab ID to click in
* @param x - X coordinate in viewport pixels
* @param y - Y coordinate in viewport pixels
*/
async clickCoordinates(tabId: number, x: number, y: number): Promise<void> {
try {
logger.debug(
`[BrowserOSAdapter] Clicking at coordinates (${x}, ${y}) in tab ${tabId}`,
)
const promise = new Promise<void>((resolve, reject) => {
// Check if clickCoordinates API is available
if (
'clickCoordinates' in chrome.browserOS &&
typeof chrome.browserOS.clickCoordinates === 'function'
) {
chrome.browserOS.clickCoordinates(tabId, x, y, () => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
logger.debug(
`[BrowserOSAdapter] Successfully clicked at (${x}, ${y}) in tab ${tabId}`,
)
resolve()
}
})
} else {
reject(new Error('clickCoordinates API not available'))
}
})
return withTimeout(
promise,
CHROME_API_TIMEOUTS.BROWSEROS_ACTION,
'clickCoordinates',
)
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[BrowserOSAdapter] Failed to click at coordinates: ${errorMessage}`,
)
throw new Error(
`Failed to click at coordinates (${x}, ${y}): ${errorMessage}`,
)
}
}
/**
* Type text at specific viewport coordinates
* @param tabId - The tab ID to type in
* @param x - X coordinate in viewport pixels
* @param y - Y coordinate in viewport pixels
* @param text - Text to type at the location
*/
async typeAtCoordinates(
tabId: number,
x: number,
y: number,
text: string,
): Promise<void> {
try {
logger.debug(
`[BrowserOSAdapter] Typing at coordinates (${x}, ${y}) in tab ${tabId}`,
)
const promise = new Promise<void>((resolve, reject) => {
// Check if typeAtCoordinates API is available
if (
'typeAtCoordinates' in chrome.browserOS &&
typeof chrome.browserOS.typeAtCoordinates === 'function'
) {
chrome.browserOS.typeAtCoordinates(tabId, x, y, text, () => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
logger.debug(
`[BrowserOSAdapter] Successfully typed "${text}" at (${x}, ${y}) in tab ${tabId}`,
)
resolve()
}
})
} else {
reject(new Error('typeAtCoordinates API not available'))
}
})
return withTimeout(
promise,
CHROME_API_TIMEOUTS.BROWSEROS_ACTION,
'typeAtCoordinates',
)
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[BrowserOSAdapter] Failed to type at coordinates: ${errorMessage}`,
)
throw new Error(
`Failed to type at coordinates (${x}, ${y}): ${errorMessage}`,
)
}
}
/**
* Get a specific preference value
* @param name - The preference name (e.g., "browseros.server.mcp_port")
* @returns Promise resolving to the preference object containing key, type, and value
*/
async getPref(name: string): Promise<PrefObject> {
try {
console.log(`[BrowserOSAdapter] Getting preference: ${name}`)
return new Promise<PrefObject>((resolve, reject) => {
chrome.browserOS.getPref(name, (pref: PrefObject) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
console.log(
`[BrowserOSAdapter] Retrieved preference ${name}: ${JSON.stringify(pref)}`,
)
resolve(pref)
}
})
})
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
console.error(
`[BrowserOSAdapter] Failed to get preference: ${errorMessage}`,
)
throw new Error(`Failed to get preference ${name}: ${errorMessage}`)
}
}
/**
* Set a specific preference value
* @param name - The preference name (e.g., "browseros.server.mcp_enabled")
* @param value - The value to set
* @param pageId - Optional page ID for settings tracking
* @returns Promise resolving to true if successful
*/
async setPref(
name: string,
value: unknown,
pageId?: string,
): Promise<boolean> {
try {
console.log(
`[BrowserOSAdapter] Setting preference ${name} to ${JSON.stringify(value)}`,
)
return new Promise<boolean>((resolve, reject) => {
if (pageId !== undefined) {
chrome.browserOS.setPref(name, value, pageId, (success: boolean) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
console.log(
`[BrowserOSAdapter] Successfully set preference ${name}`,
)
resolve(success)
}
})
} else {
chrome.browserOS.setPref(name, value, (success: boolean) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
console.log(
`[BrowserOSAdapter] Successfully set preference ${name}`,
)
resolve(success)
}
})
}
})
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
console.error(
`[BrowserOSAdapter] Failed to set preference: ${errorMessage}`,
)
throw new Error(`Failed to set preference ${name}: ${errorMessage}`)
}
}
/**
* Get all preferences (filtered to browseros.* prefs)
* @returns Promise resolving to array of preference objects
*/
async getAllPrefs(): Promise<PrefObject[]> {
try {
console.log('[BrowserOSAdapter] Getting all preferences')
return new Promise<PrefObject[]>((resolve, reject) => {
chrome.browserOS.getAllPrefs((prefs: PrefObject[]) => {
if (chrome.runtime.lastError) {
reject(new Error(chrome.runtime.lastError.message))
} else {
console.log(
`[BrowserOSAdapter] Retrieved ${prefs.length} preferences`,
)
resolve(prefs)
}
})
})
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
console.error(
`[BrowserOSAdapter] Failed to get all preferences: ${errorMessage}`,
)
throw new Error(`Failed to get all preferences: ${errorMessage}`)
}
}
}
// Export singleton instance getter for convenience
export const getBrowserOSAdapter = () => BrowserOSAdapter.getInstance()

View File

@@ -0,0 +1,261 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { logger } from '@/utils/logger'
import { CHROME_API_TIMEOUTS, withTimeout } from '@/utils/timeout'
/**
* HistoryAdapter - Wrapper for Chrome history API
*
* Responsibilities:
* - Provide clean Promise-based interface to Chrome history API
* - Handle Chrome API errors
* - Log operations for debugging
*/
export class HistoryAdapter {
/**
* Search browser history
*
* @param query - Search query (matches URL and title)
* @param maxResults - Maximum number of results (default: 100)
* @param startTime - Start time in milliseconds since epoch (optional)
* @param endTime - End time in milliseconds since epoch (optional)
* @returns Array of history items
*/
async searchHistory(
query: string,
maxResults = 100,
startTime?: number,
endTime?: number,
): Promise<chrome.history.HistoryItem[]> {
logger.debug(
`[HistoryAdapter] Searching history: "${query}" (max: ${maxResults})`,
)
try {
const results = await withTimeout(
chrome.history.search({
text: query,
maxResults,
startTime,
endTime,
}),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.history.search',
)
logger.debug(`[HistoryAdapter] Found ${results.length} history items`)
return results
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`[HistoryAdapter] Failed to search history: ${errorMessage}`)
throw new Error(`Failed to search history: ${errorMessage}`)
}
}
/**
* Get recent browser history
*
* @param maxResults - Maximum number of results (default: 20)
* @param hoursBack - How many hours back to search (default: 24)
* @returns Array of recent history items
*/
async getRecentHistory(
maxResults = 20,
hoursBack = 24,
): Promise<chrome.history.HistoryItem[]> {
logger.debug(
`[HistoryAdapter] Getting ${maxResults} recent history items (last ${hoursBack}h)`,
)
try {
const startTime = Date.now() - hoursBack * 60 * 60 * 1000
const results = await withTimeout(
chrome.history.search({
text: '',
maxResults,
startTime,
}),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.history.search',
)
logger.debug(`[HistoryAdapter] Retrieved ${results.length} recent items`)
return results
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[HistoryAdapter] Failed to get recent history: ${errorMessage}`,
)
throw new Error(`Failed to get recent history: ${errorMessage}`)
}
}
/**
* Get visit details for a specific URL
*
* @param url - URL to get visits for
* @returns Array of visit items
*/
async getVisits(url: string): Promise<chrome.history.VisitItem[]> {
logger.debug(`[HistoryAdapter] Getting visits for: ${url}`)
try {
const visits = await withTimeout(
chrome.history.getVisits({ url }),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.history.getVisits',
)
logger.debug(`[HistoryAdapter] Found ${visits.length} visits for ${url}`)
return visits
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`[HistoryAdapter] Failed to get visits: ${errorMessage}`)
throw new Error(`Failed to get visits: ${errorMessage}`)
}
}
/**
* Add a URL to browser history
*
* @param url - URL to add
*/
async addUrl(url: string): Promise<void> {
logger.debug(`[HistoryAdapter] Adding URL to history: ${url}`)
try {
await withTimeout(
chrome.history.addUrl({ url }),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.history.addUrl',
)
logger.debug(`[HistoryAdapter] Added URL: ${url}`)
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`[HistoryAdapter] Failed to add URL: ${errorMessage}`)
throw new Error(`Failed to add URL to history: ${errorMessage}`)
}
}
/**
* Remove a specific URL from history
*
* @param url - URL to remove
*/
async deleteUrl(url: string): Promise<void> {
logger.debug(`[HistoryAdapter] Removing URL from history: ${url}`)
try {
await withTimeout(
chrome.history.deleteUrl({ url }),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.history.deleteUrl',
)
logger.debug(`[HistoryAdapter] Removed URL: ${url}`)
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`[HistoryAdapter] Failed to delete URL: ${errorMessage}`)
throw new Error(`Failed to delete URL from history: ${errorMessage}`)
}
}
/**
* Delete history within a time range
*
* @param startTime - Start time in milliseconds since epoch
* @param endTime - End time in milliseconds since epoch
*/
async deleteRange(startTime: number, endTime: number): Promise<void> {
logger.debug(
`[HistoryAdapter] Deleting history range: ${new Date(startTime).toISOString()} to ${new Date(endTime).toISOString()}`,
)
try {
await withTimeout(
chrome.history.deleteRange({ startTime, endTime }),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.history.deleteRange',
)
logger.debug('[HistoryAdapter] Deleted history range')
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[HistoryAdapter] Failed to delete history range: ${errorMessage}`,
)
throw new Error(`Failed to delete history range: ${errorMessage}`)
}
}
/**
* Delete all browser history
*
* WARNING: This deletes ALL history permanently!
*/
async deleteAll(): Promise<void> {
logger.warn('[HistoryAdapter] Deleting ALL browser history')
try {
await withTimeout(
chrome.history.deleteAll(),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.history.deleteAll',
)
logger.warn('[HistoryAdapter] Deleted all history')
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[HistoryAdapter] Failed to delete all history: ${errorMessage}`,
)
throw new Error(`Failed to delete all history: ${errorMessage}`)
}
}
/**
* Get most visited URLs
*
* @param maxResults - Maximum number of results (default: 10)
* @returns Array of most visited history items
*/
async getMostVisited(maxResults = 10): Promise<chrome.history.HistoryItem[]> {
logger.debug(`[HistoryAdapter] Getting ${maxResults} most visited URLs`)
try {
// Get all recent history
const allHistory = await withTimeout(
chrome.history.search({
text: '',
maxResults: 1000, // Get a large sample
startTime: 0,
}),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.history.search',
)
// Sort by visit count
const sorted = allHistory
.filter((item) => item.visitCount && item.visitCount > 1)
.sort((a, b) => (b.visitCount || 0) - (a.visitCount || 0))
.slice(0, maxResults)
logger.debug(`[HistoryAdapter] Found ${sorted.length} most visited URLs`)
return sorted
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[HistoryAdapter] Failed to get most visited: ${errorMessage}`,
)
throw new Error(`Failed to get most visited URLs: ${errorMessage}`)
}
}
}

View File

@@ -0,0 +1,488 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { logger } from '@/utils/logger'
import { CHROME_API_TIMEOUTS, withTimeout } from '@/utils/timeout'
/**
* TabAdapter - Wrapper for Chrome tabs API
*
* Responsibilities:
* - Provide clean Promise-based interface to Chrome tabs API
* - Handle Chrome API errors
* - Log operations for debugging
*
* Chrome tabs API is already Promise-based in Manifest V3,
* so we add error handling and logging.
*/
export class TabAdapter {
/**
* Get the currently active tab
*
* @param windowId - Optional window ID. If provided, gets active tab in that window. Otherwise uses current window.
* @returns Active tab in specified or current window
* @throws Error if no active tab found
*/
async getActiveTab(windowId?: number): Promise<chrome.tabs.Tab> {
logger.debug(
`[TabAdapter] Getting active tab${windowId !== undefined ? ` in window ${windowId}` : ''}`,
)
try {
const query: chrome.tabs.QueryInfo = { active: true }
if (windowId !== undefined) {
query.windowId = windowId
} else {
query.currentWindow = true
}
const tabs = await withTimeout(
chrome.tabs.query(query),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.tabs.query',
)
if (tabs.length === 0) {
throw new Error('No active tab found')
}
logger.debug(
`[TabAdapter] Found active tab: ${tabs[0].id} (${tabs[0].url})`,
)
return tabs[0]
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`[TabAdapter] Failed to get active tab: ${errorMessage}`)
throw new Error(`Failed to get active tab: ${errorMessage}`)
}
}
/**
* Get a specific tab by ID
*
* @param tabId - Tab ID to retrieve
* @returns Tab object
* @throws Error if tab not found
*/
async getTab(tabId: number): Promise<chrome.tabs.Tab> {
logger.debug(`[TabAdapter] Getting tab ${tabId}`)
try {
const tab = await withTimeout(
chrome.tabs.get(tabId),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.tabs.get',
)
logger.debug(`[TabAdapter] Found tab: ${tab.id} (${tab.url})`)
return tab
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`[TabAdapter] Failed to get tab ${tabId}: ${errorMessage}`)
throw new Error(`Tab not found (id: ${tabId})`)
}
}
/**
* Get all tabs across all windows
*
* @returns Array of all tabs
*/
async getAllTabs(): Promise<chrome.tabs.Tab[]> {
logger.debug('[TabAdapter] Getting all tabs')
try {
const tabs = await withTimeout(
chrome.tabs.query({}),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.tabs.query',
)
logger.debug(`[TabAdapter] Found ${tabs.length} tabs`)
return tabs
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`[TabAdapter] Failed to get all tabs: ${errorMessage}`)
throw new Error(`Failed to get tabs: ${errorMessage}`)
}
}
/**
* Query tabs with specific criteria
*
* @param query - Chrome tabs query object
* @returns Array of matching tabs
*/
async queryTabs(query: chrome.tabs.QueryInfo): Promise<chrome.tabs.Tab[]> {
logger.debug(`[TabAdapter] Querying tabs: ${JSON.stringify(query)}`)
try {
const tabs = await withTimeout(
chrome.tabs.query(query),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.tabs.query',
)
logger.debug(`[TabAdapter] Query found ${tabs.length} tabs`)
return tabs
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`[TabAdapter] Failed to query tabs: ${errorMessage}`)
throw new Error(`Failed to query tabs: ${errorMessage}`)
}
}
/**
* Get tabs in specific window
*
* @param windowId - Window ID
* @returns Array of tabs in window
*/
async getTabsInWindow(windowId: number): Promise<chrome.tabs.Tab[]> {
logger.debug(`[TabAdapter] Getting tabs in window ${windowId}`)
try {
const tabs = await withTimeout(
chrome.tabs.query({ windowId }),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.tabs.query',
)
logger.debug(
`[TabAdapter] Found ${tabs.length} tabs in window ${windowId}`,
)
return tabs
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[TabAdapter] Failed to get tabs in window ${windowId}: ${errorMessage}`,
)
throw new Error(`Failed to get tabs in window: ${errorMessage}`)
}
}
/**
* Get current window's tabs
*
* @param windowId - Optional window ID. If provided, gets tabs in that window. Otherwise uses current window.
* @returns Array of tabs in specified or current window
*/
async getCurrentWindowTabs(windowId?: number): Promise<chrome.tabs.Tab[]> {
logger.debug(
`[TabAdapter] Getting tabs in ${windowId !== undefined ? `window ${windowId}` : 'current window'}`,
)
try {
const query: chrome.tabs.QueryInfo = {}
if (windowId !== undefined) {
query.windowId = windowId
} else {
query.currentWindow = true
}
const tabs = await withTimeout(
chrome.tabs.query(query),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.tabs.query',
)
logger.debug(`[TabAdapter] Found ${tabs.length} tabs`)
return tabs
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[TabAdapter] Failed to get current window tabs: ${errorMessage}`,
)
throw new Error(`Failed to get current window tabs: ${errorMessage}`)
}
}
/**
* Open a new tab with optional URL
*
* @param url - URL to open (optional, defaults to new tab page)
* @param active - Whether to make the new tab active (default: true)
* @param windowId - Optional window ID to open tab in. If not provided, opens in current window.
* @returns Newly created tab
*/
async openTab(
url?: string,
active = true,
windowId?: number,
): Promise<chrome.tabs.Tab> {
const targetUrl = url || 'chrome://newtab/'
logger.debug(
`[TabAdapter] Opening new tab: ${targetUrl} (active: ${active}${windowId !== undefined ? `, window: ${windowId}` : ''})`,
)
try {
const createProps: chrome.tabs.CreateProperties = {
url: targetUrl,
active,
}
if (windowId !== undefined) {
createProps.windowId = windowId
}
const tab = await withTimeout(
chrome.tabs.create(createProps),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.tabs.create',
)
if (!tab.id) {
throw new Error('Created tab has no ID')
}
logger.debug(`[TabAdapter] Created tab ${tab.id}: ${targetUrl}`)
return tab
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`[TabAdapter] Failed to open tab: ${errorMessage}`)
throw new Error(`Failed to open tab: ${errorMessage}`)
}
}
/**
* Close a specific tab by ID
*
* @param tabId - Tab ID to close
*/
async closeTab(tabId: number): Promise<void> {
logger.debug(`[TabAdapter] Closing tab ${tabId}`)
try {
// Get tab info before closing for logging
const tab = await withTimeout(
chrome.tabs.get(tabId),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.tabs.get',
)
const title = tab.title || 'Untitled'
await withTimeout(
chrome.tabs.remove(tabId),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.tabs.remove',
)
logger.debug(`[TabAdapter] Closed tab ${tabId}: ${title}`)
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`[TabAdapter] Failed to close tab ${tabId}: ${errorMessage}`)
throw new Error(`Failed to close tab ${tabId}: ${errorMessage}`)
}
}
/**
* Switch to (activate) a specific tab by ID
*
* @param tabId - Tab ID to switch to
* @returns Updated tab object
*/
async switchTab(tabId: number): Promise<chrome.tabs.Tab> {
logger.debug(`[TabAdapter] Switching to tab ${tabId}`)
try {
// Update tab to be active
const tab = await withTimeout(
chrome.tabs.update(tabId, { active: true }),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.tabs.update',
)
if (!tab) {
throw new Error('Failed to update tab')
}
logger.debug(
`[TabAdapter] Switched to tab ${tabId}: ${tab.title || 'Untitled'}`,
)
return tab
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[TabAdapter] Failed to switch to tab ${tabId}: ${errorMessage}`,
)
throw new Error(`Failed to switch to tab ${tabId}: ${errorMessage}`)
}
}
/**
* Navigate a tab to a specific URL
*
* @param tabId - Tab ID to navigate
* @param url - URL to navigate to
* @returns Updated tab object
*/
async navigateTab(tabId: number, url: string): Promise<chrome.tabs.Tab> {
logger.debug(`[TabAdapter] Navigating tab ${tabId} to ${url}`)
try {
const tab = await withTimeout(
chrome.tabs.update(tabId, { url }),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.tabs.update',
)
if (!tab) {
throw new Error('Failed to update tab')
}
logger.debug(`[TabAdapter] Tab ${tabId} navigating to ${url}`)
return tab
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(
`[TabAdapter] Failed to navigate tab ${tabId}: ${errorMessage}`,
)
throw new Error(
`Failed to navigate tab ${tabId} to ${url}: ${errorMessage}`,
)
}
}
/**
* Group tabs together
*
* @param tabIds - Array of tab IDs to group
* @param groupId - Optional existing group ID to add tabs to
* @param windowId - Optional window ID to create the group in (prevents tabs moving to wrong window)
* @returns Group ID of the created or updated group
*/
async groupTabs(
tabIds: number[],
groupId?: number,
windowId?: number,
): Promise<number> {
if (tabIds.length === 0) {
throw new Error('At least one tab ID is required')
}
logger.debug(
`Grouping tabs ${tabIds.join(', ')}${groupId ? ` into group ${groupId}` : ''}${windowId ? ` in window ${windowId}` : ''}`,
)
try {
// Chrome API expects [number, ...number[]] tuple type
const tabIdsTuple = tabIds as [number, ...number[]]
const options: chrome.tabs.GroupOptions = { tabIds: tabIdsTuple }
if (groupId !== undefined) {
options.groupId = groupId
}
// Specify windowId to prevent Chrome from moving tabs to the focused window
if (windowId !== undefined && groupId === undefined) {
options.createProperties = { windowId }
}
const resultGroupId = await withTimeout(
chrome.tabs.group(options),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.tabs.group',
)
logger.debug(`Grouped tabs into group ${resultGroupId}`)
return resultGroupId
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`Failed to group tabs: ${errorMessage}`)
throw new Error(`Failed to group tabs: ${errorMessage}`)
}
}
/**
* Ungroup tabs (remove them from their groups)
*
* @param tabIds - Array of tab IDs to ungroup
*/
async ungroupTabs(tabIds: number[]): Promise<void> {
if (tabIds.length === 0) {
throw new Error('At least one tab ID is required')
}
logger.debug(`Ungrouping tabs ${tabIds.join(', ')}`)
try {
// Chrome API expects [number, ...number[]] tuple type or single number
const tabIdsTuple = tabIds as [number, ...number[]]
await withTimeout(
chrome.tabs.ungroup(tabIdsTuple),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.tabs.ungroup',
)
logger.debug(`Ungrouped ${tabIds.length} tabs`)
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`Failed to ungroup tabs: ${errorMessage}`)
throw new Error(`Failed to ungroup tabs: ${errorMessage}`)
}
}
/**
* Get all tab groups in a window
*
* @param windowId - Optional window ID. If not provided, gets groups from all windows.
* @returns Array of tab groups
*/
async getTabGroups(windowId?: number): Promise<chrome.tabGroups.TabGroup[]> {
logger.debug(
`Getting tab groups${windowId !== undefined ? ` in window ${windowId}` : ''}`,
)
try {
const query: chrome.tabGroups.QueryInfo = {}
if (windowId !== undefined) {
query.windowId = windowId
}
const groups = await withTimeout(
chrome.tabGroups.query(query),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.tabGroups.query',
)
logger.debug(`Found ${groups.length} tab groups`)
return groups
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`Failed to get tab groups: ${errorMessage}`)
throw new Error(`Failed to get tab groups: ${errorMessage}`)
}
}
/**
* Update a tab group's properties
*
* @param groupId - Group ID to update
* @param properties - Properties to update (title, color, collapsed)
* @returns Updated tab group
*/
async updateTabGroup(
groupId: number,
properties: chrome.tabGroups.UpdateProperties,
): Promise<chrome.tabGroups.TabGroup> {
logger.debug(`Updating tab group ${groupId}: ${JSON.stringify(properties)}`)
try {
const group = await withTimeout(
chrome.tabGroups.update(groupId, properties),
CHROME_API_TIMEOUTS.CHROME_API,
'chrome.tabGroups.update',
)
if (!group) {
throw new Error(`Tab group ${groupId} not found`)
}
logger.debug(
`Updated tab group ${groupId}: title="${group.title}", color="${group.color}"`,
)
return group
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`Failed to update tab group ${groupId}: ${errorMessage}`)
throw new Error(`Failed to update tab group ${groupId}: ${errorMessage}`)
}
}
}

View File

@@ -0,0 +1,390 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { ActionRegistry } from '@/actions/ActionRegistry'
import { CreateBookmarkAction } from '@/actions/bookmark/CreateBookmarkAction'
import { CreateBookmarkFolderAction } from '@/actions/bookmark/CreateBookmarkFolderAction'
import { GetBookmarkChildrenAction } from '@/actions/bookmark/GetBookmarkChildrenAction'
import { GetBookmarksAction } from '@/actions/bookmark/GetBookmarksAction'
import { MoveBookmarkAction } from '@/actions/bookmark/MoveBookmarkAction'
import { RemoveBookmarkAction } from '@/actions/bookmark/RemoveBookmarkAction'
import { RemoveBookmarkTreeAction } from '@/actions/bookmark/RemoveBookmarkTreeAction'
import { UpdateBookmarkAction } from '@/actions/bookmark/UpdateBookmarkAction'
import { CaptureScreenshotAction } from '@/actions/browser/CaptureScreenshotAction'
import { CaptureScreenshotPointerAction } from '@/actions/browser/CaptureScreenshotPointerAction'
import { ClearAction } from '@/actions/browser/ClearAction'
import { ClickAction } from '@/actions/browser/ClickAction'
import { ClickCoordinatesAction } from '@/actions/browser/ClickCoordinatesAction'
import { CloseWindowAction } from '@/actions/browser/CloseWindowAction'
import { CreateWindowAction } from '@/actions/browser/CreateWindowAction'
import { ExecuteJavaScriptAction } from '@/actions/browser/ExecuteJavaScriptAction'
import { GetAccessibilityTreeAction } from '@/actions/browser/GetAccessibilityTreeAction'
import { GetInteractiveSnapshotAction } from '@/actions/browser/GetInteractiveSnapshotAction'
import { GetPageLoadStatusAction } from '@/actions/browser/GetPageLoadStatusAction'
import { GetSnapshotAction } from '@/actions/browser/GetSnapshotAction'
import { InputTextAction } from '@/actions/browser/InputTextAction'
import { ScrollDownAction } from '@/actions/browser/ScrollDownAction'
import { ScrollToNodeAction } from '@/actions/browser/ScrollToNodeAction'
import { ScrollUpAction } from '@/actions/browser/ScrollUpAction'
import { SendKeysAction } from '@/actions/browser/SendKeysAction'
import { TypeAtCoordinatesAction } from '@/actions/browser/TypeAtCoordinatesAction'
import { CheckBrowserOSAction } from '@/actions/diagnostics/CheckBrowserOSAction'
import { GetRecentHistoryAction } from '@/actions/history/GetRecentHistoryAction'
import { SearchHistoryAction } from '@/actions/history/SearchHistoryAction'
import { CloseTabAction } from '@/actions/tab/CloseTabAction'
import { GetActiveTabAction } from '@/actions/tab/GetActiveTabAction'
import { GetTabsAction } from '@/actions/tab/GetTabsAction'
import { GroupTabsAction } from '@/actions/tab/GroupTabsAction'
import { ListTabGroupsAction } from '@/actions/tab/ListTabGroupsAction'
import { NavigateAction } from '@/actions/tab/NavigateAction'
import { OpenTabAction } from '@/actions/tab/OpenTabAction'
import { SwitchTabAction } from '@/actions/tab/SwitchTabAction'
import { UngroupTabsAction } from '@/actions/tab/UngroupTabsAction'
import { UpdateTabGroupAction } from '@/actions/tab/UpdateTabGroupAction'
import { CONCURRENCY_CONFIG } from '@/config/constants'
import type { ProtocolRequest, ProtocolResponse } from '@/protocol/types'
import { ConnectionStatus } from '@/protocol/types'
import { ConcurrencyLimiter } from '@/utils/ConcurrencyLimiter'
import { logger } from '@/utils/logger'
import { RequestTracker } from '@/utils/RequestTracker'
import { RequestValidator } from '@/utils/RequestValidator'
import { ResponseQueue } from '@/utils/ResponseQueue'
import type { PortProvider } from '@/websocket/WebSocketClient'
import { WebSocketClient } from '@/websocket/WebSocketClient'
/**
* BrowserOS Controller
*
* Main controller class that orchestrates all components.
* Message flow: WebSocket → Validator → Tracker → Limiter → Action → Response/Queue → WebSocket
*/
export class BrowserOSController {
private wsClient: WebSocketClient
private requestTracker: RequestTracker
private concurrencyLimiter: ConcurrencyLimiter
private requestValidator: RequestValidator
private responseQueue: ResponseQueue
private actionRegistry: ActionRegistry
constructor(getPort: PortProvider) {
logger.info('Initializing BrowserOS Controller...')
this.requestTracker = new RequestTracker()
this.concurrencyLimiter = new ConcurrencyLimiter(
CONCURRENCY_CONFIG.maxConcurrent,
CONCURRENCY_CONFIG.maxQueueSize,
)
this.requestValidator = new RequestValidator()
this.responseQueue = new ResponseQueue()
this.wsClient = new WebSocketClient(getPort)
this.actionRegistry = new ActionRegistry()
this.registerActions()
this.setupWebSocketHandlers()
}
async start(): Promise<void> {
logger.info('Starting BrowserOS Controller...')
await this.wsClient.connect()
// Report owned windows after connection is established
await this.reportOwnedWindows()
}
private async reportOwnedWindows(): Promise<void> {
try {
const windows = await chrome.windows.getAll()
const windowIds = windows
.map((w) => w.id)
.filter((id): id is number => id !== undefined)
if (windowIds.length > 0) {
this.wsClient.send({ type: 'register_windows', windowIds })
logger.info('Reported owned windows to server', {
windowCount: windowIds.length,
windowIds,
})
}
} catch (error) {
logger.warn('Failed to report owned windows', {
error: error instanceof Error ? error.message : String(error),
})
}
}
notifyWindowCreated(windowId: number): void {
try {
this.wsClient.send({ type: 'window_created', windowId })
logger.info('Sent window_created event', { windowId })
} catch (error) {
logger.warn('Failed to send window_created event', {
windowId,
error: error instanceof Error ? error.message : String(error),
})
}
}
notifyWindowRemoved(windowId: number): void {
try {
this.wsClient.send({ type: 'window_removed', windowId })
logger.debug('Sent window_removed event', { windowId })
} catch (error) {
logger.warn('Failed to send window_removed event', {
windowId,
error: error instanceof Error ? error.message : String(error),
})
}
}
stop(): void {
logger.info('Stopping BrowserOS Controller...')
this.wsClient.disconnect()
this.requestTracker.destroy()
this.requestValidator.destroy()
this.responseQueue.clear()
}
logStats(): void {
const stats = this.getStats()
logger.info('=== Controller Stats ===')
logger.info(`Connection: ${stats.connection}`)
logger.info(`Requests: ${JSON.stringify(stats.requests)}`)
logger.info(`Concurrency: ${JSON.stringify(stats.concurrency)}`)
logger.info(`Validator: ${JSON.stringify(stats.validator)}`)
logger.info(`Response Queue: ${stats.responseQueue.size} queued`)
}
getStats() {
return {
connection: this.wsClient.getStatus(),
requests: this.requestTracker.getStats(),
concurrency: this.concurrencyLimiter.getStats(),
validator: this.requestValidator.getStats(),
responseQueue: {
size: this.responseQueue.size(),
},
}
}
isConnected(): boolean {
return this.wsClient.isConnected()
}
notifyWindowFocused(windowId?: number): void {
try {
this.wsClient.send({ type: 'focused', windowId })
logger.debug('Sent focused event', { windowId })
} catch (error) {
logger.warn('Failed to send focused event', {
windowId,
error: error instanceof Error ? error.message : String(error),
})
}
}
private registerActions(): void {
logger.info('Registering actions...')
this.actionRegistry.register('checkBrowserOS', new CheckBrowserOSAction())
this.actionRegistry.register('getActiveTab', new GetActiveTabAction())
this.actionRegistry.register('getTabs', new GetTabsAction())
this.actionRegistry.register('openTab', new OpenTabAction())
this.actionRegistry.register('closeTab', new CloseTabAction())
this.actionRegistry.register('switchTab', new SwitchTabAction())
this.actionRegistry.register('navigate', new NavigateAction())
this.actionRegistry.register('listTabGroups', new ListTabGroupsAction())
this.actionRegistry.register('groupTabs', new GroupTabsAction())
this.actionRegistry.register('updateTabGroup', new UpdateTabGroupAction())
this.actionRegistry.register('ungroupTabs', new UngroupTabsAction())
this.actionRegistry.register('createWindow', new CreateWindowAction())
this.actionRegistry.register('closeWindow', new CloseWindowAction())
this.actionRegistry.register('getBookmarks', new GetBookmarksAction())
this.actionRegistry.register('createBookmark', new CreateBookmarkAction())
this.actionRegistry.register('removeBookmark', new RemoveBookmarkAction())
this.actionRegistry.register('updateBookmark', new UpdateBookmarkAction())
this.actionRegistry.register(
'createBookmarkFolder',
new CreateBookmarkFolderAction(),
)
this.actionRegistry.register(
'getBookmarkChildren',
new GetBookmarkChildrenAction(),
)
this.actionRegistry.register('moveBookmark', new MoveBookmarkAction())
this.actionRegistry.register(
'removeBookmarkTree',
new RemoveBookmarkTreeAction(),
)
this.actionRegistry.register('searchHistory', new SearchHistoryAction())
this.actionRegistry.register(
'getRecentHistory',
new GetRecentHistoryAction(),
)
this.actionRegistry.register(
'getInteractiveSnapshot',
new GetInteractiveSnapshotAction(),
)
this.actionRegistry.register('click', new ClickAction())
this.actionRegistry.register('inputText', new InputTextAction())
this.actionRegistry.register('clear', new ClearAction())
this.actionRegistry.register('scrollToNode', new ScrollToNodeAction())
this.actionRegistry.register(
'captureScreenshot',
new CaptureScreenshotAction(),
)
this.actionRegistry.register(
'captureScreenshotPointer',
new CaptureScreenshotPointerAction(),
)
this.actionRegistry.register('scrollDown', new ScrollDownAction())
this.actionRegistry.register('scrollUp', new ScrollUpAction())
this.actionRegistry.register(
'executeJavaScript',
new ExecuteJavaScriptAction(),
)
this.actionRegistry.register('sendKeys', new SendKeysAction())
this.actionRegistry.register(
'getPageLoadStatus',
new GetPageLoadStatusAction(),
)
this.actionRegistry.register('getSnapshot', new GetSnapshotAction())
this.actionRegistry.register(
'getAccessibilityTree',
new GetAccessibilityTreeAction(),
)
this.actionRegistry.register(
'clickCoordinates',
new ClickCoordinatesAction(),
)
this.actionRegistry.register(
'typeAtCoordinates',
new TypeAtCoordinatesAction(),
)
const actions = this.actionRegistry.getAvailableActions()
logger.info(`Registered ${actions.length} action(s): ${actions.join(', ')}`)
}
private setupWebSocketHandlers(): void {
this.wsClient.onMessage((message: ProtocolResponse) => {
this.handleIncomingMessage(message)
})
this.wsClient.onStatusChange((status: ConnectionStatus) => {
this.handleStatusChange(status)
})
}
private handleIncomingMessage(message: ProtocolResponse): void {
const rawMessage = message as ProtocolResponse & Partial<ProtocolRequest>
if (rawMessage.action) {
this.processRequest(rawMessage).catch((error) => {
logger.error(
`Unhandled error processing request ${rawMessage.id}: ${error}`,
)
})
} else if (rawMessage.ok !== undefined) {
logger.info(
`Received server message: ${rawMessage.id} - ${rawMessage.ok ? 'success' : 'error'}`,
)
if (rawMessage.data) {
logger.debug(`Server data: ${JSON.stringify(rawMessage.data)}`)
}
} else {
logger.warn(
`Received unknown message format: ${JSON.stringify(rawMessage)}`,
)
}
}
private async processRequest(request: unknown): Promise<void> {
let validatedRequest: ProtocolRequest
let requestId: string | undefined
try {
validatedRequest = this.requestValidator.validate(request)
requestId = validatedRequest.id
this.requestTracker.start(validatedRequest.id, validatedRequest.action)
await this.concurrencyLimiter.execute(async () => {
this.requestTracker.markExecuting(validatedRequest.id)
await this.executeAction(validatedRequest)
})
this.requestTracker.complete(validatedRequest.id)
this.requestValidator.markComplete(validatedRequest.id)
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)
logger.error(`Request processing failed: ${errorMessage}`)
if (requestId) {
this.requestTracker.complete(requestId, errorMessage)
this.requestValidator.markComplete(requestId)
this.sendResponse({
id: requestId,
ok: false,
error: errorMessage,
})
}
}
}
private async executeAction(request: ProtocolRequest): Promise<void> {
logger.info(`Executing action: ${request.action} [${request.id}]`)
const actionResponse = await this.actionRegistry.dispatch(
request.action,
request.payload,
)
this.sendResponse({
id: request.id,
ok: actionResponse.ok,
data: actionResponse.data,
error: actionResponse.error,
})
const status = actionResponse.ok ? 'succeeded' : 'failed'
logger.info(`Action ${status}: ${request.action} [${request.id}]`)
}
private sendResponse(response: ProtocolResponse): void {
try {
if (this.wsClient.isConnected()) {
this.wsClient.send(response)
} else {
logger.warn(`Not connected. Queueing response: ${response.id}`)
this.responseQueue.enqueue(response)
}
} catch (error) {
logger.error(`Failed to send response ${response.id}: ${error}`)
this.responseQueue.enqueue(response)
}
}
private handleStatusChange(status: ConnectionStatus): void {
logger.info(`Connection status changed: ${status}`)
if (status === ConnectionStatus.CONNECTED) {
if (!this.responseQueue.isEmpty()) {
logger.info(`Flushing ${this.responseQueue.size()} queued responses...`)
this.responseQueue.flush((response) => {
this.wsClient.send(response)
})
}
}
}
}

View File

@@ -0,0 +1,213 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { getWebSocketPort } from '@/utils/ConfigHelper'
import { startKeepAlive, stopKeepAlive } from '@/utils/KeepAlive'
import { logger } from '@/utils/logger'
import { BrowserOSController } from './BrowserOSController'
const STATS_LOG_INTERVAL_MS = 30000
interface ControllerState {
controller: BrowserOSController | null
initPromise: Promise<BrowserOSController> | null
statsTimer: ReturnType<typeof setInterval> | null
}
type BrowserOSGlobals = typeof globalThis & {
__browserosControllerState?: ControllerState
__browserosController?: BrowserOSController | null
}
const globals = globalThis as BrowserOSGlobals
const controllerState: ControllerState =
globals.__browserosControllerState ??
(() => {
const state: ControllerState = {
controller: globals.__browserosController ?? null,
initPromise: null,
statsTimer: null,
}
globals.__browserosControllerState = state
return state
})()
function setDebugController(controller: BrowserOSController | null): void {
globals.__browserosController = controller
}
function startStatsTimer(): void {
if (controllerState.statsTimer) {
return
}
controllerState.statsTimer = setInterval(() => {
controllerState.controller?.logStats()
}, STATS_LOG_INTERVAL_MS)
}
function stopStatsTimer(): void {
if (!controllerState.statsTimer) {
return
}
clearInterval(controllerState.statsTimer)
controllerState.statsTimer = null
}
async function getOrCreateController(): Promise<BrowserOSController> {
if (controllerState.controller) {
return controllerState.controller
}
if (!controllerState.initPromise) {
controllerState.initPromise = (async () => {
try {
await startKeepAlive()
const controller = new BrowserOSController(getWebSocketPort)
await controller.start()
controllerState.controller = controller
setDebugController(controller)
startStatsTimer()
return controller
} catch (error) {
controllerState.controller = null
setDebugController(null)
stopStatsTimer()
try {
await stopKeepAlive()
} catch {
// ignore
}
throw error
} finally {
controllerState.initPromise = null
}
})()
}
const initPromise = controllerState.initPromise
if (!initPromise) {
throw new Error('Controller init promise missing')
}
return initPromise
}
async function shutdownController(reason: string): Promise<void> {
logger.info('Controller shutdown requested', { reason })
if (controllerState.initPromise) {
try {
await controllerState.initPromise
} catch {
// ignore start errors during shutdown
}
}
const controller = controllerState.controller
if (!controller) {
try {
await stopKeepAlive()
} catch {
// ignore
}
stopStatsTimer()
setDebugController(null)
return
}
controller.stop()
controllerState.controller = null
setDebugController(null)
stopStatsTimer()
try {
await stopKeepAlive()
} catch {
// ignore
}
}
function ensureControllerRunning(trigger: string): void {
getOrCreateController().catch((error) => {
const message =
error instanceof Error ? error.message : JSON.stringify(error)
logger.error('Controller failed to start', { trigger, error: message })
})
}
logger.info('Extension loaded')
chrome.runtime.onInstalled.addListener(() => {
logger.info('Extension installed')
})
chrome.runtime.onStartup.addListener(() => {
logger.info('Browser startup event')
ensureControllerRunning('runtime.onStartup')
})
// Immediately attempt to start the controller when the service worker initializes
ensureControllerRunning('service-worker-init')
chrome.windows.onFocusChanged.addListener((windowId) => {
if (windowId === chrome.windows.WINDOW_ID_NONE) {
return
}
notifyWindowFocused(windowId).catch((error) => {
const message =
error instanceof Error ? error.message : JSON.stringify(error)
logger.warn('Failed to notify focus change', { windowId, error: message })
})
})
chrome.windows.onCreated.addListener((window) => {
logger.info('Window created event received', { windowId: window.id })
if (window.id === undefined) {
return
}
notifyWindowCreated(window.id).catch((error) => {
const message =
error instanceof Error ? error.message : JSON.stringify(error)
logger.warn('Failed to notify window created', {
windowId: window.id,
error: message,
})
})
})
chrome.windows.onRemoved.addListener((windowId) => {
notifyWindowRemoved(windowId).catch((error) => {
const message =
error instanceof Error ? error.message : JSON.stringify(error)
logger.warn('Failed to notify window removed', { windowId, error: message })
})
})
chrome.runtime.onSuspend?.addListener(() => {
logger.info('Extension suspending')
void shutdownController('runtime.onSuspend')
})
async function notifyWindowFocused(windowId: number): Promise<void> {
const controller = await getOrCreateController()
controller.notifyWindowFocused(windowId)
}
async function notifyWindowCreated(windowId: number): Promise<void> {
const controller = await getOrCreateController()
controller.notifyWindowCreated(windowId)
}
async function notifyWindowRemoved(windowId: number): Promise<void> {
const controller = await getOrCreateController()
controller.notifyWindowRemoved(windowId)
}

View File

@@ -0,0 +1,59 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { CONTENT_LIMITS } from '@browseros/shared/constants/limits'
import { DEFAULT_PORTS } from '@browseros/shared/constants/ports'
import { TIMEOUTS } from '@browseros/shared/constants/timeouts'
export type WebSocketProtocol = 'ws' | 'wss'
export interface WebSocketConfig {
readonly protocol: WebSocketProtocol
readonly host: string
readonly path: string
readonly defaultExtensionPort: number
readonly reconnectIntervalMs: number
readonly heartbeatInterval: number
readonly heartbeatTimeout: number
readonly connectionTimeout: number
readonly requestTimeout: number
}
export interface ConcurrencyConfig {
readonly maxConcurrent: number
readonly maxQueueSize: number
}
export interface LoggingConfig {
readonly enabled: boolean
readonly level: 'debug' | 'info' | 'warn' | 'error'
readonly prefix: string
}
export const WEBSOCKET_CONFIG: WebSocketConfig = {
protocol: 'ws',
host: '127.0.0.1',
path: '/controller',
defaultExtensionPort: DEFAULT_PORTS.extension,
reconnectIntervalMs: TIMEOUTS.WS_RECONNECT_INTERVAL,
heartbeatInterval: TIMEOUTS.WS_HEARTBEAT_INTERVAL,
heartbeatTimeout: TIMEOUTS.WS_HEARTBEAT_TIMEOUT,
connectionTimeout: TIMEOUTS.WS_CONNECTION_TIMEOUT,
requestTimeout: TIMEOUTS.WS_REQUEST_TIMEOUT,
}
export const CONCURRENCY_CONFIG: ConcurrencyConfig = {
maxConcurrent: 1,
maxQueueSize: CONTENT_LIMITS.MAX_QUEUE_SIZE,
}
export const LOGGING_CONFIG: LoggingConfig = {
enabled: true,
level: 'info',
prefix: '',
}

View File

@@ -0,0 +1,60 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { z } from 'zod'
// Request schema
export const ProtocolRequestSchema = z.object({
id: z.string().describe('Request UUID'),
action: z.string().min(1).describe('Action name'),
payload: z.any().optional().describe('Action-specific data'),
})
// Response schema
export const ProtocolResponseSchema = z.object({
id: z.string().describe('Request ID (same as request)'),
ok: z.boolean().describe('Success flag'),
data: z.any().optional().describe('Result data'),
error: z.string().optional().describe('Error message'),
})
// Action response schema (used internally by action handlers)
export const ActionResponseSchema = z
.object({
ok: z.boolean().describe('Success flag'),
data: z.any().optional().describe('Result data'),
error: z.string().optional().describe('Error message'),
})
.refine(
(data) => {
// If ok is true, there should be no error
if (data.ok && data.error !== undefined) {
return false
}
// If ok is false, there should be an error
if (!data.ok && !data.error) {
return false
}
return true
},
{
message:
'When ok is true, error must be undefined. When ok is false, error must be provided.',
},
)
// Type exports
export type ProtocolRequest = z.infer<typeof ProtocolRequestSchema>
export type ProtocolResponse = z.infer<typeof ProtocolResponseSchema>
export type ActionResponse = z.infer<typeof ActionResponseSchema>
// Connection status enum
export enum ConnectionStatus {
DISCONNECTED = 'disconnected',
CONNECTING = 'connecting',
CONNECTED = 'connected',
RECONNECTING = 'reconnecting',
ERROR = 'error',
}

View File

@@ -0,0 +1,419 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
// Type definitions for chrome.browserOS API
declare namespace chrome.browserOS {
// Page load status information
interface PageLoadStatus {
isResourcesLoading: boolean
isDOMContentLoaded: boolean
isPageComplete: boolean
}
// Rectangle bounds
interface Rect {
x: number
y: number
width: number
height: number
}
// Alias for backward compatibility
type BoundingRect = Rect
// Interactive element types
type InteractiveNodeType = 'clickable' | 'typeable' | 'selectable' | 'other'
// Supported keyboard keys
type Key =
| 'Enter'
| 'Delete'
| 'Backspace'
| 'Tab'
| 'Escape'
| 'ArrowUp'
| 'ArrowDown'
| 'ArrowLeft'
| 'ArrowRight'
| 'Home'
| 'End'
| 'PageUp'
| 'PageDown'
// Interactive node in the snapshot
interface InteractiveNode {
nodeId: number
type: InteractiveNodeType
name?: string
rect?: Rect
attributes?: {
in_viewport?: string // "true" if visible in viewport, "false" if not visible
[key: string]: string | undefined
}
}
// Snapshot of interactive elements
interface InteractiveSnapshot {
snapshotId: number
timestamp: number
elements: InteractiveNode[]
hierarchicalStructure?: string // Hierarchical text representation with context
processingTimeMs: number // Performance metrics
}
// Options for getInteractiveSnapshot
interface InteractiveSnapshotOptions {
viewportOnly?: boolean
}
// Accessibility node
interface AccessibilityNode {
id: number
role: string
name?: string
value?: string
attributes?: Record<string, unknown>
childIds?: number[]
}
// Accessibility tree
interface AccessibilityTree {
rootId: number
nodes: Record<string, AccessibilityNode>
}
// API functions
function getPageLoadStatus(
tabId: number,
callback: (status: PageLoadStatus) => void,
): void
function getPageLoadStatus(callback: (status: PageLoadStatus) => void): void
function getAccessibilityTree(
tabId: number,
callback: (tree: AccessibilityTree) => void,
): void
function getAccessibilityTree(
callback: (tree: AccessibilityTree) => void,
): void
function getInteractiveSnapshot(
tabId: number,
options: InteractiveSnapshotOptions,
callback: (snapshot: InteractiveSnapshot) => void,
): void
function getInteractiveSnapshot(
tabId: number,
callback: (snapshot: InteractiveSnapshot) => void,
): void
function getInteractiveSnapshot(
options: InteractiveSnapshotOptions,
callback: (snapshot: InteractiveSnapshot) => void,
): void
function getInteractiveSnapshot(
callback: (snapshot: InteractiveSnapshot) => void,
): void
function click(tabId: number, nodeId: number, callback: () => void): void
function click(nodeId: number, callback: () => void): void
function inputText(
tabId: number,
nodeId: number,
text: string,
callback: () => void,
): void
function inputText(nodeId: number, text: string, callback: () => void): void
function clear(tabId: number, nodeId: number, callback: () => void): void
function clear(nodeId: number, callback: () => void): void
function scrollUp(tabId: number, callback: () => void): void
function scrollUp(callback: () => void): void
function scrollDown(tabId: number, callback: () => void): void
function scrollDown(callback: () => void): void
function scrollToNode(
tabId: number,
nodeId: number,
callback: (scrolled: boolean) => void,
): void
function scrollToNode(
nodeId: number,
callback: (scrolled: boolean) => void,
): void
function sendKeys(
tabId: number,
key:
| 'Enter'
| 'Delete'
| 'Backspace'
| 'Tab'
| 'Escape'
| 'ArrowUp'
| 'ArrowDown'
| 'ArrowLeft'
| 'ArrowRight'
| 'Home'
| 'End'
| 'PageUp'
| 'PageDown',
callback: () => void,
): void
function sendKeys(
key:
| 'Enter'
| 'Delete'
| 'Backspace'
| 'Tab'
| 'Escape'
| 'ArrowUp'
| 'ArrowDown'
| 'ArrowLeft'
| 'ArrowRight'
| 'Home'
| 'End'
| 'PageUp'
| 'PageDown',
callback: () => void,
): void
// Capture screenshot with all optional parameters
function captureScreenshot(
tabId: number,
thumbnailSize: number,
showHighlights: boolean,
width: number,
height: number,
callback: (dataUrl: string) => void,
): void
// Capture screenshot with tab ID, thumbnail size, and highlights
function captureScreenshot(
tabId: number,
thumbnailSize: number,
showHighlights: boolean,
callback: (dataUrl: string) => void,
): void
// Capture screenshot with tab ID and thumbnail size
function captureScreenshot(
tabId: number,
thumbnailSize: number,
callback: (dataUrl: string) => void,
): void
// Capture screenshot with tab ID only (backwards compatibility)
function captureScreenshot(
tabId: number,
callback: (dataUrl: string) => void,
): void
// Capture screenshot of active tab with default size
function captureScreenshot(callback: (dataUrl: string) => void): void
// Snapshot extraction types
type SnapshotType = 'text' | 'links'
// Context for snapshot extraction
type SnapshotContext = 'visible' | 'full'
// Section types based on ARIA landmarks
type SectionType =
| 'main'
| 'navigation'
| 'footer'
| 'header'
| 'article'
| 'aside'
| 'complementary'
| 'contentinfo'
| 'form'
| 'search'
| 'region'
| 'other'
// Text snapshot result for a section
interface TextSnapshotResult {
text: string
characterCount: number
}
// Link information
interface LinkInfo {
text: string
url: string
title?: string
attributes?: Record<string, unknown>
isExternal: boolean
}
// Links snapshot result for a section
interface LinksSnapshotResult {
links: LinkInfo[]
}
// Section with all possible snapshot results
interface SnapshotSection {
type: string
textResult?: TextSnapshotResult
linksResult?: LinksSnapshotResult
}
// Main snapshot result
interface Snapshot {
type: SnapshotType
context: SnapshotContext
timestamp: number
sections: SnapshotSection[]
processingTimeMs: number
}
// Options for getSnapshot
interface SnapshotOptions {
context?: SnapshotContext
includeSections?: SectionType[]
}
function getSnapshot(
tabId: number,
type: SnapshotType,
options: SnapshotOptions,
callback: (snapshot: Snapshot) => void,
): void
function getSnapshot(
tabId: number,
type: SnapshotType,
callback: (snapshot: Snapshot) => void,
): void
function getSnapshot(
tabId: number,
callback: (snapshot: Snapshot) => void,
): void
function getSnapshot(
type: SnapshotType,
options: SnapshotOptions,
callback: (snapshot: Snapshot) => void,
): void
function getSnapshot(
type: SnapshotType,
callback: (snapshot: Snapshot) => void,
): void
// Get BrowserOS version number
function getVersionNumber(callback: (version: string) => void): void
// Logs a metric event with optional properties
function logMetric(
eventName: string,
properties: Record<string, unknown>,
callback: () => void,
): void
function logMetric(eventName: string, callback: () => void): void
function logMetric(
eventName: string,
properties?: Record<string, unknown>,
): void
function logMetric(eventName: string): void
// Execute JavaScript in a tab
function executeJavaScript(
tabId: number,
code: string,
callback: (result: unknown) => void,
): void
function executeJavaScript(
code: string,
callback: (result: unknown) => void,
): void
// Click at specific viewport coordinates
function clickCoordinates(
tabId: number,
x: number,
y: number,
callback: () => void,
): void
function clickCoordinates(x: number, y: number, callback: () => void): void
// Type text at specific viewport coordinates
function typeAtCoordinates(
tabId: number,
x: number,
y: number,
text: string,
callback: () => void,
): void
function typeAtCoordinates(
x: number,
y: number,
text: string,
callback: () => void,
): void
// Preference object
interface PrefObject {
key: string
type: string
value: unknown
}
// Get a specific preference value
function getPref(name: string, callback: (pref: PrefObject) => void): void
// Set a specific preference value
function setPref(
name: string,
value: unknown,
pageId: string,
callback: (success: boolean) => void,
): void
function setPref(
name: string,
value: unknown,
callback: (success: boolean) => void,
): void
// Get all preferences (filtered to browseros.* prefs)
function getAllPrefs(callback: (prefs: PrefObject[]) => void): void
}
declare namespace chrome {
namespace BrowserOS {
function getPrefs(
keys: string[],
callback: (prefs: Record<string, unknown>) => void,
): void
function setPrefs(
prefs: Record<string, unknown>,
callback?: (success: boolean) => void,
): void
}
}

View File

@@ -0,0 +1,123 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { logger } from './logger'
interface QueuedTask<T> {
task: () => Promise<T>
resolve: (value: T) => void
reject: (error: Error) => void
}
export interface ConcurrencyStats {
inFlight: number
queued: number
utilization: number
}
export class ConcurrencyLimiter {
private isProcessing = false
private queue: Array<QueuedTask<unknown>> = []
constructor(
maxConcurrent: number,
private maxQueueSize = 1000,
) {
if (maxConcurrent !== 1) {
logger.warn(
`ConcurrencyLimiter: maxConcurrent=${maxConcurrent} but extension is single-threaded. ` +
`Using mutex mode (sequential execution) to prevent race conditions.`,
)
}
logger.info(
`ConcurrencyLimiter initialized: sequential=true, queueSize=${maxQueueSize}`,
)
}
async execute<T>(task: () => Promise<T>): Promise<T> {
// Queue limit check first
if (this.queue.length >= this.maxQueueSize) {
logger.error(
`Queue full (${this.maxQueueSize} requests). Rejecting request.`,
)
throw new Error(
`Controller overloaded. Queue full (${this.maxQueueSize} requests). Server should slow down.`,
)
}
return new Promise<T>((resolve, reject) => {
this.queue.push({
task,
// @ts-expect-error - TS can't infer generic type here
resolve,
reject,
})
const status = this.isProcessing ? 'QUEUED (mutex held)' : 'IMMEDIATE'
logger.info(
`[MUTEX] Task arrival - Status: ${status}, Queue size now: ${this.queue.length}`,
)
if (!this.isProcessing) {
this.processQueue()
}
})
}
private processQueue(): void {
if (this.isProcessing || this.queue.length === 0) {
return
}
// Log BEFORE we remove from queue to show true queue size
const queueSizeBeforeRemoval = this.queue.length
this.isProcessing = true
const item = this.queue.shift()
if (!item) {
this.isProcessing = false
return
}
const { task, resolve, reject } = item
logger.info(
`[MUTEX] Acquired. Started processing (${queueSizeBeforeRemoval} task(s) were queued, ${this.queue.length} still waiting).`,
)
const startTime = Date.now()
task()
.then(resolve)
.catch(reject)
.finally(() => {
const duration = Date.now() - startTime
this.isProcessing = false
logger.info(
`[MUTEX] Released after ${duration}ms. ${this.queue.length} task(s) remaining.`,
)
this.processQueue()
})
}
getStats(): ConcurrencyStats {
return {
inFlight: this.isProcessing ? 1 : 0,
queued: this.queue.length,
utilization: this.isProcessing ? 1.0 : 0.0,
}
}
// For debugging
logStats(): void {
const stats = this.getStats()
logger.info(
`Concurrency: ${stats.inFlight} in-flight (mutex mode), ` +
`${stats.queued} queued, ` +
`${Math.round(stats.utilization * 100)}% utilization`,
)
}
}

View File

@@ -0,0 +1,37 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
/// <reference path="../types/chrome-browser-os.d.ts" />
import { getBrowserOSAdapter } from '@/adapters/BrowserOSAdapter'
import { WEBSOCKET_CONFIG } from '@/config/constants'
import { logger } from '@/utils/logger'
/**
* Get the WebSocket port from BrowserOS preferences
* Returns browseros.server.extension_port preference value
* Falls back to port from constants if preference cannot be retrieved
*/
export async function getWebSocketPort(): Promise<number> {
try {
const adapter = getBrowserOSAdapter()
const pref = await adapter.getPref('browseros.server.extension_port')
if (pref && typeof pref.value === 'number') {
logger.info(`Using port from BrowserOS preferences: ${pref.value}`)
return pref.value
}
logger.warn(
`Port preference not found, using default: ${WEBSOCKET_CONFIG.defaultExtensionPort}`,
)
return WEBSOCKET_CONFIG.defaultExtensionPort
} catch (error) {
logger.error(
`Failed to get port from BrowserOS preferences: ${error}, using default: ${WEBSOCKET_CONFIG.defaultExtensionPort}`,
)
return WEBSOCKET_CONFIG.defaultExtensionPort
}
}

View File

@@ -0,0 +1,39 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { logger } from '@/utils/logger'
const KEEPALIVE_ALARM_NAME = 'browseros-keepalive'
const KEEPALIVE_INTERVAL_MINUTES = 0.33 // ~20 seconds
let isInitialized = false
export async function startKeepAlive(): Promise<void> {
if (isInitialized) {
logger.debug('KeepAlive already started')
return
}
chrome.alarms.onAlarm.addListener((alarm) => {
if (alarm.name === KEEPALIVE_ALARM_NAME) {
logger.debug('KeepAlive: ping (service worker alive)')
}
})
await chrome.alarms.create(KEEPALIVE_ALARM_NAME, {
periodInMinutes: KEEPALIVE_INTERVAL_MINUTES,
})
isInitialized = true
logger.info(
`KeepAlive started: alarm every ${KEEPALIVE_INTERVAL_MINUTES * 60}s`,
)
}
export async function stopKeepAlive(): Promise<void> {
await chrome.alarms.clear(KEEPALIVE_ALARM_NAME)
isInitialized = false
logger.info('KeepAlive stopped')
}

View File

@@ -0,0 +1,142 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { getBrowserOSAdapter } from '@/adapters/BrowserOSAdapter'
import { logger } from '@/utils/logger'
const POINTER_DISPLAY_DURATION_MS = 3000
const POINTER_DELAY_BEFORE_ACTION_MS = 500
/**
* PointerOverlay - Shows a visual mouse pointer overlay before actions
*
* Injects JavaScript to display a pointer arrow at the specified coordinates.
* The pointer auto-removes after POINTER_DISPLAY_DURATION_MS.
*
* biome-ignore lint/complexity/noStaticOnlyClass: class created before biome was setup
*/
export class PointerOverlay {
private static browserOS = getBrowserOSAdapter()
/**
* Show a pointer at the specified coordinates
* @param tabId - Tab to show pointer in
* @param x - X coordinate in viewport pixels
* @param y - Y coordinate in viewport pixels
* @param text - Optional label text (e.g., "Click", "Type: hello...")
*/
static async showPointer(
tabId: number,
x: number,
y: number,
text?: string,
): Promise<void> {
const pointerId = `browseros-pointer-${Date.now()}`
const textLabel = text
? `
var label = document.createElement('div');
label.style.cssText = 'position: absolute; top: 20px; left: 12px; background: rgba(0,0,0,0.9); color: white; padding: 4px 8px; border-radius: 4px; font-size: 12px; font-family: monospace; white-space: nowrap; box-shadow: 0 2px 4px rgba(0,0,0,0.5);';
label.textContent = '${text.replace(/[`$\\]/g, '\\$&').replace(/'/g, "\\'")}';
shadow.appendChild(label);
`
: ''
const script = `
(function() {
var existing = document.querySelector('browseros-pointer');
if (existing) existing.remove();
if (!customElements.get('browseros-pointer')) {
customElements.define('browseros-pointer', class extends HTMLElement {
constructor() {
super();
this.attachShadow({ mode: 'open' });
}
});
}
var host = document.createElement('browseros-pointer');
host.id = '${pointerId}';
host.style.cssText = 'position: fixed; left: ${x}px; top: ${y}px; z-index: 2147483647; pointer-events: none;';
var shadow = host.shadowRoot;
var arrow = document.createElement('div');
arrow.style.cssText = 'width: 0; height: 0; border-style: solid; border-width: 0 12px 20px 12px; border-color: transparent transparent #FB6618 transparent; transform: translate(-3px, -3px) rotate(45deg); filter: drop-shadow(1px 1px 2px rgba(0,0,0,0.4));';
shadow.appendChild(arrow);
${textLabel}
document.body.appendChild(host);
setTimeout(function() {
var el = document.getElementById('${pointerId}');
if (el) el.remove();
}, ${POINTER_DISPLAY_DURATION_MS});
})();
`
try {
await PointerOverlay.browserOS.executeJavaScript(tabId, script)
logger.debug(
`[PointerOverlay] Showed pointer at (${x}, ${y}) in tab ${tabId}${text ? ` with label "${text}"` : ''}`,
)
} catch (error) {
logger.warn(
`[PointerOverlay] Failed to show pointer: ${error instanceof Error ? error.message : String(error)}`,
)
}
}
/**
* Show pointer and wait before action
* Returns after the delay so the action can proceed
*/
static async showPointerAndWait(
tabId: number,
x: number,
y: number,
text?: string,
): Promise<void> {
await PointerOverlay.showPointer(tabId, x, y, text)
await PointerOverlay.delay(POINTER_DELAY_BEFORE_ACTION_MS)
}
/**
* Calculate center coordinates from a rect
*/
static getCenterCoordinates(rect: {
x: number
y: number
width: number
height: number
}): { x: number; y: number } {
return {
x: Math.round(rect.x + rect.width / 2),
y: Math.round(rect.y + rect.height / 2),
}
}
/**
* Calculate left-center coordinates (for type actions)
*/
static getLeftCenterCoordinates(rect: {
x: number
y: number
width: number
height: number
}): { x: number; y: number } {
return {
x: Math.round(rect.x + 10),
y: Math.round(rect.y + rect.height / 2),
}
}
private static delay(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms))
}
}

View File

@@ -0,0 +1,129 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { logger } from './logger'
export interface TrackedRequest {
id: string
action: string
startTime: number
status: 'pending' | 'executing' | 'completed' | 'failed'
duration?: number
error?: string
}
export interface RequestStats {
inFlight: number
avgDuration: number
errorRate: number
totalRequests: number
}
export class RequestTracker {
private requests = new Map<string, TrackedRequest>()
private cleanupInterval: ReturnType<typeof setInterval> | null = null
constructor() {
// Start periodic cleanup of old completed requests
this.cleanupInterval = setInterval(() => this.cleanup(), 60000) // Every 1 minute
}
start(id: string, action: string): void {
this.requests.set(id, {
id,
action,
startTime: Date.now(),
status: 'pending',
})
logger.debug(`Request started: ${id} [${action}]`)
}
markExecuting(id: string): void {
const req = this.requests.get(id)
if (req) {
req.status = 'executing'
logger.debug(`Request executing: ${id}`)
}
}
complete(id: string, error?: string): void {
const req = this.requests.get(id)
if (req) {
req.status = error ? 'failed' : 'completed'
req.duration = Date.now() - req.startTime
req.error = error
logger.info(
`Request ${error ? 'failed' : 'completed'}: ${id} [${req.action}] in ${req.duration}ms`,
)
// Schedule cleanup after 1 minute
setTimeout(() => this.requests.delete(id), 60000)
}
}
getActiveRequests(): TrackedRequest[] {
return Array.from(this.requests.values()).filter(
(r) => r.status === 'pending' || r.status === 'executing',
)
}
getStats(): RequestStats {
const all = Array.from(this.requests.values())
const inFlight = all.filter(
(r) => r.status === 'pending' || r.status === 'executing',
).length
const completed = all.filter(
(r): r is typeof r & { duration: number } => r.duration !== undefined,
)
const avgDuration =
completed.length > 0
? completed.reduce((sum, r) => sum + r.duration, 0) / completed.length
: 0
const failed = all.filter((r) => r.status === 'failed').length
const errorRate = all.length > 0 ? failed / all.length : 0
return {
inFlight,
avgDuration: Math.round(avgDuration),
errorRate: Math.round(errorRate * 100) / 100,
totalRequests: all.length,
}
}
getHungRequests(timeoutMs = 30000): TrackedRequest[] {
const now = Date.now()
return Array.from(this.requests.values()).filter(
(r) =>
(r.status === 'pending' || r.status === 'executing') &&
now - r.startTime > timeoutMs,
)
}
private cleanup(): void {
// Remove completed/failed requests older than 5 minutes
const now = Date.now()
const fiveMinutesAgo = now - 5 * 60 * 1000
for (const [id, req] of this.requests.entries()) {
if (
(req.status === 'completed' || req.status === 'failed') &&
req.startTime < fiveMinutesAgo
) {
this.requests.delete(id)
}
}
}
destroy(): void {
if (this.cleanupInterval) {
clearInterval(this.cleanupInterval)
this.cleanupInterval = null
}
this.requests.clear()
}
}

View File

@@ -0,0 +1,78 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import type { ProtocolRequest } from '@/protocol/types'
import { ProtocolRequestSchema } from '@/protocol/types'
import { logger } from './logger'
export class RequestValidator {
private activeIds = new Set<string>()
private idTimestamps = new Map<string, number>()
private cleanupInterval: ReturnType<typeof setInterval> | null = null
constructor() {
// Periodically cleanup old IDs (prevent memory leak)
this.cleanupInterval = setInterval(() => this.cleanup(), 60000) // Every 1 minute
}
validate(message: unknown): ProtocolRequest {
// Step 1: Parse and validate with Zod
const request = ProtocolRequestSchema.parse(message)
// Step 2: Check for duplicate ID
if (this.activeIds.has(request.id)) {
logger.error(`Duplicate request ID detected: ${request.id}`)
throw new Error(
`Duplicate request ID: ${request.id}. Already processing this request.`,
)
}
// Step 3: Track this ID
this.activeIds.add(request.id)
this.idTimestamps.set(request.id, Date.now())
logger.debug(`Request validated: ${request.id} [${request.action}]`)
return request
}
markComplete(id: string): void {
this.activeIds.delete(id)
this.idTimestamps.delete(id)
logger.debug(`Request ID released: ${id}`)
}
private cleanup(): void {
// Remove IDs older than 5 minutes (safety measure in case markComplete() not called)
const now = Date.now()
const fiveMinutesAgo = now - 5 * 60 * 1000
for (const [id, timestamp] of this.idTimestamps.entries()) {
if (timestamp < fiveMinutesAgo) {
logger.warn(
`Cleaning up stale request ID: ${id} (age: ${Math.round((now - timestamp) / 1000)}s)`,
)
this.activeIds.delete(id)
this.idTimestamps.delete(id)
}
}
}
getStats(): { activeIds: number } {
return {
activeIds: this.activeIds.size,
}
}
destroy(): void {
if (this.cleanupInterval) {
clearInterval(this.cleanupInterval)
this.cleanupInterval = null
}
this.activeIds.clear()
this.idTimestamps.clear()
}
}

Some files were not shown because too many files have changed in this diff Show More