feat: implement agent-sdk (#145)

* feat: agent-sdk outline

* feat: unit tests for agent-sdk

* feat: implement /sdk routes

* feat: integration test for agent-sdk with server

* feat: ENV to disble headless mode for testing

* feat: act() integration test working

* chore: refactor package/shared to have constants/ and /types separately

* feat: verify() and extract() sdk APIs

* feat: extract() use remote endpoint for extraction

* feat: verify() implemented - lazy parsing to avoid strong schema checks

* fix: remove generateStructuredOutput as not models support it

* fix: clean-up LLM types and use zod schema

* fix: typecheck vitetest error

* fix: remove directly calling GeminiAgent in sdk act()

* fix: lefthook for refactor warning

* fix: refactor routes/sdk to move business logic out
This commit is contained in:
Nikhil
2026-01-01 17:38:40 -08:00
committed by GitHub
parent 27124baccb
commit 47b9c1894d
59 changed files with 2146 additions and 85 deletions

View File

@@ -7,11 +7,12 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
- Write minimal code comments. Only add comments for non-obvious logic, complex algorithms, or critical warnings. Skip comments for self-explanatory code, obvious function names, and simple operations.
- Logger messages should not include `[prefix]` tags (e.g., `[Config]`, `[HTTP Server]`). Source tracking automatically adds file:line:function in development mode.
- Avoid magic constants scattered in the codebase. Use `@browseros/shared` for all shared configuration:
- `@browseros/shared/ports` - Port numbers (DEFAULT_PORTS, TEST_PORTS)
- `@browseros/shared/timeouts` - Timeout values (TIMEOUTS)
- `@browseros/shared/limits` - Rate limits, pagination, content limits (RATE_LIMITS, AGENT_LIMITS, etc.)
- `@browseros/shared/urls` - External service URLs (EXTERNAL_URLS)
- `@browseros/shared/paths` - File system paths (PATHS)
- `@browseros/shared/constants/ports` - Port numbers (DEFAULT_PORTS, TEST_PORTS)
- `@browseros/shared/constants/timeouts` - Timeout values (TIMEOUTS)
- `@browseros/shared/constants/limits` - Rate limits, pagination, content limits (RATE_LIMITS, AGENT_LIMITS, etc.)
- `@browseros/shared/constants/urls` - External service URLs (EXTERNAL_URLS)
- `@browseros/shared/constants/paths` - File system paths (PATHS)
- `@browseros/shared/types/logger` - Logger interface types (LoggerInterface, LogLevel)
## Project Overview
@@ -82,9 +83,13 @@ The main MCP server that exposes browser automation tools via HTTP/SSE.
- Controller tools work via the browser extension over WebSocket
### Shared (`packages/shared`)
Shared constants and configuration used by both server and extension. Avoids magic numbers.
Shared constants, types, and configuration used by both server and extension. Avoids magic numbers.
**Exports:** `@browseros/shared/ports`, `@browseros/shared/timeouts`, `@browseros/shared/limits`, `@browseros/shared/urls`, `@browseros/shared/paths`
**Structure:**
- `src/constants/` - Configuration values (ports, timeouts, limits, urls, paths)
- `src/types/` - Shared type definitions (logger)
**Exports:** `@browseros/shared/constants/*`, `@browseros/shared/types/*`
### Controller Extension (`apps/controller-ext`)
Chrome extension that receives commands from the server via WebSocket.
@@ -116,13 +121,13 @@ When creating new packages in this monorepo:
**package.json exports:** Must include both `types` and `default` for TypeScript:
```json
"exports": {
"./ports": {
"types": "./src/ports.ts",
"default": "./src/ports.ts"
"./constants/ports": {
"types": "./src/constants/ports.ts",
"default": "./src/constants/ports.ts"
},
"./logger": {
"types": "./src/logger.ts",
"default": "./src/logger.ts"
"./types/logger": {
"types": "./src/types/logger.ts",
"default": "./src/types/logger.ts"
}
}
```

View File

@@ -4,9 +4,9 @@
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { CONTENT_LIMITS } from '@browseros/shared/limits'
import { DEFAULT_PORTS } from '@browseros/shared/ports'
import { TIMEOUTS } from '@browseros/shared/timeouts'
import { CONTENT_LIMITS } from '@browseros/shared/constants/limits'
import { DEFAULT_PORTS } from '@browseros/shared/constants/ports'
import { TIMEOUTS } from '@browseros/shared/constants/timeouts'
export type WebSocketProtocol = 'ws' | 'wss'
export interface WebSocketConfig {

View File

@@ -3,7 +3,7 @@
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import type { LoggerInterface, LogLevel } from '@browseros/shared/logger'
import type { LoggerInterface, LogLevel } from '@browseros/shared/types/logger'
import { LOGGING_CONFIG } from '@/config/constants'
const LEVEL_PRIORITY: Record<LogLevel, number> = {

View File

@@ -20,3 +20,6 @@ SENTRY_DSN=
NODE_ENV=development
LOG_LEVEL=info
# Testing
BROWSEROS_TEST_HEADLESS=false

View File

@@ -15,6 +15,7 @@
"test:cdp": "bun run test:cleanup && bun --env-file=.env.development test tests/tools/cdp-based",
"test:controller": "bun run test:cleanup && bun --env-file=.env.development test tests/tools/controller-based",
"test:integration": "bun run test:cleanup && bun --env-file=.env.development test tests/server.integration.test.ts",
"test:sdk": "bun run test:cleanup && bun --env-file=.env.development test tests/sdk",
"test:cleanup": "./tests/__helpers__/cleanup.sh",
"typecheck": "tsc --noEmit"
},
@@ -47,6 +48,8 @@
"pino": "^9.6.0"
},
"devDependencies": {
"@browseros/agent-sdk": "workspace:*",
"async-mutex": "^0.5.0",
"pino-pretty": "^13.0.0",
"@types/bun": "latest",
"@types/debug": "^4.1.12",

View File

@@ -4,8 +4,8 @@
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { AGENT_LIMITS } from '@browseros/shared/limits'
import { TIMEOUTS } from '@browseros/shared/timeouts'
import { AGENT_LIMITS } from '@browseros/shared/constants/limits'
import { TIMEOUTS } from '@browseros/shared/constants/timeouts'
import {
executeToolCall,
type GeminiClient,

View File

@@ -8,7 +8,7 @@
* Factory and exports for provider-specific adapters
*/
import { AIProvider } from '../types.js'
import { LLM_PROVIDERS, type LLMProvider } from '@browseros/shared/schemas/llm'
import type { ProviderAdapter } from './base.js'
import { BaseProviderAdapter } from './base.js'
import { GoogleAdapter } from './google.js'
@@ -18,11 +18,11 @@ import { OpenRouterAdapter } from './openrouter.js'
* Create the appropriate adapter for a provider.
* Returns base adapter (no-op) for providers without special requirements.
*/
export function createProviderAdapter(provider: AIProvider): ProviderAdapter {
export function createProviderAdapter(provider: LLMProvider): ProviderAdapter {
switch (provider) {
case AIProvider.GOOGLE:
case LLM_PROVIDERS.GOOGLE:
return new GoogleAdapter()
case AIProvider.OPENROUTER:
case LLM_PROVIDERS.OPENROUTER:
return new OpenRouterAdapter()
default:
return new BaseProviderAdapter()

View File

@@ -22,13 +22,13 @@
* - Empty messages (no text, no parts) should be skipped
*/
import { beforeEach, describe, expect, it as t } from 'bun:test'
import type {
Content,
ContentUnion,
FunctionCall,
FunctionResponse,
} from '@google/genai'
import { beforeEach, describe, expect, it as t } from 'vitest'
import { BaseProviderAdapter } from '../adapters/base.js'
import type {

View File

@@ -20,9 +20,9 @@
* - Usage retrieval is ASYNC and happens AFTER stream (may fail)
*/
import { beforeEach, describe, expect, it as t } from 'bun:test'
import type { GenerateContentResponse } from '@google/genai'
import { FinishReason } from '@google/genai'
import { beforeEach, describe, expect, it as t } from 'vitest'
import { BaseProviderAdapter } from '../adapters/base.js'

View File

@@ -20,9 +20,9 @@
* - Conversion must handle invalid inputs gracefully (no throws)
*/
import { beforeEach, describe, expect, it as t } from 'bun:test'
import type { FunctionDeclaration, Schema, Tool } from '@google/genai'
import { Type } from '@google/genai'
import { beforeEach, describe, expect, it as t } from 'vitest'
import { ToolConversionStrategy } from './tool.js'

View File

@@ -9,7 +9,7 @@
* through the full VercelAIContentGenerator pipeline.
*/
import { TIMEOUTS } from '@browseros/shared/timeouts'
import { TIMEOUTS } from '@browseros/shared/constants/timeouts'
import type { Content } from '@google/genai'
import { VercelAIContentGenerator } from './index.js'
import type { VercelAIConfig } from './types.js'

View File

@@ -10,9 +10,9 @@
*/
import type { LanguageModelV2ToolResultOutput } from '@ai-sdk/provider'
import { LLMConfigSchema } from '@browseros/shared/schemas/llm'
import type { jsonSchema } from 'ai'
import { z } from 'zod'
// Vercel AI SDK
// === Vercel SDK Runtime Shapes (What We Receive) ===
@@ -201,7 +201,7 @@ export interface HonoSSEStream {
}
/**
* Supported AI providers
* Supported AI providers (enum for runtime comparisons)
*/
export enum AIProvider {
ANTHROPIC = 'anthropic',
@@ -218,22 +218,12 @@ export enum AIProvider {
/**
* Zod schema for Vercel AI adapter configuration
* Single source of truth - use z.infer for the type
* Extends shared LLMConfigSchema with agent-specific fields
*/
export const VercelAIConfigSchema = z.object({
provider: z.nativeEnum(AIProvider),
export const VercelAIConfigSchema = LLMConfigSchema.extend({
model: z.string().min(1, 'Model name is required'),
apiKey: z.string().optional(),
baseUrl: z.string().optional(),
// For BROWSEROS provider: upstream provider type from ai-gateway
upstreamProvider: z.string().optional(),
// Azure-specific
resourceName: z.string().optional(),
// AWS Bedrock-specific
region: z.string().optional(),
accessKeyId: z.string().optional(),
secretAccessKey: z.string().optional(),
sessionToken: z.string().optional(),
})
export type VercelAIConfig = z.infer<typeof VercelAIConfigSchema>

View File

@@ -4,7 +4,7 @@
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { EXTERNAL_URLS } from '@browseros/shared/urls'
import { EXTERNAL_URLS } from '@browseros/shared/constants/urls'
export interface StrataCreateResponse {
strataServerUrl: string

View File

@@ -0,0 +1,36 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*
* Lightweight LLM client for text generation.
* Used by SDK verify endpoint.
*/
import type { LLMConfig } from '@browseros/shared/schemas/llm'
import type { LanguageModel, ModelMessage } from 'ai'
import { generateText } from 'ai'
import { resolveLLMConfig } from './config.js'
import { createLLMProvider } from './provider.js'
export class LLMClient {
private constructor(private model: LanguageModel) {}
static async create(
config: LLMConfig,
browserosId?: string,
): Promise<LLMClient> {
const resolved = await resolveLLMConfig(config, browserosId)
const model = createLLMProvider(resolved)
return new LLMClient(model)
}
async generateText(messages: ModelMessage[]): Promise<string> {
const result = await generateText({
model: this.model,
messages,
})
return result.text
}
}

View File

@@ -0,0 +1,47 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*
* LLM config resolution - handles BROWSEROS provider lookup.
*/
import { LLM_PROVIDERS, type LLMConfig } from '@browseros/shared/schemas/llm'
import {
fetchBrowserOSConfig,
getLLMConfigFromProvider,
logger,
} from '../../common/index.js'
import type { ResolvedLLMConfig } from './types.js'
export async function resolveLLMConfig(
config: LLMConfig,
browserosId?: string,
): Promise<ResolvedLLMConfig> {
if (config.provider !== LLM_PROVIDERS.BROWSEROS) {
if (!config.model) {
throw new Error(`model is required for ${config.provider} provider`)
}
return config as ResolvedLLMConfig
}
const configUrl = process.env.BROWSEROS_CONFIG_URL
if (!configUrl) {
throw new Error(
'BROWSEROS_CONFIG_URL environment variable is required for BrowserOS provider',
)
}
logger.debug('Resolving BROWSEROS config', { configUrl, browserosId })
const browserosConfig = await fetchBrowserOSConfig(configUrl, browserosId)
const llmConfig = getLLMConfigFromProvider(browserosConfig, 'default')
return {
...config,
model: llmConfig.modelName,
apiKey: llmConfig.apiKey,
baseUrl: llmConfig.baseUrl,
upstreamProvider: llmConfig.providerType,
}
}

View File

@@ -0,0 +1,124 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*
* LLM provider creation - creates Vercel AI SDK language models.
*/
import { createAmazonBedrock } from '@ai-sdk/amazon-bedrock'
import { createAnthropic } from '@ai-sdk/anthropic'
import { createAzure } from '@ai-sdk/azure'
import { createGoogleGenerativeAI } from '@ai-sdk/google'
import { createOpenAI } from '@ai-sdk/openai'
import { createOpenAICompatible } from '@ai-sdk/openai-compatible'
import { LLM_PROVIDERS } from '@browseros/shared/schemas/llm'
import { createOpenRouter } from '@openrouter/ai-sdk-provider'
import type { LanguageModel } from 'ai'
import { logger } from '../../common/index.js'
import { createOpenRouterCompatibleFetch } from '../agent/gemini-vercel-sdk-adapter/utils/fetch.js'
import type { ResolvedLLMConfig } from './types.js'
export function createLLMProvider(config: ResolvedLLMConfig): LanguageModel {
const { provider, model, apiKey, baseUrl, upstreamProvider } = config
switch (provider) {
case LLM_PROVIDERS.ANTHROPIC:
if (!apiKey) throw new Error('Anthropic provider requires apiKey')
return createAnthropic({ apiKey })(model)
case LLM_PROVIDERS.OPENAI:
if (!apiKey) throw new Error('OpenAI provider requires apiKey')
return createOpenAI({ apiKey })(model)
case LLM_PROVIDERS.GOOGLE:
if (!apiKey) throw new Error('Google provider requires apiKey')
return createGoogleGenerativeAI({ apiKey })(model)
case LLM_PROVIDERS.OPENROUTER:
if (!apiKey) throw new Error('OpenRouter provider requires apiKey')
return createOpenRouter({
apiKey,
extraBody: { reasoning: {} },
fetch: createOpenRouterCompatibleFetch(),
})(model)
case LLM_PROVIDERS.AZURE:
if (!apiKey || !config.resourceName) {
throw new Error('Azure provider requires apiKey and resourceName')
}
return createAzure({
resourceName: config.resourceName,
apiKey,
})(model)
case LLM_PROVIDERS.OLLAMA:
if (!baseUrl) throw new Error('Ollama provider requires baseUrl')
return createOpenAICompatible({
name: 'ollama',
baseURL: baseUrl,
...(apiKey && { apiKey }),
})(model)
case LLM_PROVIDERS.LMSTUDIO:
if (!baseUrl) throw new Error('LMStudio provider requires baseUrl')
return createOpenAICompatible({
name: 'lmstudio',
baseURL: baseUrl,
...(apiKey && { apiKey }),
})(model)
case LLM_PROVIDERS.BEDROCK:
if (!config.accessKeyId || !config.secretAccessKey || !config.region) {
throw new Error(
'Bedrock provider requires accessKeyId, secretAccessKey, and region',
)
}
return createAmazonBedrock({
region: config.region,
accessKeyId: config.accessKeyId,
secretAccessKey: config.secretAccessKey,
sessionToken: config.sessionToken,
})(model)
case LLM_PROVIDERS.BROWSEROS:
if (!baseUrl) throw new Error('BrowserOS provider requires baseUrl')
switch (upstreamProvider) {
case LLM_PROVIDERS.OPENROUTER:
return createOpenRouter({
baseURL: baseUrl,
...(apiKey && { apiKey }),
fetch: createOpenRouterCompatibleFetch(),
})(model)
case LLM_PROVIDERS.ANTHROPIC:
return createAnthropic({
baseURL: baseUrl,
...(apiKey && { apiKey }),
})(model)
case LLM_PROVIDERS.AZURE:
return createAzure({
baseURL: baseUrl,
...(apiKey && { apiKey }),
})(model)
default:
logger.debug('Creating OpenAI-compatible provider for BrowserOS')
return createOpenAICompatible({
name: 'browseros',
baseURL: baseUrl,
...(apiKey && { apiKey }),
})(model)
}
case LLM_PROVIDERS.OPENAI_COMPATIBLE:
if (!baseUrl)
throw new Error('OpenAI-compatible provider requires baseUrl')
return createOpenAICompatible({
name: 'openai-compatible',
baseURL: baseUrl,
...(apiKey && { apiKey }),
})(model)
default:
throw new Error(`Unknown provider: ${provider}`)
}
}

View File

@@ -0,0 +1,14 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*
* Internal types for LLM client.
*/
import type { LLMConfig } from '@browseros/shared/schemas/llm'
export interface ResolvedLLMConfig extends LLMConfig {
model: string
upstreamProvider?: string
}

View File

@@ -5,7 +5,7 @@
*/
import type { Database } from 'bun:sqlite'
import { RATE_LIMITS } from '@browseros/shared/limits'
import { RATE_LIMITS } from '@browseros/shared/constants/limits'
import { logger } from '../../common/index.js'

View File

@@ -5,7 +5,7 @@
import fs from 'node:fs/promises'
import os from 'node:os'
import path from 'node:path'
import { TIMEOUTS } from '@browseros/shared/timeouts'
import { TIMEOUTS } from '@browseros/shared/constants/timeouts'
import type {
Browser,
ConsoleMessage,

View File

@@ -2,7 +2,7 @@
* @license
* Copyright 2025 BrowserOS
*/
import { TIMEOUTS } from '@browseros/shared/timeouts'
import { TIMEOUTS } from '@browseros/shared/constants/timeouts'
import type { Page, Protocol } from 'puppeteer-core'
import type { CdpPage } from 'puppeteer-core/internal/cdp/Page.js'

View File

@@ -12,8 +12,8 @@
import fs from 'node:fs'
import path from 'node:path'
import { CONTENT_LIMITS } from '@browseros/shared/limits'
import type { LoggerInterface, LogLevel } from '@browseros/shared/logger'
import { CONTENT_LIMITS } from '@browseros/shared/constants/limits'
import type { LoggerInterface, LogLevel } from '@browseros/shared/types/logger'
import pino from 'pino'
const isDev = process.env.NODE_ENV === 'development'

View File

@@ -2,7 +2,7 @@
* @license
* Copyright 2025 BrowserOS
*/
import { EXTERNAL_URLS } from '@browseros/shared/urls'
import { EXTERNAL_URLS } from '@browseros/shared/constants/urls'
import { PostHog } from 'posthog-node'
const POSTHOG_API_KEY = process.env.POSTHOG_API_KEY

View File

@@ -3,7 +3,7 @@
* Copyright 2025 BrowserOS
*/
import { TIMEOUTS } from '@browseros/shared/timeouts'
import { TIMEOUTS } from '@browseros/shared/constants/timeouts'
import type { WebSocket } from 'ws'
import { WebSocketServer } from 'ws'
import type { Logger } from '../common/index.js'

View File

@@ -2,7 +2,7 @@
* @license
* Copyright 2025 BrowserOS
*/
import { TIMEOUTS } from '@browseros/shared/timeouts'
import { TIMEOUTS } from '@browseros/shared/constants/timeouts'
import type { Context } from '../tools/controller-based/index.js'

View File

@@ -4,7 +4,7 @@
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { PATHS } from '@browseros/shared/paths'
import { PATHS } from '@browseros/shared/constants/paths'
import { Hono } from 'hono'
import { stream } from 'hono/streaming'
import { AIProvider } from '../../agent/agent/gemini-vercel-sdk-adapter/types.js'

View File

@@ -0,0 +1,176 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*
* SDK Routes - REST API for @browseros/agent-sdk
*/
import { LLM_PROVIDERS } from '@browseros/shared/schemas/llm'
import { Hono } from 'hono'
import { BrowserService } from '../services/sdk/browser.js'
import { ChatService } from '../services/sdk/chat.js'
import { ExtractService } from '../services/sdk/extract.js'
import {
type ActRequest,
ActRequestSchema,
type ExtractRequest,
ExtractRequestSchema,
type NavRequest,
NavRequestSchema,
type SdkDeps,
SdkError,
type VerifyRequest,
VerifyRequestSchema,
} from '../services/sdk/types.js'
import { VerifyService } from '../services/sdk/verify.js'
import type { Env } from '../types.js'
import { validateRequest } from '../utils/validation.js'
export function createSdkRoutes(deps: SdkDeps) {
const { port, logger, browserosId } = deps
const mcpServerUrl = `http://127.0.0.1:${port}/mcp`
const browserService = new BrowserService(mcpServerUrl)
const chatService = new ChatService(port)
const extractService = new ExtractService()
const verifyService = new VerifyService()
const sdk = new Hono<Env>()
sdk.post('/nav', validateRequest(NavRequestSchema), async (c) => {
const { url, tabId, windowId } = c.get('validatedBody') as NavRequest
logger.info('SDK nav request', { url, tabId, windowId })
try {
await browserService.navigate(url, tabId, windowId)
return c.json({ success: true })
} catch (error) {
const err =
error instanceof SdkError
? error
: new SdkError(
error instanceof Error ? error.message : 'Navigation failed',
)
logger.error('SDK nav error', { url, error: err.message })
return c.json(
{ error: { message: err.message } },
err.statusCode as 400 | 500,
)
}
})
sdk.post('/act', validateRequest(ActRequestSchema), async (c) => {
const { instruction, context, windowId, llm } = c.get(
'validatedBody',
) as ActRequest
logger.info('SDK act request', { instruction, windowId })
const llmConfig = llm ?? { provider: LLM_PROVIDERS.BROWSEROS }
if (llmConfig.provider !== LLM_PROVIDERS.BROWSEROS && !llmConfig.model) {
return c.json(
{ error: { message: 'model is required for non-browseros providers' } },
400,
)
}
try {
await chatService.executeAction({
instruction,
context,
windowId,
llmConfig,
})
return c.json({ success: true, steps: [] })
} catch (error) {
const err =
error instanceof SdkError
? error
: new SdkError(
error instanceof Error
? error.message
: 'Action execution failed',
)
logger.error('SDK act error', { instruction, error: err.message })
return c.json(
{ error: { message: err.message } },
err.statusCode as 400 | 500,
)
}
})
sdk.post('/extract', validateRequest(ExtractRequestSchema), async (c) => {
const { instruction, schema, context } = c.get(
'validatedBody',
) as ExtractRequest
logger.info('SDK extract request', { instruction })
try {
const { tabId } = await browserService.getActiveTab()
const content = await browserService.getPageContent(tabId)
const data = await extractService.extract({
instruction,
schema,
content,
context,
})
return c.json({ data })
} catch (error) {
const err =
error instanceof SdkError
? error
: new SdkError(
error instanceof Error ? error.message : 'Extraction failed',
)
logger.error('SDK extract error', { instruction, error: err.message })
return c.json(
{ error: { message: err.message } },
err.statusCode as 400 | 500,
)
}
})
sdk.post('/verify', validateRequest(VerifyRequestSchema), async (c) => {
const { expectation, context, llm } = c.get(
'validatedBody',
) as VerifyRequest
logger.info('SDK verify request', { expectation })
const llmConfig = llm ?? { provider: LLM_PROVIDERS.BROWSEROS }
try {
const { tabId } = await browserService.getActiveTab()
const [screenshot, pageContent] = await Promise.all([
browserService.getScreenshot(tabId),
browserService.getPageContent(tabId),
])
const result = await verifyService.verify({
expectation,
screenshot,
pageContent,
context,
llmConfig,
browserosId,
})
return c.json(result)
} catch (error) {
const err =
error instanceof SdkError
? error
: new SdkError(
error instanceof Error ? error.message : 'Verification failed',
)
logger.error('SDK verify error', { expectation, error: err.message })
return c.json(
{ error: { message: err.message } },
err.statusCode as 400 | 500,
)
}
})
return sdk
}

View File

@@ -20,6 +20,7 @@ import { health } from './routes/health.js'
import { createKlavisRoutes } from './routes/klavis.js'
import { createMcpRoutes } from './routes/mcp.js'
import { createProviderRoutes } from './routes/provider.js'
import { createSdkRoutes } from './routes/sdk.js'
import type { Env, HttpServerConfig } from './types.js'
import { defaultCorsConfig } from './utils/cors.js'
@@ -80,6 +81,14 @@ export function createHttpServer(config: HttpServerConfig) {
rateLimiter,
}),
)
.route(
'/sdk',
createSdkRoutes({
port,
logger: log,
browserosId,
}),
)
// Error handler
app.onError((err, c) => {

View File

@@ -0,0 +1,87 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*
* Browser Service - MCP-based browser operations for SDK
*/
import {
callMcpTool,
getImageContent,
getTextContent,
} from '../../utils/mcp-client.js'
import type { ActiveTab, PageContent, Screenshot } from './types.js'
import { SdkError } from './types.js'
export class BrowserService {
constructor(private mcpServerUrl: string) {}
async getActiveTab(): Promise<ActiveTab> {
const result = await callMcpTool<ActiveTab>(
this.mcpServerUrl,
'browser_get_active_tab',
{},
)
if (result.isError || !result.structuredContent?.tabId) {
throw new SdkError('Failed to get active tab')
}
return result.structuredContent
}
async getPageContent(tabId: number): Promise<string> {
const result = await callMcpTool<PageContent>(
this.mcpServerUrl,
'browser_get_page_content',
{ tabId, type: 'text' },
)
if (result.isError) {
throw new SdkError('Failed to get page content')
}
const content = result.structuredContent?.content || getTextContent(result)
if (!content) {
throw new SdkError('No content found on page', 400)
}
return content
}
async getScreenshot(tabId: number): Promise<Screenshot> {
const result = await callMcpTool(
this.mcpServerUrl,
'browser_get_screenshot',
{ tabId, size: 'medium' },
)
if (result.isError) {
throw new SdkError('Failed to capture screenshot')
}
const image = getImageContent(result)
if (!image) {
throw new SdkError('Screenshot not available')
}
return image
}
async navigate(
url: string,
tabId?: number,
windowId?: number,
): Promise<void> {
const result = await callMcpTool(this.mcpServerUrl, 'browser_navigate', {
url,
...(tabId && { tabId }),
...(windowId && { windowId }),
})
if (result.isError) {
throw new SdkError(getTextContent(result) || 'Navigation failed')
}
}
}

View File

@@ -0,0 +1,75 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*
* Chat Service - Executes actions via /chat endpoint
*/
import type { LLMConfig } from '@browseros/shared/schemas/llm'
import { SdkError } from './types.js'
export interface ExecuteActionOptions {
instruction: string
context?: Record<string, unknown>
windowId?: number
llmConfig: LLMConfig
}
export class ChatService {
private chatUrl: string
constructor(port: number) {
this.chatUrl = `http://127.0.0.1:${port}/chat`
}
async executeAction(options: ExecuteActionOptions): Promise<void> {
const { instruction, context, windowId, llmConfig } = options
let message = instruction
if (context) {
message = `${instruction}\n\nContext:\n${JSON.stringify(context, null, 2)}`
}
const conversationId = crypto.randomUUID()
const response = await fetch(this.chatUrl, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
conversationId,
message,
provider: llmConfig.provider,
model: llmConfig.model ?? 'default',
apiKey: llmConfig.apiKey,
baseUrl: llmConfig.baseUrl,
resourceName: llmConfig.resourceName,
region: llmConfig.region,
accessKeyId: llmConfig.accessKeyId,
secretAccessKey: llmConfig.secretAccessKey,
sessionToken: llmConfig.sessionToken,
browserContext: windowId ? { windowId } : undefined,
}),
})
if (!response.ok) {
const errorText = await response.text()
throw new SdkError(
errorText || 'Chat request failed',
response.status >= 400 && response.status < 600 ? response.status : 500,
)
}
// Consume the SSE stream to completion
const reader = response.body?.getReader()
if (reader) {
while (true) {
const { done } = await reader.read()
if (done) break
}
}
// Clean up the session
await fetch(`${this.chatUrl}/${conversationId}`, { method: 'DELETE' })
}
}

View File

@@ -0,0 +1,56 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*
* Extract Service - Structured data extraction via remote service
*/
import { EXTERNAL_URLS } from '@browseros/shared/constants/urls'
import { SdkError } from './types.js'
export interface ExtractOptions {
instruction: string
schema: Record<string, unknown>
content: string
context?: Record<string, unknown>
}
export interface ExtractResult {
data: unknown
}
export class ExtractService {
private serviceUrl: string
constructor() {
this.serviceUrl = `${EXTERNAL_URLS.CODEGEN_SERVICE}/api/extract`
}
async extract(options: ExtractOptions): Promise<unknown> {
const { instruction, schema, content, context } = options
const response = await fetch(this.serviceUrl, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
instruction,
schema,
content,
context,
}),
})
if (!response.ok) {
const errorData = await response.json().catch(() => ({}))
const errorMessage =
(errorData as { error?: string }).error || 'Extraction service failed'
const status =
response.status >= 400 && response.status < 600 ? response.status : 500
throw new SdkError(errorMessage, status)
}
const result = (await response.json()) as ExtractResult
return result.data
}
}

View File

@@ -0,0 +1,78 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*
* SDK Types - Type definitions and request schemas for SDK services
*/
import { LLMConfigSchema } from '@browseros/shared/schemas/llm'
import { z } from 'zod'
import type { Logger } from '../../../common/index.js'
// Request validation schemas
export const NavRequestSchema = z.object({
url: z.string().url(),
tabId: z.number().optional(),
windowId: z.number().optional(),
})
export const ActRequestSchema = z.object({
instruction: z.string().min(1),
context: z.record(z.unknown()).optional(),
maxSteps: z.number().optional(),
windowId: z.number().optional(),
llm: LLMConfigSchema.optional(),
})
export const ExtractRequestSchema = z.object({
instruction: z.string().min(1),
schema: z.record(z.unknown()),
context: z.record(z.unknown()).optional(),
})
export const VerifyRequestSchema = z.object({
expectation: z.string().min(1),
context: z.record(z.unknown()).optional(),
llm: LLMConfigSchema.optional(),
})
export type NavRequest = z.infer<typeof NavRequestSchema>
export type ActRequest = z.infer<typeof ActRequestSchema>
export type ExtractRequest = z.infer<typeof ExtractRequestSchema>
export type VerifyRequest = z.infer<typeof VerifyRequestSchema>
// Shared types
export interface SdkDeps {
port: number
logger: Logger
browserosId?: string
}
export interface ActiveTab {
tabId: number
url: string
title: string
windowId: number
}
export interface PageContent {
content: string
}
export interface Screenshot {
data: string
mimeType: string
}
export class SdkError extends Error {
constructor(
message: string,
public statusCode: number = 500,
) {
super(message)
this.name = 'SdkError'
}
}

View File

@@ -0,0 +1,78 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*
* Verify Service - Page verification via LLM
*/
import type { LLMConfig } from '@browseros/shared/schemas/llm'
import type { ModelMessage } from 'ai'
import { LLMClient } from '../../../agent/llm/client.js'
import type { Screenshot } from './types.js'
export interface VerifyOptions {
expectation: string
screenshot: Screenshot
pageContent: string
context?: Record<string, unknown>
llmConfig: LLMConfig
browserosId?: string
}
export interface VerifyResult {
success: boolean
reason: string
}
export class VerifyService {
async verify(options: VerifyOptions): Promise<VerifyResult> {
const {
expectation,
screenshot,
pageContent,
context,
llmConfig,
browserosId,
} = options
const client = await LLMClient.create(llmConfig, browserosId)
let textPrompt = `Verify this expectation about the current page:
${expectation}
Look at the screenshot and page content. Determine if the expectation is met.
Your response MUST start with exactly one of these words:
- SUCCESS - if the expectation is met
- FAILURE - if the expectation is NOT met
Then explain your reasoning.`
if (context) {
textPrompt += `\n\nAdditional context:\n${JSON.stringify(context, null, 2)}`
}
textPrompt += `\n\nPage text content:\n${pageContent}`
const imageUrl = `data:${screenshot.mimeType};base64,${screenshot.data}`
const messages: ModelMessage[] = [
{
role: 'user',
content: [
{ type: 'image', image: imageUrl },
{ type: 'text', text: textPrompt },
],
},
]
const response = await client.generateText(messages)
const trimmed = response.trim()
const success = /^SUCCESS\b/i.test(trimmed)
const reason = trimmed.replace(/^(SUCCESS|FAILURE)\s*/i, '').trim()
return { success, reason }
}
}

View File

@@ -0,0 +1,60 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*
* Internal MCP client for SDK routes.
* Provides typed access to MCP tool results with structured content.
*/
import { Client } from '@modelcontextprotocol/sdk/client/index.js'
import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js'
export interface McpContent {
type: 'text' | 'image'
text?: string
data?: string
mimeType?: string
}
export interface McpToolResult<T = Record<string, unknown>> {
content: McpContent[]
structuredContent?: T
isError?: boolean
}
export async function callMcpTool<T = Record<string, unknown>>(
serverUrl: string,
name: string,
args: Record<string, unknown> = {},
): Promise<McpToolResult<T>> {
const client = new Client({
name: 'browseros-sdk-internal',
version: '1.0.0',
})
const transport = new StreamableHTTPClientTransport(new URL(serverUrl))
try {
await client.connect(transport)
return (await client.callTool({
name,
arguments: args,
})) as McpToolResult<T>
} finally {
await transport.close()
}
}
export function getTextContent<T>(result: McpToolResult<T>): string {
const textItem = result.content.find((c) => c.type === 'text')
return textItem?.text ?? ''
}
export function getImageContent<T>(
result: McpToolResult<T>,
): { data: string; mimeType: string } | undefined {
const imageItem = result.content.find((c) => c.type === 'image')
if (!imageItem?.data || !imageItem?.mimeType) return undefined
return { data: imageItem.data, mimeType: imageItem.mimeType }
}

View File

@@ -8,7 +8,7 @@
import fs from 'node:fs'
import path from 'node:path'
import { RATE_LIMITS } from '@browseros/shared/limits'
import { RATE_LIMITS } from '@browseros/shared/constants/limits'
import { RateLimiter } from './agent/index.js'
import {
ensureBrowserConnected,

View File

@@ -2,7 +2,7 @@
* @license
* Copyright 2025 BrowserOS
*/
import { TIMEOUTS } from '@browseros/shared/timeouts'
import { TIMEOUTS } from '@browseros/shared/constants/timeouts'
import type { ElementHandle } from 'puppeteer-core'
import z from 'zod'

View File

@@ -179,6 +179,9 @@ export const getPageContent = defineTool<z.ZodRawShape, Context, Response>({
response.appendResponseLine('')
response.appendResponseLine('='.repeat(60))
// Add structured content for programmatic access
response.addStructuredContent('content', fullContent)
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error)

View File

@@ -4,7 +4,7 @@
*/
import { isUtf8 } from 'node:buffer'
import { CONTENT_LIMITS } from '@browseros/shared/limits'
import { CONTENT_LIMITS } from '@browseros/shared/constants/limits'
import type { HTTPRequest, HTTPResponse } from 'puppeteer-core'

View File

@@ -2,7 +2,7 @@
* @license
* Copyright 2025 BrowserOS
*/
import { PAGINATION } from '@browseros/shared/limits'
import { PAGINATION } from '@browseros/shared/constants/limits'
export interface PaginationOptions {
pageSize?: number

View File

@@ -69,14 +69,16 @@ export async function spawnBrowser(
const tempUserDataDir = mkdtempSync(join(tmpdir(), 'browseros-test-'))
console.log(`Created temp profile: ${tempUserDataDir}`)
const headless = process.env.BROWSEROS_TEST_HEADLESS === 'true'
console.log(`Starting BrowserOS on CDP port ${config.cdpPort}...`)
const process = spawn(
const browserProcess = spawn(
config.binaryPath,
[
'--use-mock-keychain',
'--show-component-extension-options',
'--enable-logging=stderr',
'--headless=new',
...(headless ? ['--headless=new'] : []),
`--user-data-dir=${tempUserDataDir}`,
`--remote-debugging-port=${config.cdpPort}`,
`--browseros-mcp-port=${config.serverPort}`,
@@ -88,17 +90,17 @@ export async function spawnBrowser(
},
)
process.stdout?.on('data', (_data) => {
browserProcess.stdout?.on('data', (_data) => {
// Uncomment for debugging
// console.log(`[BROWSER] ${_data.toString().trim()}`)
})
process.stderr?.on('data', (_data) => {
browserProcess.stderr?.on('data', (_data) => {
// Uncomment for debugging
// console.log(`[BROWSER] ${_data.toString().trim()}`)
})
process.on('error', (error) => {
browserProcess.on('error', (error) => {
console.error('Failed to start BrowserOS:', error)
})
@@ -106,7 +108,7 @@ export async function spawnBrowser(
await waitForCdp(config.cdpPort)
console.log('CDP is ready')
browserState = { process, tempUserDataDir, config }
browserState = { process: browserProcess, tempUserDataDir, config }
return browserState
}

View File

@@ -6,6 +6,12 @@
* Use setup.ts:ensureBrowserOS() for the full test environment.
*/
import { type ChildProcess, spawn } from 'node:child_process'
import { dirname, resolve } from 'node:path'
const SERVER_ENTRYPOINT_PATH = resolve(
dirname(import.meta.path),
'../../src/index.ts',
)
export interface ServerConfig {
cdpPort: number
@@ -65,7 +71,7 @@ export async function spawnServer(config: ServerConfig): Promise<ServerState> {
const process = spawn(
'bun',
[
'apps/server/src/index.ts',
SERVER_ENTRYPOINT_PATH,
'--cdp-port',
config.cdpPort.toString(),
'--server-port',
@@ -75,7 +81,6 @@ export async function spawnServer(config: ServerConfig): Promise<ServerState> {
],
{
stdio: ['ignore', 'pipe', 'pipe'],
cwd: globalThis.process.cwd(),
env: { ...globalThis.process.env, NODE_ENV: 'test' },
},
)

View File

@@ -5,7 +5,7 @@
* Unified test environment orchestrator.
* Ensures server + browser + extension are all ready.
*/
import { TEST_PORTS } from '@browseros/shared/ports'
import { TEST_PORTS } from '@browseros/shared/constants/ports'
import {
type BrowserConfig,

View File

@@ -0,0 +1,164 @@
/**
* @license
* Copyright 2025 BrowserOS
*
* Integration tests for @browseros/agent-sdk
* Tests the SDK against a real BrowserOS server.
*/
import { beforeAll, describe, it } from 'bun:test'
import assert from 'node:assert'
import { Agent } from '@browseros/agent-sdk'
import {
ensureBrowserOS,
type TestEnvironmentConfig,
} from '../__helpers__/setup.js'
let config: TestEnvironmentConfig
beforeAll(async () => {
config = await ensureBrowserOS()
}, 60000)
function createAgent(): Agent {
return new Agent({
url: `http://127.0.0.1:${config.serverPort}`,
})
}
describe('Agent SDK Integration', () => {
describe('nav()', () => {
it('navigates to a URL successfully', async () => {
const agent = createAgent()
const result = await agent.nav('https://google.com')
console.log('\n=== nav() Response ===')
console.log(JSON.stringify(result, null, 2))
assert.ok(result.success, 'Navigation should succeed')
}, 30000)
it('navigates to a data URL', async () => {
const agent = createAgent()
const result = await agent.nav('data:text/html,<h1>Test Page</h1>')
console.log('\n=== nav() Data URL Response ===')
console.log(JSON.stringify(result, null, 2))
assert.ok(result.success, 'Navigation to data URL should succeed')
}, 30000)
it('emits progress events', async () => {
const agent = createAgent()
const events: unknown[] = []
agent.onProgress((event) => events.push(event))
await agent.nav('https://example.com')
console.log('\n=== Progress Events ===')
console.log(JSON.stringify(events, null, 2))
assert.ok(events.length > 0, 'Should emit progress events')
assert.strictEqual(
(events[0] as { type: string }).type,
'nav',
'First event should be nav type',
)
}, 30000)
it('handles invalid URL gracefully', async () => {
const agent = createAgent()
try {
await agent.nav('not-a-valid-url')
assert.fail('Should have thrown an error')
} catch (error) {
assert.ok(error instanceof Error, 'Should throw an error')
console.log('✓ Invalid URL rejected as expected')
}
}, 30000)
})
describe('act()', () => {
it('clicks a button on a test page', async () => {
const agent = createAgent()
// Navigate to a simple test page with a button
await agent.nav(
'data:text/html,<button id="btn" onclick="this.textContent=\'Clicked!\'">Click me</button>',
)
const result = await agent.act('click the button')
console.log('\n=== act() Response ===')
console.log(JSON.stringify(result, null, 2))
assert.ok(result.success, 'Action should succeed')
}, 60000)
it('emits progress events during action', async () => {
const agent = createAgent()
const events: unknown[] = []
agent.onProgress((event) => events.push(event))
await agent.nav('data:text/html,<h1>Test</h1>')
await agent.act('describe what you see')
console.log('\n=== act() Progress Events ===')
console.log(JSON.stringify(events, null, 2))
const actEvents = events.filter(
(e) => (e as { type: string }).type === 'act',
)
assert.ok(actEvents.length > 0, 'Should emit act progress events')
}, 60000)
})
describe('extract()', () => {
it('extracts structured data from page', async () => {
const { z } = await import('zod')
const agent = createAgent()
await agent.nav(
'data:text/html,<h1>Welcome to My Site</h1><p>This is a test page.</p>',
)
const result = await agent.extract('get the page title', {
schema: z.object({ title: z.string() }),
})
console.log('\n=== extract() Response ===')
console.log(JSON.stringify(result, null, 2))
assert.ok(result.data, 'Should return extracted data')
assert.ok(
typeof result.data.title === 'string',
'Title should be a string',
)
}, 60000)
})
describe('verify()', () => {
it('verifies page state', async () => {
const agent = createAgent()
await agent.nav('data:text/html,<h1>Hello World</h1>')
const result = await agent.verify(
'the page contains a heading that says Hello World',
)
console.log('\n=== verify() Response ===')
console.log(JSON.stringify(result, null, 2))
assert.ok(
typeof result.success === 'boolean',
'Should return success boolean',
)
assert.ok(
typeof result.reason === 'string',
'Should return reason string',
)
}, 60000)
})
})

View File

@@ -141,10 +141,12 @@
"zod": "^3.24.2",
},
"devDependencies": {
"@browseros/agent-sdk": "workspace:*",
"@types/bun": "latest",
"@types/debug": "^4.1.12",
"@types/node": "^24.3.3",
"@types/ws": "^8.5.13",
"async-mutex": "^0.5.0",
"pino-pretty": "^13.0.0",
"puppeteer": "24.23.0",
"typescript": "^5.9.2",
@@ -153,6 +155,17 @@
"chrome-devtools-mcp": "latest",
},
},
"packages/agent-sdk": {
"name": "@browseros/agent-sdk",
"version": "0.0.1",
"dependencies": {
"@browseros/shared": "workspace:*",
"zod-to-json-schema": "^3.24.1",
},
"peerDependencies": {
"zod": "^3.x",
},
},
"packages/shared": {
"name": "@browseros/shared",
"version": "0.0.1",
@@ -275,6 +288,8 @@
"@braintree/sanitize-url": ["@braintree/sanitize-url@7.1.1", "", {}, "sha512-i1L7noDNxtFyL5DmZafWy1wRVhGehQmzZaz1HiN5e7iylJMSZR7ekOV7NsIqa5qBldlLrsKv4HbgFUVlQrz8Mw=="],
"@browseros/agent-sdk": ["@browseros/agent-sdk@workspace:packages/agent-sdk"],
"@browseros/server": ["@browseros/server@workspace:apps/server"],
"@browseros/shared": ["@browseros/shared@workspace:packages/shared"],
@@ -3241,7 +3256,7 @@
"zod": ["zod@3.25.76", "", {}, "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ=="],
"zod-to-json-schema": ["zod-to-json-schema@3.24.6", "", { "peerDependencies": { "zod": "^3.24.1" } }, "sha512-h/z3PKvcTcTetyjl1fkj79MHNEjm+HpD6NXheWjzOekY7kV+lwDYnHw+ivHkijnCSMz1yJaWBD9vu/Fcmk+vEg=="],
"zod-to-json-schema": ["zod-to-json-schema@3.25.1", "", { "peerDependencies": { "zod": "^3.25 || ^4" } }, "sha512-pM/SU9d3YAggzi6MtR4h7ruuQlqKtad8e9S0fmxcMi+ueAK5Korys/aWcV9LIIHTVbj01NdzxcnXSN+O74ZIVA=="],
"zustand": ["zustand@4.5.7", "", { "dependencies": { "use-sync-external-store": "^1.2.2" }, "peerDependencies": { "@types/react": ">=16.8", "immer": ">=9.0.6", "react": ">=16.8" }, "optionalPeers": ["@types/react", "immer", "react"] }, "sha512-CHOUy7mu3lbD6o6LJLfllpjkzhHXSBlX8B9+qPddUsIfeF5S/UZ5q0kmCsnRqT1UHFQZchNFDDzMbQsuesHWlw=="],
@@ -3257,6 +3272,8 @@
"@ai-sdk/ui-utils/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@2.2.8", "", { "dependencies": { "@ai-sdk/provider": "1.1.3", "nanoid": "^3.3.8", "secure-json-parse": "^2.7.0" }, "peerDependencies": { "zod": "^3.23.8" } }, "sha512-fqhG+4sCVv8x7nFzYnFo19ryhAa3w096Kmc3hWxMQfW/TubPOmt3A6tYZhl4mUfQWWQMsuSkLrtjlWuXBVSGQA=="],
"@ai-sdk/ui-utils/zod-to-json-schema": ["zod-to-json-schema@3.24.6", "", { "peerDependencies": { "zod": "^3.24.1" } }, "sha512-h/z3PKvcTcTetyjl1fkj79MHNEjm+HpD6NXheWjzOekY7kV+lwDYnHw+ivHkijnCSMz1yJaWBD9vu/Fcmk+vEg=="],
"@aklinker1/rollup-plugin-visualizer/open": ["open@8.4.2", "", { "dependencies": { "define-lazy-prop": "^2.0.0", "is-docker": "^2.1.1", "is-wsl": "^2.2.0" } }, "sha512-7x81NCL719oNbsq/3mh+hVrAWmFuEYUqrq/Iw3kUzH8ReypT9QQ0BLoJS7/G9k6N81XjW4qHWtjWwe/9eLy1EQ=="],
"@aklinker1/rollup-plugin-visualizer/picomatch": ["picomatch@2.3.1", "", {}, "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA=="],
@@ -3333,8 +3350,6 @@
"@modelcontextprotocol/sdk/zod": ["zod@4.2.1", "", {}, "sha512-0wZ1IRqGGhMP76gLqz8EyfBXKk0J2qo2+H3fi4mcUP/KtTocoX08nmIAHl1Z2kJIZbZee8KOpBCSNPRgauucjw=="],
"@modelcontextprotocol/sdk/zod-to-json-schema": ["zod-to-json-schema@3.25.1", "", { "peerDependencies": { "zod": "^3.25 || ^4" } }, "sha512-pM/SU9d3YAggzi6MtR4h7ruuQlqKtad8e9S0fmxcMi+ueAK5Korys/aWcV9LIIHTVbj01NdzxcnXSN+O74ZIVA=="],
"@openrouter/sdk/zod": ["zod@4.1.12", "", {}, "sha512-JInaHOamG8pt5+Ey8kGmdcAcg3OL9reK8ltczgHTAwNhMys/6ThXHityHxVV2p3fkw/c+MAvBHFVYHFZDmjMCQ=="],
"@opentelemetry/exporter-logs-otlp-grpc/@opentelemetry/core": ["@opentelemetry/core@2.0.1", "", { "dependencies": { "@opentelemetry/semantic-conventions": "^1.29.0" }, "peerDependencies": { "@opentelemetry/api": ">=1.0.0 <1.10.0" } }, "sha512-MaZk9SJIDgo1peKevlbhP6+IwIiNPNmswNL4AF0WaQJLbHXjr9SrZMgS12+iqr9ToV4ZVosCcc0f8Rg67LXjxw=="],
@@ -3801,6 +3816,8 @@
"@google/gemini-cli-core/@modelcontextprotocol/sdk/zod": ["zod@3.24.3", "", {}, "sha512-HhY1oqzWCQWuUqvBFnsyrtZRhyPeR7SUGv+C4+MsisMuVfSPx8HpwWqH8tRahSlt6M3PiFAcoeFhZAqIXTxoSg=="],
"@google/gemini-cli-core/@modelcontextprotocol/sdk/zod-to-json-schema": ["zod-to-json-schema@3.24.6", "", { "peerDependencies": { "zod": "^3.24.1" } }, "sha512-h/z3PKvcTcTetyjl1fkj79MHNEjm+HpD6NXheWjzOekY7kV+lwDYnHw+ivHkijnCSMz1yJaWBD9vu/Fcmk+vEg=="],
"@google/gemini-cli-core/glob/jackspeak": ["jackspeak@3.4.3", "", { "dependencies": { "@isaacs/cliui": "^8.0.2" }, "optionalDependencies": { "@pkgjs/parseargs": "^0.11.0" } }, "sha512-OGlZQpz2yfahA/Rd1Y8Cd9SIEsqvXkLVoSw/cgwhnhFMDbsQFeZYoJJ7bIZBS9BcamUW96asq/npPWugM+RQBw=="],
"@google/gemini-cli-core/glob/minimatch": ["minimatch@9.0.5", "", { "dependencies": { "brace-expansion": "^2.0.1" } }, "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow=="],

View File

@@ -21,6 +21,20 @@ pre-commit:
run: npx @biomejs/biome check --write --no-errors-on-unmatched --files-ignore-unknown=true --colors=off {staged_files}
stage_fixed: true
file-length:
glob: "*.{ts,tsx}"
exclude: "*.{test,spec,d}.ts|*.{test,spec}.tsx|**/__tests__/**|**/tests/**|**/*.generated.*"
run: |
for file in {staged_files}; do
if [[ -f "$file" ]]; then
lines=$(wc -l < "$file" | tr -d ' ')
if [[ $lines -gt 400 ]]; then
echo "⚠️ Warning: $file has $lines lines (threshold: 400)"
echo " Consider splitting this file if it has multiple responsibilities."
fi
fi
done
pre-push:
commands:
branch-name:

View File

@@ -0,0 +1,22 @@
{
"name": "@browseros/agent-sdk",
"version": "0.0.1",
"type": "module",
"scripts": {
"test": "bun test",
"typecheck": "tsc --noEmit"
},
"exports": {
".": {
"types": "./src/index.ts",
"default": "./src/index.ts"
}
},
"dependencies": {
"@browseros/shared": "workspace:*",
"zod-to-json-schema": "^3.24.1"
},
"peerDependencies": {
"zod": "^3.x"
}
}

View File

@@ -0,0 +1,166 @@
import { zodToJsonSchema } from 'zod-to-json-schema'
import {
ActionError,
type AgentSDKError,
ConnectionError,
ExtractionError,
NavigationError,
VerificationError,
} from './errors.js'
import type {
ActOptions,
ActResult,
AgentOptions,
ExtractOptions,
ExtractResult,
LLMConfig,
NavOptions,
NavResult,
ProgressEvent,
VerifyOptions,
VerifyResult,
} from './types.js'
export class Agent {
private readonly baseUrl: string
private readonly llmConfig?: LLMConfig
private progressCallback?: (event: ProgressEvent) => void
constructor(options: AgentOptions) {
this.baseUrl = options.url.replace(/\/$/, '')
this.llmConfig = options.llm
this.progressCallback = options.onProgress
}
onProgress(callback: (event: ProgressEvent) => void): void {
this.progressCallback = callback
}
private emit(event: ProgressEvent): void {
this.progressCallback?.(event)
}
private async request<T>(
endpoint: string,
body: Record<string, unknown>,
ErrorClass: new (message: string, statusCode?: number) => AgentSDKError,
): Promise<T> {
const url = `${this.baseUrl}${endpoint}`
let response: Response
try {
response = await fetch(url, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(body),
})
} catch (error) {
throw new ConnectionError(
`Failed to connect to server: ${error instanceof Error ? error.message : String(error)}`,
url,
)
}
if (!response.ok) {
let errorMessage = `Request failed with status ${response.status}`
try {
const errorBody = await response.json()
if (errorBody.error?.message) {
errorMessage = errorBody.error.message
}
} catch {
// Use default error message
}
throw new ErrorClass(errorMessage, response.status)
}
return response.json() as Promise<T>
}
async nav(url: string, options?: NavOptions): Promise<NavResult> {
this.emit({
type: 'nav',
message: `Navigating to ${url}`,
metadata: { url },
})
const result = await this.request<NavResult>(
'/sdk/nav',
{ url, ...options },
NavigationError,
)
return result
}
async act(instruction: string, options?: ActOptions): Promise<ActResult> {
this.emit({
type: 'act',
message: instruction,
metadata: { instruction },
})
const result = await this.request<ActResult>(
'/sdk/act',
{
instruction,
context: options?.context,
maxSteps: options?.maxSteps,
windowId: options?.windowId,
llm: this.llmConfig,
},
ActionError,
)
return result
}
async extract<T>(
instruction: string,
options: ExtractOptions<T>,
): Promise<ExtractResult<T>> {
this.emit({
type: 'extract',
message: instruction,
metadata: { instruction },
})
const jsonSchema = zodToJsonSchema(options.schema)
const result = await this.request<ExtractResult<T>>(
'/sdk/extract',
{
instruction,
schema: jsonSchema,
context: options.context,
llm: this.llmConfig,
},
ExtractionError,
)
return result
}
async verify(
expectation: string,
options?: VerifyOptions,
): Promise<VerifyResult> {
this.emit({
type: 'verify',
message: expectation,
metadata: { expectation },
})
const result = await this.request<VerifyResult>(
'/sdk/verify',
{
expectation,
context: options?.context,
llm: this.llmConfig,
},
VerificationError,
)
return result
}
}

View File

@@ -0,0 +1,45 @@
export class AgentSDKError extends Error {
constructor(
message: string,
public readonly code: string,
public readonly statusCode?: number,
) {
super(message)
this.name = 'AgentSDKError'
}
}
export class ConnectionError extends AgentSDKError {
constructor(message: string, url: string) {
super(message, 'CONNECTION_ERROR')
this.name = 'ConnectionError'
}
}
export class NavigationError extends AgentSDKError {
constructor(message: string, statusCode?: number) {
super(message, 'NAVIGATION_ERROR', statusCode)
this.name = 'NavigationError'
}
}
export class ActionError extends AgentSDKError {
constructor(message: string, statusCode?: number) {
super(message, 'ACTION_ERROR', statusCode)
this.name = 'ActionError'
}
}
export class ExtractionError extends AgentSDKError {
constructor(message: string, statusCode?: number) {
super(message, 'EXTRACTION_ERROR', statusCode)
this.name = 'ExtractionError'
}
}
export class VerificationError extends AgentSDKError {
constructor(message: string, statusCode?: number) {
super(message, 'VERIFICATION_ERROR', statusCode)
this.name = 'VerificationError'
}
}

View File

@@ -0,0 +1,26 @@
export { Agent } from './agent.js'
export {
ActionError,
AgentSDKError,
ConnectionError,
ExtractionError,
NavigationError,
VerificationError,
} from './errors.js'
export type {
ActOptions,
ActResult,
ActStep,
AgentOptions,
ExtractOptions,
ExtractResult,
LLMConfig,
LLMProvider,
NavOptions,
NavResult,
ProgressEvent,
ProgressEventType,
ToolCall,
VerifyOptions,
VerifyResult,
} from './types.js'

View File

@@ -0,0 +1,74 @@
import type { LLMConfig } from '@browseros/shared/schemas/llm'
import type { ZodSchema } from 'zod'
export type { LLMConfig, LLMProvider } from '@browseros/shared/schemas/llm'
export interface AgentOptions {
url: string
llm?: LLMConfig
onProgress?: (event: ProgressEvent) => void
}
export interface NavOptions {
tabId?: number
windowId?: number
}
export interface ActOptions {
context?: Record<string, unknown>
maxSteps?: number
windowId?: number
}
export interface ExtractOptions<T> {
schema: ZodSchema<T>
context?: Record<string, unknown>
}
export interface VerifyOptions {
context?: Record<string, unknown>
}
export type ProgressEventType =
| 'nav'
| 'act'
| 'extract'
| 'verify'
| 'error'
| 'done'
export interface ProgressEvent {
type: ProgressEventType
message: string
metadata?: Record<string, unknown>
}
export interface NavResult {
success: boolean
}
export interface ActResult {
success: boolean
steps: ActStep[]
}
export interface ActStep {
thought?: string
toolCalls?: ToolCall[]
}
export interface ToolCall {
name: string
args: Record<string, unknown>
result?: unknown
error?: string
}
export interface ExtractResult<T> {
data: T
}
export interface VerifyResult {
success: boolean
reason: string
}

View File

@@ -0,0 +1,606 @@
import { afterEach, beforeEach, describe, expect, it, mock } from 'bun:test'
import { z } from 'zod'
import { zodToJsonSchema } from 'zod-to-json-schema'
import { Agent } from '../../src/agent.js'
import {
ActionError,
ConnectionError,
ExtractionError,
NavigationError,
VerificationError,
} from '../../src/errors.js'
import type { ProgressEvent } from '../../src/types.js'
const TEST_URL = 'http://localhost:9222'
function mockFetch(response: unknown, status = 200) {
return mock(() =>
Promise.resolve({
ok: status >= 200 && status < 300,
status,
json: () => Promise.resolve(response),
} as Response),
)
}
function mockFetchError(error: Error) {
return mock(() => Promise.reject(error))
}
describe('Agent', () => {
let originalFetch: typeof globalThis.fetch
beforeEach(() => {
originalFetch = globalThis.fetch
})
afterEach(() => {
globalThis.fetch = originalFetch
})
describe('constructor', () => {
it('creates agent with url', () => {
const agent = new Agent({ url: TEST_URL })
expect(agent).toBeDefined()
})
it('creates agent with url and llm config', () => {
const agent = new Agent({
url: TEST_URL,
llm: { provider: 'openai', model: 'gpt-4o', apiKey: 'sk-test' },
})
expect(agent).toBeDefined()
})
it('strips trailing slash from url', () => {
const fetchMock = mockFetch({ success: true })
globalThis.fetch = fetchMock
const agent = new Agent({ url: 'http://localhost:9222/' })
agent.nav('https://example.com')
expect(fetchMock).toHaveBeenCalledWith(
'http://localhost:9222/sdk/nav',
expect.any(Object),
)
})
})
describe('nav()', () => {
it('sends correct request to /sdk/nav', async () => {
const fetchMock = mockFetch({ success: true })
globalThis.fetch = fetchMock
const agent = new Agent({ url: TEST_URL })
await agent.nav('https://example.com')
expect(fetchMock).toHaveBeenCalledWith('http://localhost:9222/sdk/nav', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ url: 'https://example.com' }),
})
})
it('includes tabId and windowId options', async () => {
const fetchMock = mockFetch({ success: true })
globalThis.fetch = fetchMock
const agent = new Agent({ url: TEST_URL })
await agent.nav('https://example.com', { tabId: 123, windowId: 456 })
expect(fetchMock).toHaveBeenCalledWith('http://localhost:9222/sdk/nav', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
url: 'https://example.com',
tabId: 123,
windowId: 456,
}),
})
})
it('returns NavResult on success', async () => {
globalThis.fetch = mockFetch({ success: true })
const agent = new Agent({ url: TEST_URL })
const result = await agent.nav('https://example.com')
expect(result).toEqual({ success: true })
})
it('throws NavigationError on failure', async () => {
globalThis.fetch = mockFetch(
{ error: { message: 'Navigation failed' } },
500,
)
const agent = new Agent({ url: TEST_URL })
await expect(agent.nav('https://example.com')).rejects.toThrow(
NavigationError,
)
})
it('throws ConnectionError when fetch fails', async () => {
globalThis.fetch = mockFetchError(new Error('Network error'))
const agent = new Agent({ url: TEST_URL })
await expect(agent.nav('https://example.com')).rejects.toThrow(
ConnectionError,
)
})
it('emits nav progress event', async () => {
globalThis.fetch = mockFetch({ success: true })
const events: ProgressEvent[] = []
const agent = new Agent({
url: TEST_URL,
onProgress: (e) => events.push(e),
})
await agent.nav('https://example.com')
expect(events).toHaveLength(1)
expect(events[0]).toEqual({
type: 'nav',
message: 'Navigating to https://example.com',
metadata: { url: 'https://example.com' },
})
})
})
describe('act()', () => {
it('sends correct request to /sdk/act', async () => {
const fetchMock = mockFetch({ success: true, steps: [] })
globalThis.fetch = fetchMock
const agent = new Agent({ url: TEST_URL })
await agent.act('click the button')
expect(fetchMock).toHaveBeenCalledWith('http://localhost:9222/sdk/act', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
instruction: 'click the button',
context: undefined,
maxSteps: undefined,
windowId: undefined,
llm: undefined,
}),
})
})
it('includes context, maxSteps, and windowId options', async () => {
const fetchMock = mockFetch({ success: true, steps: [] })
globalThis.fetch = fetchMock
const agent = new Agent({ url: TEST_URL })
await agent.act('search for item', {
context: { query: 'headphones' },
maxSteps: 5,
windowId: 789,
})
expect(fetchMock).toHaveBeenCalledWith('http://localhost:9222/sdk/act', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
instruction: 'search for item',
context: { query: 'headphones' },
maxSteps: 5,
windowId: 789,
llm: undefined,
}),
})
})
it('includes llm config from constructor', async () => {
const fetchMock = mockFetch({ success: true, steps: [] })
globalThis.fetch = fetchMock
const llmConfig = {
provider: 'openai' as const,
model: 'gpt-4o',
apiKey: 'sk-test',
}
const agent = new Agent({ url: TEST_URL, llm: llmConfig })
await agent.act('click the button')
expect(fetchMock).toHaveBeenCalledWith('http://localhost:9222/sdk/act', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
instruction: 'click the button',
context: undefined,
maxSteps: undefined,
windowId: undefined,
llm: llmConfig,
}),
})
})
it('returns ActResult on success', async () => {
const mockResult = {
success: true,
steps: [
{
thought: 'I need to click the button',
toolCalls: [{ name: 'browser_click', args: { nodeId: 1 } }],
},
],
}
globalThis.fetch = mockFetch(mockResult)
const agent = new Agent({ url: TEST_URL })
const result = await agent.act('click the button')
expect(result).toEqual(mockResult)
})
it('throws ActionError on failure', async () => {
globalThis.fetch = mockFetch({ error: { message: 'Action failed' } }, 500)
const agent = new Agent({ url: TEST_URL })
await expect(agent.act('click the button')).rejects.toThrow(ActionError)
})
it('emits act progress event', async () => {
globalThis.fetch = mockFetch({ success: true, steps: [] })
const events: ProgressEvent[] = []
const agent = new Agent({
url: TEST_URL,
onProgress: (e) => events.push(e),
})
await agent.act('click the button')
expect(events).toHaveLength(1)
expect(events[0]).toEqual({
type: 'act',
message: 'click the button',
metadata: { instruction: 'click the button' },
})
})
})
describe('extract()', () => {
const productSchema = z.object({
name: z.string(),
price: z.number(),
})
it('sends correct request with JSON Schema to /sdk/extract', async () => {
const fetchMock = mockFetch({ data: { name: 'Test', price: 99 } })
globalThis.fetch = fetchMock
const agent = new Agent({ url: TEST_URL })
await agent.extract('get product info', { schema: productSchema })
const expectedJsonSchema = zodToJsonSchema(productSchema)
expect(fetchMock).toHaveBeenCalledWith(
'http://localhost:9222/sdk/extract',
{
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
instruction: 'get product info',
schema: expectedJsonSchema,
context: undefined,
llm: undefined,
}),
},
)
})
it('includes context option', async () => {
const fetchMock = mockFetch({ data: { name: 'Test', price: 99 } })
globalThis.fetch = fetchMock
const agent = new Agent({ url: TEST_URL })
await agent.extract('get product info', {
schema: productSchema,
context: { format: 'USD' },
})
const expectedJsonSchema = zodToJsonSchema(productSchema)
expect(fetchMock).toHaveBeenCalledWith(
'http://localhost:9222/sdk/extract',
{
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
instruction: 'get product info',
schema: expectedJsonSchema,
context: { format: 'USD' },
llm: undefined,
}),
},
)
})
it('includes llm config from constructor', async () => {
const fetchMock = mockFetch({ data: { name: 'Test', price: 99 } })
globalThis.fetch = fetchMock
const llmConfig = {
provider: 'anthropic' as const,
model: 'claude-3',
apiKey: 'key',
}
const agent = new Agent({ url: TEST_URL, llm: llmConfig })
await agent.extract('get product info', { schema: productSchema })
const expectedJsonSchema = zodToJsonSchema(productSchema)
expect(fetchMock).toHaveBeenCalledWith(
'http://localhost:9222/sdk/extract',
{
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
instruction: 'get product info',
schema: expectedJsonSchema,
context: undefined,
llm: llmConfig,
}),
},
)
})
it('returns ExtractResult on success', async () => {
const mockData = { name: 'Headphones', price: 99.99 }
globalThis.fetch = mockFetch({ data: mockData })
const agent = new Agent({ url: TEST_URL })
const result = await agent.extract('get product info', {
schema: productSchema,
})
expect(result).toEqual({ data: mockData })
})
it('throws ExtractionError on failure', async () => {
globalThis.fetch = mockFetch(
{ error: { message: 'Extraction failed' } },
422,
)
const agent = new Agent({ url: TEST_URL })
await expect(
agent.extract('get product info', { schema: productSchema }),
).rejects.toThrow(ExtractionError)
})
it('emits extract progress event', async () => {
globalThis.fetch = mockFetch({ data: { name: 'Test', price: 99 } })
const events: ProgressEvent[] = []
const agent = new Agent({
url: TEST_URL,
onProgress: (e) => events.push(e),
})
await agent.extract('get product info', { schema: productSchema })
expect(events).toHaveLength(1)
expect(events[0]).toEqual({
type: 'extract',
message: 'get product info',
metadata: { instruction: 'get product info' },
})
})
})
describe('verify()', () => {
it('sends correct request to /sdk/verify', async () => {
const fetchMock = mockFetch({ success: true, reason: 'Element visible' })
globalThis.fetch = fetchMock
const agent = new Agent({ url: TEST_URL })
await agent.verify('search results are visible')
expect(fetchMock).toHaveBeenCalledWith(
'http://localhost:9222/sdk/verify',
{
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
expectation: 'search results are visible',
context: undefined,
llm: undefined,
}),
},
)
})
it('includes context option', async () => {
const fetchMock = mockFetch({ success: true, reason: 'Element visible' })
globalThis.fetch = fetchMock
const agent = new Agent({ url: TEST_URL })
await agent.verify('price is correct', { context: { expected: 99.99 } })
expect(fetchMock).toHaveBeenCalledWith(
'http://localhost:9222/sdk/verify',
{
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
expectation: 'price is correct',
context: { expected: 99.99 },
llm: undefined,
}),
},
)
})
it('includes llm config from constructor', async () => {
const fetchMock = mockFetch({ success: true, reason: 'Verified' })
globalThis.fetch = fetchMock
const llmConfig = { provider: 'google' as const, model: 'gemini-pro' }
const agent = new Agent({ url: TEST_URL, llm: llmConfig })
await agent.verify('page loaded')
expect(fetchMock).toHaveBeenCalledWith(
'http://localhost:9222/sdk/verify',
{
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
expectation: 'page loaded',
context: undefined,
llm: llmConfig,
}),
},
)
})
it('returns VerifyResult on success', async () => {
globalThis.fetch = mockFetch({
success: true,
reason: 'Search results found',
})
const agent = new Agent({ url: TEST_URL })
const result = await agent.verify('search results are visible')
expect(result).toEqual({ success: true, reason: 'Search results found' })
})
it('returns VerifyResult with success=false when verification fails', async () => {
globalThis.fetch = mockFetch({
success: false,
reason: 'No search results found',
})
const agent = new Agent({ url: TEST_URL })
const result = await agent.verify('search results are visible')
expect(result).toEqual({
success: false,
reason: 'No search results found',
})
})
it('throws VerificationError on server error', async () => {
globalThis.fetch = mockFetch(
{ error: { message: 'Verification failed' } },
500,
)
const agent = new Agent({ url: TEST_URL })
await expect(agent.verify('search results are visible')).rejects.toThrow(
VerificationError,
)
})
it('emits verify progress event', async () => {
globalThis.fetch = mockFetch({ success: true, reason: 'Verified' })
const events: ProgressEvent[] = []
const agent = new Agent({
url: TEST_URL,
onProgress: (e) => events.push(e),
})
await agent.verify('page loaded')
expect(events).toHaveLength(1)
expect(events[0]).toEqual({
type: 'verify',
message: 'page loaded',
metadata: { expectation: 'page loaded' },
})
})
})
describe('onProgress()', () => {
it('allows setting progress callback after construction', async () => {
globalThis.fetch = mockFetch({ success: true })
const events: ProgressEvent[] = []
const agent = new Agent({ url: TEST_URL })
agent.onProgress((e) => events.push(e))
await agent.nav('https://example.com')
expect(events).toHaveLength(1)
expect(events[0].type).toBe('nav')
})
it('replaces previous callback', async () => {
globalThis.fetch = mockFetch({ success: true })
const events1: ProgressEvent[] = []
const events2: ProgressEvent[] = []
const agent = new Agent({
url: TEST_URL,
onProgress: (e) => events1.push(e),
})
agent.onProgress((e) => events2.push(e))
await agent.nav('https://example.com')
expect(events1).toHaveLength(0)
expect(events2).toHaveLength(1)
})
})
describe('error handling', () => {
it('includes status code in error', async () => {
globalThis.fetch = mockFetch({ error: { message: 'Not found' } }, 404)
const agent = new Agent({ url: TEST_URL })
try {
await agent.nav('https://example.com')
} catch (error) {
expect(error).toBeInstanceOf(NavigationError)
expect((error as NavigationError).statusCode).toBe(404)
}
})
it('extracts error message from response body', async () => {
globalThis.fetch = mockFetch(
{ error: { message: 'Custom error message' } },
400,
)
const agent = new Agent({ url: TEST_URL })
try {
await agent.nav('https://example.com')
} catch (error) {
expect(error).toBeInstanceOf(NavigationError)
expect((error as NavigationError).message).toBe('Custom error message')
}
})
it('uses default error message when body parse fails', async () => {
globalThis.fetch = mock(() =>
Promise.resolve({
ok: false,
status: 500,
json: () => Promise.reject(new Error('Invalid JSON')),
} as Response),
)
const agent = new Agent({ url: TEST_URL })
try {
await agent.nav('https://example.com')
} catch (error) {
expect(error).toBeInstanceOf(NavigationError)
expect((error as NavigationError).message).toBe(
'Request failed with status 500',
)
}
})
})
})

View File

@@ -0,0 +1,7 @@
{
"extends": "../../tsconfig.json",
"compilerOptions": {
"rootDir": "src"
},
"include": ["src/**/*"]
}

View File

@@ -6,29 +6,33 @@
"typecheck": "tsc --noEmit"
},
"exports": {
"./ports": {
"types": "./src/ports.ts",
"default": "./src/ports.ts"
"./constants/ports": {
"types": "./src/constants/ports.ts",
"default": "./src/constants/ports.ts"
},
"./timeouts": {
"types": "./src/timeouts.ts",
"default": "./src/timeouts.ts"
"./constants/timeouts": {
"types": "./src/constants/timeouts.ts",
"default": "./src/constants/timeouts.ts"
},
"./limits": {
"types": "./src/limits.ts",
"default": "./src/limits.ts"
"./constants/limits": {
"types": "./src/constants/limits.ts",
"default": "./src/constants/limits.ts"
},
"./urls": {
"types": "./src/urls.ts",
"default": "./src/urls.ts"
"./constants/urls": {
"types": "./src/constants/urls.ts",
"default": "./src/constants/urls.ts"
},
"./paths": {
"types": "./src/paths.ts",
"default": "./src/paths.ts"
"./constants/paths": {
"types": "./src/constants/paths.ts",
"default": "./src/constants/paths.ts"
},
"./logger": {
"types": "./src/logger.ts",
"default": "./src/logger.ts"
"./types/logger": {
"types": "./src/types/logger.ts",
"default": "./src/types/logger.ts"
},
"./schemas/llm": {
"types": "./src/schemas/llm.ts",
"default": "./src/schemas/llm.ts"
}
}
}

View File

@@ -9,4 +9,5 @@
export const EXTERNAL_URLS = {
KLAVIS_PROXY: 'https://llm.browseros.com/klavis',
POSTHOG_DEFAULT: 'https://us.i.posthog.com',
CODEGEN_SERVICE: 'https://browseros-codegen.fly.dev',
} as const

View File

@@ -0,0 +1,64 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*
* Shared LLM configuration Zod schemas - single source of truth.
* Use z.infer<> for TypeScript types.
*/
import { z } from 'zod'
/**
* LLM provider constants for type-safe switch statements
*/
export const LLM_PROVIDERS = {
ANTHROPIC: 'anthropic',
OPENAI: 'openai',
GOOGLE: 'google',
OPENROUTER: 'openrouter',
AZURE: 'azure',
OLLAMA: 'ollama',
LMSTUDIO: 'lmstudio',
BEDROCK: 'bedrock',
BROWSEROS: 'browseros',
OPENAI_COMPATIBLE: 'openai-compatible',
} as const
/**
* Supported LLM providers
*/
export const LLMProviderSchema = z.enum([
LLM_PROVIDERS.ANTHROPIC,
LLM_PROVIDERS.OPENAI,
LLM_PROVIDERS.GOOGLE,
LLM_PROVIDERS.OPENROUTER,
LLM_PROVIDERS.AZURE,
LLM_PROVIDERS.OLLAMA,
LLM_PROVIDERS.LMSTUDIO,
LLM_PROVIDERS.BEDROCK,
LLM_PROVIDERS.BROWSEROS,
LLM_PROVIDERS.OPENAI_COMPATIBLE,
])
export type LLMProvider = z.infer<typeof LLMProviderSchema>
/**
* LLM configuration schema
* Used by SDK endpoints and agent configuration
*/
export const LLMConfigSchema = z.object({
provider: LLMProviderSchema,
model: z.string().optional(),
apiKey: z.string().optional(),
baseUrl: z.string().optional(),
// Azure-specific
resourceName: z.string().optional(),
// AWS Bedrock-specific
region: z.string().optional(),
accessKeyId: z.string().optional(),
secretAccessKey: z.string().optional(),
sessionToken: z.string().optional(),
})
export type LLMConfig = z.infer<typeof LLMConfigSchema>