mirror of
https://github.com/browseros-ai/BrowserOS.git
synced 2026-05-17 02:25:57 +00:00
feat: cdp keep alive and exit strategy (#365)
* feat: cdp keep alive and exit strategy * fix: self-kill if health checks don't arrive * fix: timer cdp race * fix: stale this
This commit is contained in:
@@ -4,10 +4,36 @@
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*/
|
||||
|
||||
import { EXIT_CODES } from '@browseros/shared/constants/exit-codes'
|
||||
import { Hono } from 'hono'
|
||||
import type { Browser } from '../../browser/browser'
|
||||
import { logger } from '../../lib/logger'
|
||||
|
||||
const HEALTH_CHECK_TIMEOUT = 5 * 60 * 1000 // 5 minutes
|
||||
|
||||
interface HealthDeps {
|
||||
browser?: Browser
|
||||
}
|
||||
|
||||
export function createHealthRoute(deps: HealthDeps = {}) {
|
||||
let watchdogTimer: ReturnType<typeof setTimeout> | null = null
|
||||
|
||||
function resetWatchdog() {
|
||||
if (watchdogTimer) clearTimeout(watchdogTimer)
|
||||
watchdogTimer = setTimeout(() => {
|
||||
logger.error(
|
||||
'No health check received in 5 minutes, Chromium may be gone — exiting',
|
||||
)
|
||||
process.exit(EXIT_CODES.GENERAL_ERROR)
|
||||
}, HEALTH_CHECK_TIMEOUT)
|
||||
}
|
||||
|
||||
// Start the watchdog on creation
|
||||
resetWatchdog()
|
||||
|
||||
export function createHealthRoute() {
|
||||
return new Hono().get('/', (c) => {
|
||||
return c.json({ status: 'ok' })
|
||||
resetWatchdog()
|
||||
const cdpConnected = deps.browser?.isCdpConnected() ?? true
|
||||
return c.json({ status: 'ok', cdpConnected })
|
||||
})
|
||||
}
|
||||
|
||||
@@ -73,7 +73,7 @@ export async function createHttpServer(config: HttpServerConfig) {
|
||||
|
||||
const app = new Hono<Env>()
|
||||
.use('/*', cors(defaultCorsConfig))
|
||||
.route('/health', createHealthRoute())
|
||||
.route('/health', createHealthRoute({ browser }))
|
||||
.route(
|
||||
'/shutdown',
|
||||
createShutdownRoute({ onShutdown: onShutdown ?? (() => {}) }),
|
||||
|
||||
@@ -13,6 +13,7 @@ import type { CdpTarget, CdpBackend as ICdpBackend } from './types'
|
||||
interface PendingRequest {
|
||||
resolve: (value: unknown) => void
|
||||
reject: (reason: Error) => void
|
||||
timer: ReturnType<typeof setTimeout>
|
||||
}
|
||||
|
||||
// biome-ignore lint/correctness/noUnusedVariables: declaration merging adds ProtocolApi properties to the class
|
||||
@@ -28,6 +29,7 @@ class CdpBackend implements ICdpBackend {
|
||||
private reconnecting = false
|
||||
private eventHandlers = new Map<string, ((params: unknown) => void)[]>()
|
||||
private sessionCache = new Map<string, ProtocolApi>()
|
||||
private keepaliveTimer: ReturnType<typeof setInterval> | null = null
|
||||
|
||||
constructor(config: { port: number }) {
|
||||
this.port = config.port
|
||||
@@ -44,6 +46,7 @@ class CdpBackend implements ICdpBackend {
|
||||
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
||||
try {
|
||||
await this.attemptConnect()
|
||||
this.startKeepalive()
|
||||
return
|
||||
} catch (error) {
|
||||
const msg = error instanceof Error ? error.message : String(error)
|
||||
@@ -84,6 +87,8 @@ class CdpBackend implements ICdpBackend {
|
||||
}
|
||||
|
||||
ws.onclose = () => {
|
||||
// Guard against stale onclose from a replaced socket
|
||||
if (this.ws !== ws) return
|
||||
this.connected = false
|
||||
this.ws = null
|
||||
if (opened) this.handleUnexpectedClose()
|
||||
@@ -97,16 +102,85 @@ class CdpBackend implements ICdpBackend {
|
||||
})
|
||||
}
|
||||
|
||||
private handleUnexpectedClose(): void {
|
||||
private startKeepalive(): void {
|
||||
this.stopKeepalive()
|
||||
|
||||
const interval = TIMEOUTS.CDP_KEEPALIVE_INTERVAL
|
||||
const timeout = TIMEOUTS.CDP_KEEPALIVE_TIMEOUT
|
||||
|
||||
this.keepaliveTimer = setInterval(async () => {
|
||||
if (!this.ws || !this.connected || this.disconnecting) return
|
||||
|
||||
let timeoutId: ReturnType<typeof setTimeout> | undefined
|
||||
try {
|
||||
await Promise.race([
|
||||
this.rawSend('Browser.getVersion'),
|
||||
new Promise((_, reject) => {
|
||||
timeoutId = setTimeout(
|
||||
() => reject(new Error('CDP keepalive timeout')),
|
||||
timeout,
|
||||
)
|
||||
}),
|
||||
])
|
||||
clearTimeout(timeoutId)
|
||||
} catch {
|
||||
clearTimeout(timeoutId)
|
||||
logger.warn('CDP keepalive failed, connection may be dead')
|
||||
this.handleDeadConnection()
|
||||
}
|
||||
}, interval)
|
||||
}
|
||||
|
||||
private stopKeepalive(): void {
|
||||
if (this.keepaliveTimer) {
|
||||
clearInterval(this.keepaliveTimer)
|
||||
this.keepaliveTimer = null
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Force-close a zombie WebSocket that stopped responding but never
|
||||
* fired onclose. This triggers the normal reconnection path.
|
||||
*/
|
||||
private handleDeadConnection(): void {
|
||||
if (this.disconnecting || this.reconnecting) return
|
||||
|
||||
this.stopKeepalive()
|
||||
|
||||
if (this.ws) {
|
||||
try {
|
||||
this.ws.close()
|
||||
} catch {
|
||||
// Already dead, ignore
|
||||
}
|
||||
this.ws = null
|
||||
}
|
||||
this.connected = false
|
||||
this.handleUnexpectedClose()
|
||||
}
|
||||
|
||||
private handleUnexpectedClose(): void {
|
||||
if (this.disconnecting) return
|
||||
|
||||
// Allow re-entry if a previous reconnection already finished.
|
||||
// The old guard `if (this.reconnecting) return` caused permanent
|
||||
// death when a freshly reconnected socket closed again before
|
||||
// the .finally() callback reset the flag.
|
||||
if (this.reconnecting) {
|
||||
logger.warn(
|
||||
'CDP closed again while reconnecting — will retry after current attempt',
|
||||
)
|
||||
return
|
||||
}
|
||||
|
||||
this.stopKeepalive()
|
||||
this.rejectPendingRequests()
|
||||
|
||||
logger.error(
|
||||
'CDP WebSocket closed unexpectedly, attempting reconnection...',
|
||||
)
|
||||
this.reconnecting = true
|
||||
this.reconnectOrCrash().finally(() => {
|
||||
this.reconnectWithRetries().finally(() => {
|
||||
this.reconnecting = false
|
||||
})
|
||||
}
|
||||
@@ -114,20 +188,24 @@ class CdpBackend implements ICdpBackend {
|
||||
private rejectPendingRequests(): void {
|
||||
const error = new Error('CDP connection lost')
|
||||
for (const request of this.pending.values()) {
|
||||
clearTimeout(request.timer)
|
||||
request.reject(error)
|
||||
}
|
||||
this.pending.clear()
|
||||
}
|
||||
|
||||
private async reconnectOrCrash(): Promise<void> {
|
||||
const maxRetries = CDP_LIMITS.CONNECT_MAX_RETRIES
|
||||
const retryDelay = TIMEOUTS.CDP_CONNECT_RETRY_DELAY
|
||||
private async reconnectWithRetries(): Promise<void> {
|
||||
const maxRetries = CDP_LIMITS.RECONNECT_MAX_RETRIES
|
||||
const delay = TIMEOUTS.CDP_RECONNECT_DELAY
|
||||
|
||||
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
||||
if (this.disconnecting) return
|
||||
|
||||
try {
|
||||
logger.info(`CDP reconnection attempt ${attempt}/${maxRetries}...`)
|
||||
await Bun.sleep(retryDelay)
|
||||
await Bun.sleep(delay)
|
||||
await this.attemptConnect()
|
||||
this.startKeepalive()
|
||||
logger.info('CDP reconnected successfully')
|
||||
return
|
||||
} catch (error) {
|
||||
@@ -146,11 +224,13 @@ class CdpBackend implements ICdpBackend {
|
||||
|
||||
async disconnect(): Promise<void> {
|
||||
this.disconnecting = true
|
||||
this.stopKeepalive()
|
||||
if (this.ws) {
|
||||
this.ws.close()
|
||||
this.ws = null
|
||||
this.connected = false
|
||||
}
|
||||
this.rejectPendingRequests()
|
||||
}
|
||||
|
||||
isConnected(): boolean {
|
||||
@@ -203,8 +283,24 @@ class CdpBackend implements ICdpBackend {
|
||||
|
||||
const ws = this.ws
|
||||
return new Promise<unknown>((resolve, reject) => {
|
||||
this.pending.set(id, { resolve, reject })
|
||||
ws.send(JSON.stringify(message))
|
||||
const timer = setTimeout(() => {
|
||||
this.pending.delete(id)
|
||||
reject(new Error(`CDP request timeout: ${method} (id=${id})`))
|
||||
}, TIMEOUTS.CDP_REQUEST_TIMEOUT)
|
||||
|
||||
this.pending.set(id, { resolve, reject, timer })
|
||||
|
||||
try {
|
||||
ws.send(JSON.stringify(message))
|
||||
} catch (err) {
|
||||
clearTimeout(timer)
|
||||
this.pending.delete(id)
|
||||
const msg = err instanceof Error ? err.message : String(err)
|
||||
reject(new Error(`CDP send failed: ${msg}`))
|
||||
|
||||
// send() failure likely means the socket is dead
|
||||
this.handleDeadConnection()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -237,6 +333,7 @@ class CdpBackend implements ICdpBackend {
|
||||
if (message.id !== undefined) {
|
||||
const pending = this.pending.get(message.id)
|
||||
if (pending) {
|
||||
clearTimeout(pending.timer)
|
||||
this.pending.delete(message.id)
|
||||
if (message.error) {
|
||||
pending.reject(new Error(`CDP error: ${message.error.message}`))
|
||||
|
||||
@@ -93,6 +93,10 @@ export class Browser {
|
||||
this.setupEventHandlers()
|
||||
}
|
||||
|
||||
isCdpConnected(): boolean {
|
||||
return this.cdp.isConnected()
|
||||
}
|
||||
|
||||
private setupEventHandlers(): void {
|
||||
this.cdp.Target.on('detachedFromTarget', (params) => {
|
||||
if (params.sessionId) {
|
||||
|
||||
@@ -27,6 +27,7 @@ export const PAGINATION = {
|
||||
|
||||
export const CDP_LIMITS = {
|
||||
CONNECT_MAX_RETRIES: 3,
|
||||
RECONNECT_MAX_RETRIES: 3,
|
||||
} as const
|
||||
|
||||
export const CONTENT_LIMITS = {
|
||||
|
||||
@@ -22,6 +22,10 @@ export const TIMEOUTS = {
|
||||
// CDP connection
|
||||
CDP_CONNECT: 10_000,
|
||||
CDP_CONNECT_RETRY_DELAY: 1_000,
|
||||
CDP_RECONNECT_DELAY: 5_000,
|
||||
CDP_KEEPALIVE_INTERVAL: 30_000,
|
||||
CDP_KEEPALIVE_TIMEOUT: 10_000,
|
||||
CDP_REQUEST_TIMEOUT: 60_000,
|
||||
|
||||
// External API calls
|
||||
KLAVIS_FETCH: 30_000,
|
||||
|
||||
Reference in New Issue
Block a user