Compare commits

...

8 Commits

Author SHA1 Message Date
Nikhil Sonti
bcf0e6f990 test(openclaw): align serialization mock with image check 2026-04-30 11:23:49 -07:00
Nikhil Sonti
d21befc509 fix(openclaw): address review feedback 2026-04-30 11:22:39 -07:00
Nikhil Sonti
b355b88433 fix(server): satisfy process lock error override 2026-04-30 11:21:55 -07:00
Nikhil Sonti
673ac0ad68 test(openclaw): cover lifecycle race recovery 2026-04-30 11:21:55 -07:00
Nikhil Sonti
114c3c3796 fix(openclaw): reconcile fixed gateway container startup 2026-04-30 11:21:55 -07:00
Nikhil Sonti
a32a073d43 feat(openclaw): serialize lifecycle across processes 2026-04-30 11:21:20 -07:00
Nikhil Sonti
054056017f feat(container): add container name reconciliation helpers 2026-04-30 11:21:19 -07:00
Nikhil Sonti
fc014c37b8 feat(server): add shared process lock helper 2026-04-30 11:19:24 -07:00
13 changed files with 858 additions and 26 deletions

View File

@@ -108,6 +108,7 @@
"klavis": "^2.15.0",
"pino": "^9.6.0",
"posthog-node": "^4.17.0",
"proper-lockfile": "^4.1.2",
"puppeteer-core": "24.23.0",
"ws": "^8.18.0",
"zod": "^3.24.2",
@@ -117,6 +118,7 @@
"@types/bun": "1.3.5",
"@types/debug": "^4.1.12",
"@types/node": "^24.3.3",
"@types/proper-lockfile": "^4.1.4",
"@types/sinon": "^21.0.0",
"@types/ws": "^8.5.13",
"async-mutex": "^0.5.0",

View File

@@ -15,18 +15,26 @@ import type {
ContainerCommandResult,
ContainerSpec,
LogFn,
WaitForContainerNameReleaseOptions,
} from '../../../lib/container'
import { isContainerNameInUse } from '../../../lib/container'
import { logger } from '../../../lib/logger'
import {
GUEST_VM_STATE,
hostPathToGuest,
type VmRuntime,
} from '../../../lib/vm'
import { ContainerNameInUseError } from '../../../lib/vm/errors'
const GATEWAY_CONTAINER_HOME = '/home/node'
const GATEWAY_STATE_DIR = `${GATEWAY_CONTAINER_HOME}/.openclaw`
const GUEST_OPENCLAW_HOME = `${GUEST_VM_STATE}/openclaw`
const GATEWAY_NPM_PREFIX = `${GATEWAY_CONTAINER_HOME}/.npm-global`
const CREATE_CONTAINER_MAX_ATTEMPTS = 3
const OPENCLAW_NAME_RELEASE_WAIT: WaitForContainerNameReleaseOptions = {
timeoutMs: 10_000,
intervalMs: 100,
}
// Prepend user-installed bin so tools like `claude` / `gemini` CLI that
// are installed via npm into the mounted home are discoverable by
// OpenClaw's child-process spawns (no login shell is involved).
@@ -121,10 +129,9 @@ export class ContainerRuntime {
input: GatewayContainerSpec,
onLog?: LogFn,
): Promise<void> {
await this.removeGatewayContainer(onLog)
const image = await this.ensureGatewayImageLoaded(onLog)
const container = await this.buildGatewayContainerSpec(input, image)
await this.shell.createContainer(container, onLog)
await this.createContainerWithNameReconcile(container, onLog)
await this.shell.startContainer(container.name)
}
@@ -208,10 +215,11 @@ export class ContainerRuntime {
onLog?: LogFn,
): Promise<number> {
const setupContainerName = `${OPENCLAW_GATEWAY_CONTAINER_NAME}-setup`
await this.shell.removeContainer(setupContainerName, { force: true }, onLog)
await this.removeContainerAndWait(setupContainerName, onLog)
const image = await this.ensureGatewayImageLoaded(onLog)
const setupArgs = command[0] === 'node' ? command.slice(1) : command
const createResult = await this.shell.runCommand(
const createResult = await this.runSetupCreateWithNameReconcile(
setupContainerName,
[
'create',
'--name',
@@ -252,10 +260,74 @@ export class ContainerRuntime {
}
private async removeGatewayContainer(onLog?: LogFn): Promise<void> {
await this.shell.removeContainer(
OPENCLAW_GATEWAY_CONTAINER_NAME,
{ force: true },
onLog,
await this.removeContainerAndWait(OPENCLAW_GATEWAY_CONTAINER_NAME, onLog)
}
/** Create the fixed-name gateway after reconciling stale nerdctl name ownership. */
private async createContainerWithNameReconcile(
container: ContainerSpec,
onLog?: LogFn,
): Promise<void> {
let attempt = 1
while (true) {
await this.removeContainerAndWait(container.name, onLog)
try {
await this.shell.createContainer(container, onLog)
return
} catch (err) {
if (
!(err instanceof ContainerNameInUseError) ||
attempt >= CREATE_CONTAINER_MAX_ATTEMPTS
) {
throw err
}
logger.warn('OpenClaw container name still in use; retrying create', {
containerName: container.name,
attempt,
maxAttempts: CREATE_CONTAINER_MAX_ATTEMPTS,
})
attempt++
}
}
}
private async runSetupCreateWithNameReconcile(
setupContainerName: string,
createArgs: string[],
onLog?: LogFn,
): Promise<ContainerCommandResult> {
let attempt = 1
while (true) {
const result = await this.shell.runCommand(createArgs, onLog)
if (
result.exitCode === 0 ||
!isContainerNameInUse(result.stderr) ||
attempt >= CREATE_CONTAINER_MAX_ATTEMPTS
) {
return result
}
logger.warn(
'OpenClaw setup container name still in use; retrying create',
{
containerName: setupContainerName,
attempt,
maxAttempts: CREATE_CONTAINER_MAX_ATTEMPTS,
},
)
await this.removeContainerAndWait(setupContainerName, onLog)
attempt++
}
}
private async removeContainerAndWait(
containerName: string,
onLog?: LogFn,
): Promise<void> {
await this.shell.removeContainer(containerName, { force: true }, onLog)
await this.shell.waitForContainerNameRelease(
containerName,
OPENCLAW_NAME_RELEASE_WAIT,
)
}

View File

@@ -10,6 +10,7 @@
import { existsSync } from 'node:fs'
import { mkdir, readFile, writeFile } from 'node:fs/promises'
import { join } from 'node:path'
import {
OPENCLAW_CONTAINER_HOME,
OPENCLAW_GATEWAY_CONTAINER_PORT,
@@ -18,6 +19,7 @@ import {
import { DEFAULT_PORTS } from '@browseros/shared/constants/ports'
import { getOpenClawDir } from '../../../lib/browseros-dir'
import { logger } from '../../../lib/logger'
import { withProcessLock } from '../../../lib/process-lock'
import {
type AgentLiveStatus,
type AgentSessionState,
@@ -1012,10 +1014,16 @@ export class OpenClawService {
if (persistedPort !== null) {
this.setPort(persistedPort)
}
if (await this.isGatewayAvailable(this.hostPort)) {
const currentPortReady = await this.isGatewayPortReady(this.hostPort)
if (
currentPortReady &&
(await this.isGatewayAuthenticated(this.hostPort))
) {
return
}
const hostPort = await allocateGatewayPort(this.openclawDir)
const hostPort = await allocateGatewayPort(this.openclawDir, {
excludePort: currentPortReady ? this.hostPort : undefined,
})
if (hostPort !== this.hostPort) {
logProgress?.(`Allocated OpenClaw gateway host port ${hostPort}`)
logger.info('Allocated OpenClaw gateway host port', { hostPort })
@@ -1025,7 +1033,10 @@ export class OpenClawService {
private async isGatewayAvailable(hostPort: number): Promise<boolean> {
if (!(await this.isGatewayPortReady(hostPort))) return false
return this.isGatewayAuthenticated(hostPort)
}
private async isGatewayAuthenticated(hostPort: number): Promise<boolean> {
if (!this.tokenLoaded) {
logger.debug(
'OpenClaw gateway port is ready before auth token is loaded',
@@ -1512,8 +1523,14 @@ export class OpenClawService {
})
await previous.catch(() => undefined)
try {
logger.debug('OpenClaw lifecycle operation started', { operation })
return await fn()
return await withProcessLock(
'openclaw-lifecycle',
{ lockDir: join(this.openclawDir, '.locks') },
async () => {
logger.debug('OpenClaw lifecycle operation started', { operation })
return await fn()
},
)
} finally {
release()
}

View File

@@ -16,6 +16,7 @@ import { OPENCLAW_GATEWAY_CONTAINER_PORT } from '@browseros/shared/constants/ope
import { getOpenClawStateDir } from './openclaw-env'
const RUNTIME_STATE_FILE = 'runtime-state.json'
const MAX_TCP_PORT = 65_535
interface RuntimeState {
gatewayPort: number
@@ -26,7 +27,7 @@ function readForcedGatewayPort(): number | null {
if (!raw) return null
const parsed = Number.parseInt(raw, 10)
if (!Number.isInteger(parsed) || parsed <= 0 || parsed > 65535) {
if (!Number.isInteger(parsed) || parsed <= 0 || parsed > MAX_TCP_PORT) {
return null
}
return parsed
@@ -49,7 +50,7 @@ export async function readPersistedGatewayPort(
typeof parsed.gatewayPort === 'number' &&
Number.isInteger(parsed.gatewayPort) &&
parsed.gatewayPort > 0 &&
parsed.gatewayPort <= 65535
parsed.gatewayPort <= MAX_TCP_PORT
) {
return parsed.gatewayPort
}
@@ -82,14 +83,26 @@ function isPortAvailable(port: number): Promise<boolean> {
})
}
async function findAvailablePort(startPort: number): Promise<number> {
async function findAvailablePort(
startPort: number,
excludePort?: number,
): Promise<number> {
let port = startPort
while (!(await isPortAvailable(port))) {
while (port === excludePort || !(await isPortAvailable(port))) {
port++
if (port > MAX_TCP_PORT) {
throw new Error(
`No available OpenClaw gateway port found from ${startPort}`,
)
}
}
return port
}
export interface AllocateGatewayPortOptions {
excludePort?: number
}
/**
* Pick a host port for the gateway container and persist it. Prefers the
* previously persisted port when it's still bindable; otherwise scans
@@ -97,6 +110,7 @@ async function findAvailablePort(startPort: number): Promise<number> {
*/
export async function allocateGatewayPort(
openclawDir: string,
opts: AllocateGatewayPortOptions = {},
): Promise<number> {
const forcedPort = readForcedGatewayPort()
if (forcedPort !== null) {
@@ -105,10 +119,17 @@ export async function allocateGatewayPort(
}
const persisted = await readPersistedGatewayPort(openclawDir)
if (persisted !== null && (await isPortAvailable(persisted))) {
if (
persisted !== null &&
persisted !== opts.excludePort &&
(await isPortAvailable(persisted))
) {
return persisted
}
const port = await findAvailablePort(OPENCLAW_GATEWAY_CONTAINER_PORT)
const port = await findAvailablePort(
OPENCLAW_GATEWAY_CONTAINER_PORT,
opts.excludePort,
)
await writePersistedGatewayPort(openclawDir, port)
return port
}

View File

@@ -4,9 +4,20 @@
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { ContainerCliError } from '../vm/errors'
import {
ContainerCliError,
ContainerNameInUseError,
ContainerNameReleaseTimeoutError,
} from '../vm/errors'
import { LimaCli } from '../vm/lima-cli'
import type { ContainerSpec, LogFn, MountSpec, PortMapping } from './types'
import type {
ContainerInfo,
ContainerSpec,
LogFn,
MountSpec,
PortMapping,
WaitForContainerNameReleaseOptions,
} from './types'
export function buildNerdctlCommand(args: string[]): string[] {
return ['nerdctl', ...args]
@@ -58,7 +69,18 @@ export class ContainerCli {
}
async createContainer(spec: ContainerSpec, onLog?: LogFn): Promise<void> {
await this.runRequired(buildCreateArgs(spec), onLog)
const args = buildCreateArgs(spec)
const result = await this.runCommand(args, onLog)
if (result.exitCode === 0) return
if (isContainerNameInUse(result.stderr)) {
throw new ContainerNameInUseError(
spec.name,
`nerdctl ${args.join(' ')}`,
result.exitCode,
result.stderr.trim(),
)
}
throw this.commandError(args, result)
}
async startContainer(name: string, onLog?: LogFn): Promise<void> {
@@ -84,6 +106,36 @@ export class ContainerCli {
throw this.commandError(args, result)
}
/** Inspect a named container without treating absence as a command failure. */
async inspectContainer(name: string): Promise<ContainerInfo | null> {
const args = ['container', 'inspect', '--format', '{{json .}}', name]
const result = await this.runCommand(args)
if (result.exitCode === 0) {
return parseContainerInfo(result.stdout, name)
}
if (isNoSuchContainer(result.stderr)) return null
throw this.commandError(args, result)
}
/** Wait for containerd/nerdctl to stop resolving a container name after rm. */
async waitForContainerNameRelease(
name: string,
opts: WaitForContainerNameReleaseOptions = {},
): Promise<void> {
const timeoutMs = opts.timeoutMs ?? 5_000
const intervalMs = opts.intervalMs ?? 100
const startedAt = Date.now()
while (Date.now() - startedAt <= timeoutMs) {
if (!(await this.inspectContainer(name))) return
const remainingMs = timeoutMs - (Date.now() - startedAt)
if (remainingMs <= 0) break
await Bun.sleep(Math.min(intervalMs, remainingMs))
}
throw new ContainerNameReleaseTimeoutError(name, timeoutMs)
}
async exec(name: string, cmd: string[], onLog?: LogFn): Promise<number> {
const result = await this.runCommand(['exec', name, ...cmd], onLog)
return result.exitCode
@@ -198,12 +250,65 @@ function mountArg(mount: MountSpec): string {
return `${mount.source}:${mount.target}${mount.readonly ? ':ro' : ''}`
}
function parseContainerInfo(
stdout: string,
fallbackName: string,
): ContainerInfo {
const line = stdout
.trim()
.split('\n')
.map((entry) => entry.trim())
.find(Boolean)
if (!line) {
throw new Error(`nerdctl container inspect returned empty output`)
}
const parsed = JSON.parse(line) as unknown
const container = Array.isArray(parsed) ? parsed[0] : parsed
const object = isRecord(container) ? container : {}
const config = isRecord(object.Config) ? object.Config : {}
const state = isRecord(object.State) ? object.State : {}
const name = stringValue(object.Name)?.replace(/^\/+/, '') ?? fallbackName
const status = stringValue(state.Status) ?? stringValue(object.Status)
const running =
typeof state.Running === 'boolean'
? state.Running
: status
? status.toLowerCase() === 'running'
: null
return {
id: stringValue(object.ID) ?? stringValue(object.Id),
name,
image: stringValue(config.Image) ?? stringValue(object.Image),
status,
running,
}
}
function isNoSuchContainer(stderr: string): boolean {
const lower = stderr.toLowerCase()
return lower.includes('no such container') || lower.includes('not found')
return (
lower.includes('no such container') || lower.includes('container not found')
)
}
export function isContainerNameInUse(stderr: string): boolean {
const lower = stderr.toLowerCase()
return (
(lower.includes('name-store error') && lower.includes('already used')) ||
lower.includes('name is already in use')
)
}
function linesToOutput(lines: string[]): string {
if (lines.length === 0) return ''
return `${lines.join('\n')}\n`
}
function isRecord(value: unknown): value is Record<string, unknown> {
return typeof value === 'object' && value !== null
}
function stringValue(value: unknown): string | null {
return typeof value === 'string' && value ? value : null
}

View File

@@ -38,6 +38,19 @@ export interface ContainerSpec {
command?: string[]
}
export interface ContainerInfo {
id: string | null
name: string
image: string | null
status: string | null
running: boolean | null
}
export interface WaitForContainerNameReleaseOptions {
timeoutMs?: number
intervalMs?: number
}
export interface LogLine {
stream: 'stdout' | 'stderr'
line: string

View File

@@ -0,0 +1,130 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
import { mkdir } from 'node:fs/promises'
import { join } from 'node:path'
import lockfile from 'proper-lockfile'
const DEFAULT_STALE_MS = 60_000
const DEFAULT_UPDATE_MS = 15_000
const DEFAULT_TIMEOUT_MS = 120_000
const DEFAULT_RETRY_MIN_TIMEOUT_MS = 100
const DEFAULT_RETRY_MAX_TIMEOUT_MS = 1_000
export interface ProcessLockOptions {
lockDir: string
staleMs?: number
updateMs?: number
timeoutMs?: number
retryMinTimeoutMs?: number
retryMaxTimeoutMs?: number
randomize?: boolean
}
export class ProcessLockTimeoutError extends Error {
constructor(
public readonly lockName: string,
public readonly lockPath: string,
public readonly timeoutMs: number,
public override readonly cause?: unknown,
) {
super(
`Timed out acquiring process lock "${lockName}" at ${lockPath} after ${timeoutMs}ms`,
)
this.name = 'ProcessLockTimeoutError'
}
}
/** Run a critical section while holding a named lock shared across processes. */
export async function withProcessLock<T>(
name: string,
options: ProcessLockOptions,
fn: () => Promise<T>,
): Promise<T> {
const release = await acquireProcessLock(name, options)
try {
return await fn()
} finally {
await release()
}
}
export function resolveProcessLockPath(lockDir: string, name: string): string {
return join(lockDir, `${sanitizeLockName(name)}.lock`)
}
async function acquireProcessLock(
name: string,
options: ProcessLockOptions,
): Promise<() => Promise<void>> {
await mkdir(options.lockDir, { recursive: true })
const lockPath = resolveProcessLockPath(options.lockDir, name)
const timeoutMs = options.timeoutMs ?? DEFAULT_TIMEOUT_MS
const retryMinTimeoutMs =
options.retryMinTimeoutMs ?? DEFAULT_RETRY_MIN_TIMEOUT_MS
const retryMaxTimeoutMs =
options.retryMaxTimeoutMs ?? DEFAULT_RETRY_MAX_TIMEOUT_MS
const startedAt = Date.now()
let lastError: unknown
while (Date.now() - startedAt <= timeoutMs) {
try {
return await lockfile.lock(lockPath, {
lockfilePath: lockPath,
realpath: false,
stale: options.staleMs ?? DEFAULT_STALE_MS,
update: options.updateMs ?? DEFAULT_UPDATE_MS,
// The wrapper owns retry/backoff so acquisition respects timeoutMs.
retries: 0,
})
} catch (err) {
if (!isLockedError(err)) throw err
lastError = err
}
const remainingMs = timeoutMs - (Date.now() - startedAt)
if (remainingMs <= 0) break
await Bun.sleep(
Math.min(
remainingMs,
nextRetryDelay(retryMinTimeoutMs, retryMaxTimeoutMs, options.randomize),
),
)
}
throw new ProcessLockTimeoutError(name, lockPath, timeoutMs, lastError)
}
function sanitizeLockName(name: string): string {
const safeName = name
.trim()
.replace(/[^a-zA-Z0-9._-]+/g, '-')
.replace(/^[.-]+|[.-]+$/g, '')
if (!safeName) throw new Error('Process lock name must not be empty')
return safeName
}
function isLockedError(err: unknown): boolean {
return (
typeof err === 'object' &&
err !== null &&
'code' in err &&
err.code === 'ELOCKED'
)
}
function nextRetryDelay(
minTimeoutMs: number,
maxTimeoutMs: number,
randomize = true,
): number {
if (maxTimeoutMs <= minTimeoutMs) return minTimeoutMs
if (!randomize) return minTimeoutMs
return (
minTimeoutMs + Math.floor(Math.random() * (maxTimeoutMs - minTimeoutMs))
)
}

View File

@@ -30,8 +30,36 @@ export class ContainerCliError extends VmError {
command: string,
public readonly exitCode: number,
public readonly stderr: string,
message = `${command} failed with exit code ${exitCode}: ${stderr}`,
) {
super(`${command} failed with exit code ${exitCode}: ${stderr}`)
super(message)
}
}
export class ContainerNameInUseError extends ContainerCliError {
constructor(
public readonly containerName: string,
command: string,
exitCode: number,
stderr: string,
) {
super(
command,
exitCode,
stderr,
`${command} failed because container name "${containerName}" is already in use: ${stderr}`,
)
}
}
export class ContainerNameReleaseTimeoutError extends VmError {
constructor(
public readonly containerName: string,
public readonly timeoutMs: number,
) {
super(
`Timed out waiting ${timeoutMs}ms for container name "${containerName}" to be released`,
)
}
}

View File

@@ -9,8 +9,10 @@ import {
OPENCLAW_IMAGE,
} from '@browseros/shared/constants/openclaw'
import { ContainerRuntime } from '../../../../src/api/services/openclaw/container-runtime'
import { ContainerNameInUseError } from '../../../../src/lib/vm/errors'
const PROJECT_DIR = '/tmp/openclaw'
const OPENCLAW_NAME_RELEASE_WAIT = { timeoutMs: 10_000, intervalMs: 100 }
const defaultSpec = {
hostPort: 18789,
hostHome: '/Users/me/.browseros/vm/openclaw',
@@ -36,6 +38,10 @@ describe('ContainerRuntime', () => {
{ force: true },
undefined,
)
expect(deps.shell.waitForContainerNameRelease).toHaveBeenCalledWith(
OPENCLAW_GATEWAY_CONTAINER_NAME,
OPENCLAW_NAME_RELEASE_WAIT,
)
expect(deps.loader.ensureAgentImageLoaded).toHaveBeenCalledWith(
'openclaw',
undefined,
@@ -68,6 +74,62 @@ describe('ContainerRuntime', () => {
)
})
it('reconciles and retries when gateway create reports name-in-use', async () => {
const deps = createDeps()
deps.shell.createContainer = mock(async () => {
if (deps.shell.createContainer.mock.calls.length === 1) {
throw new ContainerNameInUseError(
OPENCLAW_GATEWAY_CONTAINER_NAME,
'nerdctl create',
1,
`name-store error\nname "${OPENCLAW_GATEWAY_CONTAINER_NAME}" is already used`,
)
}
})
const runtime = new ContainerRuntime({
vm: deps.vm,
shell: deps.shell,
loader: deps.loader,
projectDir: PROJECT_DIR,
})
await runtime.startGateway(defaultSpec)
expect(deps.shell.createContainer).toHaveBeenCalledTimes(2)
expect(deps.shell.removeContainer).toHaveBeenCalledTimes(2)
expect(deps.shell.waitForContainerNameRelease).toHaveBeenCalledTimes(2)
expect(deps.shell.startContainer).toHaveBeenCalledWith(
OPENCLAW_GATEWAY_CONTAINER_NAME,
)
})
it('bounds gateway create retries when the name stays in use', async () => {
const deps = createDeps()
deps.shell.createContainer = mock(async () => {
throw new ContainerNameInUseError(
OPENCLAW_GATEWAY_CONTAINER_NAME,
'nerdctl create',
1,
`name-store error\nname "${OPENCLAW_GATEWAY_CONTAINER_NAME}" is already used`,
)
})
const runtime = new ContainerRuntime({
vm: deps.vm,
shell: deps.shell,
loader: deps.loader,
projectDir: PROJECT_DIR,
})
await expect(runtime.startGateway(defaultSpec)).rejects.toBeInstanceOf(
ContainerNameInUseError,
)
expect(deps.shell.createContainer).toHaveBeenCalledTimes(3)
expect(deps.shell.removeContainer).toHaveBeenCalledTimes(3)
expect(deps.shell.waitForContainerNameRelease).toHaveBeenCalledTimes(3)
expect(deps.shell.startContainer).not.toHaveBeenCalled()
})
it('uses OPENCLAW_IMAGE as a direct image override', async () => {
const previous = process.env.OPENCLAW_IMAGE
process.env.OPENCLAW_IMAGE = 'localhost/openclaw:test'
@@ -152,6 +214,45 @@ describe('ContainerRuntime', () => {
{ force: true },
undefined,
)
expect(deps.shell.waitForContainerNameRelease).toHaveBeenCalledWith(
`${OPENCLAW_GATEWAY_CONTAINER_NAME}-setup`,
OPENCLAW_NAME_RELEASE_WAIT,
)
})
it('reconciles and retries when setup create reports name-in-use', async () => {
const deps = createDeps()
let setupCreateCount = 0
deps.shell.runCommand = mock(async (args: string[]) => {
if (args[0] === 'create') {
setupCreateCount += 1
if (setupCreateCount === 1) {
return {
exitCode: 1,
stdout: '',
stderr: `name-store error\nname "${OPENCLAW_GATEWAY_CONTAINER_NAME}-setup" is already used`,
}
}
}
return { exitCode: 0, stdout: '', stderr: '' }
})
const runtime = new ContainerRuntime({
vm: deps.vm,
shell: deps.shell,
loader: deps.loader,
projectDir: PROJECT_DIR,
})
await expect(
runtime.runGatewaySetupCommand(
['node', 'dist/index.js', 'agents', 'list', '--json'],
defaultSpec,
),
).resolves.toBe(0)
expect(setupCreateCount).toBe(2)
expect(deps.shell.waitForContainerNameRelease).toHaveBeenCalledTimes(2)
expect(deps.shell.removeContainer).toHaveBeenCalledTimes(3)
})
it('tails and fetches gateway logs through the new transport', async () => {
@@ -257,6 +358,7 @@ function createDeps() {
stopContainer: mock(async () => {}),
removeContainer: mock(async () => {}),
containerImageRef: mock(async () => OPENCLAW_IMAGE),
waitForContainerNameRelease: mock(async () => {}),
exec: mock(async () => 0),
runCommand: mock(
async (_args: string[], onLog?: (line: string) => void) => {

View File

@@ -737,6 +737,77 @@ describe('OpenClawService', () => {
expect(probe).toHaveBeenCalledTimes(2)
})
it('serializes start across service instances sharing an OpenClaw dir', async () => {
tempDir = await mkdtemp(join(tmpdir(), 'openclaw-service-'))
await mkdir(join(tempDir, '.openclaw'), { recursive: true })
await writeFile(
join(tempDir, '.openclaw', 'openclaw.json'),
JSON.stringify({
gateway: {
auth: {
token: 'cli-token',
},
},
}),
)
let gatewayReady = false
let releaseStartGateway!: () => void
let notifyStartGatewayEntered!: () => void
const startGatewayEntered = new Promise<void>((resolve) => {
notifyStartGatewayEntered = resolve
})
const unblockStartGateway = new Promise<void>((resolve) => {
releaseStartGateway = resolve
})
const firstEnsureReady = mock(async () => {})
const secondEnsureReady = mock(async () => {})
const startGateway = mock(async () => {
notifyStartGatewayEntered()
await unblockStartGateway
gatewayReady = true
})
const waitForReady = mock(async () => true)
const probe = mock(async () => {})
const firstService = new OpenClawService() as MutableOpenClawService
const secondService = new OpenClawService() as MutableOpenClawService
firstService.openclawDir = tempDir
secondService.openclawDir = tempDir
firstService.runtime = {
ensureReady: firstEnsureReady,
isReady: async () => gatewayReady,
isGatewayCurrent: async () => true,
startGateway,
waitForReady,
}
secondService.runtime = {
ensureReady: secondEnsureReady,
isReady: async () => gatewayReady,
isGatewayCurrent: async () => true,
startGateway,
waitForReady,
}
firstService.cliClient = { probe }
secondService.cliClient = { probe }
mockGatewayAuth()
const firstStart = firstService.start()
await startGatewayEntered
const secondStart = secondService.start()
await Bun.sleep(25)
const secondEnteredBeforeFirstFinished = secondEnsureReady.mock.calls.length
releaseStartGateway()
await Promise.all([firstStart, secondStart])
expect(secondEnteredBeforeFirstFinished).toBe(0)
expect(firstEnsureReady).toHaveBeenCalledTimes(1)
expect(secondEnsureReady).toHaveBeenCalledTimes(1)
expect(startGateway).toHaveBeenCalledTimes(1)
expect(waitForReady).toHaveBeenCalledTimes(1)
expect(probe).toHaveBeenCalledTimes(2)
})
it('does not restart a ready gateway when start is called again', async () => {
tempDir = await mkdtemp(join(tmpdir(), 'openclaw-service-'))
await mkdir(join(tempDir, '.openclaw'), { recursive: true })

View File

@@ -4,10 +4,20 @@
*/
import { afterEach, beforeEach, describe, expect, it } from 'bun:test'
import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'
import {
chmod,
mkdir,
mkdtemp,
readFile,
rm,
writeFile,
} from 'node:fs/promises'
import { join } from 'node:path'
import { ContainerCli } from '../../../src/lib/container/container-cli'
import { ContainerCliError } from '../../../src/lib/vm/errors'
import {
ContainerCliError,
ContainerNameInUseError,
} from '../../../src/lib/vm/errors'
import { fakeSsh } from '../../__helpers__/fake-ssh'
describe('ContainerCli', () => {
@@ -163,6 +173,92 @@ describe('ContainerCli', () => {
)
})
it('inspects a container by name', async () => {
const sshPath = await fakeSsh(
{
stdout: JSON.stringify({
ID: 'abc123',
Name: 'gateway',
Config: { Image: 'openclaw:v1' },
State: { Status: 'running', Running: true },
}),
},
logPath,
)
const cli = await createCli(sshPath, tempDir)
await expect(cli.inspectContainer('gateway')).resolves.toEqual({
id: 'abc123',
name: 'gateway',
image: 'openclaw:v1',
status: 'running',
running: true,
})
await expect(readFile(logPath, 'utf8')).resolves.toContain(
"lima-browseros-vm 'nerdctl' 'container' 'inspect' '--format' '{{json .}}' 'gateway'",
)
})
it('returns null when inspected containers are absent', async () => {
const sshPath = await fakeSsh(
{ stderr: 'no such container', exit: 1 },
logPath,
)
const cli = await createCli(sshPath, tempDir)
await expect(cli.inspectContainer('gateway')).resolves.toBeNull()
})
it('does not treat unrelated not found errors as absent containers', async () => {
const sshPath = await fakeSsh(
{ stderr: 'network interface not found', exit: 1 },
logPath,
)
const cli = await createCli(sshPath, tempDir)
await expect(cli.inspectContainer('gateway')).rejects.toBeInstanceOf(
ContainerCliError,
)
})
it('waits until a container name is no longer resolvable', async () => {
const sshPath = await fakeSshContainerExistsThenMissing(tempDir, logPath)
const cli = await createCli(sshPath, tempDir)
await expect(
cli.waitForContainerNameRelease('gateway', {
timeoutMs: 500,
intervalMs: 5,
}),
).resolves.toBeUndefined()
const inspectCalls = (await readFile(logPath, 'utf8'))
.split('\n')
.filter((line) => line.includes("'container' 'inspect'"))
expect(inspectCalls).toHaveLength(2)
})
it('classifies create name-store collisions as name-in-use errors', async () => {
const sshPath = await fakeSsh(
{
stderr:
'name-store error\nname "gateway" is already used by ID "abc123"',
exit: 1,
},
logPath,
)
const cli = await createCli(sshPath, tempDir)
const error = await cli
.createContainer({ name: 'gateway', image: 'openclaw:v1' })
.catch((err) => err)
expect(error).toBeInstanceOf(ContainerNameInUseError)
expect(error.containerName).toBe('gateway')
expect(error.stderr).toContain('name "gateway" is already used')
})
it('tolerates removal when the container is already absent', async () => {
const sshPath = await fakeSsh(
{ stderr: 'no such container', exit: 1 },
@@ -215,3 +311,31 @@ function sshConfigPath(tempDir: string): string {
function sshPrefix(configPath: string): string {
return `ARGS:-F ${configPath} lima-browseros-vm`
}
async function fakeSshContainerExistsThenMissing(
tempDir: string,
logPath: string,
): Promise<string> {
const path = join(tempDir, 'ssh-container-exists-then-missing')
const counterPath = join(tempDir, 'ssh-container-exists-then-missing.count')
const body = `#!/usr/bin/env bash
set -u
echo "ARGS:$*" >> "${logPath}"
count="$(cat "${counterPath}" 2>/dev/null || echo 0)"
next=$((count + 1))
printf '%s' "$next" > "${counterPath}"
case "$count" in
0)
printf '{"ID":"abc123","Name":"gateway","Config":{"Image":"openclaw:v1"},"State":{"Status":"exited","Running":false}}'
exit 0
;;
*)
echo "no such container" >&2
exit 1
;;
esac
`
await writeFile(path, body)
await chmod(path, 0o755)
return path
}

View File

@@ -0,0 +1,129 @@
/**
* @license
* Copyright 2025 BrowserOS
*/
import { afterEach, beforeEach, describe, expect, it } from 'bun:test'
import { mkdtemp, readdir, rm } from 'node:fs/promises'
import { tmpdir } from 'node:os'
import { join } from 'node:path'
import {
ProcessLockTimeoutError,
resolveProcessLockPath,
withProcessLock,
} from '../../src/lib/process-lock'
describe('process-lock', () => {
let tempDir: string
let lockDir: string
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'process-lock-'))
lockDir = join(tempDir, '.locks')
})
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true })
})
it('serializes concurrent callers for the same lock name', async () => {
const events: string[] = []
let releaseFirst!: () => void
const firstMayFinish = new Promise<void>((resolve) => {
releaseFirst = resolve
})
const first = withProcessLock(
'openclaw-lifecycle',
{ lockDir },
async () => {
events.push('first:start')
await firstMayFinish
events.push('first:end')
},
)
while (!events.includes('first:start')) await Bun.sleep(1)
const second = withProcessLock(
'openclaw-lifecycle',
{
lockDir,
retryMinTimeoutMs: 5,
retryMaxTimeoutMs: 5,
},
async () => {
events.push('second')
},
)
await Bun.sleep(25)
expect(events).toEqual(['first:start'])
releaseFirst()
await Promise.all([first, second])
expect(events).toEqual(['first:start', 'first:end', 'second'])
})
it('releases the lock when the callback throws', async () => {
await expect(
withProcessLock('openclaw-lifecycle', { lockDir }, async () => {
throw new Error('boom')
}),
).rejects.toThrow('boom')
await expect(
withProcessLock('openclaw-lifecycle', { lockDir }, async () => 'ok'),
).resolves.toBe('ok')
})
it('fails with a structured timeout error when acquisition takes too long', async () => {
let releaseFirst!: () => void
const firstMayFinish = new Promise<void>((resolve) => {
releaseFirst = resolve
})
const first = withProcessLock(
'openclaw-lifecycle',
{ lockDir },
async () => {
await firstMayFinish
},
)
await Bun.sleep(10)
try {
await expect(
withProcessLock(
'openclaw-lifecycle',
{
lockDir,
timeoutMs: 25,
retryMinTimeoutMs: 5,
retryMaxTimeoutMs: 5,
},
async () => undefined,
),
).rejects.toBeInstanceOf(ProcessLockTimeoutError)
} finally {
releaseFirst()
await first
}
})
it('sanitizes lock names into the lock directory', async () => {
const path = resolveProcessLockPath(lockDir, '../OpenClaw Lifecycle!')
expect(path).toBe(join(lockDir, 'OpenClaw-Lifecycle.lock'))
await withProcessLock(
'../OpenClaw Lifecycle!',
{ lockDir },
async () => undefined,
)
const entries = await readdir(lockDir)
expect(entries).not.toContain('..')
})
})

View File

@@ -196,6 +196,7 @@
"klavis": "^2.15.0",
"pino": "^9.6.0",
"posthog-node": "^4.17.0",
"proper-lockfile": "^4.1.2",
"puppeteer-core": "24.23.0",
"ws": "^8.18.0",
"zod": "^3.24.2",
@@ -205,6 +206,7 @@
"@types/bun": "1.3.5",
"@types/debug": "^4.1.12",
"@types/node": "^24.3.3",
"@types/proper-lockfile": "^4.1.4",
"@types/sinon": "^21.0.0",
"@types/ws": "^8.5.13",
"async-mutex": "^0.5.0",
@@ -1829,12 +1831,16 @@
"@types/pg-pool": ["@types/pg-pool@2.0.7", "", { "dependencies": { "@types/pg": "*" } }, "sha512-U4CwmGVQcbEuqpyju8/ptOKg6gEC+Tqsvj2xS9o1g71bUh8twxnC6ZL5rZKCsGN0iyH0CwgUyc9VR5owNQF9Ng=="],
"@types/proper-lockfile": ["@types/proper-lockfile@4.1.4", "", { "dependencies": { "@types/retry": "*" } }, "sha512-uo2ABllncSqg9F1D4nugVl9v93RmjxF6LJzQLMLDdPaXCUIDPeOJ21Gbqi43xNKzBi/WQ0Q0dICqufzQbMjipQ=="],
"@types/react": ["@types/react@19.2.9", "", { "dependencies": { "csstype": "^3.2.2" } }, "sha512-Lpo8kgb/igvMIPeNV2rsYKTgaORYdO1XGVZ4Qz3akwOj0ySGYMPlQWa8BaLn0G63D1aSaAQ5ldR06wCpChQCjA=="],
"@types/react-dom": ["@types/react-dom@19.2.3", "", { "peerDependencies": { "@types/react": "^19.2.0" } }, "sha512-jp2L/eY6fn+KgVVQAOqYItbF0VY/YApe5Mz2F0aykSO8gx31bYCZyvSeYxCHKvzHG5eZjc+zyaS5BrBWya2+kQ=="],
"@types/request": ["@types/request@2.48.13", "", { "dependencies": { "@types/caseless": "*", "@types/node": "*", "@types/tough-cookie": "*", "form-data": "^2.5.5" } }, "sha512-FGJ6udDNUCjd19pp0Q3iTiDkwhYup7J8hpMW9c4k53NrccQFFWKRho6hvtPPEhnXWKvukfwAlB6DbDz4yhH5Gg=="],
"@types/retry": ["@types/retry@0.12.5", "", {}, "sha512-3xSjTp3v03X/lSQLkczaN9UIEwJMoMCA1+Nb5HfbJEQWogdeQIyVtTvxPXDQjZ5zws8rFQfVfRdz03ARihPJgw=="],
"@types/sinon": ["@types/sinon@21.0.0", "", { "dependencies": { "@types/sinonjs__fake-timers": "*" } }, "sha512-+oHKZ0lTI+WVLxx1IbJDNmReQaIsQJjN2e7UUrJHEeByG7bFeKJYsv1E75JxTQ9QKJDp21bAa/0W2Xo4srsDnw=="],
"@types/sinonjs__fake-timers": ["@types/sinonjs__fake-timers@15.0.1", "", {}, "sha512-Ko2tjWJq8oozHzHV+reuvS5KYIRAokHnGbDwGh/J64LntgpbuylF74ipEL24HCyRjf9FOlBiBHWBR1RlVKsI1w=="],
@@ -3569,6 +3575,8 @@
"prop-types": ["prop-types@15.8.1", "", { "dependencies": { "loose-envify": "^1.4.0", "object-assign": "^4.1.1", "react-is": "^16.13.1" } }, "sha512-oj87CgZICdulUohogVAR7AjlC0327U4el4L6eAvOqCeudMDVU0NThNaV+b9Df4dXgSP1gXMTnPdhfe/2qDH5cg=="],
"proper-lockfile": ["proper-lockfile@4.1.2", "", { "dependencies": { "graceful-fs": "^4.2.4", "retry": "^0.12.0", "signal-exit": "^3.0.2" } }, "sha512-TjNPblN4BwAWMXU8s9AEz4JmQxnD1NNL7bNOY/AKUzyamc379FWASUhc/K1pL2noVb+XmZKLL68cjzLsiOAMaA=="],
"property-information": ["property-information@7.1.0", "", {}, "sha512-TwEZ+X+yCJmYfL7TPUOcvBZ4QfoT5YenQiJuX//0th53DE6w0xxLEtfK3iyryQFddXuvkIk51EEgrJQ0WJkOmQ=="],
"proto-list": ["proto-list@1.2.4", "", {}, "sha512-vtK/94akxsTMhe0/cbfpR+syPuszcuwhqVjJq26CuNDgFGj682oRBXOP5MJpv2r7JtE8MsiepGIqvvOTBwn2vA=="],
@@ -3829,6 +3837,8 @@
"restore-cursor": ["restore-cursor@5.1.0", "", { "dependencies": { "onetime": "^7.0.0", "signal-exit": "^4.1.0" } }, "sha512-oMA2dcrw6u0YfxJQXm342bFKX/E4sG9rbTzO9ptUcR/e8A33cHuvStiYOwH7fszkZlZ1z/ta9AAoPk2F4qIOHA=="],
"retry": ["retry@0.12.0", "", {}, "sha512-9LkiTwjUh6rT555DtE9rTX+BKByPfrMzEAtnlEtdEwr3Nkffwiihqe2bWADg+OQRjt9gl6ICdmB/ZFDCGAtSow=="],
"retry-request": ["retry-request@7.0.2", "", { "dependencies": { "@types/request": "^2.48.8", "extend": "^3.0.2", "teeny-request": "^9.0.0" } }, "sha512-dUOvLMJ0/JJYEn8NrpOaGNE7X3vpI5XlZS/u0ANjqtcZVKnIxP7IgCFwrKTxENw29emmwug53awKtaMm4i9g5w=="],
"reusify": ["reusify@1.1.0", "", {}, "sha512-g6QUff04oZpHs0eG5p83rFLhHeV00ug/Yf9nZM6fLeUrPguBTkTQOdpAWWspMh55TZfVQDPaN3NQJfbVRAxdIw=="],
@@ -3921,7 +3931,7 @@
"side-channel-weakmap": ["side-channel-weakmap@1.0.2", "", { "dependencies": { "call-bound": "^1.0.2", "es-errors": "^1.3.0", "get-intrinsic": "^1.2.5", "object-inspect": "^1.13.3", "side-channel-map": "^1.0.1" } }, "sha512-WPS/HvHQTYnHisLo9McqBHOJk2FkHO/tlpvldyrnem4aeQp4hai3gythswg6p01oSoTl58rcpiFAjF2br2Ak2A=="],
"signal-exit": ["signal-exit@4.1.0", "", {}, "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw=="],
"signal-exit": ["signal-exit@3.0.7", "", {}, "sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ=="],
"signedsource": ["signedsource@1.0.0", "", {}, "sha512-6+eerH9fEnNmi/hyM1DXcRK3pWdoMQtlkQ+ns0ntzunjKqp5i3sKCc80ym8Fib3iaYhdJUOPdhlJWj1tvge2Ww=="],
@@ -4491,6 +4501,8 @@
"@hono/zod-validator/zod": ["zod@3.25.76", "", {}, "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ=="],
"@inquirer/core/signal-exit": ["signal-exit@4.1.0", "", {}, "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw=="],
"@inquirer/core/wrap-ansi": ["wrap-ansi@6.2.0", "", { "dependencies": { "ansi-styles": "^4.0.0", "string-width": "^4.1.0", "strip-ansi": "^6.0.0" } }, "sha512-r6lPcBGxZXlIcymEu7InxDMhdW0KDxpLgoFLcguasxCaJ/SOIZwINatK9KY/tf+ZrlywOKU0UDj3ATXUBfxJXA=="],
"@isaacs/cliui/string-width": ["string-width@5.1.2", "", { "dependencies": { "eastasianwidth": "^0.2.0", "emoji-regex": "^9.2.2", "strip-ansi": "^7.0.1" } }, "sha512-HnLOCR3vjcY8beoNLtcjZ5/nxn2afmME6lhrDrebokqMap+XbeW8n9TXpPDOqdGK5qcI3oT0GKTW6wC7EMiVqA=="],
@@ -4885,6 +4897,8 @@
"eventid/uuid": ["uuid@8.3.2", "", { "bin": { "uuid": "dist/bin/uuid" } }, "sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg=="],
"execa/signal-exit": ["signal-exit@4.1.0", "", {}, "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw=="],
"express/cookie": ["cookie@0.7.2", "", {}, "sha512-yki5XnKuf750l50uGTllt6kKILY4nQ1eNIQatoXEByZ5dWgnKqbnqmTrBE5B4N7lrMJKQ2ytWMiTO2o0v6Ew/w=="],
"extract-zip/get-stream": ["get-stream@5.2.0", "", { "dependencies": { "pump": "^3.0.0" } }, "sha512-nBF+F1rAZVCu/p7rjzgA+Yb4lfYXrpl7a6VmJrU8wF9I1CKvP/QwPNZHnOlwbTkY6dvtFIzFMSyQXbLoTQPRpA=="],
@@ -4895,6 +4909,8 @@
"find-up/path-exists": ["path-exists@4.0.0", "", {}, "sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w=="],
"foreground-child/signal-exit": ["signal-exit@4.1.0", "", {}, "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw=="],
"form-data/mime-types": ["mime-types@2.1.35", "", { "dependencies": { "mime-db": "1.52.0" } }, "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw=="],
"fx-runner/commander": ["commander@2.9.0", "", { "dependencies": { "graceful-readlink": ">= 1.0.0" } }, "sha512-bmkUukX8wAOjHdN26xj5c4ctEV22TQ7dQYhSmuckKhToXrkUn0iIaolHdIxYYqD55nhpSPA9zPQ1yP57GdXP2A=="],
@@ -5051,6 +5067,8 @@
"read-pkg/type-fest": ["type-fest@4.41.0", "", {}, "sha512-TeTSQ6H5YHvpqVwBRcnLDCBnDOHWYu7IvGbHT6N8AOymcr9PJGjc1GTtiWZTYg0NCgYwvnYWEkVChQAr9bjfwA=="],
"restore-cursor/signal-exit": ["signal-exit@4.1.0", "", {}, "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw=="],
"roarr/sprintf-js": ["sprintf-js@1.1.3", "", {}, "sha512-Oo+0REFV59/rz3gfJNKQiBlwfHaSESl1pcGyABQsnnIfWOFt6JNj5gCog2U6MLZ//IGYD+nA8nI+mTShREReaA=="],
"sinon/diff": ["diff@8.0.3", "", {}, "sha512-qejHi7bcSD4hQAZE0tNAawRK1ZtafHDmMTMkrrIGgSLl7hTnQHmKCeB45xAcbfTqK2zowkM3j3bHt/4b/ARbYQ=="],