feat(eval): wire BrowserOS MCP into performance grader

Performance grader now connects to the live BrowserOS the agent just used (still on the task page during Phase 3 grading) and can verify state-change claims via read-only mcp__browseros__* tools. System prompt teaches per-axis usage and caps live calls at 2-3 per task. Adds mind2web-e2e-perf suite (10 online-mind2web tasks, Bedrock Opus 4.6) for smoke-testing the new path.
2026-05-14 08:03:58 +00:00 · 2026-05-05 22:43:41 +05:30
20 changed files with 484 additions and 927 deletions
--- a/packages/browseros-agent/apps/eval/configs/suites/mind2web-e2e-perf.json
+++ b/packages/browseros-agent/apps/eval/configs/suites/mind2web-e2e-perf.json
@@ -0,0 +1,28 @@
+{
+  "id": "mind2web-e2e-perf",
+  "agent": {
+    "type": "single",
+    "provider": "bedrock",
+    "model": "global.anthropic.claude-opus-4-6-v1",
+    "region": "AWS_REGION",
+    "accessKeyId": "AWS_ACCESS_KEY_ID",
+    "secretAccessKey": "AWS_SECRET_ACCESS_KEY",
+    "supportsImages": true
+  },
+  "dataset": "../../data/mind2web_e2e_test.jsonl",
+  "num_workers": 2,
+  "restart_server_per_task": true,
+  "browseros": {
+    "server_url": "http://127.0.0.1:9110",
+    "base_cdp_port": 9010,
+    "base_server_port": 9110,
+    "base_extension_port": 9310,
+    "load_extensions": false,
+    "headless": false
+  },
+  "captcha": {
+    "api_key_env": "NOPECHA_API_KEY"
+  },
+  "graders": ["performance_grader"],
+  "timeout_ms": 600000
+}
--- a/packages/browseros-agent/apps/eval/src/graders/performance/axes.ts
+++ b/packages/browseros-agent/apps/eval/src/graders/performance/axes.ts
@@ -41,11 +41,34 @@ export const DEFAULT_AXES: AxisDefinition[] = [

 export const PERFORMANCE_SYSTEM_PROMPT = `You are a performance evaluator for a browser automation agent. You will score how well the agent executed a web task across multiple axes.

-## Data Files
+## Data Sources

-You have two data sources in your working directory:
+You have three sources of evidence: the local artifacts (messages.jsonl, screenshots) AND, when available, the **live BrowserOS browser** the agent just used (still on the task page — the run finishes by navigating to about:blank only after grading).

-### 1. messages.jsonl
+### Live browser access (mcp__browseros__*)
+The BrowserOS instance the agent just used is **still running and still on the task page** (the eval pipeline only navigates to about:blank after grading completes). You can inspect that live state via MCP — this is ground truth that no artifact can match.
+
+Available tools (READ-ONLY — never click, type, or navigate):
+- \`mcp__browseros__get_active_page\` — current URL + title. Cheap; call first to confirm the page hasn't changed.
+- \`mcp__browseros__list_pages\` — all open tabs (catches multi-tab tasks).
+- \`mcp__browseros__get_page_content\` — page as clean markdown. Best for reading prose, prices, lists.
+- \`mcp__browseros__get_page_links\` — all links on the page (verify the agent actually navigated where it claimed).
+- \`mcp__browseros__take_snapshot\` — interactive-element snapshot (verify form fields, buttons in their final state).
+- \`mcp__browseros__get_dom\` / \`mcp__browseros__search_dom\` — DOM inspection for specific selectors/strings.
+- \`mcp__browseros__take_screenshot\` — fresh screenshot of current state. More reliable than the last numbered screenshot if the agent's final action didn't trigger a capture.
+- \`mcp__browseros__get_console_logs\` — runtime errors the agent may have missed.
+
+**When to use the live browser (per axis):**
+- **task_completion** — the highest-value use. If the agent claims "submitted the form" or "added X to cart", call \`get_active_page\` (correct URL?) and \`get_page_content\` or \`take_snapshot\` (success state visible? cart shows the item?). If the answer cites specific data, \`search_dom\` for that value confirms it's actually present on the final page.
+- **error_recovery** — \`get_console_logs\` reveals runtime errors the agent didn't surface. A "completed" run with red console errors is suspicious.
+- **efficiency** — usually unnecessary; messages.jsonl already shows the call sequence.
+- **reasoning_quality / speed / autonomy** — usually unnecessary; derive from the message stream.
+
+**Budget:** prefer artifacts first. Reach for MCP only when artifacts are inconclusive (blurry screenshot, claim not in DOM logs, ambiguous final state, or you need to confirm a state-changing claim). Cap yourself at ~2-3 MCP calls per task. Never use MCP to drive the browser — these are verification reads only.
+
+### Local artifacts
+
+#### messages.jsonl
 The raw event stream — one JSON object per line with a "type" field.

 **Event types you care about:**
@@ -56,7 +79,7 @@ The raw event stream — one JSON object per line with a "type" field.
 **Event types to handle carefully:**
 - "tool-output-available" — Tool output. The "output" field contains FULL PAGE DOM CONTENT — hundreds of interactive elements, entire page text, etc. These lines are 5-50KB each. NEVER read them in bulk. However, you CAN and SHOULD use Grep to search within these lines for specific keywords when screenshots alone can't verify a claim. For example, if the task asks "find the price of X" and the screenshot is unclear, grep messages.jsonl for the product name or price value to confirm the agent actually saw it in the DOM.

-### 2. screenshots/ directory
+#### screenshots/ directory
 Numbered PNG screenshots (1.png, 2.png, ...) captured after each tool execution.

 ## Browser Tool Reference
@@ -102,6 +125,13 @@ When the agent's final answer contains specific data (prices, names, dates, coun
 - Task asks "extract the email address" → grep for the email pattern
 This is the most reliable way to verify whether the agent actually found the data it claims, since screenshots may be blurry, truncated, or missing the relevant section.

+**Step 5: Cross-check against the live browser (when artifacts are inconclusive)**
+If the answer relies on a side-effect ("submitted", "added to cart", "logged in", "filled the form") OR if Step 4 grep can't find the claimed value, fall through to mcp__browseros__ tools. Typical pattern:
+1. \`mcp__browseros__get_active_page\` — does the URL match the expected post-action page?
+2. \`mcp__browseros__get_page_content\` or \`mcp__browseros__search_dom\` — is the success indicator (confirmation message, cart item, updated value) actually present?
+3. If suspicious, \`mcp__browseros__get_console_logs\` to spot silent failures.
+Stop after 2-3 calls — this is verification, not exploration.
+
 ## How to View Screenshots

 You have {screenshot_count} screenshots. View 3-5 strategically:
--- a/packages/browseros-agent/apps/eval/src/graders/performance/performance-grader.ts
+++ b/packages/browseros-agent/apps/eval/src/graders/performance/performance-grader.ts
@@ -83,6 +83,7 @@ export class PerformanceGrader implements Grader {
        systemPrompt,
        userPrompt,
        input.outputDir,
+        input.mcpUrl,
      )
      if (response) {
        await writeGraderJsonArtifact(
@@ -185,11 +186,39 @@ export class PerformanceGrader implements Grader {
    systemPrompt: string,
    userPrompt: string,
    outputDir: string,
+    mcpUrl?: string,
  ): Promise<AgentResult | null> {
    const taskId = outputDir.split('/').pop() ?? outputDir
-    console.log(`Perf grader ${taskId}: Starting (model=${this.model})`)
+    console.log(
+      `Perf grader ${taskId}: Starting (model=${this.model}, mcp=${mcpUrl ? 'on' : 'off'})`,
+    )
    const startMs = Date.now()

+    const allowedTools = ['Read', 'Glob', 'Grep']
+    const mcpServers: Record<
+      string,
+      { type: 'http'; url: string; headers?: Record<string, string> }
+    > = {}
+    if (mcpUrl) {
+      mcpServers.browseros = {
+        type: 'http',
+        url: mcpUrl,
+        headers: { 'X-BrowserOS-Source': 'sdk-internal' },
+      }
+      // Read-only inspection tools — let the grader verify claims against live browser state.
+      allowedTools.push(
+        'mcp__browseros__get_active_page',
+        'mcp__browseros__list_pages',
+        'mcp__browseros__get_page_content',
+        'mcp__browseros__get_page_links',
+        'mcp__browseros__take_screenshot',
+        'mcp__browseros__take_snapshot',
+        'mcp__browseros__get_dom',
+        'mcp__browseros__search_dom',
+        'mcp__browseros__get_console_logs',
+      )
+    }
+
    const agentPromise = (async (): Promise<AgentResult | null> => {
      let result: AgentResult | null = null
      let messageCount = 0
@@ -200,7 +229,8 @@ export class PerformanceGrader implements Grader {
          model: this.model,
          cwd: outputDir,
          systemPrompt,
-          allowedTools: ['Read', 'Glob', 'Grep'],
+          allowedTools,
+          mcpServers,
          permissionMode: 'bypassPermissions',
          allowDangerouslySkipPermissions: true,
          maxTurns: this.maxTurns,
--- a/packages/browseros-agent/apps/eval/src/runs/task-run-pipeline.ts
+++ b/packages/browseros-agent/apps/eval/src/runs/task-run-pipeline.ts
@@ -163,7 +163,10 @@ export class TaskRunPipeline {
      // Phase 2: Execute agent
      const agentResult = await this.executeAgent(task, pageId)

-      // Phase 3: Run graders
+      // Phase 3: Run graders.
+      // The browser is intentionally still on the task page here — graders
+      // (e.g. PerformanceGrader) may inspect live browser state via MCP for
+      // claim verification. Do not move the about:blank cleanup above this.
      const graderResults = await this.runGraders(
        task,
        agentResult,
--- a/packages/browseros-agent/apps/server/src/lib/agents/acpx-agent-adapter.ts
+++ b/packages/browseros-agent/apps/server/src/lib/agents/acpx-agent-adapter.ts
@@ -1,74 +0,0 @@
-/**
- * @license
- * Copyright 2025 BrowserOS
- * SPDX-License-Identifier: AGPL-3.0-or-later
- */
-
-import type { createRuntimeStore } from 'acpx/runtime'
-import type { OpenClawGatewayChatClient } from '../../api/services/openclaw/openclaw-gateway-chat-client'
-import type { AgentDefinition } from './agent-types'
-import { prepareClaudeCodeContext } from './claude-code/prepare'
-import { prepareCodexContext } from './codex/prepare'
-import {
-  maybeHandleOpenClawTurn,
-  prepareOpenClawContext,
-} from './openclaw/prepare'
-import type { AgentPromptInput, AgentStreamEvent } from './types'
-
-export interface PreparedAcpxAgentContext {
-  cwd: string
-  runtimeSessionKey: string
-  runPrompt: string
-  commandEnv: Record<string, string>
-  commandIdentity: string
-  useBrowserosMcp: boolean
-  openclawSessionKey: string | null
-}
-
-export interface PrepareAcpxAgentContextInput {
-  browserosDir: string
-  agent: AgentDefinition
-  sessionId: 'main'
-  sessionKey: string
-  cwdOverride: string | null
-  isSelectedCwd: boolean
-  message: string
-}
-
-export interface AcpxAdapterTurnInput {
-  prompt: AgentPromptInput
-  prepared: PreparedAcpxAgentContext
-  sessionStore: ReturnType<typeof createRuntimeStore>
-  openclawGatewayChat: OpenClawGatewayChatClient | null
-}
-
-export interface AcpxAgentAdapter {
-  prepare(
-    input: PrepareAcpxAgentContextInput,
-  ): Promise<PreparedAcpxAgentContext>
-  maybeHandleTurn?(
-    input: AcpxAdapterTurnInput,
-  ): Promise<ReadableStream<AgentStreamEvent> | null>
-}
-
-const ADAPTERS: Record<AgentDefinition['adapter'], AcpxAgentAdapter> = {
-  claude: { prepare: prepareClaudeCodeContext },
-  codex: { prepare: prepareCodexContext },
-  openclaw: {
-    prepare: prepareOpenClawContext,
-    maybeHandleTurn: maybeHandleOpenClawTurn,
-  },
-}
-
-export function getAcpxAgentAdapter(
-  adapter: AgentDefinition['adapter'],
-): AcpxAgentAdapter {
-  return ADAPTERS[adapter]
-}
-
-/** Prepares adapter-specific filesystem, prompt, env, and session identity for one ACPX turn. */
-export async function prepareAcpxAgentContext(
-  input: PrepareAcpxAgentContextInput,
-): Promise<PreparedAcpxAgentContext> {
-  return getAcpxAgentAdapter(input.agent.adapter).prepare(input)
-}
--- a/packages/browseros-agent/apps/server/src/lib/agents/acpx-agent-common.ts
+++ b/packages/browseros-agent/apps/server/src/lib/agents/acpx-agent-common.ts
@@ -1,95 +0,0 @@
-/**
- * @license
- * Copyright 2025 BrowserOS
- * SPDX-License-Identifier: AGPL-3.0-or-later
- */
-
-import type {
-  PrepareAcpxAgentContextInput,
-  PreparedAcpxAgentContext,
-} from './acpx-agent-adapter'
-import type { AgentRuntimePaths } from './acpx-runtime-context'
-import {
-  BROWSEROS_ACPX_OPERATING_PROMPT_VERSION,
-  buildAcpxRuntimePromptPrefix,
-  buildBrowserosAcpPrompt,
-  ensureAgentHome,
-  ensureRuntimeSkills,
-  ensureUsableCwd,
-  resolveAgentRuntimePaths,
-} from './acpx-runtime-context'
-import {
-  deriveRuntimeSessionKey,
-  saveLatestRuntimeState,
-} from './acpx-runtime-state'
-
-export interface BrowserosManagedContext {
-  input: PrepareAcpxAgentContextInput
-  paths: AgentRuntimePaths
-  skillNames: string[]
-  promptPrefix: string
-}
-
-/** Builds the common BrowserOS-managed home, skills, cwd, and prompt prefix for Claude/Codex. */
-export async function prepareBrowserosManagedContext(
-  input: PrepareAcpxAgentContextInput,
-): Promise<BrowserosManagedContext> {
-  const paths = resolveAgentRuntimePaths({
-    browserosDir: input.browserosDir,
-    agentId: input.agent.id,
-    cwd: input.cwdOverride,
-  })
-  await ensureUsableCwd(paths.effectiveCwd, !input.isSelectedCwd)
-  await ensureAgentHome(paths)
-  const skillNames = await ensureRuntimeSkills(paths.runtimeSkillsDir)
-  const promptPrefix = buildAcpxRuntimePromptPrefix({
-    agent: input.agent,
-    paths,
-    skillNames,
-  })
-  return { input, paths, skillNames, promptPrefix }
-}
-
-/** Finalizes BrowserOS-managed prep into the uniform adapter context consumed by AcpxRuntime. */
-export async function finishBrowserosManagedContext(input: {
-  input: PrepareAcpxAgentContextInput
-  paths: AgentRuntimePaths
-  skillNames: string[]
-  promptPrefix: string
-  commandEnv: Record<string, string>
-}): Promise<PreparedAcpxAgentContext> {
-  const commandIdentity = stableCommandIdentity(input.commandEnv)
-  const runtimeSessionKey = deriveRuntimeSessionKey({
-    agentId: input.input.agent.id,
-    sessionId: input.input.sessionId,
-    adapter: input.input.agent.adapter,
-    cwd: input.paths.effectiveCwd,
-    agentHome: input.paths.agentHome,
-    promptVersion: BROWSEROS_ACPX_OPERATING_PROMPT_VERSION,
-    skillIdentity: input.skillNames.join(','),
-    commandIdentity,
-  })
-  await saveLatestRuntimeState(input.paths.runtimeStatePath, {
-    sessionId: input.input.sessionId,
-    runtimeSessionKey,
-    cwd: input.paths.effectiveCwd,
-    agentHome: input.paths.agentHome,
-    updatedAt: Date.now(),
-  })
-  return {
-    cwd: input.paths.effectiveCwd,
-    runtimeSessionKey,
-    runPrompt: buildBrowserosAcpPrompt(input.promptPrefix, input.input.message),
-    commandEnv: input.commandEnv,
-    commandIdentity,
-    useBrowserosMcp: true,
-    openclawSessionKey: null,
-  }
-}
-
-export function stableCommandIdentity(env: Record<string, string>): string {
-  return Object.entries(env)
-    .sort(([left], [right]) => left.localeCompare(right))
-    .map(([key, value]) => `${key}=${value}`)
-    .join('\n')
-}
--- a/packages/browseros-agent/apps/server/src/lib/agents/acpx-runtime-context.ts
+++ b/packages/browseros-agent/apps/server/src/lib/agents/acpx-runtime-context.ts
@@ -35,7 +35,6 @@ export interface AgentRuntimePaths {
  effectiveCwd: string
  runtimeStatePath: string
  runtimeSkillsDir: string
-  runtimeRoot: string
  codexHome: string
 }

@@ -46,7 +45,6 @@ export function resolveAgentRuntimePaths(input: {
 }): AgentRuntimePaths {
  const harnessDir = join(input.browserosDir, 'agents', 'harness')
  const defaultWorkspaceCwd = join(harnessDir, 'workspace')
-  const runtimeRoot = join(harnessDir, input.agentId, 'runtime')
  return {
    browserosDir: input.browserosDir,
    harnessDir,
@@ -59,8 +57,7 @@ export function resolveAgentRuntimePaths(input: {
      `${input.agentId}.json`,
    ),
    runtimeSkillsDir: join(harnessDir, 'runtime-skills'),
-    runtimeRoot,
-    codexHome: join(runtimeRoot, 'codex-home'),
+    codexHome: join(harnessDir, input.agentId, 'runtime', 'codex-home'),
  }
 }

@@ -113,7 +110,7 @@ export async function materializeCodexHome(input: {
  }
 }

-/** Builds stable BrowserOS-managed instructions for Claude/Codex ACP turns. */
+/** Builds the stable BrowserOS operating instructions prepended to ACP turns. */
 export function buildAcpxRuntimePromptPrefix(input: {
  agent: AgentDefinition
  paths: AgentRuntimePaths
@@ -137,12 +134,6 @@ BrowserOS has made runtime skills available for this ACPX session.
 Skill root: ${input.paths.runtimeSkillsDir}
 Available skills: ${input.skillNames.join(', ')}
 When a task calls for one of these skills, read its SKILL.md from that root and follow it.
-
-When the user asks you to remember, save feedback, store a preference, or update memory in this BrowserOS ACPX context, use the BrowserOS memory skill.
-Write BrowserOS memory only under AGENT_HOME:
- AGENT_HOME/MEMORY.md for durable promoted preferences and operating patterns.
- AGENT_HOME/memory/YYYY-MM-DD.md for daily notes and candidate memories.
-Do not use native Claude project memory, native CLI memory, or workspace files for BrowserOS memory.
 </browseros_acpx_runtime>`
 }

@@ -157,40 +148,6 @@ export function wrapCommandWithEnv(
  return prefix ? `env ${prefix} ${command}` : command
 }

-/** Ensures the runtime cwd exists, creating only the managed default workspace. */
-export async function ensureUsableCwd(
-  cwd: string,
-  isDefaultWorkspace: boolean,
-): Promise<void> {
-  if (isDefaultWorkspace) {
-    await mkdir(cwd, { recursive: true })
-    return
-  }
-  let info: Stats
-  try {
-    info = await stat(cwd)
-  } catch (err) {
-    if (isNotFoundError(err)) {
-      throw new Error(`Selected workspace does not exist: ${cwd}`)
-    }
-    throw err
-  }
-  if (!info.isDirectory()) {
-    throw new Error(`Selected workspace is not a directory: ${cwd}`)
-  }
-}
-
-export function buildBrowserosAcpPrompt(
-  prefix: string,
-  message: string,
-): string {
-  return `${prefix}
-
-<user_request>
-${escapePromptTagText(message)}
-</user_request>`
-}
-
 async function writeFileIfMissing(
  path: string,
  content: string,
@@ -250,7 +207,7 @@ async function sourceFileExists(path: string): Promise<boolean> {
    throw err
  }
  if (!info.isFile()) {
-    throw new Error(`Expected source file to be a file: ${path}`)
+    throw new Error(`Expected Codex source file to be a file: ${path}`)
  }
  return true
 }
@@ -259,13 +216,6 @@ function shellQuote(value: string): string {
  return `'${value.replace(/'/g, "'\\''")}'`
 }

-function escapePromptTagText(value: string): string {
-  return value
-    .replace(/&/g, '&amp;')
-    .replace(/</g, '&lt;')
-    .replace(/>/g, '&gt;')
-}
-
 function isNotFoundError(err: unknown): boolean {
  return (
    typeof err === 'object' &&
--- a/packages/browseros-agent/apps/server/src/lib/agents/acpx-runtime-templates.ts
+++ b/packages/browseros-agent/apps/server/src/lib/agents/acpx-runtime-templates.ts
@@ -114,11 +114,6 @@ Do not store memory files in the project workspace.

 ## Write

- When the user explicitly asks you to remember, save feedback, store a preference, or update memory, use this skill.
- Write BrowserOS memory only under $AGENT_HOME.
- Use $AGENT_HOME/MEMORY.md for durable promoted preferences and operating patterns.
- Use $AGENT_HOME/memory/YYYY-MM-DD.md for daily notes and candidate memories.
- Do not use native Claude project memory, native CLI memory, or workspace files for BrowserOS memory.
 - Put observations and task breadcrumbs in today's daily note first.
 - Promote only stable patterns into MEMORY.md.
 - Do not promote one-off facts, raw transcripts, temporary state, secrets, or credentials.
--- a/packages/browseros-agent/apps/server/src/lib/agents/acpx-runtime.ts
+++ b/packages/browseros-agent/apps/server/src/lib/agents/acpx-runtime.ts
@@ -4,6 +4,9 @@
 * SPDX-License-Identifier: AGPL-3.0-or-later
 */

+import { randomUUID } from 'node:crypto'
+import type { Stats } from 'node:fs'
+import { mkdir, stat } from 'node:fs/promises'
 import { join } from 'node:path'
 import { OPENCLAW_GATEWAY_CONTAINER_PORT } from '@browseros/shared/constants/openclaw'
 import { DEFAULT_PORTS } from '@browseros/shared/constants/ports'
@@ -19,18 +22,28 @@ import {
  createAgentRegistry,
  createRuntimeStore,
 } from 'acpx/runtime'
-import type { OpenClawGatewayChatClient } from '../../api/services/openclaw/openclaw-gateway-chat-client'
+import type {
+  OpenAIChatMessage,
+  OpenAIContentPart,
+  OpenClawGatewayChatClient,
+} from '../../api/services/openclaw/openclaw-gateway-chat-client'
 import { getBrowserosDir } from '../browseros-dir'
 import { logger } from '../logger'
+import type { AgentRuntimePaths } from './acpx-runtime-context'
 import {
-  getAcpxAgentAdapter,
-  prepareAcpxAgentContext,
-} from './acpx-agent-adapter'
-import {
+  BROWSEROS_ACPX_OPERATING_PROMPT_VERSION,
+  buildAcpxRuntimePromptPrefix,
+  ensureAgentHome,
+  ensureRuntimeSkills,
+  materializeCodexHome,
  resolveAgentRuntimePaths,
  wrapCommandWithEnv,
 } from './acpx-runtime-context'
-import { loadLatestRuntimeState } from './acpx-runtime-state'
+import {
+  deriveRuntimeSessionKey,
+  loadLatestRuntimeState,
+  saveLatestRuntimeState,
+} from './acpx-runtime-state'
 import type {
  AgentDefinition,
  AgentHistoryEntry,
@@ -94,8 +107,6 @@ interface PreparedRuntimeContext {
  runPrompt: string
  agentCommandEnv: Record<string, string>
  commandIdentity: string
-  useBrowserosMcp: boolean
-  openclawSessionKey: string | null
 }

 const BROWSEROS_ACP_AGENT_INSTRUCTIONS = `<role>
@@ -183,11 +194,16 @@ export class AcpxRuntime implements AgentRuntime {
  async send(
    input: AgentPromptInput,
  ): Promise<ReadableStream<AgentStreamEvent>> {
-    const prepared = await this.prepareRuntimeContext(
-      input,
-      input.cwd ?? this.defaultCwd,
-    )
-    const cwd = prepared.cwd
+    const prepared =
+      input.agent.adapter === 'openclaw'
+        ? null
+        : await this.prepareRuntimeContext(input, input.cwd ?? this.defaultCwd)
+    const cwd =
+      prepared?.cwd ??
+      (await this.resolveNonManagedCwd(
+        input.cwd ?? this.defaultCwd,
+        !!input.cwd,
+      ))
    const imageAttachments = (input.attachments ?? []).filter((a) =>
      a.mediaType.startsWith('image/'),
    )
@@ -205,38 +221,42 @@ export class AcpxRuntime implements AgentRuntime {
      imageAttachmentCount: imageAttachments.length,
    })

-    const adapter = getAcpxAgentAdapter(input.agent.adapter)
-    const adapterStream =
-      (await adapter.maybeHandleTurn?.({
-        prompt: input,
-        prepared: {
-          cwd: prepared.cwd,
-          runtimeSessionKey: prepared.runtimeSessionKey,
-          runPrompt: prepared.runPrompt,
-          commandEnv: prepared.agentCommandEnv,
-          commandIdentity: prepared.commandIdentity,
-          useBrowserosMcp: prepared.useBrowserosMcp,
-          openclawSessionKey: prepared.openclawSessionKey,
-        },
-        sessionStore: this.sessionStore,
-        openclawGatewayChat: this.openclawGatewayChat,
-      })) ?? null
-    if (adapterStream) return adapterStream
+    // Image carve-out for OpenClaw: the openclaw `acp` bridge silently
+    // drops ACP `image` content blocks, so the model never sees the
+    // attachment. Divert image-bearing turns to the gateway's HTTP
+    // /v1/chat/completions endpoint (which accepts OpenAI-style
+    // `image_url` parts) and pipe its SSE back through the same
+    // AgentStreamEvent shape callers already consume.
+    if (
+      input.agent.adapter === 'openclaw' &&
+      imageAttachments.length > 0 &&
+      this.openclawGatewayChat
+    ) {
+      return this.sendOpenclawViaGateway(input, imageAttachments, cwd)
+    }

    const runtime = this.getRuntime({
      cwd,
      permissionMode: input.permissionMode,
      nonInteractivePermissions: 'fail',
-      commandEnv: prepared.agentCommandEnv,
-      commandIdentity: prepared.commandIdentity,
-      useBrowserosMcp: prepared.useBrowserosMcp,
-      openclawSessionKey: prepared.openclawSessionKey,
+      commandEnv: prepared?.agentCommandEnv ?? {},
+      commandIdentity: prepared?.commandIdentity ?? 'openclaw',
+      // OpenClaw agents need their gateway sessionKey baked into the
+      // spawn command (acpx does not forward sessionKey to newSession);
+      // claude/codex don't, and including it would split their cache.
+      openclawSessionKey:
+        input.agent.adapter === 'openclaw' ? input.sessionKey : null,
    })

    return createAcpxEventStream(runtime, input, {
      cwd,
-      runtimeSessionKey: prepared.runtimeSessionKey,
-      runPrompt: prepared.runPrompt,
+      runtimeSessionKey: prepared?.runtimeSessionKey ?? input.sessionKey,
+      runPrompt:
+        prepared?.runPrompt ??
+        buildBrowserosAcpPrompt(
+          BROWSEROS_ACP_AGENT_INSTRUCTIONS,
+          input.message,
+        ),
    })
  }

@@ -257,27 +277,64 @@ export class AcpxRuntime implements AgentRuntime {
    return (await this.sessionStore.load(agent.sessionKey)) ?? null
  }

+  private async resolveNonManagedCwd(
+    cwdOverride: string | null,
+    isSelectedCwd: boolean,
+  ): Promise<string> {
+    const paths = resolveAgentRuntimePaths({
+      browserosDir: this.browserosDir,
+      agentId: 'openclaw',
+      cwd: cwdOverride,
+    })
+    await ensureUsableCwd(paths.effectiveCwd, !isSelectedCwd)
+    return paths.effectiveCwd
+  }
+
  private async prepareRuntimeContext(
    input: AgentPromptInput,
    cwdOverride: string | null,
  ): Promise<PreparedRuntimeContext> {
-    const prepared = await prepareAcpxAgentContext({
+    const paths = resolveAgentRuntimePaths({
      browserosDir: this.browserosDir,
+      agentId: input.agent.id,
+      cwd: cwdOverride,
+    })
+    await ensureUsableCwd(paths.effectiveCwd, !input.cwd)
+    await ensureAgentHome(paths)
+    const skillNames = await ensureRuntimeSkills(paths.runtimeSkillsDir)
+    if (input.agent.adapter === 'codex') {
+      await materializeCodexHome({ paths, skillNames })
+    }
+    const promptPrefix = buildAcpxRuntimePromptPrefix({
      agent: input.agent,
+      paths,
+      skillNames,
+    })
+    const agentCommandEnv = buildAgentCommandEnv(input.agent, paths)
+    const commandIdentity = stableCommandIdentity(agentCommandEnv)
+    const runtimeSessionKey = deriveRuntimeSessionKey({
+      agentId: input.agent.id,
      sessionId: input.sessionId,
-      sessionKey: input.sessionKey,
-      cwdOverride,
-      isSelectedCwd: !!input.cwd,
-      message: input.message,
+      adapter: input.agent.adapter,
+      cwd: paths.effectiveCwd,
+      agentHome: paths.agentHome,
+      promptVersion: BROWSEROS_ACPX_OPERATING_PROMPT_VERSION,
+      skillIdentity: skillNames.join(','),
+      commandIdentity,
+    })
+    await saveLatestRuntimeState(paths.runtimeStatePath, {
+      sessionId: input.sessionId,
+      runtimeSessionKey,
+      cwd: paths.effectiveCwd,
+      agentHome: paths.agentHome,
+      updatedAt: Date.now(),
    })
    return {
-      cwd: prepared.cwd,
-      runtimeSessionKey: prepared.runtimeSessionKey,
-      runPrompt: prepared.runPrompt,
-      agentCommandEnv: prepared.commandEnv,
-      commandIdentity: prepared.commandIdentity,
-      useBrowserosMcp: prepared.useBrowserosMcp,
-      openclawSessionKey: prepared.openclawSessionKey,
+      cwd: paths.effectiveCwd,
+      runtimeSessionKey,
+      runPrompt: buildBrowserosAcpPrompt(promptPrefix, input.message),
+      agentCommandEnv,
+      commandIdentity,
    }
  }

@@ -287,7 +344,6 @@ export class AcpxRuntime implements AgentRuntime {
    nonInteractivePermissions: AcpRuntimeOptions['nonInteractivePermissions']
    commandEnv: Record<string, string>
    commandIdentity: string
-    useBrowserosMcp: boolean
    openclawSessionKey: string | null
  }): AcpxCoreRuntime {
    const key = JSON.stringify({
@@ -295,12 +351,16 @@ export class AcpxRuntime implements AgentRuntime {
      permissionMode: input.permissionMode,
      nonInteractivePermissions: input.nonInteractivePermissions,
      commandIdentity: input.commandIdentity,
-      useBrowserosMcp: input.useBrowserosMcp,
      openclawSessionKey: input.openclawSessionKey,
    })
    const existing = this.runtimes.get(key)
    if (existing) return existing

+    // OpenClaw exposes its provider tools through the gateway, not through
+    // ACP-side MCP servers. Forwarding the BrowserOS HTTP MCP to its bridge
+    // makes newSession fail because openclaw rejects unsupported transports.
+    // Claude/codex still need the BrowserOS MCP for browser tooling.
+    const isOpenclaw = input.openclawSessionKey !== null
    const runtime = this.runtimeFactory({
      cwd: input.cwd,
      sessionStore: this.sessionStore,
@@ -309,9 +369,9 @@ export class AcpxRuntime implements AgentRuntime {
        openclawSessionKey: input.openclawSessionKey,
        commandEnv: input.commandEnv,
      }),
-      mcpServers: input.useBrowserosMcp
-        ? createBrowserosMcpServers(this.browserosServerPort)
-        : [],
+      mcpServers: isOpenclaw
+        ? []
+        : createBrowserosMcpServers(this.browserosServerPort),
      permissionMode: input.permissionMode,
      nonInteractivePermissions: input.nonInteractivePermissions,
    })
@@ -323,11 +383,195 @@ export class AcpxRuntime implements AgentRuntime {
      nonInteractivePermissions: input.nonInteractivePermissions,
      browserosServerPort: this.browserosServerPort,
      commandIdentity: input.commandIdentity,
-      useBrowserosMcp: input.useBrowserosMcp,
      openclawSessionKey: input.openclawSessionKey,
    })
    return runtime
  }
+
+  /**
+   * Drives an OpenClaw turn that includes image attachments through the
+   * gateway HTTP endpoint, which translates OpenAI-style `image_url`
+   * content parts into provider-native multimodal calls. Streams back
+   * `AgentStreamEvent` so the chat panel renders identically to ACP
+   * turns. On natural completion, appends a synthetic user+assistant
+   * pair to the acpx session record so the turn shows up in
+   * `getHistory()` after a reload.
+   *
+   * Persistence is best-effort: when no session record exists yet (e.g.
+   * the very first turn for a fresh agent is image-only), the live
+   * stream still works but the turn is absent from history on reload.
+   * Subsequent text turns through ACP create/update the record normally.
+   */
+  private async sendOpenclawViaGateway(
+    input: AgentPromptInput,
+    imageAttachments: ReadonlyArray<{ mediaType: string; data: string }>,
+    cwd: string,
+  ): Promise<ReadableStream<AgentStreamEvent>> {
+    if (!this.openclawGatewayChat) {
+      throw new Error(
+        'OpenClaw gateway chat client is not wired into AcpxRuntime',
+      )
+    }
+
+    const existingRecord = await this.sessionStore.load(input.sessionKey)
+    const priorMessages = existingRecord
+      ? recordToOpenAIMessages(existingRecord)
+      : []
+    const userContent: OpenAIContentPart[] = [
+      {
+        type: 'text',
+        text: buildBrowserosAcpPrompt(
+          BROWSEROS_ACP_AGENT_INSTRUCTIONS,
+          input.message,
+        ),
+      },
+      ...imageAttachments.map(
+        (a): OpenAIContentPart => ({
+          type: 'image_url',
+          image_url: { url: `data:${a.mediaType};base64,${a.data}` },
+        }),
+      ),
+    ]
+    const messages: OpenAIChatMessage[] = [
+      ...priorMessages,
+      { role: 'user', content: userContent },
+    ]
+
+    logger.info('Agent harness gateway image turn dispatched', {
+      agentId: input.agent.id,
+      sessionKey: input.sessionKey,
+      cwd,
+      priorMessageCount: priorMessages.length,
+      imageAttachmentCount: imageAttachments.length,
+    })
+
+    const upstream = await this.openclawGatewayChat.streamTurn({
+      agentId: input.agent.id,
+      sessionKey: input.sessionKey,
+      messages,
+      signal: input.signal,
+    })
+
+    const sessionStore = this.sessionStore
+    const sessionKey = input.sessionKey
+    const userMessageText = input.message
+    let accumulated = ''
+
+    return new ReadableStream<AgentStreamEvent>({
+      start: (controller) => {
+        const reader = upstream.getReader()
+        const persist = async () => {
+          if (!existingRecord || !accumulated) return
+          try {
+            await persistGatewayTurn(
+              sessionStore,
+              sessionKey,
+              userMessageText,
+              imageAttachments,
+              accumulated,
+            )
+          } catch (err) {
+            logger.warn(
+              'Failed to persist gateway image turn to acpx session record',
+              {
+                sessionKey,
+                error: err instanceof Error ? err.message : String(err),
+              },
+            )
+          }
+        }
+        ;(async () => {
+          try {
+            while (true) {
+              const { done, value } = await reader.read()
+              if (done) break
+              if (value.type === 'text_delta') accumulated += value.text
+              controller.enqueue(value)
+            }
+            await persist()
+            controller.close()
+          } catch (err) {
+            controller.enqueue({
+              type: 'error',
+              message: err instanceof Error ? err.message : String(err),
+            })
+            controller.close()
+          }
+        })().catch(() => {})
+      },
+      cancel: () => {
+        // Best-effort: cancel propagation to the gateway is its own
+        // upstream issue (see plan), but at least drop our reader so
+        // the OpenAI SSE parse loop exits.
+      },
+    })
+  }
+}
+
+async function persistGatewayTurn(
+  sessionStore: ReturnType<typeof createRuntimeStore>,
+  sessionKey: string,
+  userMessageText: string,
+  imageAttachments: ReadonlyArray<{ mediaType: string; data: string }>,
+  assistantText: string,
+): Promise<void> {
+  const record = await sessionStore.load(sessionKey)
+  if (!record) return
+  const userContent: AcpxUserContent[] = [
+    {
+      Text: buildBrowserosAcpPrompt(
+        BROWSEROS_ACP_AGENT_INSTRUCTIONS,
+        userMessageText,
+      ),
+    } as AcpxUserContent,
+  ]
+  for (const _image of imageAttachments) {
+    // The history mapper's `userContentToText` reads `Image.source` and
+    // emits `[image]` for any non-empty value — we just need a truthy
+    // marker so the placeholder renders. We don't store the base64 in
+    // the record (it's already in the gateway's transcript and would
+    // bloat the JSON file).
+    userContent.push({ Image: { source: 'base64' } } as AcpxUserContent)
+  }
+  // The acpx persistence layer requires User messages to carry an `id`
+  // and Agent messages to carry a `tool_results` object — without them
+  // the record fails to round-trip through `parseSessionRecord` on next
+  // load. See acpx/dist/prompt-turn-... `isUserMessage`/`isAgentMessage`.
+  const turnId = randomUUID()
+  const updated = {
+    ...record,
+    messages: [
+      ...record.messages,
+      { User: { id: `user-${turnId}`, content: userContent } },
+      { Agent: { content: [{ Text: assistantText }], tool_results: {} } },
+    ],
+    lastUsedAt: new Date().toISOString(),
+  } as AcpSessionRecord
+  await sessionStore.save(updated)
+}
+
+function recordToOpenAIMessages(record: AcpSessionRecord): OpenAIChatMessage[] {
+  const messages: OpenAIChatMessage[] = []
+  for (const message of record.messages) {
+    if (message === 'Resume') continue
+    if ('User' in message) {
+      const text = message.User.content
+        .map(userContentToText)
+        .filter(Boolean)
+        .join('\n\n')
+        .trim()
+      if (text) messages.push({ role: 'user', content: text })
+      continue
+    }
+    if ('Agent' in message) {
+      const text = message.Agent.content
+        .map((part) => ('Text' in part ? part.Text : ''))
+        .join('')
+        .trim()
+      if (text) messages.push({ role: 'assistant', content: text })
+    }
+  }
+  return messages
 }

 type AcpxSessionMessage = AcpSessionRecord['messages'][number]
@@ -825,6 +1069,77 @@ function resolveOpenclawAcpCommand(
  return argv.join(' ')
 }

+async function ensureUsableCwd(
+  cwd: string,
+  isDefaultWorkspace: boolean,
+): Promise<void> {
+  if (isDefaultWorkspace) {
+    await mkdir(cwd, { recursive: true })
+    return
+  }
+  let info: Stats
+  try {
+    info = await stat(cwd)
+  } catch (err) {
+    if (isNotFoundError(err)) {
+      throw new Error(`Selected workspace does not exist: ${cwd}`)
+    }
+    throw err
+  }
+  if (!info.isDirectory()) {
+    throw new Error(`Selected workspace is not a directory: ${cwd}`)
+  }
+}
+
+function isNotFoundError(err: unknown): boolean {
+  return (
+    typeof err === 'object' &&
+    err !== null &&
+    'code' in err &&
+    err.code === 'ENOENT'
+  )
+}
+
+function buildAgentCommandEnv(
+  agent: AgentDefinition,
+  paths: AgentRuntimePaths,
+): Record<string, string> {
+  if (agent.adapter === 'codex') {
+    return {
+      AGENT_HOME: paths.agentHome,
+      CODEX_HOME: paths.codexHome,
+    }
+  }
+  if (agent.adapter === 'claude') {
+    return {
+      AGENT_HOME: paths.agentHome,
+    }
+  }
+  return {}
+}
+
+function stableCommandIdentity(env: Record<string, string>): string {
+  return Object.entries(env)
+    .sort(([left], [right]) => left.localeCompare(right))
+    .map(([key, value]) => `${key}=${value}`)
+    .join('\n')
+}
+
+function buildBrowserosAcpPrompt(prefix: string, message: string): string {
+  return `${prefix}
+
+<user_request>
+${escapePromptTagText(message)}
+</user_request>`
+}
+
+function escapePromptTagText(value: string): string {
+  return value
+    .replace(/&/g, '&amp;')
+    .replace(/</g, '&lt;')
+    .replace(/>/g, '&gt;')
+}
+
 async function applyRuntimeControls(
  runtime: AcpxCoreRuntime,
  handle: AcpRuntimeHandle,
--- a/packages/browseros-agent/apps/server/src/lib/agents/claude-code/prepare.ts
+++ b/packages/browseros-agent/apps/server/src/lib/agents/claude-code/prepare.ts
@@ -1,27 +0,0 @@
-/**
- * @license
- * Copyright 2025 BrowserOS
- * SPDX-License-Identifier: AGPL-3.0-or-later
- */
-
-import type {
-  PrepareAcpxAgentContextInput,
-  PreparedAcpxAgentContext,
-} from '../acpx-agent-adapter'
-import {
-  finishBrowserosManagedContext,
-  prepareBrowserosManagedContext,
-} from '../acpx-agent-common'
-
-/** Prepares Claude Code with BrowserOS agent home while preserving host Claude auth. */
-export async function prepareClaudeCodeContext(
-  input: PrepareAcpxAgentContextInput,
-): Promise<PreparedAcpxAgentContext> {
-  const common = await prepareBrowserosManagedContext(input)
-  return finishBrowserosManagedContext({
-    ...common,
-    commandEnv: {
-      AGENT_HOME: common.paths.agentHome,
-    },
-  })
-}
--- a/packages/browseros-agent/apps/server/src/lib/agents/codex/prepare.ts
+++ b/packages/browseros-agent/apps/server/src/lib/agents/codex/prepare.ts
@@ -1,33 +0,0 @@
-/**
- * @license
- * Copyright 2025 BrowserOS
- * SPDX-License-Identifier: AGPL-3.0-or-later
- */
-
-import type {
-  PrepareAcpxAgentContextInput,
-  PreparedAcpxAgentContext,
-} from '../acpx-agent-adapter'
-import {
-  finishBrowserosManagedContext,
-  prepareBrowserosManagedContext,
-} from '../acpx-agent-common'
-import { materializeCodexHome } from '../acpx-runtime-context'
-
-/** Prepares Codex with a contained CODEX_HOME and BrowserOS agent home. */
-export async function prepareCodexContext(
-  input: PrepareAcpxAgentContextInput,
-): Promise<PreparedAcpxAgentContext> {
-  const common = await prepareBrowserosManagedContext(input)
-  await materializeCodexHome({
-    paths: common.paths,
-    skillNames: common.skillNames,
-  })
-  return finishBrowserosManagedContext({
-    ...common,
-    commandEnv: {
-      AGENT_HOME: common.paths.agentHome,
-      CODEX_HOME: common.paths.codexHome,
-    },
-  })
-}
--- a/packages/browseros-agent/apps/server/src/lib/agents/openclaw/image-turn.ts
+++ b/packages/browseros-agent/apps/server/src/lib/agents/openclaw/image-turn.ts
@@ -1,219 +0,0 @@
-/**
- * @license
- * Copyright 2025 BrowserOS
- * SPDX-License-Identifier: AGPL-3.0-or-later
- */
-
-import { randomUUID } from 'node:crypto'
-import type { AcpSessionRecord, createRuntimeStore } from 'acpx/runtime'
-import type {
-  OpenAIChatMessage,
-  OpenAIContentPart,
-} from '../../../api/services/openclaw/openclaw-gateway-chat-client'
-import { logger } from '../../logger'
-import type { AcpxAdapterTurnInput } from '../acpx-agent-adapter'
-import type { AgentStreamEvent } from '../types'
-
-type ImageAttachment = Readonly<{ mediaType: string; data: string }>
-
-export async function maybeHandleOpenClawTurn(
-  input: AcpxAdapterTurnInput,
-): Promise<ReadableStream<AgentStreamEvent> | null> {
-  const imageAttachments = (input.prompt.attachments ?? []).filter((a) =>
-    a.mediaType.startsWith('image/'),
-  )
-  if (imageAttachments.length === 0 || !input.openclawGatewayChat) {
-    return null
-  }
-  return sendOpenclawViaGateway({
-    prompt: input.prompt,
-    sessionStore: input.sessionStore,
-    openclawGatewayChat: input.openclawGatewayChat,
-    imageAttachments,
-    cwd: input.prepared.cwd,
-    runPrompt: input.prepared.runPrompt,
-  })
-}
-
-/** Handles OpenClaw image turns through the gateway HTTP chat endpoint. */
-async function sendOpenclawViaGateway(input: {
-  prompt: AcpxAdapterTurnInput['prompt']
-  sessionStore: AcpxAdapterTurnInput['sessionStore']
-  openclawGatewayChat: NonNullable<AcpxAdapterTurnInput['openclawGatewayChat']>
-  imageAttachments: ReadonlyArray<ImageAttachment>
-  cwd: string
-  runPrompt: string
-}): Promise<ReadableStream<AgentStreamEvent>> {
-  const existingRecord = await input.sessionStore.load(input.prompt.sessionKey)
-  const priorMessages = existingRecord
-    ? recordToOpenAIMessages(existingRecord)
-    : []
-  const userContent: OpenAIContentPart[] = [
-    {
-      type: 'text',
-      text: input.runPrompt,
-    },
-    ...input.imageAttachments.map(
-      (a): OpenAIContentPart => ({
-        type: 'image_url',
-        image_url: { url: `data:${a.mediaType};base64,${a.data}` },
-      }),
-    ),
-  ]
-  const messages: OpenAIChatMessage[] = [
-    ...priorMessages,
-    { role: 'user', content: userContent },
-  ]
-
-  logger.info('Agent harness gateway image turn dispatched', {
-    agentId: input.prompt.agent.id,
-    sessionKey: input.prompt.sessionKey,
-    cwd: input.cwd,
-    priorMessageCount: priorMessages.length,
-    imageAttachmentCount: input.imageAttachments.length,
-  })
-
-  const upstream = await input.openclawGatewayChat.streamTurn({
-    agentId: input.prompt.agent.id,
-    sessionKey: input.prompt.sessionKey,
-    messages,
-    signal: input.prompt.signal,
-  })
-
-  const sessionStore = input.sessionStore
-  const sessionKey = input.prompt.sessionKey
-  const userMessageText = input.prompt.message
-  const imageAttachments = input.imageAttachments
-  let accumulated = ''
-
-  return new ReadableStream<AgentStreamEvent>({
-    start: (controller) => {
-      const reader = upstream.getReader()
-      const persist = async () => {
-        if (!existingRecord || !accumulated) return
-        try {
-          await persistGatewayTurn(
-            sessionStore,
-            sessionKey,
-            userMessageText,
-            imageAttachments,
-            accumulated,
-          )
-        } catch (err) {
-          logger.warn(
-            'Failed to persist gateway image turn to acpx session record',
-            {
-              sessionKey,
-              error: err instanceof Error ? err.message : String(err),
-            },
-          )
-        }
-      }
-      ;(async () => {
-        try {
-          while (true) {
-            const { done, value } = await reader.read()
-            if (done) break
-            if (value.type === 'text_delta') accumulated += value.text
-            controller.enqueue(value)
-          }
-          await persist()
-          controller.close()
-        } catch (err) {
-          controller.enqueue({
-            type: 'error',
-            message: err instanceof Error ? err.message : String(err),
-          })
-          controller.close()
-        }
-      })().catch(() => {})
-    },
-    cancel: () => {
-      // Best-effort: cancel propagation to the gateway is tracked separately.
-    },
-  })
-}
-
-async function persistGatewayTurn(
-  sessionStore: ReturnType<typeof createRuntimeStore>,
-  sessionKey: string,
-  userMessageText: string,
-  imageAttachments: ReadonlyArray<ImageAttachment>,
-  assistantText: string,
-): Promise<void> {
-  const record = await sessionStore.load(sessionKey)
-  if (!record) return
-  const userContent: AcpxUserContent[] = [
-    { Text: userMessageText } as AcpxUserContent,
-  ]
-  for (const _image of imageAttachments) {
-    userContent.push({ Image: { source: 'base64' } } as AcpxUserContent)
-  }
-  const turnId = randomUUID()
-  const updated = {
-    ...record,
-    messages: [
-      ...record.messages,
-      { User: { id: `user-${turnId}`, content: userContent } },
-      { Agent: { content: [{ Text: assistantText }], tool_results: {} } },
-    ],
-    lastUsedAt: new Date().toISOString(),
-  } as AcpSessionRecord
-  await sessionStore.save(updated)
-}
-
-function recordToOpenAIMessages(record: AcpSessionRecord): OpenAIChatMessage[] {
-  const messages: OpenAIChatMessage[] = []
-  for (const message of record.messages) {
-    if (message === 'Resume') continue
-    if ('User' in message) {
-      const text = message.User.content
-        .map(userContentToText)
-        .filter(Boolean)
-        .join('\n\n')
-        .trim()
-      if (text) messages.push({ role: 'user', content: text })
-      continue
-    }
-    if ('Agent' in message) {
-      const text = message.Agent.content
-        .map((part) => ('Text' in part ? part.Text : ''))
-        .join('')
-        .trim()
-      if (text) messages.push({ role: 'assistant', content: text })
-    }
-  }
-  return messages
-}
-
-type AcpxSessionMessage = AcpSessionRecord['messages'][number]
-type AcpxUserContent = Extract<
-  Exclude<AcpxSessionMessage, 'Resume'>,
-  { User: unknown }
->['User']['content'][number]
-
-function userContentToText(content: AcpxUserContent): string {
-  if ('Text' in content) return unwrapPromptText(content.Text)
-  if ('Mention' in content) return content.Mention.content
-  if ('Image' in content) return content.Image.source ? '[image]' : ''
-  return ''
-}
-
-function unwrapPromptText(raw: string): string {
-  const runtimeMatch = raw.match(
-    /^<browseros_acpx_runtime\b[\s\S]*?<\/browseros_acpx_runtime>\n\n<user_request>\n([\s\S]*?)\n<\/user_request>$/,
-  )
-  if (runtimeMatch) return decodeBasicEntities(runtimeMatch[1]).trim()
-  const roleMatch = raw.match(
-    /^<role>[\s\S]*?<\/role>\n\n<user_request>\n([\s\S]*?)\n<\/user_request>$/,
-  )
-  if (roleMatch) return decodeBasicEntities(roleMatch[1]).trim()
-  return raw.trim()
-}
-
-function decodeBasicEntities(value: string): string {
-  return value
-    .replace(/&lt;/g, '<')
-    .replace(/&gt;/g, '>')
-    .replace(/&amp;/g, '&')
-}
--- a/packages/browseros-agent/apps/server/src/lib/agents/openclaw/prepare.ts
+++ b/packages/browseros-agent/apps/server/src/lib/agents/openclaw/prepare.ts
@@ -1,46 +0,0 @@
-/**
- * @license
- * Copyright 2025 BrowserOS
- * SPDX-License-Identifier: AGPL-3.0-or-later
- */
-
-import type {
-  PrepareAcpxAgentContextInput,
-  PreparedAcpxAgentContext,
-} from '../acpx-agent-adapter'
-import {
-  buildBrowserosAcpPrompt,
-  ensureUsableCwd,
-  resolveAgentRuntimePaths,
-} from '../acpx-runtime-context'
-
-export { maybeHandleOpenClawTurn } from './image-turn'
-
-const OPENCLAW_BROWSEROS_ACP_INSTRUCTIONS =
-  '<role>You are running inside BrowserOS through the OpenClaw ACP adapter. Use your OpenClaw identity, memory, and browser tools.</role>'
-
-/**
- * Prepares OpenClaw without BrowserOS SOUL/MEMORY or BrowserOS MCP.
- * OpenClaw runs inside the gateway VM/container, so a selected host cwd is not visible there.
- */
-export async function prepareOpenClawContext(
-  input: PrepareAcpxAgentContextInput,
-): Promise<PreparedAcpxAgentContext> {
-  const paths = resolveAgentRuntimePaths({
-    browserosDir: input.browserosDir,
-    agentId: input.agent.id,
-  })
-  await ensureUsableCwd(paths.effectiveCwd, true)
-  return {
-    cwd: paths.effectiveCwd,
-    runtimeSessionKey: input.sessionKey,
-    runPrompt: buildBrowserosAcpPrompt(
-      OPENCLAW_BROWSEROS_ACP_INSTRUCTIONS,
-      input.message,
-    ),
-    commandEnv: {},
-    commandIdentity: 'openclaw',
-    useBrowserosMcp: false,
-    openclawSessionKey: input.sessionKey,
-  }
-}
--- a/packages/browseros-agent/apps/server/tests/lib/agents/acpx-agent-adapter.test.ts
+++ b/packages/browseros-agent/apps/server/tests/lib/agents/acpx-agent-adapter.test.ts
@@ -1,113 +0,0 @@
-/**
- * @license
- * Copyright 2025 BrowserOS
- */
-
-import { afterEach, describe, expect, it } from 'bun:test'
-import { mkdtemp, readFile, rm } from 'node:fs/promises'
-import { tmpdir } from 'node:os'
-import { join } from 'node:path'
-import { prepareAcpxAgentContext } from '../../../src/lib/agents/acpx-agent-adapter'
-import type { AgentDefinition } from '../../../src/lib/agents/agent-types'
-
-describe('prepareAcpxAgentContext', () => {
-  const tempDirs: string[] = []
-
-  afterEach(async () => {
-    await Promise.all(
-      tempDirs.map((dir) => rm(dir, { recursive: true, force: true })),
-    )
-    tempDirs.length = 0
-  })
-
-  function makeAgent(adapter: AgentDefinition['adapter']): AgentDefinition {
-    return {
-      id: `${adapter}-agent`,
-      name: `${adapter} agent`,
-      adapter,
-      permissionMode: 'approve-all',
-      sessionKey: `agent:${adapter}-agent:main`,
-      createdAt: 1000,
-      updatedAt: 1000,
-    }
-  }
-
-  it('prepares Claude with BrowserOS memory, host auth, BrowserOS MCP, and fingerprinted session', async () => {
-    const browserosDir = await mkdtemp(join(tmpdir(), 'browseros-adapters-'))
-    tempDirs.push(browserosDir)
-    const prepared = await prepareAcpxAgentContext({
-      browserosDir,
-      agent: makeAgent('claude'),
-      sessionId: 'main',
-      sessionKey: 'agent:claude-agent:main',
-      cwdOverride: null,
-      isSelectedCwd: false,
-      message: 'remember this',
-    })
-
-    expect(prepared.commandEnv.AGENT_HOME).toContain('/claude-agent/home')
-    expect(prepared.commandEnv).not.toHaveProperty('CLAUDE_CONFIG_DIR')
-    expect(prepared.commandEnv).not.toHaveProperty('CODEX_HOME')
-    expect(prepared.useBrowserosMcp).toBe(true)
-    expect(prepared.openclawSessionKey).toBeNull()
-    expect(prepared.runtimeSessionKey).toMatch(
-      /^agent:claude-agent:main:[a-f0-9]{16}$/,
-    )
-    expect(prepared.runPrompt).toContain(
-      'Available skills: browseros, memory, soul',
-    )
-    expect(
-      await readFile(`${prepared.commandEnv.AGENT_HOME}/MEMORY.md`, 'utf8'),
-    ).toContain('# MEMORY.md')
-  })
-
-  it('prepares Codex with CODEX_HOME and BrowserOS MCP', async () => {
-    const browserosDir = await mkdtemp(join(tmpdir(), 'browseros-adapters-'))
-    tempDirs.push(browserosDir)
-    const prepared = await prepareAcpxAgentContext({
-      browserosDir,
-      agent: makeAgent('codex'),
-      sessionId: 'main',
-      sessionKey: 'agent:codex-agent:main',
-      cwdOverride: null,
-      isSelectedCwd: false,
-      message: 'hi',
-    })
-
-    expect(prepared.commandEnv.AGENT_HOME).toContain('/codex-agent/home')
-    expect(prepared.commandEnv.CODEX_HOME).toContain(
-      '/codex-agent/runtime/codex-home',
-    )
-    expect(prepared.commandEnv).not.toHaveProperty('CLAUDE_CONFIG_DIR')
-    expect(prepared.useBrowserosMcp).toBe(true)
-    expect(prepared.openclawSessionKey).toBeNull()
-    expect(prepared.runPrompt).toContain('AGENT_HOME=')
-  })
-
-  it('prepares OpenClaw without BrowserOS memory, host cwd, skills, or MCP', async () => {
-    const browserosDir = await mkdtemp(join(tmpdir(), 'browseros-adapters-'))
-    tempDirs.push(browserosDir)
-    const ignoredSelectedCwd = join(browserosDir, 'missing-selected-workspace')
-    const prepared = await prepareAcpxAgentContext({
-      browserosDir,
-      agent: makeAgent('openclaw'),
-      sessionId: 'main',
-      sessionKey: 'agent:openclaw-agent:main',
-      cwdOverride: ignoredSelectedCwd,
-      isSelectedCwd: true,
-      message: 'browse',
-    })
-
-    expect(prepared.cwd).toBe(
-      join(browserosDir, 'agents', 'harness', 'workspace'),
-    )
-    expect(prepared.commandEnv).toEqual({})
-    expect(prepared.useBrowserosMcp).toBe(false)
-    expect(prepared.openclawSessionKey).toBe('agent:openclaw-agent:main')
-    expect(prepared.runtimeSessionKey).toBe('agent:openclaw-agent:main')
-    expect(prepared.runPrompt).not.toContain('SOUL.md stores')
-    expect(prepared.runPrompt).not.toContain('BrowserOS memory skill')
-    expect(prepared.runPrompt).not.toContain('AGENT_HOME/MEMORY.md')
-    expect(prepared.runPrompt).not.toContain('Available skills:')
-  })
-})
--- a/packages/browseros-agent/apps/server/tests/lib/agents/acpx-runtime-context.test.ts
+++ b/packages/browseros-agent/apps/server/tests/lib/agents/acpx-runtime-context.test.ts
@@ -55,9 +55,6 @@ describe('acpx runtime context helpers', () => {
    expect(paths.runtimeSkillsDir).toBe(
      join(browserosDir, 'agents', 'harness', 'runtime-skills'),
    )
-    expect(paths.runtimeRoot).toBe(
-      join(browserosDir, 'agents', 'harness', 'agent-1', 'runtime'),
-    )
    expect(paths.codexHome).toBe(
      join(
        browserosDir,
@@ -260,33 +257,4 @@ describe('acpx runtime context helpers', () => {
    )
    expect(prompt).toContain('Available skills: browseros, memory, soul')
  })
-
-  it('routes explicit memory requests to BrowserOS AGENT_HOME files', () => {
-    const agent: AgentDefinition = {
-      id: 'agent-1',
-      name: 'Researcher',
-      adapter: 'claude',
-      permissionMode: 'approve-all',
-      sessionKey: 'agent:agent-1:main',
-      createdAt: 1000,
-      updatedAt: 1000,
-    }
-    const paths = resolveAgentRuntimePaths({
-      browserosDir: '/tmp/browseros',
-      agentId: agent.id,
-      cwd: '/tmp/workspace',
-    })
-
-    const prompt = buildAcpxRuntimePromptPrefix({
-      agent,
-      paths,
-      skillNames: ['browseros', 'memory', 'soul'],
-    })
-
-    expect(prompt).toContain('When the user asks you to remember')
-    expect(prompt).toContain('use the BrowserOS memory skill')
-    expect(prompt).toContain('AGENT_HOME/MEMORY.md')
-    expect(prompt).toContain('AGENT_HOME/memory/YYYY-MM-DD.md')
-    expect(prompt).toContain('Do not use native Claude project memory')
-  })
 })
--- a/packages/browseros-agent/apps/server/tests/lib/agents/acpx-runtime.test.ts
+++ b/packages/browseros-agent/apps/server/tests/lib/agents/acpx-runtime.test.ts
@@ -868,7 +868,7 @@ Use the BrowserOS MCP server for all browser tasks, including browsing the web,
    )
  })

-  it('injects AGENT_HOME without CLAUDE_CONFIG_DIR into Claude ACP command resolution', async () => {
+  it('injects AGENT_HOME into Claude ACP command resolution', async () => {
    const browserosDir = await mkdtemp(
      join(tmpdir(), 'browseros-acpx-browseros-'),
    )
@@ -898,7 +898,6 @@ Use the BrowserOS MCP server for all browser tasks, including browsing the web,
    const command =
      getCreateRuntimeOptions(calls).agentRegistry.resolve('claude')
    expect(command).toContain('env AGENT_HOME=')
-    expect(command).not.toContain('CLAUDE_CONFIG_DIR=')
    expect(command).not.toContain('CODEX_HOME=')
  })

@@ -1262,15 +1261,7 @@ Use the BrowserOS MCP server for all browser tasks, including browsing the web,
        max_segments: 0,
      },
      closed: false,
-      messages: [
-        {
-          User: {
-            id: 'prior-user',
-            content: [{ Text: 'literal &amp; &lt;tag&gt;' } as never],
-          },
-        },
-        { Agent: { content: [{ Text: 'Prior answer.' }], tool_results: {} } },
-      ],
+      messages: [],
      updated_at: seedTimestamp,
      cumulative_token_usage: {},
      request_token_usage: {},
@@ -1295,15 +1286,13 @@ Use the BrowserOS MCP server for all browser tasks, including browsing the web,
        })
      },
    } as never
-    const calls: Array<{ method: string; input: unknown }> = []
    const runtime = new AcpxRuntime({
      cwd,
      stateDir,
      openclawGatewayChat,
      // Provide a runtime factory that would fail loudly if reached —
      // image turns must NOT fall through to the ACP path.
-      runtimeFactory: (options) => {
-        calls.push({ method: 'createRuntime', input: options })
+      runtimeFactory: () => {
        throw new Error('ACP path should not be reached for image turns')
      },
    })
@@ -1334,9 +1323,6 @@ Use the BrowserOS MCP server for all browser tasks, including browsing the web,
      { type: 'done', stopReason: 'end_turn' },
    ])
    expect(gatewayCalls).toHaveLength(1)
-    expect(
-      calls.filter((call) => call.method === 'createRuntime'),
-    ).toHaveLength(0)
    const gatewayInput = gatewayCalls[0]?.input as {
      agentId: string
      sessionKey: string
@@ -1346,10 +1332,6 @@ Use the BrowserOS MCP server for all browser tasks, including browsing the web,
      }>
    }
    expect(gatewayInput.agentId).toBe('img-bot')
-    expect(gatewayInput.messages[0]).toEqual({
-      role: 'user',
-      content: 'literal &amp; &lt;tag&gt;',
-    })
    expect(gatewayInput.messages.at(-1)?.role).toBe('user')
    const userContent = gatewayInput.messages.at(-1)?.content
    expect(Array.isArray(userContent)).toBe(true)
@@ -1364,7 +1346,7 @@ Use the BrowserOS MCP server for all browser tasks, including browsing the web,
      agent,
      sessionId: 'main',
    })
-    expect(history.items.slice(-2).map((item) => item.role)).toEqual([
+    expect(history.items.map((item) => item.role)).toEqual([
      'user',
      'assistant',
    ])
--- a/packages/browseros/tools/patch/Makefile
+++ b/packages/browseros/tools/patch/Makefile
@@ -1,12 +1,8 @@
 BINARY := browseros-patch
-GOBIN := $(shell go env GOBIN)
-ifeq ($(GOBIN),)
-GOBIN := $(shell go env GOPATH)/bin
-endif
-PREFIX ?= $(GOBIN)
+PREFIX ?= /usr/local/bin
 VERSION ?= dev

-.PHONY: build install uninstall clean test fmt
+.PHONY: build install clean test fmt

 build:
 	go build -ldflags "-X github.com/browseros-ai/BrowserOS/packages/browseros/tools/patch/cmd.Version=$(VERSION)" -o $(BINARY) .
@@ -21,10 +17,6 @@ else
 endif
 	@echo "Installed $(BINARY) to $(PREFIX)/$(BINARY)"

-uninstall:
-	rm -f $(PREFIX)/$(BINARY)
-	@echo "Removed $(PREFIX)/$(BINARY)"
-
 test:
 	go test ./...

--- a/packages/browseros/tools/patch/internal/workspace/detect.go
+++ b/packages/browseros/tools/patch/internal/workspace/detect.go
@@ -3,7 +3,6 @@ package workspace
 import (
 	"fmt"
 	"path/filepath"
-	"slices"
 	"strings"
 )

@@ -16,21 +15,21 @@ func Detect(reg *Registry, cwd string) (Entry, error) {
 		return Entry{}, err
 	}
 	clean := filepath.Clean(abs)
-	realClean := canonicalPath(clean)
 	var best Entry
 	bestLen := -1
 	for _, ws := range reg.Workspaces {
 		base := filepath.Clean(ws.Path)
-		realBase := canonicalPath(base)
-		if containsPath(clean, base) || containsPath(realClean, realBase) {
-			if len(realBase) > bestLen {
+		if clean == base || strings.HasPrefix(clean, base+string(filepath.Separator)) {
+			if len(base) > bestLen {
 				best = ws
-				bestLen = len(realBase)
+				bestLen = len(base)
 			}
 		}
 	}
 	if bestLen == -1 {
-		return Entry{}, detectError(clean, realClean, reg.Workspaces)
+		return Entry{}, fmt.Errorf(
+			`not inside a registered workspace; run "browseros-patch list" to inspect workspaces or pass one by name`,
+		)
 	}
 	return best, nil
 }
@@ -48,43 +47,3 @@ func Resolve(reg *Registry, name string, cwd string, src string) (Entry, error)
 	}
 	return Detect(reg, cwd)
 }
-
-func canonicalPath(path string) string {
-	realPath, err := filepath.EvalSymlinks(path)
-	if err != nil {
-		return filepath.Clean(path)
-	}
-	return filepath.Clean(realPath)
-}
-
-func containsPath(path string, base string) bool {
-	return path == base || strings.HasPrefix(path, base+string(filepath.Separator))
-}
-
-func detectError(cwd string, resolvedCWD string, workspaces []Entry) error {
-	var builder strings.Builder
-	builder.WriteString(`not inside a registered workspace; run "browseros-patch list" to inspect workspaces or pass one by name`)
-	builder.WriteString("\n")
-	builder.WriteString("cwd: ")
-	builder.WriteString(cwd)
-	if resolvedCWD != cwd {
-		builder.WriteString("\nresolved cwd: ")
-		builder.WriteString(resolvedCWD)
-	}
-	if len(workspaces) > 0 {
-		builder.WriteString("\nregistered workspaces:")
-		sorted := append([]Entry(nil), workspaces...)
-		slices.SortFunc(sorted, func(a, b Entry) int {
-			return strings.Compare(a.Name, b.Name)
-		})
-		for _, ws := range sorted {
-			builder.WriteString("\n  ")
-			builder.WriteString(ws.Name)
-			builder.WriteString("  ")
-			builder.WriteString(ws.Path)
-		}
-		builder.WriteString("\nexample: browseros-patch diff ")
-		builder.WriteString(sorted[0].Name)
-	}
-	return fmt.Errorf("%s", builder.String())
-}
--- a/packages/browseros/tools/patch/internal/workspace/registry.go
+++ b/packages/browseros/tools/patch/internal/workspace/registry.go
@@ -70,7 +70,7 @@ func NormalizeWorkspacePath(raw string) (string, error) {
 	if _, err := os.Stat(filepath.Join(clean, ".git")); err != nil {
 		return "", fmt.Errorf("workspace is not a git checkout: %s", clean)
 	}
-	return canonicalPath(clean), nil
+	return clean, nil
 }

 func (r *Registry) Get(name string) (Entry, error) {
--- a/packages/browseros/tools/patch/internal/workspace/workspace_test.go
+++ b/packages/browseros/tools/patch/internal/workspace/workspace_test.go
@@ -3,7 +3,6 @@ package workspace
 import (
 	"os"
 	"path/filepath"
-	"strings"
 	"testing"
 )

@@ -37,10 +36,6 @@ func TestRegistryDetectsLongestMatchingWorkspace(t *testing.T) {
 			t.Fatalf("mkdir: %v", err)
 		}
 	}
-	detectedPath := filepath.Join(child, "chrome", "browser")
-	if err := os.MkdirAll(detectedPath, 0o755); err != nil {
-		t.Fatalf("mkdir detected path: %v", err)
-	}

 	reg := &Registry{Version: 1}
 	if _, err := reg.Add("parent", parent); err != nil {
@@ -50,7 +45,7 @@ func TestRegistryDetectsLongestMatchingWorkspace(t *testing.T) {
 		t.Fatalf("add child: %v", err)
 	}

-	ws, err := Detect(reg, detectedPath)
+	ws, err := Detect(reg, filepath.Join(child, "chrome", "browser"))
 	if err != nil {
 		t.Fatalf("Detect: %v", err)
 	}
@@ -58,86 +53,3 @@ func TestRegistryDetectsLongestMatchingWorkspace(t *testing.T) {
 		t.Fatalf("expected child workspace, got %q", ws.Name)
 	}
 }
-
-func TestDetectMatchesSymlinkedWorkingDirectory(t *testing.T) {
-	root := t.TempDir()
-	workspacePath := filepath.Join(root, "chromium-1", "src")
-	if err := os.MkdirAll(filepath.Join(workspacePath, ".git"), 0o755); err != nil {
-		t.Fatalf("mkdir workspace: %v", err)
-	}
-	if err := os.MkdirAll(filepath.Join(workspacePath, "chrome", "browser"), 0o755); err != nil {
-		t.Fatalf("mkdir workspace child: %v", err)
-	}
-	linkPath := filepath.Join(root, "ch-1")
-	if err := os.Symlink(workspacePath, linkPath); err != nil {
-		t.Fatalf("symlink workspace: %v", err)
-	}
-
-	reg := &Registry{Version: 1}
-	if _, err := reg.Add("ch1", workspacePath); err != nil {
-		t.Fatalf("add workspace: %v", err)
-	}
-
-	ws, err := Detect(reg, filepath.Join(linkPath, "chrome", "browser"))
-	if err != nil {
-		t.Fatalf("Detect: %v", err)
-	}
-	if ws.Name != "ch1" {
-		t.Fatalf("expected ch1 workspace, got %q", ws.Name)
-	}
-}
-
-func TestRegistryAddStoresCanonicalWorkspacePath(t *testing.T) {
-	root := t.TempDir()
-	workspacePath := filepath.Join(root, "chromium-1", "src")
-	if err := os.MkdirAll(filepath.Join(workspacePath, ".git"), 0o755); err != nil {
-		t.Fatalf("mkdir workspace: %v", err)
-	}
-	linkPath := filepath.Join(root, "ch-1")
-	if err := os.Symlink(workspacePath, linkPath); err != nil {
-		t.Fatalf("symlink workspace: %v", err)
-	}
-
-	reg := &Registry{Version: 1}
-	entry, err := reg.Add("ch1", linkPath)
-	if err != nil {
-		t.Fatalf("add workspace: %v", err)
-	}
-	expectedPath := canonicalPath(workspacePath)
-	if entry.Path != expectedPath {
-		t.Fatalf("expected canonical path %q, got %q", expectedPath, entry.Path)
-	}
-}
-
-func TestDetectErrorIncludesPathContextAndWorkspaceHint(t *testing.T) {
-	root := t.TempDir()
-	workspacePath := filepath.Join(root, "chromium-1", "src")
-	if err := os.MkdirAll(filepath.Join(workspacePath, ".git"), 0o755); err != nil {
-		t.Fatalf("mkdir workspace: %v", err)
-	}
-	outsidePath := filepath.Join(root, "outside")
-	if err := os.MkdirAll(outsidePath, 0o755); err != nil {
-		t.Fatalf("mkdir outside: %v", err)
-	}
-
-	reg := &Registry{Version: 1}
-	if _, err := reg.Add("ch1", workspacePath); err != nil {
-		t.Fatalf("add workspace: %v", err)
-	}
-
-	_, err := Detect(reg, outsidePath)
-	if err == nil {
-		t.Fatalf("expected Detect to fail")
-	}
-	message := err.Error()
-	for _, want := range []string{
-		"cwd: " + outsidePath,
-		"registered workspaces:",
-		"ch1  " + canonicalPath(workspacePath),
-		"example: browseros-patch diff ch1",
-	} {
-		if !strings.Contains(message, want) {
-			t.Fatalf("expected error to contain %q, got:\n%s", want, message)
-		}
-	}
-}