fix: address balpha CLI dogfooding feedback

fix: udpate readme (#829 )
feat: add balpha dogfooding CLI (#828 )
2026-05-14 08:03:58 +00:00 · 2026-04-27 15:39:39 -07:00 · 2026-04-27 15:27:16 -07:00 · 2026-04-27 15:03:37 -07:00 · 2026-04-27 21:52:26 +05:30 · 2026-04-27 21:35:43 +05:30
272 changed files with 29568 additions and 5284 deletions
--- a/.github/workflows/build-agent.yml
+++ b/.github/workflows/build-agent.yml
@@ -0,0 +1,157 @@
+name: build-agent
+
+on:
+  workflow_dispatch:
+    inputs:
+      agent:
+        description: "Agent name from bundle.json"
+        required: true
+        type: string
+        default: openclaw
+      publish:
+        description: "Upload to R2 and merge manifest slice"
+        required: false
+        default: false
+        type: boolean
+  pull_request:
+    paths:
+      - "packages/browseros-agent/packages/build-tools/**"
+      - ".github/workflows/build-agent.yml"
+
+env:
+  BUN_VERSION: "1.3.6"
+  PKG_DIR: packages/browseros-agent/packages/build-tools
+
+permissions:
+  contents: read
+
+jobs:
+  check:
+    runs-on: ubuntu-24.04
+    steps:
+      - uses: actions/checkout@v4
+      - uses: oven-sh/setup-bun@v2
+        with:
+          bun-version: ${{ env.BUN_VERSION }}
+      - working-directory: packages/browseros-agent
+        run: bun install --frozen-lockfile
+      - working-directory: packages/browseros-agent
+        run: bun run --filter @browseros/build-tools typecheck
+      - working-directory: packages/browseros-agent
+        run: bun run --filter @browseros/build-tools test
+
+  build:
+    needs: check
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - arch: arm64
+            runner: ubuntu-24.04-arm
+    runs-on: ${{ matrix.runner }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: oven-sh/setup-bun@v2
+        with:
+          bun-version: ${{ env.BUN_VERSION }}
+      - name: Install podman
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y podman
+      - working-directory: packages/browseros-agent
+        run: bun install --frozen-lockfile
+      - name: Build tarball
+        working-directory: ${{ env.PKG_DIR }}
+        env:
+          AGENT: ${{ inputs.agent || 'openclaw' }}
+          OUT: ${{ github.workspace }}/dist/images
+        run: bun run build:tarball -- --agent "$AGENT" --arch "${{ matrix.arch }}" --output-dir "$OUT"
+      - uses: actions/upload-artifact@v4
+        with:
+          name: tarball-${{ inputs.agent || 'openclaw' }}-${{ matrix.arch }}
+          path: dist/images/
+          retention-days: 7
+
+  smoke:
+    needs: build
+    runs-on: ubuntu-24.04-arm
+    steps:
+      - uses: actions/checkout@v4
+      - uses: oven-sh/setup-bun@v2
+        with:
+          bun-version: ${{ env.BUN_VERSION }}
+      - uses: actions/download-artifact@v4
+        with:
+          name: tarball-${{ inputs.agent || 'openclaw' }}-arm64
+          path: dist/images
+      - name: Install podman
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y podman
+      - working-directory: packages/browseros-agent
+        run: bun install --frozen-lockfile
+      - name: Smoke test tarball
+        working-directory: ${{ env.PKG_DIR }}
+        env:
+          AGENT: ${{ inputs.agent || 'openclaw' }}
+        run: |
+          set -euo pipefail
+          tarball="$(find "$GITHUB_WORKSPACE/dist/images" -name "${AGENT}-*-arm64.tar.gz" -print -quit)"
+          if [ -z "$tarball" ]; then
+            echo "missing arm64 tarball artifact for ${AGENT}" >&2
+            exit 1
+          fi
+          bun run smoke:tarball -- --agent "$AGENT" --arch arm64 --tarball "$tarball"
+
+  publish:
+    needs: [build, smoke]
+    if: ${{ github.event_name == 'workflow_dispatch' && inputs.publish == true }}
+    runs-on: ubuntu-24.04
+    environment: release
+    concurrency:
+      group: r2-manifest-publish
+      cancel-in-progress: false
+    steps:
+      - uses: actions/checkout@v4
+      - uses: oven-sh/setup-bun@v2
+        with:
+          bun-version: ${{ env.BUN_VERSION }}
+      - uses: actions/download-artifact@v4
+        with:
+          pattern: tarball-*
+          path: dist/images
+          merge-multiple: true
+      - working-directory: packages/browseros-agent
+        run: bun install --frozen-lockfile
+      - name: Upload tarballs to R2
+        working-directory: ${{ env.PKG_DIR }}
+        env:
+          R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }}
+          R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }}
+          R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }}
+          R2_BUCKET: ${{ secrets.R2_BUCKET }}
+        run: |
+          set -euo pipefail
+          for file in "$GITHUB_WORKSPACE"/dist/images/*.tar.gz; do
+            base="$(basename "$file")"
+            bun run upload -- --file "$file" --key "vm/images/$base" --content-type "application/gzip" --sidecar-sha
+          done
+      - name: Merge agent slice into manifest
+        working-directory: ${{ env.PKG_DIR }}
+        env:
+          AGENT: ${{ inputs.agent || 'openclaw' }}
+          R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }}
+          R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }}
+          R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }}
+          R2_BUCKET: ${{ secrets.R2_BUCKET }}
+        run: |
+          set -euo pipefail
+          mkdir -p dist/images
+          cp -R "$GITHUB_WORKSPACE"/dist/images/* dist/images/
+          bun run download -- --key vm/manifest.json --out dist/baseline-manifest.json
+          bun run emit-manifest -- \
+            --slice "agents:${AGENT}" \
+            --dist-dir dist \
+            --merge-from dist/baseline-manifest.json \
+            --out dist/manifest.json
+          bun run upload -- --file dist/manifest.json --key vm/manifest.json --content-type "application/json"
--- a/.github/workflows/eval-weekly.yml
+++ b/.github/workflows/eval-weekly.yml
@@ -43,6 +43,12 @@ jobs:
        working-directory: packages/browseros-agent
        run: bun install --ignore-scripts && bun run build:agent-sdk

+      - name: Install Python eval dependencies
+        run: pip install agisdk requests
+
+      - name: Clone WebArena-Infinity
+        run: git clone --depth 1 https://github.com/web-arena-x/webarena-infinity.git /tmp/webarena-infinity
+
      - name: Install xvfb
        run: sudo apt-get update && sudo apt-get install -y xvfb

@@ -57,9 +63,11 @@ jobs:
        working-directory: packages/browseros-agent/apps/eval
        env:
          FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
+          OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
          CLAUDE_CODE_OAUTH_TOKEN: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
          NOPECHA_API_KEY: ${{ secrets.NOPECHA_API_KEY }}
          BROWSEROS_BINARY: /usr/bin/browseros
+          WEBARENA_INFINITY_DIR: /tmp/webarena-infinity
          EVAL_CONFIG: ${{ github.event.inputs.config || 'configs/browseros-agent-weekly.json' }}
        run: |
          echo "Running eval with config: $EVAL_CONFIG"
@@ -81,6 +89,8 @@ jobs:

      - name: Generate trend report
        if: success()
+        timeout-minutes: 5
+        continue-on-error: true
        working-directory: packages/browseros-agent
        env:
          EVAL_R2_ACCOUNT_ID: ${{ secrets.EVAL_R2_ACCOUNT_ID }}
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -30,12 +30,54 @@ jobs:
      fail-fast: false
      matrix:
        include:
-          - suite: tools
-            test_path: tests/tools
-            junit_path: test-results/tools.xml
-          - suite: integration
-            test_path: tests/server.integration.test.ts
-            junit_path: test-results/integration.xml
+          - suite: server-agent
+            command: (cd apps/server && bun run test:agent)
+            junit_path: test-results/server-agent.xml
+            needs_browser: false
+          - suite: server-api
+            command: (cd apps/server && bun run test:api)
+            junit_path: test-results/server-api.xml
+            needs_browser: false
+          - suite: server-skills
+            command: (cd apps/server && bun run test:skills)
+            junit_path: test-results/server-skills.xml
+            needs_browser: false
+          - suite: server-tools
+            command: (cd apps/server && bun run test:tools)
+            junit_path: test-results/server-tools.xml
+            needs_browser: true
+          - suite: server-browser
+            command: (cd apps/server && bun run test:browser)
+            junit_path: test-results/server-browser.xml
+            needs_browser: false
+          - suite: server-integration
+            command: (cd apps/server && bun run test:integration)
+            junit_path: test-results/server-integration.xml
+            needs_browser: true
+          - suite: server-sdk
+            command: (cd apps/server && bun run test:sdk)
+            junit_path: test-results/server-sdk.xml
+            needs_browser: true
+          - suite: server-root
+            command: (cd apps/server && bun run test:root)
+            junit_path: test-results/server-root.xml
+            needs_browser: false
+          - suite: agent
+            command: bun run test:agent
+            junit_path: test-results/agent.xml
+            needs_browser: false
+          - suite: eval
+            command: bun run test:eval
+            junit_path: test-results/eval.xml
+            needs_browser: false
+          - suite: agent-sdk
+            command: bun run test:agent-sdk
+            junit_path: test-results/agent-sdk.xml
+            needs_browser: false
+          - suite: build
+            command: bun run test:build
+            junit_path: test-results/build.xml
+            needs_browser: false

    steps:
      - name: Checkout code
@@ -48,6 +90,7 @@ jobs:
        run: bun ci

      - name: Resolve BrowserOS cache key
+        if: matrix.needs_browser == true
        id: browseros-cache-key
        run: |
          set -euo pipefail
@@ -62,6 +105,7 @@ jobs:
          echo "key=browseros-appimage-${{ runner.os }}-$cache_key" >> "$GITHUB_OUTPUT"

      - name: Restore BrowserOS cache
+        if: matrix.needs_browser == true
        id: browseros-cache
        uses: actions/cache@v4
        with:
@@ -69,13 +113,14 @@ jobs:
          key: ${{ steps.browseros-cache-key.outputs.key }}

      - name: Download BrowserOS
-        if: steps.browseros-cache.outputs.cache-hit != 'true'
+        if: matrix.needs_browser == true && steps.browseros-cache.outputs.cache-hit != 'true'
        run: |
          mkdir -p .ci/bin
          curl -fsSL "$BROWSEROS_APPIMAGE_URL" -o .ci/bin/BrowserOS.AppImage
          chmod +x .ci/bin/BrowserOS.AppImage

      - name: Prepare BrowserOS wrapper
+        if: matrix.needs_browser == true
        run: |
          mkdir -p .ci/bin
          cat > .ci/bin/browseros <<'EOF'
@@ -96,16 +141,23 @@ jobs:
          BROWSEROS_BINARY: ${{ github.workspace }}/packages/browseros-agent/.ci/bin/browseros
          BROWSEROS_TEST_HEADLESS: "true"
          BROWSEROS_TEST_EXTRA_ARGS: --no-sandbox --disable-dev-shm-usage
+          BROWSEROS_JUNIT_PATH: ${{ github.workspace }}/packages/browseros-agent/${{ matrix.junit_path }}
        run: |
          set +e
          mkdir -p test-results
-          cd apps/server
-          bun run test:cleanup
-          bun --env-file=.env.development test "${{ matrix.test_path }}" --reporter=junit --reporter-outfile="../../${{ matrix.junit_path }}"
+          ${{ matrix.command }}
          exit_code=$?
-          cd ../..
          if [ ! -f "${{ matrix.junit_path }}" ]; then
-            cat > "${{ matrix.junit_path }}" <<EOF
+            if [ "$exit_code" = "0" ]; then
+              cat > "${{ matrix.junit_path }}" <<EOF
+          <?xml version="1.0" encoding="UTF-8"?>
+          <testsuites tests="0" failures="0">
+            <testsuite name="${{ matrix.suite }}" tests="0" failures="0">
+            </testsuite>
+          </testsuites>
+          EOF
+            else
+              cat > "${{ matrix.junit_path }}" <<EOF
          <?xml version="1.0" encoding="UTF-8"?>
          <testsuites tests="1" failures="1">
            <testsuite name="${{ matrix.suite }}" tests="1" failures="1">
@@ -115,6 +167,7 @@ jobs:
            </testsuite>
          </testsuites>
          EOF
+            fi
          fi
          echo "exit_code=$exit_code" >> "$GITHUB_OUTPUT"

@@ -136,3 +189,124 @@ jobs:
            echo "See the uploaded \`junit-${{ matrix.suite }}\` artifact for details." >> "$GITHUB_STEP_SUMMARY"
            exit 1
          fi
+
+  comment:
+    name: PR test summary
+    needs: test
+    if: >-
+      always()
+      && github.event_name == 'pull_request'
+      && github.event.pull_request.head.repo.full_name == github.repository
+    runs-on: ubuntu-latest
+    permissions:
+      pull-requests: write
+      actions: read
+    steps:
+      - name: Download JUnit artifacts
+        uses: actions/download-artifact@v4
+        continue-on-error: true
+        with:
+          path: junit
+          pattern: junit-*
+
+      - name: Build comment body
+        run: |
+          python3 <<'PY'
+          import glob, os, xml.etree.ElementTree as ET
+
+          run_url = f"{os.environ['GITHUB_SERVER_URL']}/{os.environ['GITHUB_REPOSITORY']}/actions/runs/{os.environ['GITHUB_RUN_ID']}"
+          marker = "<!-- browseros-agent-tests-summary -->"
+
+          suites = []
+          failed_cases = []
+          total_tests = total_failed = total_skipped = 0
+
+          for xml_path in sorted(glob.glob("junit/junit-*/*.xml")):
+              suite_name = os.path.basename(os.path.dirname(xml_path)).removeprefix("junit-")
+              try:
+                  root = ET.parse(xml_path).getroot()
+              except ET.ParseError:
+                  suites.append({"name": suite_name, "passed": 0, "failed": 1, "skipped": 0, "total": 1})
+                  total_tests += 1
+                  total_failed += 1
+                  failed_cases.append((suite_name, "(could not parse junit XML)"))
+                  continue
+
+              testsuites = root.findall("testsuite") if root.tag == "testsuites" else [root]
+              s_tests = s_fail = s_err = s_skip = 0
+              for ts in testsuites:
+                  s_tests += int(ts.get("tests") or 0)
+                  s_fail += int(ts.get("failures") or 0)
+                  s_err += int(ts.get("errors") or 0)
+                  s_skip += int(ts.get("skipped") or 0)
+                  for tc in ts.iter("testcase"):
+                      if tc.find("failure") is not None or tc.find("error") is not None:
+                          cls = tc.get("classname") or ""
+                          name = tc.get("name") or "(unnamed)"
+                          label = f"{cls} > {name}" if cls else name
+                          failed_cases.append((suite_name, label))
+
+              s_failed = s_fail + s_err
+              s_passed = max(s_tests - s_failed - s_skip, 0)
+              suites.append({"name": suite_name, "passed": s_passed, "failed": s_failed, "skipped": s_skip, "total": s_tests})
+              total_tests += s_tests
+              total_failed += s_failed
+              total_skipped += s_skip
+
+          total_passed = max(total_tests - total_failed - total_skipped, 0)
+
+          if total_tests == 0:
+              header = "## :warning: No test results were produced"
+          elif total_failed == 0:
+              header = f"## :white_check_mark: Tests passed — {total_passed}/{total_tests}"
+          else:
+              header = f"## :x: Tests failed — {total_failed}/{total_tests} failed"
+
+          lines = [marker, header, ""]
+          if suites:
+              lines.append("| Suite | Passed | Failed | Skipped |")
+              lines.append("|-------|--------|--------|---------|")
+              for s in suites:
+                  icon = ":white_check_mark:" if s["failed"] == 0 and s["total"] > 0 else ":warning:" if s["total"] == 0 else ":x:"
+                  lines.append(f"| {icon} `{s['name']}` | {s['passed']}/{s['total']} | {s['failed']} | {s['skipped']} |")
+
+          if failed_cases:
+              lines += ["", "<details open>", "<summary><b>Failed tests</b></summary>", ""]
+              for suite_name, label in failed_cases[:50]:
+                  lines.append(f"- **{suite_name}** — `{label}`")
+              if len(failed_cases) > 50:
+                  lines.append(f"- …and {len(failed_cases) - 50} more")
+              lines += ["", "</details>"]
+
+          lines += ["", f"[View workflow run]({run_url})"]
+
+          with open("comment.md", "w") as f:
+              f.write("\n".join(lines) + "\n")
+          PY
+
+      - name: Upsert sticky PR comment
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const fs = require('fs');
+            const body = fs.readFileSync('comment.md', 'utf8');
+            const marker = '<!-- browseros-agent-tests-summary -->';
+            const { owner, repo } = context.repo;
+            const issue_number = context.payload.pull_request.number;
+
+            const triggerSha = context.payload.pull_request.head.sha;
+            const { data: pr } = await github.rest.pulls.get({ owner, repo, pull_number: issue_number });
+            if (pr.head.sha !== triggerSha) {
+              core.info(`PR head has moved (${pr.head.sha} vs ${triggerSha}) — skipping stale comment.`);
+              return;
+            }
+
+            const comments = await github.paginate(github.rest.issues.listComments, {
+              owner, repo, issue_number, per_page: 100,
+            });
+            const existing = comments.find(c => c.body && c.body.includes(marker));
+            if (existing) {
+              await github.rest.issues.updateComment({ owner, repo, comment_id: existing.id, body });
+            } else {
+              await github.rest.issues.createComment({ owner, repo, issue_number, body });
+            }
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,6 @@
 **/.DS_Store
+**.auctor/**
+.auctor.json
 .gcs_entries
 **/dmg
 **/env
--- a/packages/browseros-agent/.gitignore
+++ b/packages/browseros-agent/.gitignore
@@ -14,6 +14,7 @@ lerna-debug.log*
 # Ignore all .env files except .env.example
 **/.env.*
 !**/.env.example
+!**/.env.sample
 !**/.env.production.example


@@ -179,6 +180,8 @@ packages/*/dist
 browseros-server
 browseros-server.exe
 browseros-server-*
+tools/alpha/balpha
+tools/alpha/browseros-alpha
 tools/dev/browseros-dev

 log.txt
--- a/packages/browseros-agent/CLAUDE.md
+++ b/packages/browseros-agent/CLAUDE.md
@@ -218,3 +218,9 @@ This uses the same element resolution as the server's MCP tools — no coordinat
 The `<target>` argument can be:
 - An **index** from the `targets` output (e.g., `3`)
 - A **URL substring** (e.g., `sidepanel`, `newtab`, `chrome-extension://`)
+
+## Release gating — bundled-VM runtime migration (2026-Q2)
+
+Between the Lima server-prod-resources cutover (WS3) and the ContainerRuntime migration (WS6) landing, `resources/bin/third_party/` ships `limactl` instead of `podman`. The current OpenClaw runtime (`apps/server/src/api/services/openclaw/podman-runtime.ts`, `container-runtime.ts`) still invokes `podman`; it will fail to find the binary on builds cut from `dev`.
+
+Do **not** cut a release branch off `dev` during this window. Track WS6 progress before any release cut. See `specs/bundled-vm-runtime-spec.md` + `specs/workstreams.md` for context.
--- a/packages/browseros-agent/README.md
+++ b/packages/browseros-agent/README.md
@@ -75,26 +75,20 @@ packages/

 ### Setup

-Requires [process-compose](https://github.com/F1bonacc1/process-compose):
-
-```bash
-brew install process-compose
-```
-
 ```bash
 # Copy environment files for each package
 cp apps/server/.env.example apps/server/.env.development
 cp apps/agent/.env.example apps/agent/.env.development
 cp apps/server/.env.production.example apps/server/.env.production

+# Install deps, generate agent code, and sync the VM cache
+bun run dev:setup
+
 # Start the full dev environment
-process-compose up
+bun run dev:watch
 ```

-The `process-compose up` command runs the following in order:
-1. `bun install` — installs dependencies
-2. `bun --cwd apps/agent codegen` — generates agent code
-3. `bun --cwd apps/server start` and `bun --cwd apps/agent dev` — starts server and agent in parallel
+`dev:watch` exits when the VM cache manifest is missing, but setup stays in `dev:setup`.

 ### Environment Variables

--- a/packages/browseros-agent/apps/agent/components/sidebar/SidebarNavigation.tsx
+++ b/packages/browseros-agent/apps/agent/components/sidebar/SidebarNavigation.tsx
@@ -74,6 +74,18 @@ const primaryNavItems: NavItem[] = [
  { name: 'Settings', to: '/settings/ai', icon: Settings },
 ]

+function isNavItemActive(item: NavItem, pathname: string): boolean {
+  if (item.to === '/settings/ai') {
+    return pathname.startsWith('/settings')
+  }
+
+  if (item.to === '/agents') {
+    return pathname === '/agents' || pathname.startsWith('/agents/')
+  }
+
+  return pathname === item.to
+}
+
 export const SidebarNavigation: FC<SidebarNavigationProps> = ({
  expanded = true,
 }) => {
@@ -90,10 +102,7 @@ export const SidebarNavigation: FC<SidebarNavigationProps> = ({
        <nav className="space-y-1">
          {filteredItems.map((item) => {
            const Icon = item.icon
-            const isActive =
-              item.to === '/settings/ai'
-                ? location.pathname.startsWith('/settings')
-                : location.pathname === item.to
+            const isActive = isNavItemActive(item, location.pathname)

            const navItem = (
              <NavLink
--- a/packages/browseros-agent/apps/agent/entrypoints/app/App.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/App.tsx
@@ -113,7 +113,22 @@ export const App: FC = () => {
          <Route path="connect-apps" element={<ConnectMCP />} />
          <Route path="scheduled" element={<ScheduledTasksPage />} />
          {alphaEnabled ? (
-            <Route path="agents" element={<AgentsPage />} />
+            <>
+              <Route path="agents" element={<AgentsPage />} />
+              <Route element={<AgentCommandLayout />}>
+                <Route
+                  path="agents/:agentId"
+                  element={
+                    <AgentCommandConversation
+                      variant="page"
+                      backPath="/agents"
+                      agentPathPrefix="/agents"
+                      createAgentPath="/agents"
+                    />
+                  }
+                />
+              </Route>
+            </>
          ) : null}
          {alphaEnabled ? (
            <Route path="admin" element={<AdminDashboardPage />} />
--- a/packages/browseros-agent/apps/agent/entrypoints/app/agent-command/AgentCard.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/agent-command/AgentCard.tsx
@@ -1,4 +1,4 @@
-import { Bot } from 'lucide-react'
+import { Bot, Loader2, Wrench } from 'lucide-react'
 import type { FC } from 'react'
 import type { AgentCardData } from '@/lib/agent-conversations/types'
 import { cn } from '@/lib/utils'
@@ -32,6 +32,11 @@ function getStatusTone(status: AgentCardData['status']): string {
  return 'bg-emerald-500'
 }

+function formatCost(usd: number): string {
+  if (usd < 0.005) return `$${usd.toFixed(4)}`
+  return `$${usd.toFixed(2)}`
+}
+
 export const AgentCardExpanded: FC<AgentCardProps> = ({
  agent,
  onClick,
@@ -81,9 +86,26 @@ export const AgentCardExpanded: FC<AgentCardProps> = ({
      </p>
    </div>

-    <div className="mt-4 flex items-center justify-between gap-3 text-muted-foreground text-xs">
-      <span>{formatTimestamp(agent.lastMessageTimestamp)}</span>
-      <span>Open conversation</span>
+    <div className="mt-4 space-y-1.5 text-muted-foreground text-xs">
+      <div className="flex items-center justify-between gap-3">
+        <span>{formatTimestamp(agent.lastMessageTimestamp)}</span>
+        {agent.costUsd ? (
+          <span className="tabular-nums opacity-70">
+            {formatCost(agent.costUsd)}
+          </span>
+        ) : null}
+      </div>
+      {agent.status === 'working' && agent.currentTool ? (
+        <div className="flex items-center gap-1.5 text-[var(--accent-orange)]/70">
+          <Loader2 className="size-3 shrink-0 animate-spin" />
+          <span className="truncate">{agent.currentTool}</span>
+        </div>
+      ) : agent.activitySummary ? (
+        <div className="flex items-center gap-1.5 text-muted-foreground/60">
+          <Wrench className="size-3 shrink-0" />
+          <span className="truncate">{agent.activitySummary}</span>
+        </div>
+      ) : null}
    </div>
  </button>
 )
--- a/packages/browseros-agent/apps/agent/entrypoints/app/agent-command/AgentCommandConversation.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/agent-command/AgentCommandConversation.tsx
@@ -1,194 +1,429 @@
-import { Bot, Home, RotateCcw } from 'lucide-react'
-import { type FC, useEffect, useRef } from 'react'
+import { ArrowLeft, Bot, Home } from 'lucide-react'
+import { type FC, useEffect, useMemo, useRef, useState } from 'react'
 import { Navigate, useNavigate, useParams, useSearchParams } from 'react-router'
 import { Button } from '@/components/ui/button'
-import type { AgentEntry } from '@/entrypoints/app/agents/useOpenClaw'
+import {
+  type AgentEntry,
+  getModelDisplayName,
+} from '@/entrypoints/app/agents/useOpenClaw'
 import { cn } from '@/lib/utils'
 import { useAgentCommandData } from './agent-command-layout'
+import { ClawChat } from './ClawChat'
 import { ConversationInput } from './ConversationInput'
-import { ConversationMessage } from './ConversationMessage'
+import {
+  buildChatHistoryFromClawMessages,
+  flattenHistoryPages,
+} from './claw-chat-types'
 import { useAgentConversation } from './useAgentConversation'
+import { useClawChatHistory } from './useClawChatHistory'
+import { useOutboundQueue } from './useOutboundQueue'
+
+function StatusBadge({ status }: { status: string }) {
+  return (
+    <div className="inline-flex items-center gap-2 rounded-full border border-border/60 bg-card px-3 py-1 text-[11px] text-muted-foreground uppercase tracking-[0.18em]">
+      <span
+        className={cn(
+          'size-1.5 rounded-full',
+          status === 'Working on your request'
+            ? 'bg-amber-500'
+            : status === 'Ready'
+              ? 'bg-emerald-500'
+              : status === 'Offline'
+                ? 'bg-muted-foreground/50'
+                : 'bg-[var(--accent-orange)]',
+        )}
+      />
+      <span>{status}</span>
+    </div>
+  )
+}
+
+function AgentIdentity({
+  name,
+  meta,
+  className,
+}: {
+  name: string
+  meta: string
+  className?: string
+}) {
+  return (
+    <div className={cn('min-w-0', className)}>
+      <div className="truncate font-semibold text-[15px] leading-5">{name}</div>
+      <div className="truncate text-muted-foreground text-xs leading-5">
+        {meta}
+      </div>
+    </div>
+  )
+}

 function ConversationHeader({
  agentName,
+  agentMeta,
  status,
+  backLabel,
+  backTarget,
  onGoHome,
-  onReset,
 }: {
  agentName: string
+  agentMeta: string
  status: string
+  backLabel: string
+  backTarget: 'home' | 'page'
  onGoHome: () => void
-  onReset: () => void
 }) {
+  const BackIcon = backTarget === 'home' ? Home : ArrowLeft
+
  return (
-    <div className="overflow-hidden rounded-[1.5rem] border border-border/60 bg-card/95 shadow-sm backdrop-blur">
-      <div className="flex items-center justify-between gap-3 px-5 py-4">
-        <div className="flex min-w-0 items-center gap-3">
-          <Button
-            variant="ghost"
-            size="icon"
-            onClick={onGoHome}
-            className="rounded-xl"
-            title="Back to home"
-          >
-            <Home className="size-4" />
-          </Button>
-          <div className="flex size-11 shrink-0 items-center justify-center rounded-2xl bg-muted text-muted-foreground">
-            <Bot className="size-5" />
-          </div>
-          <div className="min-w-0">
-            <div className="truncate font-semibold text-sm">{agentName}</div>
-            <div className="truncate text-muted-foreground text-sm">
-              {status}
-            </div>
-          </div>
-        </div>
+    <div className="flex h-14 items-center justify-between gap-4 border-border/50 border-b px-5">
+      <div className="flex min-w-0 items-center gap-3">
        <Button
          variant="ghost"
-          size="sm"
-          onClick={onReset}
-          className="rounded-xl text-muted-foreground"
+          size="icon"
+          onClick={onGoHome}
+          className="size-8 rounded-xl lg:hidden"
+          title={backLabel}
        >
-          <RotateCcw className="mr-2 size-4" />
-          New conversation
+          <BackIcon className="size-4" />
        </Button>
-      </div>
-    </div>
-  )
-}
-
-function EmptyConversationState({ agentName }: { agentName: string }) {
-  return (
-    <div className="flex min-h-full items-center justify-center py-10">
-      <div className="max-w-md rounded-[1.5rem] border border-border/60 bg-card/90 px-8 py-10 text-center shadow-sm backdrop-blur">
-        <div className="mx-auto flex size-14 items-center justify-center rounded-2xl bg-muted text-muted-foreground">
-          <Bot className="size-6" />
+        <div className="flex size-8 shrink-0 items-center justify-center rounded-xl bg-muted text-muted-foreground">
+          <Bot className="size-4" />
+        </div>
+        <AgentIdentity name={agentName} meta={agentMeta} />
+      </div>
+
+      <StatusBadge status={status} />
+    </div>
+  )
+}
+
+function AgentRailHeader({ onGoHome }: { onGoHome: () => void }) {
+  return (
+    <div className="hidden h-14 items-center border-border/50 border-r border-b bg-background/70 px-4 lg:flex">
+      <div className="flex min-w-0 items-center gap-3">
+        <Button
+          variant="ghost"
+          size="icon"
+          onClick={onGoHome}
+          className="size-8 rounded-xl"
+          title="Back to home"
+        >
+          <ArrowLeft className="size-4" />
+        </Button>
+        <div className="truncate font-semibold text-[15px] leading-5">
+          Agents
        </div>
-        <h2 className="mt-4 font-semibold text-lg">{agentName}</h2>
-        <p className="mt-2 text-muted-foreground text-sm">
-          Send a message to start a focused conversation with this agent.
-        </p>
      </div>
    </div>
  )
 }

-function getConversationStatusCopy(
-  status: string | undefined,
-  streaming: boolean,
-): string {
-  if (streaming) return 'Working on your request'
-  if (status === 'running') return 'Ready for the next task'
-  if (status === 'starting') return 'Connecting to OpenClaw'
-  if (status === 'error') return 'OpenClaw needs attention'
-  if (status === 'stopped') return 'OpenClaw is offline'
-  return 'Open agent setup to continue'
+function AgentRailList({
+  activeAgentId,
+  agents,
+  onSelectAgent,
+}: {
+  activeAgentId: string
+  agents: AgentEntry[]
+  onSelectAgent: (entry: AgentEntry) => void
+}) {
+  return (
+    <aside className="hidden min-h-0 flex-col border-border/50 border-r bg-background/70 lg:flex">
+      <div className="styled-scrollbar min-h-0 flex-1 space-y-2 overflow-y-auto px-3 py-3">
+        {agents.map((entry) => {
+          const active = entry.agentId === activeAgentId
+          const modelName = getModelDisplayName(entry.model) ?? 'OpenClaw agent'
+
+          return (
+            <button
+              key={entry.agentId}
+              type="button"
+              onClick={() => onSelectAgent(entry)}
+              className={cn(
+                'w-full rounded-2xl border px-3 py-3 text-left transition-all',
+                active
+                  ? 'border-[var(--accent-orange)]/30 bg-[var(--accent-orange)]/8 shadow-sm'
+                  : 'border-transparent bg-transparent hover:border-border/60 hover:bg-card',
+              )}
+            >
+              <div className="flex items-center gap-3">
+                <div
+                  className={cn(
+                    'flex size-9 items-center justify-center rounded-xl',
+                    active
+                      ? 'bg-[var(--accent-orange)]/12 text-[var(--accent-orange)]'
+                      : 'bg-muted text-muted-foreground',
+                  )}
+                >
+                  <Bot className="size-4" />
+                </div>
+                <AgentIdentity name={entry.name} meta={modelName} />
+              </div>
+            </button>
+          )
+        })}
+      </div>
+    </aside>
+  )
 }

-export const AgentCommandConversation: FC = () => {
-  const { agentId } = useParams<{ agentId: string }>()
-  const [searchParams, setSearchParams] = useSearchParams()
+function getConversationStatusCopy(status: string | undefined): string {
+  if (status === 'running') return 'Ready'
+  if (status === 'starting') return 'Connecting'
+  if (status === 'error') return 'Attention'
+  if (status === 'stopped') return 'Offline'
+  return 'Setup'
+}
+
+function AgentConversationController({
+  agentId,
+  initialMessage,
+  onInitialMessageConsumed,
+  status,
+  agents,
+  agentPathPrefix,
+  createAgentPath,
+}: {
+  agentId: string
+  initialMessage: string | null
+  onInitialMessageConsumed: () => void
+  status: ReturnType<typeof useAgentCommandData>['status']
+  agents: AgentEntry[]
+  agentPathPrefix: string
+  createAgentPath: string
+}) {
  const navigate = useNavigate()
-  const scrollRef = useRef<HTMLDivElement>(null)
-  const initialQuerySent = useRef(false)
-  const { status, agents } = useAgentCommandData()
-  const shouldRedirectHome = !agentId
-  const resolvedAgentId = agentId ?? ''
-  const agent = agents.find((entry) => entry.agentId === resolvedAgentId)
-  const agentName = agent?.name || resolvedAgentId || 'Agent'
-  const { turns, streaming, loading, send, resetConversation } =
-    useAgentConversation(resolvedAgentId, agentName)
-  const lastTurn = turns[turns.length - 1]
-  const lastTurnPartCount = lastTurn?.parts.length ?? 0
+  const initialMessageSentRef = useRef<string | null>(null)
+  const onInitialMessageConsumedRef = useRef(onInitialMessageConsumed)
+  const [streamSessionKey, setStreamSessionKey] = useState<string | null>(null)
+  const agent = agents.find((entry) => entry.agentId === agentId)
+  const agentName = agent?.name || agentId || 'Agent'
+  // Single source of truth: the history endpoint resolves the session itself
+  // when sessionKey is null. Once a chat creates a new session, streamSessionKey
+  // overrides it and the history queryKey rotates to refetch for that session.
+  const historyQuery = useClawChatHistory({
+    agentId,
+    sessionKey: streamSessionKey,
+  })

+  const historyMessages = useMemo(
+    () => flattenHistoryPages(historyQuery.data?.pages ?? []),
+    [historyQuery.data?.pages],
+  )
+  const chatHistory = useMemo(
+    () => buildChatHistoryFromClawMessages(historyMessages),
+    [historyMessages],
+  )
+  const resolvedSessionKey =
+    streamSessionKey ?? historyQuery.data?.pages?.[0]?.sessionKey ?? null
+
+  const { turns, streaming } = useAgentConversation(agentId, {
+    sessionKey: resolvedSessionKey,
+    history: chatHistory,
+    onSessionKeyChange: (sessionKey) => {
+      setStreamSessionKey(sessionKey)
+    },
+  })
+  const outboundQueue = useOutboundQueue({
+    agentId,
+    sessionKey: resolvedSessionKey,
+  })
+  onInitialMessageConsumedRef.current = onInitialMessageConsumed
+
+  // Refetch history whenever a server-dispatched queue item completes.
+  // The server worker streams the queued turn into OpenClaw directly, so
+  // the client never observes the live tokens — we only see the new
+  // assistant turn once the JSONL is updated. Watching the queue for
+  // any 'sending' item dropping out is the cleanest "turn finalized"
+  // signal we have without exposing per-turn SSE.
+  const previousSendingIdsRef = useRef<Set<string>>(new Set())
  useEffect(() => {
-    if (shouldRedirectHome) return
-
-    const query = searchParams.get('q')
-    if (query && !initialQuerySent.current && !loading) {
-      initialQuerySent.current = true
-      setSearchParams({}, { replace: true })
-      void send(query)
+    const currentSending = new Set(
+      outboundQueue.queue
+        .filter((item) => item.status === 'sending')
+        .map((item) => item.id),
+    )
+    const dropped = [...previousSendingIdsRef.current].filter(
+      (id) => !currentSending.has(id),
+    )
+    previousSendingIdsRef.current = currentSending
+    if (dropped.length > 0) {
+      void historyQuery.refetch()
    }
-  }, [loading, searchParams, send, setSearchParams, shouldRedirectHome])
+  }, [outboundQueue.queue, historyQuery])
+
+  const disabled = status?.status !== 'running'
+  // Two-part gate: cover both "still fetching" AND "just got enabled but
+  // hasn't started fetching yet". When `enabled` flips true (baseUrl
+  // resolves), there's a render frame where React Query reports
+  // isLoading=false but hasn't run the queryFn yet — `isFetched` is still
+  // false. Without this we render EmptyState during that one frame.
+  const isInitialLoading =
+    historyQuery.isLoading || (!historyQuery.isFetched && !historyQuery.isError)
+
+  const historyReady = historyQuery.isFetched || historyQuery.isError
+  const initialMessageKey = initialMessage
+    ? `${agentId}:${initialMessage}`
+    : null
+  const error = historyQuery.error ?? null
+
+  const enqueueRef = useRef(outboundQueue.enqueue)
+  enqueueRef.current = outboundQueue.enqueue

  useEffect(() => {
+    const query = initialMessage?.trim()
+    if (!initialMessageKey) {
+      initialMessageSentRef.current = null
+      return
+    }
+
+    // The initial-message handoff (home composer → conversation page via
+    // ?q=) goes through the outbound queue too, so it inherits the same
+    // single-flight serialization. We no longer need to gate on
+    // `streaming` — the queue worker drains as soon as the agent is
+    // free.
    if (
-      shouldRedirectHome ||
-      (turns.length === 0 && lastTurnPartCount === 0 && !streaming)
+      !query ||
+      initialMessageSentRef.current === initialMessageKey ||
+      disabled ||
+      !historyReady
    ) {
      return
    }

-    scrollRef.current?.scrollTo({
-      top: scrollRef.current.scrollHeight,
-      behavior: 'smooth',
-    })
-  }, [lastTurnPartCount, shouldRedirectHome, streaming, turns.length])
-
-  if (shouldRedirectHome) {
-    return <Navigate to="/home" replace />
-  }
+    initialMessageSentRef.current = initialMessageKey
+    onInitialMessageConsumedRef.current()
+    enqueueRef.current({ text: query })
+  }, [disabled, historyReady, initialMessage, initialMessageKey])

  const handleSelectAgent = (entry: AgentEntry) => {
-    navigate(`/home/agents/${entry.agentId}`)
+    navigate(`${agentPathPrefix}/${entry.agentId}`)
  }

-  const statusCopy = getConversationStatusCopy(status?.status, streaming)
-
  return (
-    <div className="absolute inset-0 overflow-hidden">
-      <div className="fade-in slide-in-from-bottom-5 mx-auto flex h-full w-full max-w-3xl animate-in flex-col gap-3 px-4 pt-4 pb-2 duration-300">
-        <ConversationHeader
-          agentName={agentName}
-          status={statusCopy}
-          onGoHome={() => navigate('/home')}
-          onReset={resetConversation}
-        />
+    <div className="flex min-h-0 flex-col overflow-hidden">
+      <ClawChat
+        agentName={agentName}
+        historyMessages={historyMessages}
+        turns={turns}
+        streaming={streaming}
+        isInitialLoading={isInitialLoading}
+        error={error}
+        hasNextPage={Boolean(historyQuery.hasNextPage)}
+        isFetchingNextPage={historyQuery.isFetchingNextPage}
+        onFetchNextPage={() => {
+          void historyQuery.fetchNextPage()
+        }}
+        onRetry={() => {
+          void historyQuery.refetch()
+        }}
+      />

-        <main
-          ref={scrollRef}
-          className={cn(
-            'styled-scrollbar min-h-0 flex-1 overflow-y-auto overflow-x-hidden rounded-[1.5rem] border border-border/50 bg-card/85 px-5 py-5 shadow-sm',
-            '[&_[data-streamdown="code-block"]]:!max-w-full [&_[data-streamdown="table-wrapper"]]:!max-w-full [&_[data-streamdown="code-block"]]:overflow-x-auto [&_[data-streamdown="table-wrapper"]]:overflow-x-auto',
-          )}
-        >
-          {loading ? (
-            <div className="flex h-full items-center justify-center text-muted-foreground text-sm">
-              Loading conversation...
-            </div>
-          ) : turns.length === 0 ? (
-            <EmptyConversationState agentName={agentName} />
-          ) : (
-            <div className="w-full space-y-4">
-              {turns.map((turn, index) => (
-                <ConversationMessage
-                  key={turn.id}
-                  turn={turn}
-                  streaming={streaming && index === turns.length - 1}
-                />
-              ))}
-            </div>
-          )}
-        </main>
-
-        <div className="w-full flex-shrink-0">
+      <div className="border-border/50 border-t bg-background/88 px-4 py-3 backdrop-blur-md">
+        <div className="mx-auto max-w-3xl">
          <ConversationInput
            variant="conversation"
            agents={agents}
-            selectedAgentId={resolvedAgentId}
+            selectedAgentId={agentId}
            onSelectAgent={handleSelectAgent}
-            onSend={(text) => {
-              void send(text)
+            onSend={(input) => {
+              outboundQueue.enqueue({
+                text: input.text,
+                attachments: input.attachments.map((a) => a.payload),
+                attachmentPreviews: input.attachments.map((a) => ({
+                  id: a.id,
+                  kind: a.kind,
+                  mediaType: a.mediaType,
+                  name: a.name,
+                  dataUrl: a.dataUrl,
+                })),
+                history: chatHistory,
+              })
            }}
-            onCreateAgent={() => navigate('/agents')}
+            onCreateAgent={() => navigate(createAgentPath)}
            streaming={streaming}
-            disabled={status?.status !== 'running'}
+            disabled={disabled}
            status={status?.status}
            placeholder={`Message ${agentName}...`}
+            outboundQueue={outboundQueue.queue}
+            onCancelQueued={outboundQueue.cancel}
+            onRetryQueued={outboundQueue.retry}
          />
        </div>
      </div>
    </div>
  )
 }
+
+interface AgentCommandConversationProps {
+  variant?: 'command' | 'page'
+  backPath?: string
+  agentPathPrefix?: string
+  createAgentPath?: string
+}
+
+export const AgentCommandConversation: FC<AgentCommandConversationProps> = ({
+  variant = 'command',
+  backPath = '/home',
+  agentPathPrefix = '/home/agents',
+  createAgentPath = '/agents',
+}) => {
+  const { agentId } = useParams<{ agentId: string }>()
+  const [searchParams, setSearchParams] = useSearchParams()
+  const navigate = useNavigate()
+  const { status, agents } = useAgentCommandData()
+  const shouldRedirectHome = !agentId
+  const resolvedAgentId = agentId ?? ''
+  const agent = agents.find((entry) => entry.agentId === resolvedAgentId)
+  const agentName = agent?.name || resolvedAgentId || 'Agent'
+  const agentMeta = getModelDisplayName(agent?.model) ?? 'OpenClaw agent'
+  const initialMessage = searchParams.get('q')
+  const isPageVariant = variant === 'page'
+  const backLabel = isPageVariant ? 'Back to agents' : 'Back to home'
+
+  if (shouldRedirectHome) {
+    return <Navigate to="/home" replace />
+  }
+
+  const handleSelectAgent = (entry: AgentEntry) => {
+    navigate(`${agentPathPrefix}/${entry.agentId}`)
+  }
+
+  const statusCopy = getConversationStatusCopy(status?.status)
+
+  return (
+    <div className="absolute inset-0 overflow-hidden bg-background md:pl-[theme(spacing.14)]">
+      <div className="mx-auto grid h-full w-full max-w-[1480px] lg:grid-cols-[288px_minmax(0,1fr)] lg:grid-rows-[3.5rem_minmax(0,1fr)]">
+        <AgentRailHeader onGoHome={() => navigate(backPath)} />
+
+        <ConversationHeader
+          agentName={agentName}
+          agentMeta={agentMeta}
+          status={statusCopy}
+          backLabel={backLabel}
+          backTarget={isPageVariant ? 'page' : 'home'}
+          onGoHome={() => navigate(backPath)}
+        />
+
+        <AgentRailList
+          activeAgentId={resolvedAgentId}
+          agents={agents}
+          onSelectAgent={handleSelectAgent}
+        />
+
+        <AgentConversationController
+          key={resolvedAgentId}
+          agentId={resolvedAgentId}
+          agents={agents}
+          status={status}
+          initialMessage={initialMessage}
+          onInitialMessageConsumed={() =>
+            setSearchParams({}, { replace: true })
+          }
+          agentPathPrefix={agentPathPrefix}
+          createAgentPath={createAgentPath}
+        />
+      </div>
+    </div>
+  )
+}
--- a/packages/browseros-agent/apps/agent/entrypoints/app/agent-command/AgentCommandHome.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/agent-command/AgentCommandHome.tsx
@@ -1,20 +1,19 @@
-import { ArrowRight } from 'lucide-react'
+import { ArrowRight, Bot, Plus, Settings2 } from 'lucide-react'
 import { type FC, useEffect, useState } from 'react'
 import { useNavigate } from 'react-router'
 import { Button } from '@/components/ui/button'
 import { Card, CardContent } from '@/components/ui/card'
+import { Separator } from '@/components/ui/separator'
 import type { AgentEntry } from '@/entrypoints/app/agents/useOpenClaw'
 import { ImportDataHint } from '@/entrypoints/newtab/index/ImportDataHint'
-import { NewTabBranding } from '@/entrypoints/newtab/index/NewTabBranding'
-import { NewTabTip } from '@/entrypoints/newtab/index/NewTabTip'
-import { ScheduleResults } from '@/entrypoints/newtab/index/ScheduleResults'
 import { SignInHint } from '@/entrypoints/newtab/index/SignInHint'
-import { TopSites } from '@/entrypoints/newtab/index/TopSites'
 import { useActiveHint } from '@/entrypoints/newtab/index/useActiveHint'
+import type { AgentCardData } from '@/lib/agent-conversations/types'
 import { AgentCardDock } from './AgentCardDock'
 import { useAgentCommandData } from './agent-command-layout'
 import { ConversationInput } from './ConversationInput'
-import { useAgentCardData } from './useAgentCardData'
+import { buildAgentCardData } from './useAgentCardData'
+import { useAgentDashboard } from './useAgentDashboard'

 function AgentCommandSetupState({
  onOpenAgents,
@@ -22,13 +21,19 @@ function AgentCommandSetupState({
  onOpenAgents: () => void
 }) {
  return (
-    <Card className="border-border/60 bg-card/85 shadow-sm">
-      <CardContent className="flex flex-col items-center gap-4 p-6 text-center">
-        <p className="max-w-xl text-muted-foreground text-sm">
-          Set up OpenClaw agents to turn your new tab into an agent command
-          center.
-        </p>
-        <Button onClick={onOpenAgents} className="gap-2">
+    <Card className="border-border/60 bg-card/90 shadow-sm">
+      <CardContent className="flex flex-col items-center gap-4 p-8 text-center">
+        <div className="flex size-12 items-center justify-center rounded-2xl bg-muted text-muted-foreground">
+          <Bot className="size-5" />
+        </div>
+        <div className="space-y-2">
+          <h2 className="font-semibold text-lg">Set up your first agent</h2>
+          <p className="max-w-md text-muted-foreground text-sm leading-6">
+            Connect OpenClaw and create an agent before using the new tab as
+            your workspace.
+          </p>
+        </div>
+        <Button onClick={onOpenAgents} className="gap-2 rounded-xl">
          Open Agent Setup
          <ArrowRight className="size-4" />
        </Button>
@@ -39,13 +44,19 @@ function AgentCommandSetupState({

 function EmptyAgentsState({ onOpenAgents }: { onOpenAgents: () => void }) {
  return (
-    <Card className="border-border/60 bg-card/85 shadow-sm">
-      <CardContent className="flex flex-col items-center gap-4 p-6 text-center">
-        <p className="max-w-xl text-muted-foreground text-sm">
-          OpenClaw is running, but you do not have any agents yet.
-        </p>
-        <Button variant="outline" onClick={onOpenAgents}>
-          Create your first agent
+    <Card className="border-border/60 bg-card/90 shadow-sm">
+      <CardContent className="flex flex-col items-center gap-4 p-8 text-center">
+        <div className="flex size-12 items-center justify-center rounded-2xl bg-muted text-muted-foreground">
+          <Plus className="size-5" />
+        </div>
+        <div className="space-y-2">
+          <h2 className="font-semibold text-lg">No agents yet</h2>
+          <p className="max-w-md text-muted-foreground text-sm leading-6">
+            Create an agent to start using BrowserOS as an agent-first new tab.
+          </p>
+        </div>
+        <Button variant="outline" onClick={onOpenAgents} className="rounded-xl">
+          Create agent
        </Button>
      </CardContent>
    </Card>
@@ -58,13 +69,19 @@ function OpenClawUnavailableState({
  onOpenAgents: () => void
 }) {
  return (
-    <Card className="border-border/60 bg-card/85 shadow-sm">
-      <CardContent className="flex flex-col items-center gap-4 p-6 text-center">
-        <p className="max-w-xl text-muted-foreground text-sm">
-          OpenClaw is unavailable right now. Open the Agents page to restart the
-          gateway or review setup.
-        </p>
-        <Button onClick={onOpenAgents} className="gap-2">
+    <Card className="border-border/60 bg-card/90 shadow-sm">
+      <CardContent className="flex flex-col items-center gap-4 p-8 text-center">
+        <div className="flex size-12 items-center justify-center rounded-2xl bg-muted text-muted-foreground">
+          <Settings2 className="size-5" />
+        </div>
+        <div className="space-y-2">
+          <h2 className="font-semibold text-lg">OpenClaw is unavailable</h2>
+          <p className="max-w-md text-muted-foreground text-sm leading-6">
+            Review your agent setup to restart the gateway or reconnect the
+            local service.
+          </p>
+        </div>
+        <Button onClick={onOpenAgents} className="gap-2 rounded-xl">
          Open Agent Setup
          <ArrowRight className="size-4" />
        </Button>
@@ -73,17 +90,54 @@ function OpenClawUnavailableState({
  )
 }

+function RecentThreads({
+  activeAgentId,
+  agents,
+  onOpenAgents,
+  onSelectAgent,
+}: {
+  activeAgentId?: string | null
+  agents: AgentCardData[]
+  onOpenAgents: () => void
+  onSelectAgent: (agentId: string) => void
+}) {
+  if (agents.length === 0) return null
+
+  return (
+    <section className="space-y-4">
+      <div className="flex items-center justify-between gap-4">
+        <div>
+          <h2 className="font-semibold text-base">Recent agents</h2>
+          <p className="text-muted-foreground text-sm">
+            Continue from where you left off.
+          </p>
+        </div>
+        <Button
+          variant="outline"
+          onClick={onOpenAgents}
+          className="rounded-xl"
+          size="sm"
+        >
+          Manage agents
+        </Button>
+      </div>
+      <AgentCardDock
+        agents={agents}
+        activeAgentId={activeAgentId ?? undefined}
+        onSelectAgent={onSelectAgent}
+        onCreateAgent={onOpenAgents}
+      />
+    </section>
+  )
+}
+
 export const AgentCommandHome: FC = () => {
  const navigate = useNavigate()
  const activeHint = useActiveHint()
  const { status, agents } = useAgentCommandData()
-  const [mounted, setMounted] = useState(false)
  const [selectedAgentId, setSelectedAgentId] = useState<string | null>(null)
-  const cardData = useAgentCardData(agents, status?.status)
-
-  useEffect(() => {
-    setMounted(true)
-  }, [])
+  const { data: dashboard } = useAgentDashboard(status?.status === 'running')
+  const cardData = buildAgentCardData(agents, status?.status, dashboard?.agents)

  useEffect(() => {
    if (agents.length === 0) {
@@ -101,9 +155,16 @@ export const AgentCommandHome: FC = () => {
    }
  }, [agents, selectedAgentId])

-  const handleSend = (text: string) => {
+  const handleSend = (input: { text: string }) => {
    if (!selectedAgentId) return
-    navigate(`/home/agents/${selectedAgentId}?q=${encodeURIComponent(text)}`)
+    // Home composer navigates to the conversation page with the prompt in
+    // the query string. Attachments are dropped at this boundary in v1 —
+    // the conversation page (where staging UX is most useful anyway) is
+    // where users can attach. A future iteration can stash staged files
+    // in chrome.storage.session and replay them on first mount there.
+    navigate(
+      `/home/agents/${selectedAgentId}?q=${encodeURIComponent(input.text)}`,
+    )
  }

  const handleSelectAgent = (agent: AgentEntry) => {
@@ -117,62 +178,65 @@ export const AgentCommandHome: FC = () => {
    openClawStatus !== 'running' &&
    openClawStatus !== 'uninitialized' &&
    cardData.length === 0
+  const selectedCard =
+    cardData.find((agent) => agent.agentId === selectedAgentId) ?? cardData[0]

  return (
-    <div className="pt-[max(25vh,16px)]">
-      <div className="relative w-full space-y-8 md:w-3xl">
-        <NewTabBranding />
-
-        <ConversationInput
-          variant="home"
-          agents={agents}
-          selectedAgentId={selectedAgentId}
-          onSelectAgent={handleSelectAgent}
-          onSend={handleSend}
-          onCreateAgent={() => navigate('/agents')}
-          streaming={false}
-          disabled={status?.status !== 'running'}
-          status={status?.status}
-          placeholder={
-            status?.status === 'running'
-              ? undefined
-              : 'OpenClaw is not running...'
-          }
-        />
-
-        {mounted ? <NewTabTip /> : null}
-
+    <div className="min-h-full px-4 py-6">
+      <div className="mx-auto flex w-full max-w-5xl flex-col gap-8">
        {isSetup ? (
          shouldShowUnavailableState ? (
            <OpenClawUnavailableState
              onOpenAgents={() => navigate('/agents')}
            />
          ) : cardData.length > 0 ? (
-            <section className="space-y-3">
-              <div className="flex items-center justify-between">
-                <div>
-                  <h2 className="font-semibold text-base">Agents</h2>
-                  <p className="text-muted-foreground text-sm">
-                    Pick up where your agents left off.
+            <>
+              <div className="flex flex-col items-center gap-5 pt-[max(10vh,24px)] text-center">
+                <div className="space-y-3">
+                  <h1 className="font-semibold text-[clamp(2rem,4vw,3.25rem)] leading-tight tracking-tight">
+                    What should your agent work on next?
+                  </h1>
+                  <p className="mx-auto max-w-2xl text-muted-foreground text-sm leading-6">
+                    Start with a task, continue a thread, or switch to another
+                    agent without leaving the new tab.
                  </p>
                </div>
+
+                <div className="w-full max-w-3xl">
+                  <ConversationInput
+                    variant="home"
+                    agents={agents}
+                    selectedAgentId={selectedAgentId}
+                    onSelectAgent={handleSelectAgent}
+                    onSend={handleSend}
+                    onCreateAgent={() => navigate('/agents')}
+                    streaming={false}
+                    disabled={status?.status !== 'running'}
+                    status={status?.status}
+                    placeholder={
+                      status?.status === 'running'
+                        ? `Ask ${selectedCard?.name ?? 'your agent'} to handle a task...`
+                        : 'OpenClaw is not running...'
+                    }
+                  />
+                </div>
              </div>
-              <AgentCardDock
+
+              <Separator />
+
+              <RecentThreads
+                activeAgentId={selectedAgentId}
                agents={cardData}
-                activeAgentId={selectedAgentId ?? undefined}
+                onOpenAgents={() => navigate('/agents')}
                onSelectAgent={(agentId) => navigate(`/home/agents/${agentId}`)}
-                onCreateAgent={() => navigate('/agents')}
              />
-            </section>
+            </>
          ) : (
            <EmptyAgentsState onOpenAgents={() => navigate('/agents')} />
          )
        ) : (
          <AgentCommandSetupState onOpenAgents={() => navigate('/agents')} />
        )}
-
-        {mounted ? <TopSites /> : null}
-        {mounted ? <ScheduleResults /> : null}
      </div>

      {activeHint === 'signin' ? <SignInHint /> : null}
--- a/packages/browseros-agent/apps/agent/entrypoints/app/agent-command/ClawChat.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/agent-command/ClawChat.tsx
@@ -0,0 +1,172 @@
+import { Bot, Loader2, RefreshCw } from 'lucide-react'
+import { type FC, useEffect, useRef } from 'react'
+import {
+  Conversation,
+  ConversationContent,
+  ConversationScrollButton,
+} from '@/components/ai-elements/conversation'
+import type { AgentConversationTurn } from '@/lib/agent-conversations/types'
+import { cn } from '@/lib/utils'
+import { ClawChatMessage } from './ClawChatMessage'
+import { ConversationMessage } from './ConversationMessage'
+import type { ClawChatMessage as ClawChatMessageModel } from './claw-chat-types'
+
+interface ClawChatProps {
+  agentName: string
+  historyMessages: ClawChatMessageModel[]
+  turns: AgentConversationTurn[]
+  streaming: boolean
+  isInitialLoading: boolean
+  error: Error | null
+  hasNextPage: boolean
+  isFetchingNextPage: boolean
+  onFetchNextPage: () => void
+  onRetry: () => void
+  className?: string
+}
+
+function EmptyConversationState({ agentName }: { agentName: string }) {
+  return (
+    <div className="flex h-full items-center justify-center px-6 py-12">
+      <div className="max-w-md text-center">
+        <div className="mx-auto flex size-14 items-center justify-center rounded-3xl bg-muted text-muted-foreground">
+          <Bot className="size-6" />
+        </div>
+        <h2 className="mt-5 font-semibold text-xl">{agentName}</h2>
+        <p className="mt-2 text-muted-foreground text-sm leading-6">
+          Ask {agentName} to start a task.
+        </p>
+      </div>
+    </div>
+  )
+}
+
+function LoadingConversationState() {
+  return (
+    <div className="flex h-full items-center justify-center gap-2 text-muted-foreground text-sm">
+      <Loader2 className="size-4 animate-spin" />
+      Loading conversation...
+    </div>
+  )
+}
+
+function ConversationErrorState({
+  message,
+  onRetry,
+}: {
+  message: string
+  onRetry: () => void
+}) {
+  return (
+    <div className="flex h-full items-center justify-center px-6 py-12">
+      <div className="max-w-md rounded-2xl border border-border/60 bg-card px-5 py-4 text-center shadow-sm">
+        <p className="text-sm">{message}</p>
+        <button
+          type="button"
+          onClick={onRetry}
+          className="mt-3 inline-flex items-center gap-2 rounded-lg border border-border/60 px-3 py-1.5 font-medium text-muted-foreground text-xs transition-colors hover:bg-accent hover:text-foreground"
+        >
+          <RefreshCw className="size-3.5" />
+          Retry
+        </button>
+      </div>
+    </div>
+  )
+}
+
+export const ClawChat: FC<ClawChatProps> = ({
+  agentName,
+  historyMessages,
+  turns,
+  streaming,
+  isInitialLoading,
+  error,
+  hasNextPage,
+  isFetchingNextPage,
+  onFetchNextPage,
+  onRetry,
+  className,
+}) => {
+  const topSentinelRef = useRef<HTMLDivElement>(null)
+  const onFetchNextPageRef = useRef(onFetchNextPage)
+  onFetchNextPageRef.current = onFetchNextPage
+  const hasMessages = historyMessages.length > 0 || turns.length > 0
+
+  useEffect(() => {
+    const sentinel = topSentinelRef.current
+    if (!sentinel) return
+
+    const observer = new IntersectionObserver(
+      (entries) => {
+        const [entry] = entries
+        if (!entry?.isIntersecting || !hasNextPage || isFetchingNextPage) {
+          return
+        }
+
+        onFetchNextPageRef.current()
+      },
+      {
+        root: null,
+        rootMargin: '160px 0px 0px 0px',
+        threshold: 0,
+      },
+    )
+
+    observer.observe(sentinel)
+    return () => observer.disconnect()
+  }, [hasNextPage, isFetchingNextPage])
+
+  return (
+    <div
+      className={cn('flex min-h-0 flex-1 flex-col overflow-hidden', className)}
+    >
+      <Conversation
+        className={cn(
+          'bg-background',
+          '[&_[data-streamdown="code-block"]]:!w-full [&_[data-streamdown="code-block"]]:!max-w-full [&_[data-streamdown="table-wrapper"]]:!w-full [&_[data-streamdown="table-wrapper"]]:!max-w-full [&_[data-streamdown="code-block"]]:overflow-x-auto [&_[data-streamdown="table-wrapper"]]:overflow-x-auto',
+        )}
+      >
+        <ConversationContent className="min-h-full px-5 py-5">
+          {isInitialLoading ? (
+            <LoadingConversationState />
+          ) : error && !hasMessages ? (
+            <ConversationErrorState message={error.message} onRetry={onRetry} />
+          ) : !hasMessages ? (
+            <EmptyConversationState agentName={agentName} />
+          ) : (
+            <div className="mx-auto flex w-full max-w-3xl flex-col gap-3">
+              <div ref={topSentinelRef} aria-hidden="true" className="h-px" />
+              {isFetchingNextPage ? (
+                <div className="flex justify-center py-2 text-muted-foreground text-xs">
+                  <Loader2 className="mr-2 size-3.5 animate-spin" />
+                  Loading older messages...
+                </div>
+              ) : null}
+              {!hasNextPage && historyMessages.length > 0 ? (
+                <div className="py-1 text-center text-muted-foreground text-xs">
+                  Start of conversation
+                </div>
+              ) : null}
+              {historyMessages.map((message) => (
+                <ClawChatMessage key={message.id} message={message} />
+              ))}
+              {turns.map((turn, index) => (
+                <ConversationMessage
+                  key={turn.id}
+                  turn={turn}
+                  streaming={streaming && index === turns.length - 1}
+                />
+              ))}
+              {error ? (
+                <div className="rounded-xl border border-border/60 bg-card px-4 py-3 text-muted-foreground text-sm">
+                  {error.message}
+                </div>
+              ) : null}
+            </div>
+          )}
+        </ConversationContent>
+        <ConversationScrollButton />
+      </Conversation>
+    </div>
+  )
+}
--- a/packages/browseros-agent/apps/agent/entrypoints/app/agent-command/ClawChatMessage.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/agent-command/ClawChatMessage.tsx
@@ -0,0 +1,248 @@
+import { CheckCircle2, Copy, Loader2, Wrench, XCircle } from 'lucide-react'
+import { type FC, useCallback, useMemo } from 'react'
+import {
+  Message,
+  MessageAction,
+  MessageActions,
+  MessageAttachment,
+  MessageAttachments,
+  MessageContent,
+  MessageResponse,
+  MessageToolbar,
+} from '@/components/ai-elements/message'
+import {
+  Reasoning,
+  ReasoningContent,
+  ReasoningTrigger,
+} from '@/components/ai-elements/reasoning'
+import {
+  Task,
+  TaskContent,
+  TaskItem,
+  TaskTrigger,
+} from '@/components/ai-elements/task'
+import { cn } from '@/lib/utils'
+import type {
+  ClawChatMessagePart,
+  ClawChatMessage as ClawChatMessageType,
+} from './claw-chat-types'
+
+function formatCost(usd: number): string {
+  if (usd < 0.005) return `$${usd.toFixed(4)}`
+  return `$${usd.toFixed(2)}`
+}
+
+type ToolCallPart = Extract<ClawChatMessagePart, { type: 'tool-call' }>
+type AttachmentPart = Extract<ClawChatMessagePart, { type: 'attachment' }>
+
+interface RenderEntry {
+  kind: 'text' | 'reasoning' | 'meta' | 'task' | 'attachments'
+  partIndex: number
+  part?: ClawChatMessagePart
+  tools?: ToolCallPart[]
+  attachments?: AttachmentPart[]
+}
+
+/**
+ * Build a render plan that groups all tool-call parts into a single Task
+ * collapsible and all attachment parts into a single attachment strip at
+ * their respective first-appearance positions. Other parts render in place.
+ */
+function buildRenderEntries(parts: ClawChatMessagePart[]): RenderEntry[] {
+  const entries: RenderEntry[] = []
+  const tools: ToolCallPart[] = []
+  const attachments: AttachmentPart[] = []
+  let taskInserted = false
+  let attachmentsInserted = false
+
+  parts.forEach((part, partIndex) => {
+    if (part.type === 'tool-call') {
+      tools.push(part)
+      if (!taskInserted) {
+        entries.push({ kind: 'task', partIndex, tools })
+        taskInserted = true
+      }
+    } else if (part.type === 'attachment') {
+      attachments.push(part)
+      if (!attachmentsInserted) {
+        entries.push({ kind: 'attachments', partIndex, attachments })
+        attachmentsInserted = true
+      }
+    } else if (part.type === 'text') {
+      entries.push({ kind: 'text', partIndex, part })
+    } else if (part.type === 'reasoning') {
+      entries.push({ kind: 'reasoning', partIndex, part })
+    } else if (part.type === 'meta') {
+      entries.push({ kind: 'meta', partIndex, part })
+    }
+  })
+
+  return entries
+}
+
+function ToolStatusIcon({ status }: { status: ToolCallPart['status'] }) {
+  if (status === 'running' || status === 'pending') {
+    return (
+      <Loader2 className="size-3.5 shrink-0 animate-spin text-muted-foreground" />
+    )
+  }
+  if (status === 'completed') {
+    return <CheckCircle2 className="size-3.5 shrink-0 text-green-500" />
+  }
+  return <XCircle className="size-3.5 shrink-0 text-destructive" />
+}
+
+interface ClawChatMessageProps {
+  message: ClawChatMessageType
+}
+
+export const ClawChatMessage: FC<ClawChatMessageProps> = ({ message }) => {
+  const messageText = message.parts
+    .filter((p) => p.type === 'text')
+    .map((p) => p.text)
+    .join('\n')
+
+  const handleCopy = useCallback(() => {
+    if (messageText) navigator.clipboard.writeText(messageText)
+  }, [messageText])
+
+  const entries = useMemo(
+    () => buildRenderEntries(message.parts),
+    [message.parts],
+  )
+
+  return (
+    <Message
+      from={message.role}
+      className="max-w-full group-[.is-user]:max-w-[80%]"
+    >
+      <MessageContent className="max-w-full overflow-hidden group-[.is-assistant]:w-full group-[.is-user]:max-w-full">
+        {entries.map((entry) => {
+          const key = `${message.id}-entry-${entry.partIndex}`
+
+          if (entry.kind === 'attachments' && entry.attachments) {
+            return (
+              <MessageAttachments key={key}>
+                {entry.attachments.map((attachment, idx) => (
+                  <MessageAttachment
+                    // biome-ignore lint/suspicious/noArrayIndexKey: attachment order is stable within a finalized message
+                    key={`${attachment.kind}-${idx}`}
+                    data={{
+                      type: 'file',
+                      url: attachment.dataUrl ?? '',
+                      mediaType: attachment.mediaType,
+                      filename: attachment.name,
+                    }}
+                  />
+                ))}
+              </MessageAttachments>
+            )
+          }
+
+          if (entry.kind === 'text' && entry.part?.type === 'text') {
+            return (
+              <MessageResponse
+                key={key}
+                // Historical messages are finalized — render immediately.
+                // Streamdown's default "streaming" mode uses an idle-callback
+                // debounce (300ms / 500ms idle) that paints empty content
+                // first, which made history flash blank tool collapsibles
+                // before text on every load.
+                mode="static"
+                parseIncompleteMarkdown={false}
+                className={cn(
+                  'max-w-full overflow-hidden break-words',
+                  '[&_[data-streamdown="code-block"]]:!w-full [&_[data-streamdown="code-block"]]:!max-w-full [&_[data-streamdown="code-block"]]:overflow-x-auto',
+                  '[&_[data-streamdown="table-wrapper"]]:!w-full [&_[data-streamdown="table-wrapper"]]:!max-w-full [&_[data-streamdown="table-wrapper"]]:overflow-x-auto',
+                  '[&_table]:w-max [&_table]:min-w-full',
+                )}
+              >
+                {entry.part.text}
+              </MessageResponse>
+            )
+          }
+
+          if (entry.kind === 'reasoning' && entry.part?.type === 'reasoning') {
+            return (
+              <Reasoning
+                key={key}
+                className="w-full"
+                defaultOpen={false}
+                duration={entry.part.duration}
+              >
+                <ReasoningTrigger />
+                <ReasoningContent>{entry.part.text}</ReasoningContent>
+              </Reasoning>
+            )
+          }
+
+          if (entry.kind === 'meta' && entry.part?.type === 'meta') {
+            return (
+              <div key={key} className="text-muted-foreground text-xs">
+                {entry.part.label}: {entry.part.value}
+              </div>
+            )
+          }
+
+          if (entry.kind === 'task' && entry.tools) {
+            const tools = entry.tools
+            const errorCount = tools.filter((t) => t.status === 'failed').length
+            const taskTitle = `Agent activity (${tools.length} ${tools.length === 1 ? 'action' : 'actions'}${errorCount > 0 ? `, ${errorCount} failed` : ''})`
+
+            return (
+              <Task key={key} defaultOpen={false}>
+                <TaskTrigger title={taskTitle} TriggerIcon={Wrench} />
+                <TaskContent>
+                  {tools.map((tool, idx) => (
+                    <TaskItem
+                      // biome-ignore lint/suspicious/noArrayIndexKey: tool order is stable within a finalized historical message
+                      key={`${tool.name}-${tool.status}-${idx}`}
+                      className="flex items-center gap-2"
+                    >
+                      <ToolStatusIcon status={tool.status} />
+                      <span className="text-foreground text-xs">
+                        {tool.label}
+                      </span>
+                      {tool.subject ? (
+                        <span className="ml-1.5 truncate text-muted-foreground/70 text-xs">
+                          · {tool.subject}
+                        </span>
+                      ) : null}
+                      {tool.error ? (
+                        <span className="ml-2 truncate text-destructive text-xs">
+                          {tool.error}
+                        </span>
+                      ) : null}
+                      {tool.durationMs != null ? (
+                        <span className="ml-auto text-muted-foreground/60 text-xs tabular-nums">
+                          {(tool.durationMs / 1000).toFixed(1)}s
+                        </span>
+                      ) : null}
+                    </TaskItem>
+                  ))}
+                </TaskContent>
+              </Task>
+            )
+          }
+
+          return null
+        })}
+
+        {message.role === 'assistant' && messageText ? (
+          <MessageToolbar>
+            <MessageActions>
+              <MessageAction tooltip="Copy" onClick={handleCopy}>
+                <Copy className="size-3.5" />
+              </MessageAction>
+            </MessageActions>
+            {message.costUsd ? (
+              <span className="text-[11px] text-muted-foreground/50 tabular-nums">
+                {formatCost(message.costUsd)}
+              </span>
+            ) : null}
+          </MessageToolbar>
+        ) : null}
+      </MessageContent>
+    </Message>
+  )
+}
--- a/packages/browseros-agent/apps/agent/entrypoints/app/agent-command/ConversationInput.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/agent-command/ConversationInput.tsx
@@ -1,21 +1,36 @@
 import {
+  AlertTriangle,
  ArrowRight,
  Bot,
  ChevronDown,
+  FileText,
  Folder,
  Layers,
  Loader2,
  Mic,
+  Paperclip,
+  RefreshCw,
  Square,
+  X,
 } from 'lucide-react'
-import { type FC, type ReactNode, useEffect, useState } from 'react'
+import {
+  type DragEvent,
+  type FC,
+  type ReactNode,
+  useEffect,
+  useLayoutEffect,
+  useRef,
+  useState,
+} from 'react'
 import { AppSelector } from '@/components/elements/AppSelector'
 import { TabPickerPopover } from '@/components/elements/tab-picker-popover'
 import { WorkspaceSelector } from '@/components/elements/workspace-selector'
 import { Button } from '@/components/ui/button'
+import { Textarea } from '@/components/ui/textarea'
 import type { AgentEntry } from '@/entrypoints/app/agents/useOpenClaw'
 import { McpServerIcon } from '@/entrypoints/app/connect-mcp/McpServerIcon'
 import { useGetUserMCPIntegrations } from '@/entrypoints/app/connect-mcp/useGetUserMCPIntegrations'
+import { type StagedAttachment, stageAttachments } from '@/lib/attachments'
 import { Feature } from '@/lib/browseros/capabilities'
 import { useCapabilities } from '@/lib/browseros/useCapabilities'
 import { useMcpServers } from '@/lib/mcp/mcpServerStorage'
@@ -23,18 +38,33 @@ import { cn } from '@/lib/utils'
 import { useVoiceInput } from '@/lib/voice/useVoiceInput'
 import { useWorkspace } from '@/lib/workspace/use-workspace'
 import { AgentSelector } from './AgentSelector'
+import type { OutboundMessage } from './useOutboundQueue'
+
+export interface ConversationInputSendInput {
+  text: string
+  attachments: StagedAttachment[]
+}

 interface ConversationInputProps {
  agents: AgentEntry[]
  selectedAgentId: string | null
  onSelectAgent: (agent: AgentEntry) => void
-  onSend: (text: string) => void
+  onSend: (input: ConversationInputSendInput) => void
  onCreateAgent?: () => void
  streaming: boolean
  disabled?: boolean
  status?: string
  placeholder?: string
  variant?: 'home' | 'conversation'
+  // Outbound queue: when present, the composer renders the queue strip
+  // above the textarea and lets the user keep sending while a previous
+  // turn is in flight. Optional so non-conversation variants (the home
+  // page) can opt out — the queue only makes sense in the conversation
+  // page where each enqueued message will eventually be delivered to the
+  // active agent.
+  outboundQueue?: OutboundMessage[]
+  onCancelQueued?: (id: string) => void
+  onRetryQueued?: (id: string) => void
 }

 function InputActionButton({
@@ -123,6 +153,8 @@ function ContextControls({
  onToggleTab,
  showAgentSelector,
  status,
+  onAttachClick,
+  attachDisabled,
 }: {
  agents: AgentEntry[]
  onCreateAgent?: () => void
@@ -132,6 +164,8 @@ function ContextControls({
  onToggleTab: (tab: chrome.tabs.Tab) => void
  showAgentSelector: boolean
  status?: string
+  onAttachClick: () => void
+  attachDisabled: boolean
 }) {
  const { supports } = useCapabilities()
  const { selectedFolder } = useWorkspace()
@@ -146,7 +180,7 @@ function ContextControls({
  })

  return (
-    <div className="flex items-center justify-between border-border/50 border-t px-5 py-3">
+    <div className="flex items-center justify-between border-border/40 border-t px-4 py-2.5">
      <div className="flex items-center gap-1">
        {showAgentSelector ? (
          <AgentSelector
@@ -191,6 +225,20 @@ function ContextControls({
            <span>Tabs</span>
          </Button>
        </TabPickerPopover>
+        <Button
+          type="button"
+          variant="ghost"
+          onClick={onAttachClick}
+          disabled={attachDisabled}
+          title="Attach files"
+          className={cn(
+            'flex items-center gap-2 rounded-lg px-3 py-1.5 font-medium text-sm transition-all',
+            'bg-transparent text-muted-foreground hover:bg-accent hover:text-accent-foreground',
+          )}
+        >
+          <Paperclip className="h-4 w-4" />
+          <span>Attach</span>
+        </Button>
      </div>

      {supports(Feature.MANAGED_MCP_SUPPORT) ? (
@@ -234,7 +282,7 @@ function ContextControls({

 function HomeShell({ children }: { children: ReactNode }) {
  return (
-    <div className="overflow-hidden rounded-[1.5rem] border border-border/60 bg-card/95 shadow-sm backdrop-blur">
+    <div className="overflow-hidden rounded-[1.55rem] border border-border/60 bg-card/95 shadow-sm">
      {children}
    </div>
  )
@@ -242,7 +290,7 @@ function HomeShell({ children }: { children: ReactNode }) {

 function ConversationShell({ children }: { children: ReactNode }) {
  return (
-    <div className="overflow-hidden rounded-[1.5rem] border border-border/60 bg-card/95 shadow-sm backdrop-blur">
+    <div className="overflow-hidden rounded-[1.35rem] border border-border/50 bg-background/95 shadow-[0_10px_30px_rgba(15,23,42,0.06)] backdrop-blur-md">
      {children}
    </div>
  )
@@ -259,13 +307,60 @@ export const ConversationInput: FC<ConversationInputProps> = ({
  status,
  placeholder,
  variant = 'conversation',
+  outboundQueue,
+  onCancelQueued,
+  onRetryQueued,
 }) => {
  const [input, setInput] = useState('')
  const [selectedTabs, setSelectedTabs] = useState<chrome.tabs.Tab[]>([])
+  const [isExpandedDraft, setIsExpandedDraft] = useState(false)
+  const [attachments, setAttachments] = useState<StagedAttachment[]>([])
+  const [attachmentError, setAttachmentError] = useState<string | null>(null)
+  const [isStaging, setIsStaging] = useState(false)
+  const [isDragOver, setIsDragOver] = useState(false)
+  const fileInputRef = useRef<HTMLInputElement>(null)
  const voice = useVoiceInput()
+  const textareaRef = useRef<HTMLTextAreaElement>(null)
  const selectedAgent = agents.find(
    (agent) => agent.agentId === selectedAgentId,
  )
+  const isConversation = variant === 'conversation'
+
+  const stageFiles = async (files: File[]) => {
+    if (files.length === 0) return
+    setIsStaging(true)
+    setAttachmentError(null)
+    try {
+      const result = await stageAttachments(files, attachments.length)
+      if (result.staged.length > 0) {
+        setAttachments((prev) => [...prev, ...result.staged])
+      }
+      if (result.errors.length > 0) {
+        setAttachmentError(result.errors.map((e) => e.message).join(' \u2022 '))
+      }
+    } finally {
+      setIsStaging(false)
+    }
+  }
+
+  const removeAttachment = (id: string) => {
+    setAttachments((prev) => prev.filter((a) => a.id !== id))
+    setAttachmentError(null)
+  }
+
+  useLayoutEffect(() => {
+    const element = textareaRef.current
+    if (!element) return
+
+    const maxHeight = isConversation ? 176 : 100
+    const collapsedHeight = isConversation ? 56 : 72
+    element.style.height = '0px'
+    const nextHeight = Math.min(element.scrollHeight, maxHeight)
+    element.style.height = `${nextHeight}px`
+    element.style.overflowY =
+      element.scrollHeight > maxHeight ? 'auto' : 'hidden'
+    setIsExpandedDraft(nextHeight > collapsedHeight)
+  })

  useEffect(() => {
    if (voice.transcript && !voice.isTranscribing) {
@@ -284,11 +379,71 @@ export const ConversationInput: FC<ConversationInputProps> = ({
    })
  }

+  const hasContent = input.trim().length > 0 || attachments.length > 0
+  const queueEnabled = outboundQueue !== undefined
+
  const handleSend = () => {
    const text = input.trim()
-    if (!text || streaming || disabled) return
-    onSend(text)
+    // The outbound queue accepts new messages while streaming; legacy
+    // direct-send callers (e.g., the home composer) keep the original
+    // streaming-blocks-send semantic.
+    if (disabled || isStaging) return
+    if (!queueEnabled && streaming) return
+    if (!text && attachments.length === 0) return
+    onSend({ text, attachments })
    setInput('')
+    setAttachments([])
+    setAttachmentError(null)
+  }
+
+  const handlePaste = (event: React.ClipboardEvent<HTMLTextAreaElement>) => {
+    const items = event.clipboardData?.items
+    if (!items) return
+    const files: File[] = []
+    for (const item of items) {
+      if (item.kind === 'file') {
+        const file = item.getAsFile()
+        if (file) files.push(file)
+      }
+    }
+    if (files.length > 0) {
+      event.preventDefault()
+      void stageFiles(files)
+    }
+  }
+
+  const handleDrop = (event: DragEvent<HTMLDivElement>) => {
+    event.preventDefault()
+    setIsDragOver(false)
+    const files = Array.from(event.dataTransfer?.files ?? [])
+    if (files.length > 0) {
+      void stageFiles(files)
+    }
+  }
+
+  const handleDragOver = (event: DragEvent<HTMLDivElement>) => {
+    if (!event.dataTransfer?.types.includes('Files')) return
+    event.preventDefault()
+    setIsDragOver(true)
+  }
+
+  const handleDragLeave = (event: DragEvent<HTMLDivElement>) => {
+    if (event.currentTarget.contains(event.relatedTarget as Node | null)) {
+      return
+    }
+    setIsDragOver(false)
+  }
+
+  const openFilePicker = () => {
+    fileInputRef.current?.click()
+  }
+
+  const handleFileInputChange = (
+    event: React.ChangeEvent<HTMLInputElement>,
+  ) => {
+    const files = Array.from(event.target.files ?? [])
+    event.target.value = ''
+    if (files.length > 0) void stageFiles(files)
  }

  const shell = variant === 'home' ? HomeShell : ConversationShell
@@ -296,73 +451,321 @@ export const ConversationInput: FC<ConversationInputProps> = ({

  return (
    <Shell>
-      <div className="flex items-center gap-3 px-5 py-4">
-        <BotInputIcon variant={variant} />
+      <section
+        // Drag/drop on a region isn't a click affordance — wrap the
+        // composer in a labeled <section> so the a11y rule is satisfied
+        // without misrepresenting the surface as interactive.
+        aria-label="Message composer"
+        className={cn('relative', isDragOver && 'ring-2 ring-primary/60')}
+        onDragOver={handleDragOver}
+        onDragLeave={handleDragLeave}
+        onDrop={handleDrop}
+      >
        <input
-          type="text"
-          value={input}
-          onChange={(event) => setInput(event.currentTarget.value)}
-          onKeyDown={(event) => {
-            if (event.key === 'Enter') {
-              event.preventDefault()
-              handleSend()
+          ref={fileInputRef}
+          type="file"
+          multiple
+          accept="image/png,image/jpeg,image/webp,image/gif,text/*,application/json"
+          className="hidden"
+          onChange={handleFileInputChange}
+        />
+        {attachments.length > 0 || attachmentError ? (
+          <AttachmentStrip
+            attachments={attachments}
+            onRemove={removeAttachment}
+            error={attachmentError}
+          />
+        ) : null}
+        {queueEnabled && outboundQueue && outboundQueue.length > 0 ? (
+          <OutboundQueueStrip
+            messages={outboundQueue}
+            onCancel={onCancelQueued}
+            onRetry={onRetryQueued}
+          />
+        ) : null}
+        <div
+          className={cn(
+            'flex gap-3',
+            variant === 'home' ? 'px-4 py-3' : 'px-4 py-3',
+            isExpandedDraft ? 'items-end' : 'items-center',
+          )}
+        >
+          <BotInputIcon variant={variant} />
+          <div className="flex-1">
+            <Textarea
+              ref={textareaRef}
+              value={input}
+              onChange={(event) => setInput(event.currentTarget.value)}
+              onKeyDown={(event) => {
+                if (event.key === 'Enter' && !event.shiftKey) {
+                  event.preventDefault()
+                  handleSend()
+                }
+              }}
+              onPaste={handlePaste}
+              rows={1}
+              placeholder={
+                voice.isTranscribing
+                  ? 'Transcribing...'
+                  : (placeholder ??
+                    `Message ${selectedAgent?.name ?? 'agent'}...`)
+              }
+              disabled={disabled || voice.isTranscribing}
+              className={cn(
+                'resize-none border-none bg-transparent px-0 text-[15px] shadow-none focus-visible:ring-0',
+                '[field-sizing:fixed]',
+                variant === 'home'
+                  ? 'min-h-[40px] py-2 leading-6'
+                  : 'min-h-[40px] py-2 leading-6',
+                'placeholder:text-muted-foreground/80',
+              )}
+            />
+          </div>
+          <VoiceButton
+            isRecording={voice.isRecording}
+            isTranscribing={voice.isTranscribing}
+            onStart={() => {
+              void voice.startRecording()
+            }}
+            onStop={() => {
+              void voice.stopRecording()
+            }}
+          />
+          <InputActionButton
+            disabled={
+              !hasContent ||
+              isStaging ||
+              !!disabled ||
+              voice.isRecording ||
+              voice.isTranscribing ||
+              // Only block on `streaming` for the legacy direct-send path
+              // (no queue). With the queue active the press always
+              // succeeds — it just enqueues instead of dispatching.
+              (!queueEnabled && streaming)
            }
-          }}
-          placeholder={
-            voice.isTranscribing
-              ? 'Transcribing...'
-              : (placeholder ?? `Message ${selectedAgent?.name ?? 'agent'}...`)
-          }
-          disabled={disabled || voice.isTranscribing}
-          className="flex-1 border-none bg-transparent text-base text-foreground outline-none placeholder:text-muted-foreground disabled:opacity-60"
+            onClick={handleSend}
+            // Spinner stays the user-facing "agent is busy" hint; with the
+            // queue active we still spin while a turn is in flight.
+            streaming={streaming}
+          />
+        </div>
+        {voice.error ? (
+          <div className="px-5 pb-2 text-destructive text-xs">
+            {voice.error}
+          </div>
+        ) : null}
+        <ContextControls
+          agents={agents}
+          onCreateAgent={onCreateAgent}
+          onSelectAgent={onSelectAgent}
+          selectedAgentId={selectedAgentId}
+          selectedTabs={selectedTabs}
+          onToggleTab={toggleTab}
+          showAgentSelector={variant === 'home'}
+          status={status}
+          onAttachClick={openFilePicker}
+          attachDisabled={attachments.length >= 10 || isStaging || !!disabled}
        />
-        <VoiceButton
-          isRecording={voice.isRecording}
-          isTranscribing={voice.isTranscribing}
-          onStart={() => {
-            void voice.startRecording()
-          }}
-          onStop={() => {
-            void voice.stopRecording()
-          }}
-        />
-        <InputActionButton
-          disabled={
-            !input.trim() ||
-            streaming ||
-            !!disabled ||
-            voice.isRecording ||
-            voice.isTranscribing
-          }
-          onClick={handleSend}
-          streaming={streaming}
-        />
-      </div>
-      {voice.error ? (
-        <div className="px-5 pb-2 text-destructive text-xs">{voice.error}</div>
-      ) : null}
-      <ContextControls
-        agents={agents}
-        onCreateAgent={onCreateAgent}
-        onSelectAgent={onSelectAgent}
-        selectedAgentId={selectedAgentId}
-        selectedTabs={selectedTabs}
-        onToggleTab={toggleTab}
-        showAgentSelector={variant === 'home'}
-        status={status}
-      />
+        {isDragOver ? (
+          <div className="pointer-events-none absolute inset-0 flex items-center justify-center rounded-[inherit] bg-background/80 font-medium text-foreground text-sm backdrop-blur-sm">
+            Drop files to attach
+          </div>
+        ) : null}
+      </section>
    </Shell>
  )
 }

+function OutboundQueueStrip({
+  messages,
+  onCancel,
+  onRetry,
+}: {
+  messages: OutboundMessage[]
+  onCancel?: (id: string) => void
+  onRetry?: (id: string) => void
+}) {
+  return (
+    <div className="border-border/40 border-b px-4 pt-3 pb-2">
+      <ul className="flex flex-col gap-1">
+        {messages.map((message) => (
+          <OutboundQueueItem
+            key={message.id}
+            message={message}
+            onCancel={onCancel}
+            onRetry={onRetry}
+          />
+        ))}
+      </ul>
+    </div>
+  )
+}
+
+function OutboundQueueItem({
+  message,
+  onCancel,
+  onRetry,
+}: {
+  message: OutboundMessage
+  onCancel?: (id: string) => void
+  onRetry?: (id: string) => void
+}) {
+  const preview = message.text.trim() || '(attachments only)'
+  return (
+    <li className="flex items-center gap-2 rounded-md px-2 py-1 text-xs">
+      <OutboundQueueStatusIcon status={message.status} />
+      <span className="min-w-0 flex-1 truncate text-muted-foreground">
+        {preview}
+      </span>
+      {message.attachmentPreviews.length > 0 ? (
+        <span className="inline-flex items-center gap-1 text-muted-foreground/70">
+          <Paperclip className="size-3" />
+          <span className="tabular-nums">
+            {message.attachmentPreviews.length}
+          </span>
+        </span>
+      ) : null}
+      {message.status === 'queued' && onCancel ? (
+        <button
+          type="button"
+          onClick={() => onCancel(message.id)}
+          className="ml-1 inline-flex size-5 items-center justify-center rounded-full text-muted-foreground hover:bg-accent hover:text-foreground"
+          aria-label="Cancel queued message"
+          title="Cancel"
+        >
+          <X className="size-3" />
+        </button>
+      ) : null}
+      {message.status === 'failed' ? (
+        <span className="ml-1 inline-flex items-center gap-2 text-destructive">
+          <span className="max-w-[160px] truncate" title={message.error}>
+            {message.error ?? 'Failed'}
+          </span>
+          {onRetry ? (
+            <button
+              type="button"
+              onClick={() => onRetry(message.id)}
+              className="inline-flex size-5 items-center justify-center rounded-full hover:bg-accent hover:text-foreground"
+              aria-label="Retry failed message"
+              title="Retry"
+            >
+              <RefreshCw className="size-3" />
+            </button>
+          ) : null}
+          {onCancel ? (
+            <button
+              type="button"
+              onClick={() => onCancel(message.id)}
+              className="inline-flex size-5 items-center justify-center rounded-full hover:bg-accent hover:text-foreground"
+              aria-label="Discard failed message"
+              title="Discard"
+            >
+              <X className="size-3" />
+            </button>
+          ) : null}
+        </span>
+      ) : null}
+    </li>
+  )
+}
+
+function OutboundQueueStatusIcon({
+  status,
+}: {
+  status: OutboundMessage['status']
+}) {
+  if (status === 'sending') {
+    return (
+      <Loader2 className="size-3.5 shrink-0 animate-spin text-muted-foreground" />
+    )
+  }
+  if (status === 'failed') {
+    return <AlertTriangle className="size-3.5 shrink-0 text-destructive" />
+  }
+  return (
+    <span className="inline-block size-2 shrink-0 rounded-full bg-muted-foreground/40" />
+  )
+}
+
+function AttachmentStrip({
+  attachments,
+  onRemove,
+  error,
+}: {
+  attachments: StagedAttachment[]
+  onRemove: (id: string) => void
+  error: string | null
+}) {
+  return (
+    <div className="border-border/40 border-b px-4 pt-3 pb-2">
+      {attachments.length > 0 ? (
+        <div className="flex flex-wrap gap-2">
+          {attachments.map((attachment) => (
+            <AttachmentChip
+              key={attachment.id}
+              attachment={attachment}
+              onRemove={() => onRemove(attachment.id)}
+            />
+          ))}
+        </div>
+      ) : null}
+      {error ? (
+        <div className="mt-2 text-destructive text-xs">{error}</div>
+      ) : null}
+    </div>
+  )
+}
+
+function AttachmentChip({
+  attachment,
+  onRemove,
+}: {
+  attachment: StagedAttachment
+  onRemove: () => void
+}) {
+  if (attachment.kind === 'image' && attachment.dataUrl) {
+    return (
+      <div className="group relative size-16 overflow-hidden rounded-md border border-border/60">
+        <img
+          src={attachment.dataUrl}
+          alt={attachment.name}
+          className="size-full object-cover"
+        />
+        <button
+          type="button"
+          onClick={onRemove}
+          className="absolute top-1 right-1 inline-flex size-5 items-center justify-center rounded-full bg-background/80 text-muted-foreground opacity-0 transition-opacity hover:text-foreground group-hover:opacity-100"
+          aria-label={`Remove ${attachment.name}`}
+        >
+          <X className="size-3" />
+        </button>
+      </div>
+    )
+  }
+  return (
+    <div className="group flex max-w-[220px] items-center gap-2 rounded-md border border-border/60 bg-background/60 px-2 py-1.5">
+      <FileText className="size-4 shrink-0 text-muted-foreground" />
+      <span className="truncate text-xs">{attachment.name}</span>
+      <button
+        type="button"
+        onClick={onRemove}
+        className="ml-1 inline-flex size-4 items-center justify-center text-muted-foreground hover:text-foreground"
+        aria-label={`Remove ${attachment.name}`}
+      >
+        <X className="size-3" />
+      </button>
+    </div>
+  )
+}
+
 function BotInputIcon({ variant }: { variant: 'home' | 'conversation' }) {
  return (
    <div
      className={cn(
        'flex items-center justify-center text-[var(--accent-orange)]',
        variant === 'home'
-          ? 'h-10 w-10 rounded-xl bg-[var(--accent-orange)]/10'
-          : 'h-9 w-9 rounded-xl bg-[var(--accent-orange)]/12',
+          ? 'h-8 w-8 rounded-lg bg-[var(--accent-orange)]/10'
+          : 'h-8 w-8 rounded-lg bg-[var(--accent-orange)]/10',
      )}
    >
      <Bot className="h-4 w-4" />
--- a/packages/browseros-agent/apps/agent/entrypoints/app/agent-command/ConversationMessage.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/agent-command/ConversationMessage.tsx
@@ -1,7 +1,9 @@
-import { Bot, CheckCircle2, Loader2, XCircle } from 'lucide-react'
-import type { FC } from 'react'
+import { Bot, CheckCircle2, Loader2, Wrench, XCircle } from 'lucide-react'
+import { type FC, useMemo } from 'react'
 import {
  Message,
+  MessageAttachment,
+  MessageAttachments,
  MessageContent,
  MessageResponse,
 } from '@/components/ai-elements/message'
@@ -10,96 +12,191 @@ import {
  ReasoningContent,
  ReasoningTrigger,
 } from '@/components/ai-elements/reasoning'
-import type { AgentConversationTurn } from '@/lib/agent-conversations/types'
+import {
+  Task,
+  TaskContent,
+  TaskItem,
+  TaskTrigger,
+} from '@/components/ai-elements/task'
+import type {
+  AgentConversationTurn,
+  ToolEntry,
+} from '@/lib/agent-conversations/types'

 interface ConversationMessageProps {
  turn: AgentConversationTurn
  streaming: boolean
 }

+interface RenderEntry {
+  kind: 'thinking' | 'text' | 'task'
+  partIndex: number
+  text?: string
+  done?: boolean
+  tools?: ToolEntry[]
+}
+
+/**
+ * Build the render plan for an assistant turn:
+ * - thinking and text parts render in place
+ * - all tool-batch parts collapse into a single Task entry at their first
+ *   appearance position, with tools listed in arrival order
+ */
+function buildRenderEntries(turn: AgentConversationTurn): RenderEntry[] {
+  const entries: RenderEntry[] = []
+  const aggregatedTools: ToolEntry[] = []
+  let taskInserted = false
+
+  turn.parts.forEach((part, partIndex) => {
+    if (part.kind === 'thinking') {
+      entries.push({
+        kind: 'thinking',
+        partIndex,
+        text: part.text,
+        done: part.done,
+      })
+    } else if (part.kind === 'text') {
+      entries.push({ kind: 'text', partIndex, text: part.text })
+    } else if (part.kind === 'tool-batch') {
+      aggregatedTools.push(...part.tools)
+      if (!taskInserted) {
+        entries.push({
+          kind: 'task',
+          partIndex,
+          tools: aggregatedTools,
+        })
+        taskInserted = true
+      }
+    }
+  })
+
+  return entries
+}
+
+function ToolStatusIcon({ status }: { status: ToolEntry['status'] }) {
+  if (status === 'running') {
+    return (
+      <Loader2 className="size-3.5 shrink-0 animate-spin text-muted-foreground" />
+    )
+  }
+  if (status === 'completed') {
+    return <CheckCircle2 className="size-3.5 shrink-0 text-green-500" />
+  }
+  return <XCircle className="size-3.5 shrink-0 text-destructive" />
+}
+
 export const ConversationMessage: FC<ConversationMessageProps> = ({
  turn,
  streaming,
-}) => (
-  <div className="space-y-3">
-    <Message from="user">
-      <MessageContent>
-        <pre className="whitespace-pre-wrap font-sans text-sm">
-          {turn.userText}
-        </pre>
-      </MessageContent>
-    </Message>
+}) => {
+  const entries = useMemo(() => buildRenderEntries(turn), [turn])

-    {turn.parts.length > 0 && (
-      <Message from="assistant">
+  return (
+    <div className="space-y-3">
+      <Message from="user">
        <MessageContent>
-          {turn.parts.map((part, i) => {
-            const key = `${turn.id}-part-${i}`
+          {turn.userAttachments && turn.userAttachments.length > 0 && (
+            <MessageAttachments>
+              {turn.userAttachments.map((attachment) => (
+                <MessageAttachment
+                  key={attachment.id}
+                  data={{
+                    type: 'file',
+                    url: attachment.dataUrl ?? '',
+                    mediaType: attachment.mediaType,
+                    filename: attachment.name,
+                  }}
+                />
+              ))}
+            </MessageAttachments>
+          )}
+          {turn.userText && (
+            <pre className="whitespace-pre-wrap font-sans text-sm">
+              {turn.userText}
+            </pre>
+          )}
+        </MessageContent>
+      </Message>

-            switch (part.kind) {
-              case 'thinking':
+      {entries.length > 0 && (
+        <Message from="assistant">
+          <MessageContent>
+            {entries.map((entry) => {
+              const key = `${turn.id}-entry-${entry.partIndex}`
+
+              if (entry.kind === 'thinking') {
                return (
                  <Reasoning
                    key={key}
                    className="w-full"
-                    isStreaming={!part.done}
-                    defaultOpen={!part.done}
+                    isStreaming={!entry.done}
+                    defaultOpen={!entry.done}
                  >
                    <ReasoningTrigger />
-                    <ReasoningContent>{part.text}</ReasoningContent>
+                    <ReasoningContent>{entry.text ?? ''}</ReasoningContent>
                  </Reasoning>
                )
+              }

-              case 'tool-batch':
+              if (entry.kind === 'text') {
                return (
-                  <div key={key} className="w-full space-y-1">
-                    {part.tools.map((tool) => (
-                      <div
+                  <MessageResponse key={key}>
+                    {entry.text ?? ''}
+                  </MessageResponse>
+                )
+              }
+
+              const tools = entry.tools ?? []
+              const allDone = tools.every((t) => t.status !== 'running')
+              const taskTitle = allDone
+                ? `Agent activity (${tools.length} ${tools.length === 1 ? 'action' : 'actions'})`
+                : `Working… (${tools.length} ${tools.length === 1 ? 'action' : 'actions'})`
+
+              return (
+                <Task key={key} defaultOpen={!turn.done}>
+                  <TaskTrigger title={taskTitle} TriggerIcon={Wrench} />
+                  <TaskContent>
+                    {tools.map((tool) => (
+                      <TaskItem
                        key={tool.id}
-                        className="flex items-center gap-2 rounded-md border px-3 py-2 text-sm"
+                        className="flex items-center gap-2"
                      >
-                        {tool.status === 'running' && (
-                          <Loader2 className="size-3.5 animate-spin text-muted-foreground" />
-                        )}
-                        {tool.status === 'completed' && (
-                          <CheckCircle2 className="size-3.5 text-green-500" />
-                        )}
-                        {tool.status === 'error' && (
-                          <XCircle className="size-3.5 text-destructive" />
-                        )}
-                        <span className="font-mono text-xs">{tool.name}</span>
+                        <ToolStatusIcon status={tool.status} />
+                        <span className="text-foreground text-xs">
+                          {tool.label}
+                        </span>
+                        {tool.subject ? (
+                          <span className="ml-1.5 truncate text-muted-foreground/70 text-xs">
+                            · {tool.subject}
+                          </span>
+                        ) : null}
                        {tool.durationMs != null && (
-                          <span className="ml-auto text-muted-foreground text-xs">
+                          <span className="ml-auto text-muted-foreground/60 text-xs tabular-nums">
                            {(tool.durationMs / 1000).toFixed(1)}s
                          </span>
                        )}
-                      </div>
+                      </TaskItem>
                    ))}
-                  </div>
-                )
+                  </TaskContent>
+                </Task>
+              )
+            })}
+          </MessageContent>
+        </Message>
+      )}

-              case 'text':
-                return <MessageResponse key={key}>{part.text}</MessageResponse>
-
-              default:
-                return null
-            }
-          })}
-        </MessageContent>
-      </Message>
-    )}
-
-    {!turn.done && turn.parts.length === 0 && streaming && (
-      <div className="flex gap-2">
-        <div className="flex size-7 shrink-0 items-center justify-center rounded-full bg-[var(--accent-orange)] text-white">
-          <Bot className="size-3.5" />
+      {!turn.done && turn.parts.length === 0 && streaming && (
+        <div className="flex gap-2">
+          <div className="flex size-7 shrink-0 items-center justify-center rounded-full bg-[var(--accent-orange)] text-white">
+            <Bot className="size-3.5" />
+          </div>
+          <div className="flex items-center gap-1 rounded-xl rounded-tl-none border border-border/50 bg-card px-3 py-2.5 shadow-sm">
+            <span className="size-1.5 animate-bounce rounded-full bg-[var(--accent-orange)] [animation-delay:-0.3s]" />
+            <span className="size-1.5 animate-bounce rounded-full bg-[var(--accent-orange)] [animation-delay:-0.15s]" />
+            <span className="size-1.5 animate-bounce rounded-full bg-[var(--accent-orange)]" />
+          </div>
        </div>
-        <div className="flex items-center gap-1 rounded-xl rounded-tl-none border border-border/50 bg-card px-3 py-2.5 shadow-sm">
-          <span className="size-1.5 animate-bounce rounded-full bg-[var(--accent-orange)] [animation-delay:-0.3s]" />
-          <span className="size-1.5 animate-bounce rounded-full bg-[var(--accent-orange)] [animation-delay:-0.15s]" />
-          <span className="size-1.5 animate-bounce rounded-full bg-[var(--accent-orange)]" />
-        </div>
-      </div>
-    )}
-  </div>
-)
+      )}
+    </div>
+  )
+}
--- a/packages/browseros-agent/apps/agent/entrypoints/app/agent-command/claw-chat-types.test.ts
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/agent-command/claw-chat-types.test.ts
@@ -0,0 +1,121 @@
+import { describe, expect, it } from 'bun:test'
+import {
+  type AgentHistoryPageResponse,
+  type BrowserOSChatHistoryItem,
+  buildChatHistoryFromClawMessages,
+  flattenHistoryPages,
+  mapHistoryItemToClawMessage,
+} from './claw-chat-types'
+
+function historyItem(
+  overrides: Partial<BrowserOSChatHistoryItem>,
+): BrowserOSChatHistoryItem {
+  return {
+    id: 'session-1:0',
+    role: 'user',
+    text: 'Hello',
+    timestamp: 1000,
+    messageSeq: 0,
+    sessionKey: 'session-1',
+    source: 'user-chat',
+    ...overrides,
+  }
+}
+
+function page(items: BrowserOSChatHistoryItem[]): AgentHistoryPageResponse {
+  return {
+    agentId: 'main',
+    sessionKey: 'session-1',
+    session: null,
+    items,
+    page: {
+      hasMore: false,
+      limit: 50,
+    },
+  }
+}
+
+describe('claw-chat-types', () => {
+  it('maps backend history items into text-first ClawChat messages', () => {
+    const message = mapHistoryItemToClawMessage(
+      historyItem({
+        id: 'session-1:1',
+        role: 'assistant',
+        text: 'Hi there',
+        messageSeq: 1,
+      }),
+    )
+
+    expect(message).toEqual({
+      id: 'session-1:1',
+      role: 'assistant',
+      sessionKey: 'session-1',
+      timestamp: 1000,
+      source: 'user-chat',
+      messageSeq: 1,
+      status: 'historical',
+      parts: [{ type: 'text', text: 'Hi there' }],
+    })
+  })
+
+  it('flattens paginated history into oldest-to-newest render order', () => {
+    const messages = flattenHistoryPages([
+      page([
+        historyItem({
+          id: 'session-1:2',
+          role: 'user',
+          text: 'newer',
+          timestamp: 3000,
+          messageSeq: 2,
+        }),
+      ]),
+      page([
+        historyItem({
+          id: 'session-1:0',
+          role: 'user',
+          text: 'older',
+          timestamp: 1000,
+          messageSeq: 0,
+        }),
+        historyItem({
+          id: 'session-1:1',
+          role: 'assistant',
+          text: 'middle',
+          timestamp: 2000,
+          messageSeq: 1,
+        }),
+      ]),
+    ])
+
+    expect(messages.map((message) => message.id)).toEqual([
+      'session-1:0',
+      'session-1:1',
+      'session-1:2',
+    ])
+  })
+
+  it('builds OpenClaw chat history from text message parts only', () => {
+    const history = buildChatHistoryFromClawMessages([
+      {
+        id: 'user-1',
+        role: 'user',
+        sessionKey: 'session-1',
+        parts: [{ type: 'text', text: '  User request  ' }],
+      },
+      {
+        id: 'assistant-1',
+        role: 'assistant',
+        sessionKey: 'session-1',
+        parts: [
+          { type: 'reasoning', text: 'private reasoning' },
+          { type: 'text', text: 'Assistant answer' },
+        ],
+      },
+    ])
+
+    expect(history).toEqual([
+      { role: 'user', content: 'User request' },
+      { role: 'assistant', content: 'Assistant answer' },
+    ])
+  })
+})
--- a/packages/browseros-agent/apps/agent/entrypoints/app/agent-command/claw-chat-types.ts
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/agent-command/claw-chat-types.ts
@@ -0,0 +1,223 @@
+import type { OpenClawChatHistoryMessage } from '@/entrypoints/app/agents/useOpenClaw'
+
+export type ClawChatRole = 'user' | 'assistant'
+
+export type ClawChatSource = 'user-chat' | 'cron' | 'hook' | 'channel' | 'other'
+
+export interface BrowserOSOpenClawSession {
+  key: string
+  updatedAt: number
+  sessionId: string
+  agentId: string
+  kind: string
+  source: ClawChatSource
+  status?: string
+  totalTokens?: number
+  model?: string
+  modelProvider?: string
+}
+
+export interface BrowserOSChatHistoryToolCall {
+  toolCallId?: string
+  toolName: string
+  label: string
+  subject?: string
+  status: 'completed' | 'failed'
+  input?: Record<string, unknown>
+  output?: string
+  error?: string
+  durationMs?: number
+}
+
+export interface BrowserOSChatHistoryReasoning {
+  text: string
+  durationMs?: number
+}
+
+export interface BrowserOSChatHistoryAttachment {
+  kind: 'image' | 'file'
+  mediaType: string
+  // Images carry a `data:` URL so we can render directly without any
+  // additional fetch; files (text/PDF) currently round-trip via inline
+  // text in the message body and do not populate this field in v1.
+  dataUrl?: string
+  name?: string
+}
+
+export interface BrowserOSChatHistoryItem {
+  id: string
+  role: ClawChatRole
+  text: string
+  timestamp?: number
+  messageSeq: number
+  sessionKey: string
+  source: ClawChatSource
+  costUsd?: number
+  tokensIn?: number
+  tokensOut?: number
+  toolCalls?: BrowserOSChatHistoryToolCall[]
+  reasoning?: BrowserOSChatHistoryReasoning
+  attachments?: BrowserOSChatHistoryAttachment[]
+}
+
+export interface AgentHistoryPageResponse {
+  agentId: string
+  sessionKey: string | null
+  session: BrowserOSOpenClawSession | null
+  items: BrowserOSChatHistoryItem[]
+  page: {
+    cursor?: string
+    hasMore: boolean
+    limit: number
+  }
+}
+
+export type ClawChatMessageStatus =
+  | 'historical'
+  | 'sending'
+  | 'streaming'
+  | 'error'
+
+export type ClawChatMessagePart =
+  | { type: 'text'; text: string }
+  | { type: 'reasoning'; text: string; duration?: number }
+  | {
+      type: 'tool-call'
+      name: string
+      label: string
+      subject?: string
+      status: 'pending' | 'running' | 'completed' | 'failed'
+      input?: unknown
+      output?: unknown
+      error?: string
+      durationMs?: number
+    }
+  | {
+      type: 'attachment'
+      kind: 'image' | 'file'
+      mediaType: string
+      dataUrl?: string
+      name?: string
+    }
+  | { type: 'meta'; label: string; value: string }
+
+export interface ClawChatMessage {
+  id: string
+  role: ClawChatRole
+  sessionKey: string
+  timestamp?: number
+  source?: ClawChatSource
+  messageSeq?: number
+  status?: ClawChatMessageStatus
+  parts: ClawChatMessagePart[]
+  costUsd?: number
+  tokensIn?: number
+  tokensOut?: number
+}
+
+export function mapHistoryItemToClawMessage(
+  item: BrowserOSChatHistoryItem,
+): ClawChatMessage {
+  const parts: ClawChatMessagePart[] = []
+
+  // Attachments first — they belong above the text in user messages and
+  // never appear on assistant messages today (assistant images come back
+  // through tool results, which render via the Task collapsible).
+  if (item.attachments && item.attachments.length > 0) {
+    for (const attachment of item.attachments) {
+      parts.push({
+        type: 'attachment',
+        kind: attachment.kind,
+        mediaType: attachment.mediaType,
+        dataUrl: attachment.dataUrl,
+        name: attachment.name,
+      })
+    }
+  }
+
+  // Reasoning, then tool calls, then text — the chronological order the
+  // agent produced them (think → act → answer).
+  if (item.reasoning && item.reasoning.text.trim().length > 0) {
+    // 0ms means thinking and the final answer were emitted in the same JSONL
+    // line (no tool calls between them) — there's no real elapsed wall-clock,
+    // so fall through to the "Thinking" trigger instead of "Thought for 0
+    // seconds" / streaming shimmer. Real multi-line turns floor at 1s.
+    const durationMs = item.reasoning.durationMs ?? 0
+    const duration =
+      durationMs > 0 ? Math.max(1, Math.round(durationMs / 1000)) : undefined
+    parts.push({
+      type: 'reasoning',
+      text: item.reasoning.text,
+      duration,
+    })
+  }
+
+  if (item.toolCalls && item.toolCalls.length > 0) {
+    for (const tc of item.toolCalls) {
+      parts.push({
+        type: 'tool-call',
+        name: tc.toolName,
+        label: tc.label,
+        subject: tc.subject,
+        status: tc.status,
+        input: tc.input,
+        output: tc.output,
+        error: tc.error,
+        durationMs: tc.durationMs,
+      })
+    }
+  }
+
+  // Only emit a text part when there's actual content. User messages with
+  // only attachments and no caption shouldn't render an empty bubble.
+  if (item.text.trim().length > 0) {
+    parts.push({ type: 'text', text: item.text })
+  }
+
+  return {
+    id: item.id,
+    role: item.role,
+    sessionKey: item.sessionKey,
+    timestamp: item.timestamp,
+    source: item.source,
+    messageSeq: item.messageSeq,
+    status: 'historical',
+    parts,
+    costUsd: item.costUsd,
+    tokensIn: item.tokensIn,
+    tokensOut: item.tokensOut,
+  }
+}
+
+export function flattenHistoryPages(
+  pages: AgentHistoryPageResponse[],
+): ClawChatMessage[] {
+  return pages
+    .flatMap((page) => page.items)
+    .sort((a, b) => {
+      if (a.timestamp != null && b.timestamp != null) {
+        return a.timestamp - b.timestamp
+      }
+      return a.messageSeq - b.messageSeq
+    })
+    .map(mapHistoryItemToClawMessage)
+}
+
+export function buildChatHistoryFromClawMessages(
+  messages: ClawChatMessage[],
+): OpenClawChatHistoryMessage[] {
+  return messages
+    .map((message) => {
+      const content = message.parts
+        .filter((part): part is { type: 'text'; text: string } => {
+          return part.type === 'text' && part.text.trim().length > 0
+        })
+        .map((part) => part.text.trim())
+        .join('\n\n')
+
+      return content ? { role: message.role, content } : null
+    })
+    .filter((message): message is OpenClawChatHistoryMessage =>
+      Boolean(message),
+    )
+}
--- a/packages/browseros-agent/apps/agent/entrypoints/app/agent-command/useAgentCardData.ts
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/agent-command/useAgentCardData.ts
@@ -1,69 +1,50 @@
-import { useEffect, useState } from 'react'
 import {
  type AgentEntry,
  getModelDisplayName,
  type OpenClawStatus,
 } from '@/entrypoints/app/agents/useOpenClaw'
-import { getLatestConversation } from '@/lib/agent-conversations/storage'
 import type { AgentCardData } from '@/lib/agent-conversations/types'
+import type { AgentOverview } from './useAgentDashboard'

-function getAgentStatusTone(
-  status: OpenClawStatus['status'] | undefined,
+function resolveAgentStatus(
+  gatewayStatus: OpenClawStatus['status'] | undefined,
+  liveStatus: AgentOverview['status'] | undefined,
 ): AgentCardData['status'] {
-  if (status === 'error') return 'error'
-  if (status === 'starting') return 'working'
+  // Gateway-level errors take precedence
+  if (gatewayStatus === 'error') return 'error'
+  if (gatewayStatus === 'starting') return 'working'
+
+  // Per-agent live status from the WS observer
+  if (liveStatus === 'working') return 'working'
+  if (liveStatus === 'error') return 'error'
+
  return 'idle'
 }

-async function getAgentCardData(
-  agent: AgentEntry,
-  status: OpenClawStatus['status'] | undefined,
-): Promise<AgentCardData> {
-  const conversation = await getLatestConversation(agent.agentId)
-  const lastTurn = conversation?.turns[conversation.turns.length - 1]
-  const lastTextPart = lastTurn?.parts.findLast((part) => part.kind === 'text')
-
-  return {
-    agentId: agent.agentId,
-    name: agent.name,
-    model: getModelDisplayName(agent.model),
-    status: getAgentStatusTone(status),
-    lastMessage:
-      lastTextPart?.kind === 'text'
-        ? lastTextPart.text.slice(0, 120)
-        : undefined,
-    lastMessageTimestamp: lastTurn?.timestamp,
-  }
-}
-
-export function useAgentCardData(
+/**
+ * Build agent card display data by merging the raw agent entries from
+ * the gateway with enriched overview data from the dashboard API.
+ *
+ * Pure function — no hooks, no IndexedDB, no async.
+ */
+export function buildAgentCardData(
  agents: AgentEntry[],
  status: OpenClawStatus['status'] | undefined,
-) {
-  const [cardData, setCardData] = useState<AgentCardData[]>([])
+  dashboard: AgentOverview[] | undefined,
+): AgentCardData[] {
+  return agents.map((agent) => {
+    const overview = dashboard?.find((d) => d.agentId === agent.agentId)

-  useEffect(() => {
-    let active = true
-
-    const loadCardData = async () => {
-      const nextCardData = await Promise.all(
-        agents.map((agent) => getAgentCardData(agent, status)),
-      )
-      if (active) {
-        setCardData(nextCardData)
-      }
+    return {
+      agentId: agent.agentId,
+      name: agent.name,
+      model: getModelDisplayName(agent.model),
+      status: resolveAgentStatus(status, overview?.status),
+      lastMessage: overview?.latestMessage?.slice(0, 200) ?? undefined,
+      lastMessageTimestamp: overview?.latestMessageAt ?? undefined,
+      activitySummary: overview?.activitySummary ?? undefined,
+      currentTool: overview?.currentTool ?? undefined,
+      costUsd: overview?.totalCostUsd ?? undefined,
    }
-
-    if (agents.length > 0) {
-      void loadCardData()
-    } else {
-      setCardData([])
-    }
-
-    return () => {
-      active = false
-    }
-  }, [agents, status])
-
-  return cardData
+  })
 }
--- a/packages/browseros-agent/apps/agent/entrypoints/app/agent-command/useAgentConversation.ts
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/agent-command/useAgentConversation.ts
@@ -1,51 +1,57 @@
 import { useEffect, useRef, useState } from 'react'
 import {
  chatWithAgent,
+  type OpenClawChatHistoryMessage,
  type OpenClawStreamEvent,
 } from '@/entrypoints/app/agents/useOpenClaw'
-import {
-  getLatestConversation,
-  saveConversation,
-} from '@/lib/agent-conversations/storage'
 import type {
-  AgentConversation,
  AgentConversationTurn,
  AssistantPart,
+  UserAttachmentPreview,
 } from '@/lib/agent-conversations/types'
+import type { ServerAttachmentPayload } from '@/lib/attachments'
 import { consumeSSEStream } from '@/lib/sse'
+import { buildToolLabel } from '@/lib/tool-labels'

-export function useAgentConversation(agentId: string, agentName: string) {
+export interface SendInput {
+  text: string
+  attachments?: ServerAttachmentPayload[]
+  // Optional preview metadata used to render the optimistic user turn.
+  // Built by the composer at staging time; the server only sees the
+  // payload array.
+  attachmentPreviews?: UserAttachmentPreview[]
+}
+
+interface UseAgentConversationOptions {
+  sessionKey?: string | null
+  history?: OpenClawChatHistoryMessage[]
+  onSessionKeyChange?: (sessionKey: string) => void
+}
+
+export function useAgentConversation(
+  agentId: string,
+  options: UseAgentConversationOptions = {},
+) {
  const [turns, setTurns] = useState<AgentConversationTurn[]>([])
  const [streaming, setStreaming] = useState(false)
-  const [loading, setLoading] = useState(true)
-  const sessionKeyRef = useRef('')
+  const sessionKeyRef = useRef(options.sessionKey ?? '')
+  const historyRef = useRef<OpenClawChatHistoryMessage[]>(options.history ?? [])
  const textAccRef = useRef('')
  const thinkAccRef = useRef('')
  const streamAbortRef = useRef<AbortController | null>(null)
+  const onSessionKeyChangeRef = useRef(options.onSessionKeyChange)

  useEffect(() => {
-    let active = true
-    getLatestConversation(agentId)
-      .then((conv) => {
-        if (!active) return
-        if (conv) {
-          setTurns(conv.turns)
-          sessionKeyRef.current = conv.sessionKey
-        } else {
-          sessionKeyRef.current = crypto.randomUUID()
-        }
-        setLoading(false)
-      })
-      .catch(() => {
-        if (active) {
-          sessionKeyRef.current = crypto.randomUUID()
-          setLoading(false)
-        }
-      })
-    return () => {
-      active = false
-    }
-  }, [agentId])
+    sessionKeyRef.current = options.sessionKey ?? ''
+  }, [options.sessionKey])
+
+  useEffect(() => {
+    historyRef.current = options.history ?? []
+  }, [options.history])
+
+  useEffect(() => {
+    onSessionKeyChangeRef.current = options.onSessionKeyChange
+  }, [options.onSessionKeyChange])

  useEffect(() => {
    return () => {
@@ -53,18 +59,6 @@ export function useAgentConversation(agentId: string, agentName: string) {
    }
  }, [])

-  const persistTurns = (updatedTurns: AgentConversationTurn[]) => {
-    const conv: AgentConversation = {
-      agentId,
-      agentName,
-      sessionKey: sessionKeyRef.current,
-      turns: updatedTurns,
-      createdAt: updatedTurns[0]?.timestamp ?? Date.now(),
-      updatedAt: Date.now(),
-    }
-    saveConversation(conv).catch(() => {})
-  }
-
  const updateCurrentTurnParts = (
    updater: (parts: AssistantPart[]) => AssistantPart[],
  ) => {
@@ -110,9 +104,14 @@ export function useAgentConversation(agentId: string, agentName: string) {
      }

      case 'tool-start': {
+        const rawName = (event.data.toolName as string) ?? 'unknown'
+        const args = event.data.args as Record<string, unknown> | undefined
+        const { label, subject } = buildToolLabel(rawName, args)
        const tool = {
          id: (event.data.toolCallId as string) ?? crypto.randomUUID(),
-          name: (event.data.toolName as string) ?? 'unknown',
+          name: rawName,
+          label,
+          subject,
          status: 'running' as const,
        }
        updateCurrentTurnParts((parts) => {
@@ -164,9 +163,7 @@ export function useAgentConversation(agentId: string, agentName: string) {
        setTurns((prev) => {
          const last = prev[prev.length - 1]
          if (!last) return prev
-          const updated = [...prev.slice(0, -1), { ...last, done: true }]
-          persistTurns(updated)
-          return updated
+          return [...prev.slice(0, -1), { ...last, done: true }]
        })
        break
      }
@@ -185,12 +182,22 @@ export function useAgentConversation(agentId: string, agentName: string) {
    }
  }

-  const send = async (text: string) => {
-    if (!text.trim() || streaming) return
+  const send = async (input: string | SendInput) => {
+    const normalized: SendInput =
+      typeof input === 'string' ? { text: input } : input
+    const trimmed = normalized.text.trim()
+    const attachments = normalized.attachments ?? []
+    if (streaming) return
+    if (!trimmed && attachments.length === 0) return

    const turn: AgentConversationTurn = {
      id: crypto.randomUUID(),
-      userText: text.trim(),
+      userText: trimmed,
+      userAttachments:
+        normalized.attachmentPreviews &&
+        normalized.attachmentPreviews.length > 0
+          ? normalized.attachmentPreviews
+          : undefined,
      parts: [],
      done: false,
      timestamp: Date.now(),
@@ -205,10 +212,17 @@ export function useAgentConversation(agentId: string, agentName: string) {
    try {
      const response = await chatWithAgent(
        agentId,
-        text.trim(),
-        sessionKeyRef.current,
+        trimmed,
+        sessionKeyRef.current || undefined,
+        historyRef.current,
        abortController.signal,
+        attachments,
      )
+      const responseSessionKey = response.headers.get('X-Session-Key')
+      if (responseSessionKey) {
+        sessionKeyRef.current = responseSessionKey
+        onSessionKeyChangeRef.current?.(responseSessionKey)
+      }
      if (!response.ok) {
        const err = await response.text()
        updateCurrentTurnParts((parts) => [
@@ -242,13 +256,11 @@ export function useAgentConversation(agentId: string, agentName: string) {
    streamAbortRef.current = null
    setTurns([])
    setStreaming(false)
-    sessionKeyRef.current = crypto.randomUUID()
  }

  return {
    turns,
    streaming,
-    loading,
    sessionKey: sessionKeyRef.current,
    send,
    resetConversation,
--- a/packages/browseros-agent/apps/agent/entrypoints/app/agent-command/useAgentDashboard.ts
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/agent-command/useAgentDashboard.ts
@@ -0,0 +1,95 @@
+import { useQuery, useQueryClient } from '@tanstack/react-query'
+import { useEffect } from 'react'
+import { useAgentServerUrl } from '@/lib/browseros/useBrowserOSProviders'
+
+export interface AgentOverview {
+  agentId: string
+  status: 'working' | 'idle' | 'error' | 'unknown'
+  latestMessage: string | null
+  latestMessageAt: number | null
+  activitySummary: string | null
+  currentTool: string | null
+  totalCostUsd: number
+  sessionCount: number
+}
+
+export interface DashboardResponse {
+  agents: AgentOverview[]
+  summary: {
+    totalAgents: number
+    totalCostUsd: number
+  }
+}
+
+interface StatusEvent {
+  agentId: string
+  status: AgentOverview['status']
+  currentTool: string | null
+  error: string | null
+  timestamp: number
+}
+
+const DASHBOARD_QUERY_KEY = ['claw', 'dashboard']
+
+export function useAgentDashboard(enabled: boolean) {
+  const { baseUrl, isLoading: urlLoading } = useAgentServerUrl()
+  const queryClient = useQueryClient()
+  const ready = enabled && Boolean(baseUrl) && !urlLoading
+
+  // Initial data load + periodic refresh as fallback
+  const query = useQuery<DashboardResponse>({
+    queryKey: [...DASHBOARD_QUERY_KEY, baseUrl],
+    queryFn: async () => {
+      const url = new URL('/claw/dashboard', baseUrl as string)
+      const response = await fetch(url.toString())
+      if (!response.ok) throw new Error('Failed to fetch dashboard')
+      return response.json()
+    },
+    enabled: ready,
+  })
+
+  // SSE subscription for real-time status patches
+  useEffect(() => {
+    if (!ready || !baseUrl) return
+
+    const streamUrl = new URL('/claw/dashboard/stream', baseUrl)
+    const eventSource = new EventSource(streamUrl.toString())
+
+    eventSource.addEventListener('snapshot', (event) => {
+      try {
+        const dashboard = JSON.parse(event.data) as DashboardResponse
+        queryClient.setQueryData([...DASHBOARD_QUERY_KEY, baseUrl], dashboard)
+      } catch {}
+    })
+
+    eventSource.addEventListener('status', (event) => {
+      try {
+        const status = JSON.parse(event.data) as StatusEvent
+        queryClient.setQueryData<DashboardResponse>(
+          [...DASHBOARD_QUERY_KEY, baseUrl],
+          (prev) => {
+            if (!prev) return prev
+            return {
+              ...prev,
+              agents: prev.agents.map((agent) =>
+                agent.agentId === status.agentId
+                  ? {
+                      ...agent,
+                      status: status.status,
+                      currentTool: status.currentTool,
+                    }
+                  : agent,
+              ),
+            }
+          },
+        )
+      } catch {}
+    })
+
+    return () => {
+      eventSource.close()
+    }
+  }, [ready, baseUrl, queryClient])
+
+  return query
+}
--- a/packages/browseros-agent/apps/agent/entrypoints/app/agent-command/useClawChatHistory.ts
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/agent-command/useClawChatHistory.ts
@@ -0,0 +1,71 @@
+import { useInfiniteQuery } from '@tanstack/react-query'
+import { useAgentServerUrl } from '@/lib/browseros/useBrowserOSProviders'
+import type { AgentHistoryPageResponse } from './claw-chat-types'
+
+const HISTORY_QUERY_KEY = 'claw-agent-history'
+
+async function fetchClawJson<T>(url: string): Promise<T> {
+  const response = await fetch(url)
+
+  if (!response.ok) {
+    let message = `Request failed with status ${response.status}`
+    try {
+      const body = (await response.json()) as { error?: string }
+      if (body.error) message = body.error
+    } catch {}
+    throw new Error(message)
+  }
+
+  return response.json() as Promise<T>
+}
+
+function buildClawUrl(baseUrl: string, path: string): URL {
+  return new URL(`/claw${path}`, baseUrl)
+}
+
+export function useClawChatHistory({
+  agentId,
+  sessionKey,
+  enabled = true,
+  limit = 50,
+}: {
+  agentId: string
+  // null lets the server resolve the most recent user-chat session for the
+  // agent — avoids an extra /session round-trip and the race that came with it.
+  sessionKey: string | null
+  enabled?: boolean
+  limit?: number
+}) {
+  const {
+    baseUrl,
+    isLoading: urlLoading,
+    error: urlError,
+  } = useAgentServerUrl()
+
+  const query = useInfiniteQuery<AgentHistoryPageResponse, Error>({
+    queryKey: [HISTORY_QUERY_KEY, baseUrl, agentId, sessionKey],
+    initialPageParam: undefined as string | undefined,
+    queryFn: async ({ pageParam }) => {
+      const url = buildClawUrl(baseUrl as string, `/agents/${agentId}/history`)
+      url.searchParams.set('limit', String(limit))
+
+      if (sessionKey) {
+        url.searchParams.set('sessionKey', sessionKey)
+      }
+      if (typeof pageParam === 'string' && pageParam) {
+        url.searchParams.set('cursor', pageParam)
+      }
+
+      return fetchClawJson<AgentHistoryPageResponse>(url.toString())
+    },
+    getNextPageParam: (lastPage) =>
+      lastPage.page.hasMore ? lastPage.page.cursor : undefined,
+    enabled: enabled && Boolean(baseUrl) && !urlLoading && Boolean(agentId),
+  })
+
+  return {
+    ...query,
+    error: query.error ?? urlError,
+    isLoading: query.isLoading || urlLoading,
+  }
+}
--- a/packages/browseros-agent/apps/agent/entrypoints/app/agent-command/useOutboundQueue.ts
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/agent-command/useOutboundQueue.ts
@@ -0,0 +1,270 @@
+import { useCallback, useEffect, useRef, useState } from 'react'
+import type { OpenClawChatHistoryMessage } from '@/entrypoints/app/agents/useOpenClaw'
+import type { UserAttachmentPreview } from '@/lib/agent-conversations/types'
+import type { ServerAttachmentPayload } from '@/lib/attachments'
+import { useAgentServerUrl } from '@/lib/browseros/useBrowserOSProviders'
+
+export type OutboundMessageStatus = 'queued' | 'sending' | 'failed'
+
+export interface OutboundMessage {
+  id: string
+  text: string
+  attachments: ServerAttachmentPayload[]
+  attachmentPreviews: UserAttachmentPreview[]
+  status: OutboundMessageStatus
+  error?: string
+  createdAt: number
+}
+
+export interface OutboundQueueEnqueueInput {
+  text: string
+  attachments?: ServerAttachmentPayload[]
+  attachmentPreviews?: UserAttachmentPreview[]
+  history?: OpenClawChatHistoryMessage[]
+}
+
+export interface OutboundQueueApi {
+  queue: OutboundMessage[]
+  enqueue(input: OutboundQueueEnqueueInput): void
+  cancel(id: string): void
+  retry(id: string): void
+}
+
+interface UseOutboundQueueOptions {
+  agentId: string | null | undefined
+  sessionKey?: string | null
+}
+
+interface ServerQueuedItem {
+  id: string
+  status: 'queued' | 'dispatching' | 'failed'
+  message: string
+  attachmentsPreview: Array<{
+    kind: 'image' | 'file'
+    mediaType: string
+    name?: string
+  }>
+  error?: string
+  createdAt: number
+}
+
+function makeId(): string {
+  if (typeof crypto !== 'undefined' && crypto.randomUUID) {
+    return crypto.randomUUID()
+  }
+  return `${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 10)}`
+}
+
+/**
+ * Server-backed outbound message queue. The browser is purely a
+ * projection of server state — closing the tab is safe because the queue
+ * keeps draining server-side via the OutboundQueueService.
+ *
+ * Single id-keyed list: the client generates the queue id and hands it
+ * to the server in the POST body, so the optimistic row and the SSE
+ * snapshot reconcile on the same key from frame zero — there is no
+ * window in which the message renders twice.
+ */
+export function useOutboundQueue(
+  options: UseOutboundQueueOptions,
+): OutboundQueueApi {
+  const { agentId, sessionKey } = options
+  const { baseUrl } = useAgentServerUrl()
+  const sessionKeyRef = useRef<string | null | undefined>(sessionKey)
+  sessionKeyRef.current = sessionKey
+
+  const [items, setItems] = useState<OutboundMessage[]>([])
+  // Track which ids the server has confirmed seeing in any SSE snapshot.
+  // We use this to know whether a missing-from-snapshot id is "drained
+  // by the server" (drop it) or "still in flight client-side" (keep
+  // showing the optimistic row).
+  const everSeenByServerRef = useRef<Set<string>>(new Set())
+  // Local-only attachment previews, keyed by queue id. Data URLs never
+  // leave the browser — the SSE feed only carries metadata, so we hold
+  // them here so the chip strip keeps rendering after server takeover.
+  const previewMapRef = useRef<Map<string, UserAttachmentPreview[]>>(new Map())
+
+  useEffect(() => {
+    if (!baseUrl || !agentId) {
+      setItems([])
+      everSeenByServerRef.current = new Set()
+      previewMapRef.current = new Map()
+      return
+    }
+    let cancelled = false
+    const url = `${baseUrl}/claw/agents/${encodeURIComponent(agentId)}/queue/stream`
+    const source = new EventSource(url)
+    source.onmessage = (event) => {
+      if (cancelled) return
+      try {
+        const parsed = JSON.parse(event.data) as { items: ServerQueuedItem[] }
+        const snapshotIds = new Set(parsed.items.map((item) => item.id))
+        for (const id of snapshotIds) everSeenByServerRef.current.add(id)
+
+        setItems((prev) => {
+          const next: OutboundMessage[] = parsed.items.map((item) => ({
+            id: item.id,
+            text: item.message,
+            attachments: [],
+            attachmentPreviews: previewMapRef.current.get(item.id) ?? [],
+            status: serverStatusToClient(item.status),
+            error: item.error,
+            createdAt: item.createdAt,
+          }))
+          // Carry forward any optimistic / failed entries the server
+          // doesn't know about yet (POST in flight) or has finished
+          // dispatching but the client wants to keep visible (failed).
+          const carried = prev.filter((local) => {
+            if (snapshotIds.has(local.id)) return false
+            if (everSeenByServerRef.current.has(local.id)) {
+              // Server saw it before and it's gone now — drained.
+              previewMapRef.current.delete(local.id)
+              return false
+            }
+            return local.status !== 'failed' || Boolean(local.error)
+          })
+          return [...carried, ...next]
+        })
+      } catch {
+        // Malformed event — ignore; next snapshot will recover.
+      }
+    }
+    source.onerror = () => {
+      // Auto-reconnects; nothing to do here.
+    }
+    return () => {
+      cancelled = true
+      source.close()
+    }
+  }, [baseUrl, agentId])
+
+  const enqueue = useCallback(
+    (input: OutboundQueueEnqueueInput) => {
+      if (!baseUrl || !agentId) return
+      const trimmed = input.text.trim()
+      const attachments = input.attachments ?? []
+      if (!trimmed && attachments.length === 0) return
+
+      const id = makeId()
+      const previews = input.attachmentPreviews ?? []
+      previewMapRef.current.set(id, previews)
+      setItems((prev) => [
+        ...prev,
+        {
+          id,
+          text: trimmed,
+          attachments,
+          attachmentPreviews: previews,
+          status: 'queued',
+          createdAt: Date.now(),
+        },
+      ])
+
+      void (async () => {
+        try {
+          const response = await fetch(
+            `${baseUrl}/claw/agents/${encodeURIComponent(agentId)}/queue`,
+            {
+              method: 'POST',
+              headers: { 'Content-Type': 'application/json' },
+              body: JSON.stringify({
+                id,
+                message: trimmed,
+                attachments: attachments.length > 0 ? attachments : undefined,
+                sessionKey: sessionKeyRef.current ?? undefined,
+                history: input.history,
+              }),
+            },
+          )
+          if (!response.ok) {
+            const text = await response.text().catch(() => '')
+            previewMapRef.current.delete(id)
+            setItems((prev) =>
+              prev.map((item) =>
+                item.id === id
+                  ? {
+                      ...item,
+                      status: 'failed',
+                      error:
+                        text || `Failed to enqueue (status ${response.status})`,
+                    }
+                  : item,
+              ),
+            )
+          }
+        } catch (err) {
+          // Only mark as failed if the SSE snapshot hasn't already
+          // taken ownership of the entry (i.e. the request actually
+          // reached the server).
+          if (everSeenByServerRef.current.has(id)) return
+          previewMapRef.current.delete(id)
+          setItems((prev) =>
+            prev.map((item) =>
+              item.id === id
+                ? {
+                    ...item,
+                    status: 'failed',
+                    error:
+                      err instanceof Error
+                        ? err.message
+                        : 'Failed to enqueue message',
+                  }
+                : item,
+            ),
+          )
+        }
+      })()
+    },
+    [baseUrl, agentId],
+  )
+
+  const cancel = useCallback(
+    (id: string) => {
+      // If the server has never seen this id, just drop it locally.
+      if (!everSeenByServerRef.current.has(id)) {
+        previewMapRef.current.delete(id)
+        setItems((prev) => prev.filter((item) => item.id !== id))
+        return
+      }
+      if (!baseUrl || !agentId) return
+      void fetch(
+        `${baseUrl}/claw/agents/${encodeURIComponent(agentId)}/queue/${encodeURIComponent(id)}`,
+        { method: 'DELETE' },
+      ).catch(() => {})
+    },
+    [baseUrl, agentId],
+  )
+
+  const retry = useCallback(
+    (id: string) => {
+      if (!everSeenByServerRef.current.has(id)) {
+        // Optimistic-only entry, never made it to the server. Reset
+        // status so the user can press Send again.
+        setItems((prev) =>
+          prev.map((item) =>
+            item.id === id
+              ? { ...item, status: 'queued', error: undefined }
+              : item,
+          ),
+        )
+        return
+      }
+      if (!baseUrl || !agentId) return
+      void fetch(
+        `${baseUrl}/claw/agents/${encodeURIComponent(agentId)}/queue/${encodeURIComponent(id)}/retry`,
+        { method: 'POST' },
+      ).catch(() => {})
+    },
+    [baseUrl, agentId],
+  )
+
+  return { queue: items, enqueue, cancel, retry }
+}
+
+function serverStatusToClient(
+  status: ServerQueuedItem['status'],
+): OutboundMessageStatus {
+  if (status === 'dispatching') return 'sending'
+  if (status === 'failed') return 'failed'
+  return 'queued'
+}
--- a/packages/browseros-agent/apps/agent/entrypoints/app/agents/AgentChat.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/agents/AgentChat.tsx
@@ -1,393 +0,0 @@
-import {
-  ArrowLeft,
-  Bot,
-  CheckCircle2,
-  Loader2,
-  Send,
-  XCircle,
-} from 'lucide-react'
-import { type FC, useEffect, useRef, useState } from 'react'
-import {
-  Message,
-  MessageContent,
-  MessageResponse,
-} from '@/components/ai-elements/message'
-import {
-  Reasoning,
-  ReasoningContent,
-  ReasoningTrigger,
-} from '@/components/ai-elements/reasoning'
-import { Button } from '@/components/ui/button'
-import { Textarea } from '@/components/ui/textarea'
-import { consumeSSEStream } from '@/lib/sse'
-import { chatWithAgent, type OpenClawStreamEvent } from './useOpenClaw'
-
-interface ToolEntry {
-  id: string
-  name: string
-  status: 'running' | 'completed' | 'error'
-  durationMs?: number
-}
-
-type AssistantPart =
-  | { kind: 'thinking'; text: string; done: boolean }
-  | { kind: 'tool-batch'; tools: ToolEntry[] }
-  | { kind: 'text'; text: string }
-
-interface ChatTurn {
-  id: string
-  userText: string
-  parts: AssistantPart[]
-  done: boolean
-}
-
-interface AgentChatProps {
-  agentId: string
-  agentName: string
-  onBack: () => void
-}
-
-export const AgentChat: FC<AgentChatProps> = ({
-  agentId,
-  agentName,
-  onBack,
-}) => {
-  const [turns, setTurns] = useState<ChatTurn[]>([])
-  const [input, setInput] = useState('')
-  const [streaming, setStreaming] = useState(false)
-  const scrollRef = useRef<HTMLDivElement>(null)
-  const sessionKeyRef = useRef(crypto.randomUUID())
-  const streamAbortRef = useRef<AbortController | null>(null)
-
-  const textAccRef = useRef('')
-  const thinkAccRef = useRef('')
-
-  const scrollToBottom = () => {
-    scrollRef.current?.scrollTo(0, scrollRef.current.scrollHeight)
-  }
-
-  // biome-ignore lint/correctness/useExhaustiveDependencies: scroll on every turns change
-  useEffect(() => {
-    scrollToBottom()
-  }, [turns])
-
-  useEffect(() => {
-    return () => {
-      streamAbortRef.current?.abort()
-    }
-  }, [])
-
-  const updateCurrentTurnParts = (
-    updater: (parts: AssistantPart[]) => AssistantPart[],
-  ) => {
-    setTurns((prev) => {
-      const last = prev[prev.length - 1]
-      if (!last) return prev
-      return [...prev.slice(0, -1), { ...last, parts: updater(last.parts) }]
-    })
-  }
-
-  const processStreamEvent = (event: OpenClawStreamEvent) => {
-    switch (event.type) {
-      case 'text-delta': {
-        const delta = (event.data.text as string) ?? ''
-        textAccRef.current += delta
-        const text = textAccRef.current
-        updateCurrentTurnParts((parts) => {
-          const last = parts[parts.length - 1]
-          if (last?.kind === 'text') {
-            return [...parts.slice(0, -1), { ...last, text }]
-          }
-          return [...parts, { kind: 'text', text }]
-        })
-        break
-      }
-
-      case 'thinking': {
-        const delta = (event.data.text as string) ?? ''
-        thinkAccRef.current += delta
-        const text = thinkAccRef.current
-        updateCurrentTurnParts((parts) => {
-          const idx = parts.findIndex((p) => p.kind === 'thinking' && !p.done)
-          if (idx >= 0) {
-            return [
-              ...parts.slice(0, idx),
-              { ...parts[idx], text, done: false },
-              ...parts.slice(idx + 1),
-            ]
-          }
-          return [...parts, { kind: 'thinking', text, done: false }]
-        })
-        break
-      }
-
-      case 'tool-start': {
-        const tool: ToolEntry = {
-          id: (event.data.toolCallId as string) ?? crypto.randomUUID(),
-          name: (event.data.toolName as string) ?? 'unknown',
-          status: 'running',
-        }
-        updateCurrentTurnParts((parts) => {
-          const last = parts[parts.length - 1]
-          if (last?.kind === 'tool-batch') {
-            return [
-              ...parts.slice(0, -1),
-              { ...last, tools: [...last.tools, tool] },
-            ]
-          }
-          return [...parts, { kind: 'tool-batch', tools: [tool] }]
-        })
-        break
-      }
-
-      case 'tool-end': {
-        const toolId = event.data.toolCallId as string
-        const status =
-          (event.data.status as string) === 'error' ? 'error' : 'completed'
-        const durationMs = event.data.durationMs as number | undefined
-        updateCurrentTurnParts((parts) => {
-          for (let i = parts.length - 1; i >= 0; i--) {
-            const part = parts[i]
-            if (
-              part.kind === 'tool-batch' &&
-              part.tools.some((t) => t.id === toolId)
-            ) {
-              const updatedTools = part.tools.map((t) =>
-                t.id === toolId
-                  ? {
-                      ...t,
-                      status: status as ToolEntry['status'],
-                      durationMs,
-                    }
-                  : t,
-              )
-              return [
-                ...parts.slice(0, i),
-                { ...part, tools: updatedTools },
-                ...parts.slice(i + 1),
-              ]
-            }
-          }
-          return parts
-        })
-        break
-      }
-
-      case 'done': {
-        updateCurrentTurnParts((parts) =>
-          parts.map((part) =>
-            part.kind === 'thinking' ? { ...part, done: true } : part,
-          ),
-        )
-        setTurns((prev) => {
-          const last = prev[prev.length - 1]
-          if (!last) return prev
-          return [...prev.slice(0, -1), { ...last, done: true }]
-        })
-        break
-      }
-
-      case 'error': {
-        const msg =
-          (event.data.message as string) ??
-          (event.data.error as string) ??
-          'Unknown error'
-        updateCurrentTurnParts((parts) => [
-          ...parts,
-          { kind: 'text', text: `Error: ${msg}` },
-        ])
-        break
-      }
-    }
-  }
-
-  const handleSend = async () => {
-    const text = input.trim()
-    if (!text || streaming) return
-
-    const turn: ChatTurn = {
-      id: crypto.randomUUID(),
-      userText: text,
-      parts: [],
-      done: false,
-    }
-    setTurns((prev) => [...prev, turn])
-    setInput('')
-    setStreaming(true)
-
-    textAccRef.current = ''
-    thinkAccRef.current = ''
-    const abortController = new AbortController()
-    streamAbortRef.current = abortController
-
-    try {
-      const response = await chatWithAgent(
-        agentId,
-        text,
-        sessionKeyRef.current,
-        abortController.signal,
-      )
-
-      if (!response.ok) {
-        const err = await response.text()
-        updateCurrentTurnParts((parts) => [
-          ...parts,
-          { kind: 'text', text: `Error: ${err}` },
-        ])
-        return
-      }
-
-      await consumeSSEStream(
-        response,
-        processStreamEvent,
-        abortController.signal,
-      )
-    } catch (err) {
-      if (abortController.signal.aborted) return
-      const msg = err instanceof Error ? err.message : String(err)
-      updateCurrentTurnParts((parts) => [
-        ...parts,
-        { kind: 'text', text: `Error: ${msg}` },
-      ])
-    } finally {
-      if (streamAbortRef.current === abortController) {
-        streamAbortRef.current = null
-      }
-      setStreaming(false)
-    }
-  }
-
-  return (
-    <div className="flex h-[calc(100vh-4rem)] flex-col">
-      <div className="flex items-center gap-2 border-b px-4 py-3">
-        <Button variant="ghost" size="icon" onClick={onBack}>
-          <ArrowLeft className="size-4" />
-        </Button>
-        <h2 className="font-semibold text-lg">{agentName}</h2>
-      </div>
-
-      <div ref={scrollRef} className="flex-1 space-y-4 overflow-y-auto p-4">
-        {turns.map((turn) => (
-          <div key={turn.id} className="space-y-3">
-            {/* User message */}
-            <Message from="user">
-              <MessageContent>
-                <pre className="whitespace-pre-wrap font-sans text-sm">
-                  {turn.userText}
-                </pre>
-              </MessageContent>
-            </Message>
-
-            {/* Assistant response — all parts grouped */}
-            {turn.parts.length > 0 && (
-              <Message from="assistant">
-                <MessageContent>
-                  {turn.parts.map((part, i) => {
-                    const key = `${turn.id}-part-${i}`
-
-                    switch (part.kind) {
-                      case 'thinking':
-                        return (
-                          <Reasoning
-                            key={key}
-                            className="w-full"
-                            isStreaming={!part.done}
-                            defaultOpen={!part.done}
-                          >
-                            <ReasoningTrigger />
-                            <ReasoningContent>{part.text}</ReasoningContent>
-                          </Reasoning>
-                        )
-
-                      case 'tool-batch':
-                        return (
-                          <div key={key} className="w-full space-y-1">
-                            {part.tools.map((tool) => (
-                              <div
-                                key={tool.id}
-                                className="flex items-center gap-2 rounded-md border px-3 py-2 text-sm"
-                              >
-                                {tool.status === 'running' && (
-                                  <Loader2 className="size-3.5 animate-spin text-muted-foreground" />
-                                )}
-                                {tool.status === 'completed' && (
-                                  <CheckCircle2 className="size-3.5 text-green-500" />
-                                )}
-                                {tool.status === 'error' && (
-                                  <XCircle className="size-3.5 text-destructive" />
-                                )}
-                                <span className="font-mono text-xs">
-                                  {tool.name}
-                                </span>
-                                {tool.durationMs != null && (
-                                  <span className="ml-auto text-muted-foreground text-xs">
-                                    {(tool.durationMs / 1000).toFixed(1)}s
-                                  </span>
-                                )}
-                              </div>
-                            ))}
-                          </div>
-                        )
-
-                      case 'text':
-                        return (
-                          <MessageResponse key={key}>
-                            {part.text}
-                          </MessageResponse>
-                        )
-                      default:
-                        return null
-                    }
-                  })}
-                </MessageContent>
-              </Message>
-            )}
-
-            {/* Streaming indicator when waiting for first part */}
-            {!turn.done && turn.parts.length === 0 && streaming && (
-              <div className="flex gap-2">
-                <div className="flex h-7 w-7 shrink-0 items-center justify-center rounded-full bg-[var(--accent-orange)] text-white">
-                  <Bot className="h-3.5 w-3.5" />
-                </div>
-                <div className="flex items-center gap-1 rounded-xl rounded-tl-none border border-border/50 bg-card px-3 py-2.5 shadow-sm">
-                  <span className="h-1.5 w-1.5 animate-bounce rounded-full bg-[var(--accent-orange)] [animation-delay:-0.3s]" />
-                  <span className="h-1.5 w-1.5 animate-bounce rounded-full bg-[var(--accent-orange)] [animation-delay:-0.15s]" />
-                  <span className="h-1.5 w-1.5 animate-bounce rounded-full bg-[var(--accent-orange)]" />
-                </div>
-              </div>
-            )}
-          </div>
-        ))}
-      </div>
-
-      <div className="border-t p-4">
-        <div className="flex gap-2">
-          <Textarea
-            value={input}
-            onChange={(e) => setInput(e.target.value)}
-            onKeyDown={(e) => {
-              if (e.key === 'Enter' && !e.shiftKey) {
-                e.preventDefault()
-                handleSend()
-              }
-            }}
-            placeholder="Send a message..."
-            className="min-h-[44px] resize-none"
-            rows={1}
-          />
-          <Button
-            onClick={handleSend}
-            disabled={!input.trim() || streaming}
-            size="icon"
-          >
-            {streaming ? (
-              <Loader2 className="size-4 animate-spin" />
-            ) : (
-              <Send className="size-4" />
-            )}
-          </Button>
-        </div>
-      </div>
-    </div>
-  )
-}
--- a/packages/browseros-agent/apps/agent/entrypoints/app/agents/AgentTerminal.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/agents/AgentTerminal.tsx
@@ -5,14 +5,16 @@ import {
 import { FitAddon } from '@xterm/addon-fit'
 import { WebLinksAddon } from '@xterm/addon-web-links'
 import { Terminal } from '@xterm/xterm'
-import { ArrowLeft } from 'lucide-react'
-import { type FC, useEffect, useRef } from 'react'
+import { ArrowLeft, Check, Copy } from 'lucide-react'
+import { type FC, useEffect, useRef, useState } from 'react'
 import '@xterm/xterm/css/xterm.css'
 import { Button } from '@/components/ui/button'
 import { getAgentServerUrl } from '@/lib/browseros/helpers'

 interface AgentTerminalProps {
  onBack: () => void
+  initialCommand?: string
+  onSessionExit?: () => void
 }

 type TerminalServerMessage =
@@ -36,26 +38,22 @@ function resolveCssColor(variableName: string): string {
  return color
 }

-function withAlpha(color: string, alpha: number): string {
-  const channels = color.match(/[\d.]+/g)
-  if (!channels || channels.length < 3) return color
-  const [red, green, blue] = channels
-  return `rgb(${red} ${green} ${blue} / ${alpha})`
-}
-
 function createTerminalTheme() {
  const isDark = document.documentElement.classList.contains('dark')
  const background = resolveCssColor('--background')
  const foreground = resolveCssColor('--foreground')
  const muted = resolveCssColor('--muted-foreground')
-  const accent = resolveCssColor('--accent-orange')

  return {
    background,
    foreground,
    cursor: foreground,
    cursorAccent: background,
-    selectionBackground: withAlpha(accent, isDark ? 0.3 : 0.2),
+    // Solid terminal-standard selection colors. Deriving from a CSS var
+    // with alpha composed against the background produced near-white
+    // rectangles on light mode, making selection invisible.
+    selectionBackground: isDark ? '#3a4463' : '#b4d4f4',
+    selectionInactiveBackground: isDark ? '#2b3348' : '#d9e5f3',
    selectionForeground: foreground,
    black: isDark ? '#16131a' : '#1f1b22',
    red: isDark ? '#ef8c7c' : '#c25544',
@@ -118,8 +116,38 @@ function parseTerminalMessage(data: unknown): TerminalServerMessage | null {
  return null
 }

-export const AgentTerminal: FC<AgentTerminalProps> = ({ onBack }) => {
+export const AgentTerminal: FC<AgentTerminalProps> = ({
+  onBack,
+  initialCommand,
+  onSessionExit,
+}) => {
  const containerRef = useRef<HTMLDivElement>(null)
+  const terminalRef = useRef<Terminal | null>(null)
+  // Refs keep the mount-once effect from tearing down the PTY when the
+  // parent re-renders with new inline callbacks.
+  const initialCommandRef = useRef(initialCommand)
+  const onSessionExitRef = useRef(onSessionExit)
+  initialCommandRef.current = initialCommand
+  onSessionExitRef.current = onSessionExit
+
+  const [copied, setCopied] = useState(false)
+
+  // Copy the current xterm selection to the browser clipboard. No-op
+  // if nothing is selected — users who want the whole buffer can
+  // Cmd+A first. Uses the browser clipboard, not the container's, so
+  // it works even when the running TUI has mouse tracking enabled
+  // (Opt+drag forces a selection regardless, see terminal config).
+  const handleCopy = async (): Promise<void> => {
+    const text = terminalRef.current?.getSelection()
+    if (!text) return
+    try {
+      await navigator.clipboard.writeText(text)
+      setCopied(true)
+      window.setTimeout(() => setCopied(false), 1500)
+    } catch {
+      // clipboard permission denied or unavailable — swallow, user will retry
+    }
+  }

  useEffect(() => {
    if (!containerRef.current) return
@@ -132,6 +160,34 @@ export const AgentTerminal: FC<AgentTerminalProps> = ({ onBack }) => {
      lineHeight: 1.25,
      scrollback: 8000,
      theme: createTerminalTheme(),
+      // Opt+click+drag forces a native text selection even when the
+      // running TUI has mouse-tracking enabled (xterm would otherwise
+      // forward every click to the app and selection wouldn't work).
+      macOptionClickForcesSelection: true,
+    })
+    terminalRef.current = terminal
+
+    // Cmd+A → select all, Cmd+C → copy selection via the browser
+    // clipboard. Return false so xterm doesn't also forward the keys
+    // to the running program.
+    terminal.attachCustomKeyEventHandler((event) => {
+      if (event.type !== 'keydown') return true
+      const isMac = navigator.platform.toUpperCase().includes('MAC')
+      const mod = isMac ? event.metaKey : event.ctrlKey
+      if (!mod) return true
+      const key = event.key.toLowerCase()
+      if (key === 'a') {
+        terminal.selectAll()
+        return false
+      }
+      if (key === 'c') {
+        const sel = terminal.getSelection()
+        if (sel) {
+          void navigator.clipboard.writeText(sel)
+          return false
+        }
+      }
+      return true
    })

    const fitAddon = new FitAddon()
@@ -139,6 +195,12 @@ export const AgentTerminal: FC<AgentTerminalProps> = ({ onBack }) => {
    terminal.loadAddon(new WebLinksAddon())
    terminal.open(containerRef.current)

+    // React 18 StrictMode double-invokes effects in dev. Everything
+    // async inside this effect is scoped to an AbortController; the
+    // cleanup aborts it and any pending awaits bail out, so we never
+    // leak a second live WebSocket or duplicate xterm listeners.
+    const ac = new AbortController()
+    const cleanups: Array<() => void> = []
    let ws: WebSocket | null = null
    let sawExit = false

@@ -159,17 +221,28 @@ export const AgentTerminal: FC<AgentTerminalProps> = ({ onBack }) => {
      sendMessage({ type: 'resize', cols, rows })
    }

-    const connect = async () => {
+    const connect = async (): Promise<void> => {
      const baseUrl = await getAgentServerUrl()
+      if (ac.signal.aborted) return
      const wsUrl = new URL('/terminal/ws', baseUrl)
      wsUrl.protocol = wsUrl.protocol === 'https:' ? 'wss:' : 'ws:'

      ws = new WebSocket(wsUrl)
+      // If the effect was cleaned up between the await above and now,
+      // close the socket we just opened and bail.
+      if (ac.signal.aborted) {
+        ws.close()
+        ws = null
+        return
+      }
+      cleanups.push(() => ws?.close())

      ws.onopen = () => {
        fitAddon.fit()
        terminal.focus()
        sendResize()
+        const cmd = initialCommandRef.current
+        if (cmd) sendMessage({ type: 'input', data: `${cmd}\n` })
      }

      ws.onmessage = (event) => {
@@ -185,6 +258,7 @@ export const AgentTerminal: FC<AgentTerminalProps> = ({ onBack }) => {
          terminal.write(
            `\r\n\x1b[90m[session ended with exit ${message.exitCode}]\x1b[0m\r\n`,
          )
+          onSessionExitRef.current?.()
        }
      }

@@ -200,49 +274,41 @@ export const AgentTerminal: FC<AgentTerminalProps> = ({ onBack }) => {
      const inputDisposable = terminal.onData((data) => {
        sendMessage({ type: 'input', data })
      })
-
      const resizeDisposable = terminal.onResize(({ cols, rows }) => {
        sendResize(cols, rows)
      })
-
-      return () => {
-        inputDisposable.dispose()
-        resizeDisposable.dispose()
-      }
+      cleanups.push(() => inputDisposable.dispose())
+      cleanups.push(() => resizeDisposable.dispose())
    }

-    let disposeSocketBindings: (() => void) | undefined
-    void connect().then((disposeBindings) => {
-      disposeSocketBindings = disposeBindings
-    })
+    void connect()

    const resizeObserver = new ResizeObserver(() => {
      fitAddon.fit()
      sendResize()
    })
    resizeObserver.observe(containerRef.current)
+    cleanups.push(() => resizeObserver.disconnect())

-    const themeObserver = new MutationObserver(() => {
-      applyTheme()
-    })
+    const themeObserver = new MutationObserver(() => applyTheme())
    themeObserver.observe(document.documentElement, {
      attributes: true,
      attributeFilter: ['class'],
    })
+    cleanups.push(() => themeObserver.disconnect())

    return () => {
-      resizeObserver.disconnect()
-      themeObserver.disconnect()
-      disposeSocketBindings?.()
-      ws?.close()
+      ac.abort()
+      for (const dispose of cleanups) dispose()
      terminal.dispose()
+      terminalRef.current = null
    }
  }, [])

  return (
    <div className="flex h-[calc(100dvh-10rem)] min-h-[32rem] w-full flex-col py-2 sm:min-h-[42rem] sm:py-4">
      <div className="flex min-h-0 flex-1 flex-col overflow-hidden rounded-xl border border-border bg-card shadow-sm">
-        <div className="flex items-center gap-3 border-border border-b px-4 py-3 sm:px-6">
+        <div className="flex items-center justify-between gap-3 border-border border-b px-4 py-3 sm:px-6">
          <div className="flex min-w-0 items-center gap-3">
            <Button variant="ghost" size="icon" onClick={onBack}>
              <ArrowLeft className="size-4" />
@@ -256,6 +322,14 @@ export const AgentTerminal: FC<AgentTerminalProps> = ({ onBack }) => {
              </div>
            </div>
          </div>
+          <Button variant="outline" size="sm" onClick={handleCopy}>
+            {copied ? (
+              <Check className="mr-1 size-3.5" />
+            ) : (
+              <Copy className="mr-1 size-3.5" />
+            )}
+            {copied ? 'Copied' : 'Copy'}
+          </Button>
        </div>

        <div className="min-h-0 flex-1 p-4 sm:p-6">
@@ -269,7 +343,7 @@ export const AgentTerminal: FC<AgentTerminalProps> = ({ onBack }) => {
              </div>
            </div>

-            <div className="min-h-0 flex-1 px-4 py-4 sm:px-5 sm:py-5">
+            <div className="min-h-0 flex-1 cursor-text px-4 py-4 sm:px-5 sm:py-5">
              <div ref={containerRef} className="h-full w-full" />
            </div>
          </div>
--- a/packages/browseros-agent/apps/agent/entrypoints/app/agents/AgentsPage.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/agents/AgentsPage.tsx
--- a/packages/browseros-agent/apps/agent/entrypoints/app/agents/openclaw-cli-providers.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/agents/openclaw-cli-providers.tsx
@@ -0,0 +1,185 @@
+import { useQuery } from '@tanstack/react-query'
+import { CheckCircle2, Loader2, Terminal, TriangleAlert } from 'lucide-react'
+import type { FC } from 'react'
+import { Button } from '@/components/ui/button'
+import { useAgentServerUrl } from '@/lib/browseros/useBrowserOSProviders'
+
+export interface OpenClawCliProvider {
+  id: string
+  displayName: string
+  description: string
+  models: readonly string[]
+  authLoginCommand: string
+}
+
+export interface OpenClawCliProviderAuthStatus {
+  installed: boolean
+  loggedIn: boolean
+  accountLabel?: string
+  subscriptionLabel?: string
+  error?: string
+}
+
+export interface OpenClawCliProviderOption {
+  id: string
+  type: string
+  name: string
+  modelId: string
+}
+
+const CLAUDE_CLI_PROVIDER: OpenClawCliProvider = {
+  id: 'claude-cli',
+  displayName: 'Anthropic Claude CLI',
+  description: 'Uses your Claude.ai subscription via the Claude Code CLI',
+  models: ['claude-sonnet-4-6', 'claude-opus-4-6', 'claude-haiku-4-5'],
+  authLoginCommand: 'claude /login',
+}
+
+export const OPENCLAW_CLI_PROVIDERS: readonly OpenClawCliProvider[] = [
+  CLAUDE_CLI_PROVIDER,
+]
+
+export function findOpenClawCliProviderById(
+  id: string,
+): OpenClawCliProvider | undefined {
+  return OPENCLAW_CLI_PROVIDERS.find((provider) => provider.id === id)
+}
+
+export function buildOpenClawCliProviderOptions(): OpenClawCliProviderOption[] {
+  return OPENCLAW_CLI_PROVIDERS.flatMap((provider) =>
+    provider.models.map((modelId) => ({
+      id: `${provider.id}/${modelId}`,
+      type: provider.id,
+      name: provider.displayName,
+      modelId,
+    })),
+  )
+}
+
+async function fetchCliProviderAuthStatus(
+  baseUrl: string,
+  providerId: string,
+): Promise<OpenClawCliProviderAuthStatus> {
+  const res = await fetch(`${baseUrl}/claw/providers/${providerId}/auth-status`)
+  if (!res.ok) {
+    let message = `Auth status request failed (${res.status})`
+    try {
+      const body = (await res.json()) as { error?: string }
+      if (body.error) message = body.error
+    } catch {}
+    throw new Error(message)
+  }
+  return res.json() as Promise<OpenClawCliProviderAuthStatus>
+}
+
+export function useOpenClawCliProviderAuthStatus(
+  providerId: string,
+  enabled: boolean,
+) {
+  const { baseUrl, isLoading: urlLoading } = useAgentServerUrl()
+  return useQuery<OpenClawCliProviderAuthStatus, Error>({
+    queryKey: ['openclaw-cli-auth', baseUrl, providerId],
+    queryFn: () => fetchCliProviderAuthStatus(baseUrl as string, providerId),
+    enabled: !!baseUrl && !urlLoading && enabled,
+    refetchInterval: enabled ? 2000 : false,
+  })
+}
+
+interface OpenClawCliProviderStatusPanelProps {
+  provider: OpenClawCliProvider
+  status: OpenClawCliProviderAuthStatus | undefined
+  loading: boolean
+  fetchError: Error | null
+  onConnect: () => void
+}
+
+export const OpenClawCliProviderStatusPanel: FC<
+  OpenClawCliProviderStatusPanelProps
+> = ({ provider, status, loading, fetchError, onConnect }) => {
+  // Initial fetch (no data yet).
+  if (loading && !status) {
+    return (
+      <div className="flex items-center gap-2 rounded-md border border-border bg-muted/30 px-3 py-2 text-sm">
+        <Loader2 className="size-4 animate-spin text-muted-foreground" />
+        <span className="text-muted-foreground">
+          Checking {provider.displayName} status…
+        </span>
+      </div>
+    )
+  }
+
+  if (fetchError) {
+    return (
+      <div className="flex items-start gap-2 rounded-md border border-destructive/30 bg-destructive/5 px-3 py-2 text-sm">
+        <TriangleAlert className="mt-0.5 size-4 text-destructive" />
+        <div>
+          <div className="font-medium text-destructive">
+            Could not read {provider.displayName} status
+          </div>
+          <div className="text-muted-foreground text-xs">
+            {fetchError.message}
+          </div>
+        </div>
+      </div>
+    )
+  }
+
+  if (!status) return null
+
+  // Install failed or binary missing.
+  if (!status.installed) {
+    return (
+      <div className="flex items-start gap-2 rounded-md border border-amber-500/40 bg-amber-500/5 px-3 py-2 text-sm">
+        <TriangleAlert className="mt-0.5 size-4 text-amber-600" />
+        <div>
+          <div className="font-medium">
+            {provider.displayName} not installed
+          </div>
+          <div className="text-muted-foreground text-xs">
+            The gateway will try to install it on the next restart. If this
+            persists, check your network and the gateway logs.
+          </div>
+        </div>
+      </div>
+    )
+  }
+
+  // Happy path.
+  if (status.loggedIn) {
+    const identityBits = [
+      status.accountLabel,
+      status.subscriptionLabel ? `(${status.subscriptionLabel})` : null,
+    ].filter(Boolean)
+    const identity = identityBits.length > 0 ? identityBits.join(' ') : 'Ready'
+    return (
+      <div className="flex items-center gap-2 rounded-md border border-emerald-500/40 bg-emerald-500/5 px-3 py-2 text-sm">
+        <CheckCircle2 className="size-4 text-emerald-600" />
+        <div className="min-w-0 flex-1">
+          <div className="font-medium">Connected to {provider.displayName}</div>
+          <div className="truncate text-muted-foreground text-xs">
+            {identity}
+          </div>
+        </div>
+      </div>
+    )
+  }
+
+  // Installed but not logged in.
+  return (
+    <div className="flex flex-col gap-2 rounded-md border border-border bg-muted/30 px-3 py-3 text-sm">
+      <div>
+        <div className="font-medium">{provider.displayName} not set up</div>
+        <div className="text-muted-foreground text-xs">
+          {provider.description}
+        </div>
+        {status.error && (
+          <div className="mt-1 text-destructive text-xs">{status.error}</div>
+        )}
+      </div>
+      <Button size="sm" variant="outline" onClick={onConnect} className="w-fit">
+        <Terminal className="mr-1 size-4" />
+        Connect {provider.displayName}
+      </Button>
+    </div>
+  )
+}
--- a/packages/browseros-agent/apps/agent/entrypoints/app/agents/openclaw-supported-providers.ts
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/agents/openclaw-supported-providers.ts
@@ -0,0 +1,24 @@
+import type { LlmProviderConfig, ProviderType } from '@/lib/llm-providers/types'
+
+const OPENCLAW_SUPPORTED_PROVIDER_TYPES: ProviderType[] = [
+  'openrouter',
+  'openai',
+  'openai-compatible',
+  'anthropic',
+  'moonshot',
+]
+
+export function isOpenClawSupportedProviderType(
+  providerType: ProviderType,
+): boolean {
+  return OPENCLAW_SUPPORTED_PROVIDER_TYPES.includes(providerType)
+}
+
+export function getOpenClawSupportedProviders(
+  providers: LlmProviderConfig[],
+): LlmProviderConfig[] {
+  return providers.filter(
+    (provider) =>
+      !!provider.apiKey && isOpenClawSupportedProviderType(provider.type),
+  )
+}
--- a/packages/browseros-agent/apps/agent/entrypoints/app/agents/useOpenClaw.ts
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/agents/useOpenClaw.ts
@@ -1,7 +1,3 @@
-import type {
-  BrowserOSAgentRoleId,
-  BrowserOSCustomRoleInput,
-} from '@browseros/shared/types/role-aware-agents'
 import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query'
 import { getAgentServerUrl } from '@/lib/browseros/helpers'
 import { useAgentServerUrl } from '@/lib/browseros/useBrowserOSProviders'
@@ -11,27 +7,6 @@ export interface AgentEntry {
  name: string
  workspace: string
  model?: unknown
-  role?: {
-    roleSource: 'builtin' | 'custom'
-    roleId?: BrowserOSAgentRoleId
-    roleName: string
-    shortDescription: string
-  }
-}
-
-export interface RoleTemplateSummary {
-  id: BrowserOSAgentRoleId
-  name: string
-  shortDescription: string
-  longDescription: string
-  recommendedApps: string[]
-  defaultAgentName: string
-  boundaries: Array<{
-    key: string
-    label: string
-    description: string
-    defaultMode: 'allow' | 'ask' | 'block'
-  }>
 }

 export interface OpenClawStatus {
@@ -61,8 +36,6 @@ export interface OpenClawStatus {

 export interface OpenClawAgentMutationInput {
  name: string
-  roleId?: BrowserOSAgentRoleId
-  customRole?: BrowserOSCustomRoleInput
  providerType?: string
  providerName?: string
  baseUrl?: string
@@ -86,9 +59,15 @@ export function getModelDisplayName(model: unknown): string | undefined {
 export const OPENCLAW_QUERY_KEYS = {
  status: 'openclaw-status',
  agents: 'openclaw-agents',
-  roles: 'openclaw-roles',
 } as const

+export type GatewayLifecycleAction =
+  | 'setup'
+  | 'start'
+  | 'stop'
+  | 'restart'
+  | 'reconnect'
+
 async function clawFetch<T>(
  baseUrl: string,
  path: string,
@@ -117,16 +96,6 @@ async function fetchOpenClawAgents(baseUrl: string): Promise<AgentEntry[]> {
  return data.agents ?? []
 }

-async function fetchOpenClawRoles(
-  baseUrl: string,
-): Promise<RoleTemplateSummary[]> {
-  const data = await clawFetch<{ roles: RoleTemplateSummary[] }>(
-    baseUrl,
-    '/roles',
-  )
-  return data.roles ?? []
-}
-
 async function invalidateOpenClawQueries(
  queryClient: ReturnType<typeof useQueryClient>,
 ): Promise<void> {
@@ -179,28 +148,6 @@ export function useOpenClawAgents(enabled = true) {
  }
 }

-export function useOpenClawRoles() {
-  const {
-    baseUrl,
-    isLoading: urlLoading,
-    error: urlError,
-  } = useAgentServerUrl()
-
-  const query = useQuery<RoleTemplateSummary[], Error>({
-    queryKey: [OPENCLAW_QUERY_KEYS.roles, baseUrl],
-    queryFn: () => fetchOpenClawRoles(baseUrl as string),
-    enabled: !!baseUrl && !urlLoading,
-    staleTime: 60_000,
-  })
-
-  return {
-    roles: query.data ?? [],
-    loading: query.isLoading || urlLoading,
-    error: query.error ?? urlError,
-    refetch: query.refetch,
-  }
-}
-
 export function useOpenClawMutations() {
  const { baseUrl, isLoading: urlLoading } = useAgentServerUrl()
  const queryClient = useQueryClient()
@@ -278,6 +225,13 @@ export function useOpenClawMutations() {
    onSuccess,
  })

+  let pendingGatewayAction: GatewayLifecycleAction | null = null
+  if (setupMutation.isPending) pendingGatewayAction = 'setup'
+  else if (restartMutation.isPending) pendingGatewayAction = 'restart'
+  else if (stopMutation.isPending) pendingGatewayAction = 'stop'
+  else if (startMutation.isPending) pendingGatewayAction = 'start'
+  else if (reconnectMutation.isPending) pendingGatewayAction = 'reconnect'
+
  return {
    setupOpenClaw: setupMutation.mutateAsync,
    createAgent: createMutation.mutateAsync,
@@ -298,6 +252,7 @@ export function useOpenClawMutations() {
    creating: createMutation.isPending,
    deleting: deleteMutation.isPending,
    reconnecting: reconnectMutation.isPending,
+    pendingGatewayAction,
  }
 }

@@ -314,17 +269,66 @@ export interface OpenClawStreamEvent {
  data: Record<string, unknown>
 }

+export interface OpenClawChatHistoryMessage {
+  role: 'user' | 'assistant'
+  content: string
+}
+
+interface ChatHistoryTurnLike {
+  userText: string
+  parts: Array<{ kind: string; text?: string }>
+}
+
+export function buildChatHistoryFromTurns(
+  turns: ChatHistoryTurnLike[],
+): OpenClawChatHistoryMessage[] {
+  const messages: OpenClawChatHistoryMessage[] = []
+
+  for (const turn of turns) {
+    const userText = turn.userText.trim()
+    if (userText) {
+      messages.push({ role: 'user', content: userText })
+    }
+
+    const assistantText = turn.parts
+      .filter(
+        (
+          part,
+        ): part is {
+          kind: 'text'
+          text: string
+        } => part.kind === 'text' && typeof part.text === 'string',
+      )
+      .map((part) => part.text.trim())
+      .filter(Boolean)
+      .join('\n\n')
+
+    if (assistantText) {
+      messages.push({ role: 'assistant', content: assistantText })
+    }
+  }
+
+  return messages
+}
+
 export async function chatWithAgent(
  agentId: string,
  message: string,
  sessionKey?: string,
+  history: OpenClawChatHistoryMessage[] = [],
  signal?: AbortSignal,
+  attachments?: ReadonlyArray<unknown>,
 ): Promise<Response> {
  const baseUrl = await getAgentServerUrl()
  return fetch(`${baseUrl}/claw/agents/${agentId}/chat`, {
    method: 'POST',
    headers: { 'Content-Type': 'application/json' },
-    body: JSON.stringify({ message, sessionKey }),
+    body: JSON.stringify({
+      message,
+      sessionKey,
+      history,
+      ...(attachments && attachments.length > 0 ? { attachments } : {}),
+    }),
    signal,
  })
 }
--- a/packages/browseros-agent/apps/agent/entrypoints/newtab/layout/route-utils.test.ts
+++ b/packages/browseros-agent/apps/agent/entrypoints/newtab/layout/route-utils.test.ts
@@ -18,8 +18,8 @@ describe('route-utils', () => {
    expect(shouldUseChatSession('/home/chat')).toBe(true)
  })

-  it('keeps the focus grid on home while hiding it on dedicated full-screen routes', () => {
-    expect(shouldHideFocusGrid('/home')).toBe(false)
+  it('hides the focus grid on full-screen routes', () => {
+    expect(shouldHideFocusGrid('/home')).toBe(true)
    expect(shouldHideFocusGrid('/home/agents/main')).toBe(true)
    expect(shouldHideFocusGrid('/home/chat')).toBe(true)
    expect(shouldHideFocusGrid('/home/skills')).toBe(true)
--- a/packages/browseros-agent/apps/agent/entrypoints/newtab/layout/route-utils.ts
+++ b/packages/browseros-agent/apps/agent/entrypoints/newtab/layout/route-utils.ts
@@ -1,4 +1,5 @@
 const HIDE_FOCUS_GRID_PATHS = new Set([
+  '/home',
  '/home/soul',
  '/home/memory',
  '/home/skills',
--- a/packages/browseros-agent/apps/agent/lib/agent-conversations/types.ts
+++ b/packages/browseros-agent/apps/agent/lib/agent-conversations/types.ts
@@ -12,6 +12,8 @@ export interface AssistantThinkingPart {
 export interface ToolEntry {
  id: string
  name: string
+  label: string
+  subject?: string
  status: 'running' | 'completed' | 'error'
  durationMs?: number
 }
@@ -26,9 +28,24 @@ export type AssistantPart =
  | AssistantThinkingPart
  | AssistantToolBatchPart

+/**
+ * Attachments rendered alongside the user's text on the optimistic turn
+ * — populated when the composer staged any images/files. The dataUrl is
+ * the same one the server received; we keep it in memory only for the
+ * lifetime of the live turn (history reload re-fetches via the JSONL).
+ */
+export interface UserAttachmentPreview {
+  id: string
+  kind: 'image' | 'file'
+  mediaType: string
+  name: string
+  dataUrl?: string
+}
+
 export interface AgentConversationTurn {
  id: string
  userText: string
+  userAttachments?: UserAttachmentPreview[]
  parts: AssistantPart[]
  done: boolean
  timestamp: number
@@ -50,4 +67,7 @@ export interface AgentCardData {
  status: 'idle' | 'working' | 'error'
  lastMessage?: string
  lastMessageTimestamp?: number
+  activitySummary?: string
+  currentTool?: string
+  costUsd?: number
 }
--- a/packages/browseros-agent/apps/agent/lib/attachments.ts
+++ b/packages/browseros-agent/apps/agent/lib/attachments.ts
@@ -0,0 +1,369 @@
+/**
+ * Composer attachment helpers — validation, image compression, and the
+ * client-side payload shape sent to /agents/:id/chat.
+ *
+ * Image attachments travel as `data:` URLs (base64) so the gateway, which
+ * runs on 127.0.0.1 over Lima virtiofs, can ingest them as standard
+ * OpenAI-style content blocks. Non-image text-shaped files are read into
+ * memory and travel as their extracted text body — the server inlines
+ * them as a fenced `<attachment>` block on the user message.
+ */
+
+export const MAX_ATTACHMENTS_PER_MESSAGE = 10
+export const MAX_IMAGE_BYTES = 5 * 1024 * 1024 // 5 MB after compression
+export const MAX_FILE_TEXT_BYTES = 1 * 1024 * 1024 // 1 MB extracted text
+export const IMAGE_LONG_EDGE_CAP = 2048
+
+export const ALLOWED_IMAGE_MEDIA_TYPES = [
+  'image/png',
+  'image/jpeg',
+  'image/jpg',
+  'image/webp',
+  'image/gif',
+] as const
+
+export const ALLOWED_FILE_MEDIA_TYPE_PREFIXES = [
+  'text/',
+  'application/json',
+] as const
+
+export type ServerImageAttachment = {
+  kind: 'image'
+  mediaType: string
+  dataUrl: string
+  name?: string
+}
+
+export type ServerFileAttachment = {
+  kind: 'file'
+  mediaType: string
+  name: string
+  text: string
+}
+
+export type ServerAttachmentPayload =
+  | ServerImageAttachment
+  | ServerFileAttachment
+
+/** UI-side representation: what the composer needs to render a chip. */
+export interface StagedAttachment {
+  id: string
+  kind: 'image' | 'file'
+  mediaType: string
+  name: string
+  // Set for images so the chip thumbnail can render directly. For files
+  // we don't need a preview yet, but the field exists for v2 PDF previews.
+  dataUrl?: string
+  // Pre-computed payload for the server. Built once at staging time so
+  // re-renders don't re-encode large blobs.
+  payload: ServerAttachmentPayload
+}
+
+export type AttachmentValidationError =
+  | { code: 'too_many'; message: string }
+  | { code: 'unsupported_type'; message: string; mediaType: string }
+  | { code: 'too_large'; message: string }
+  | { code: 'read_failed'; message: string }
+
+export type StageAttachmentResult =
+  | { ok: true; attachment: StagedAttachment }
+  | { ok: false; error: AttachmentValidationError }
+
+function isImageMediaType(mediaType: string): boolean {
+  return (ALLOWED_IMAGE_MEDIA_TYPES as readonly string[]).includes(mediaType)
+}
+
+function isAllowedFileMediaType(mediaType: string): boolean {
+  return ALLOWED_FILE_MEDIA_TYPE_PREFIXES.some((prefix) =>
+    mediaType.startsWith(prefix),
+  )
+}
+
+/** Build a unique id without depending on `crypto.randomUUID` outside DOM. */
+function makeId(): string {
+  if (typeof crypto !== 'undefined' && crypto.randomUUID) {
+    return crypto.randomUUID()
+  }
+  return `att-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 10)}`
+}
+
+/**
+ * Read a `File` and produce the staged-attachment shape — validate type,
+ * compress if it's a large image, and pre-build the server payload.
+ */
+export async function stageAttachment(
+  file: File,
+): Promise<StageAttachmentResult> {
+  const mediaType = file.type || 'application/octet-stream'
+
+  if (isImageMediaType(mediaType)) {
+    try {
+      const compressed = await compressImageIfNeeded(file)
+      const dataUrl = await readAsDataUrl(compressed)
+      // Rough byte ceiling — `data:image/png;base64,...` doubles size with
+      // base64. Reject early so we never POST something the route will 400.
+      if (dataUrl.length > MAX_IMAGE_BYTES * 2) {
+        return {
+          ok: false,
+          error: {
+            code: 'too_large',
+            message: `Image "${file.name}" is too large (max ${humanBytes(
+              MAX_IMAGE_BYTES,
+            )}).`,
+          },
+        }
+      }
+      return {
+        ok: true,
+        attachment: {
+          id: makeId(),
+          kind: 'image',
+          mediaType,
+          name: file.name || 'image',
+          dataUrl,
+          payload: {
+            kind: 'image',
+            mediaType,
+            dataUrl,
+            name: file.name || undefined,
+          },
+        },
+      }
+    } catch (err) {
+      return {
+        ok: false,
+        error: {
+          code: 'read_failed',
+          message:
+            err instanceof Error
+              ? err.message
+              : `Failed to read image "${file.name}".`,
+        },
+      }
+    }
+  }
+
+  if (isAllowedFileMediaType(mediaType)) {
+    let text: string
+    try {
+      text = await file.text()
+    } catch (err) {
+      return {
+        ok: false,
+        error: {
+          code: 'read_failed',
+          message:
+            err instanceof Error
+              ? err.message
+              : `Failed to read file "${file.name}".`,
+        },
+      }
+    }
+    if (text.length > MAX_FILE_TEXT_BYTES) {
+      return {
+        ok: false,
+        error: {
+          code: 'too_large',
+          message: `File "${file.name}" is too large (max ${humanBytes(
+            MAX_FILE_TEXT_BYTES,
+          )}).`,
+        },
+      }
+    }
+    return {
+      ok: true,
+      attachment: {
+        id: makeId(),
+        kind: 'file',
+        mediaType,
+        name: file.name || 'attachment',
+        payload: {
+          kind: 'file',
+          mediaType,
+          name: file.name || 'attachment',
+          text,
+        },
+      },
+    }
+  }
+
+  return {
+    ok: false,
+    error: {
+      code: 'unsupported_type',
+      message: `Unsupported attachment type: ${mediaType || 'unknown'}`,
+      mediaType,
+    },
+  }
+}
+
+/**
+ * Stage multiple files at once, enforcing the per-message cap. The result
+ * partitions successful stages and any errors so the caller can show
+ * granular toasts.
+ */
+export async function stageAttachments(
+  files: File[],
+  alreadyStaged: number,
+): Promise<{
+  staged: StagedAttachment[]
+  errors: AttachmentValidationError[]
+}> {
+  const remainingSlots = Math.max(
+    0,
+    MAX_ATTACHMENTS_PER_MESSAGE - alreadyStaged,
+  )
+  const staged: StagedAttachment[] = []
+  const errors: AttachmentValidationError[] = []
+
+  if (remainingSlots === 0 && files.length > 0) {
+    errors.push({
+      code: 'too_many',
+      message: `At most ${MAX_ATTACHMENTS_PER_MESSAGE} attachments per message.`,
+    })
+    return { staged, errors }
+  }
+
+  const overflow = files.length - remainingSlots
+  if (overflow > 0) {
+    errors.push({
+      code: 'too_many',
+      message: `Only the first ${remainingSlots} of ${files.length} files were attached (max ${MAX_ATTACHMENTS_PER_MESSAGE}).`,
+    })
+  }
+
+  for (const file of files.slice(0, remainingSlots)) {
+    const result = await stageAttachment(file)
+    if (result.ok) {
+      staged.push(result.attachment)
+    } else {
+      errors.push(result.error)
+    }
+  }
+
+  return { staged, errors }
+}
+
+/**
+ * Resize images that are oversized to a sane long-edge cap. JPEG/WebP
+ * source files are re-encoded to JPEG; PNGs/GIFs that are already small
+ * are passed through untouched.
+ */
+export async function compressImageIfNeeded(file: File): Promise<Blob> {
+  // Cheap path: small files don't need any transform.
+  if (file.size <= 1.5 * 1024 * 1024) return file
+
+  const bitmap = await blobToImageBitmap(file)
+  const { width, height } = bitmap
+  const longEdge = Math.max(width, height)
+  if (longEdge <= IMAGE_LONG_EDGE_CAP && file.size <= MAX_IMAGE_BYTES) {
+    bitmap.close?.()
+    return file
+  }
+
+  const scale = Math.min(1, IMAGE_LONG_EDGE_CAP / longEdge)
+  const targetWidth = Math.max(1, Math.round(width * scale))
+  const targetHeight = Math.max(1, Math.round(height * scale))
+
+  const canvas =
+    typeof OffscreenCanvas !== 'undefined'
+      ? new OffscreenCanvas(targetWidth, targetHeight)
+      : Object.assign(document.createElement('canvas'), {
+          width: targetWidth,
+          height: targetHeight,
+        })
+
+  const ctx = canvas.getContext('2d') as
+    | CanvasRenderingContext2D
+    | OffscreenCanvasRenderingContext2D
+    | null
+  if (!ctx) {
+    bitmap.close?.()
+    return file
+  }
+  ctx.drawImage(bitmap, 0, 0, targetWidth, targetHeight)
+  bitmap.close?.()
+
+  const outputType = 'image/jpeg'
+  if (canvas instanceof HTMLCanvasElement) {
+    return await new Promise<Blob>((resolve, reject) => {
+      canvas.toBlob(
+        (blob) => {
+          if (blob) resolve(blob)
+          else reject(new Error('Image compression failed.'))
+        },
+        outputType,
+        0.85,
+      )
+    })
+  }
+  return await (canvas as OffscreenCanvas).convertToBlob({
+    type: outputType,
+    quality: 0.85,
+  })
+}
+
+async function blobToImageBitmap(blob: Blob): Promise<ImageBitmap> {
+  if (typeof createImageBitmap === 'function') {
+    return createImageBitmap(blob)
+  }
+  // Fallback: load via an Image element and use the canvas decode path.
+  const url = URL.createObjectURL(blob)
+  try {
+    const img = await new Promise<HTMLImageElement>((resolve, reject) => {
+      const el = new Image()
+      el.onload = () => resolve(el)
+      el.onerror = () =>
+        reject(new Error('Failed to decode image for compression.'))
+      el.src = url
+    })
+    const canvas = document.createElement('canvas')
+    canvas.width = img.naturalWidth
+    canvas.height = img.naturalHeight
+    const ctx = canvas.getContext('2d')
+    if (!ctx) throw new Error('Canvas 2D context unavailable.')
+    ctx.drawImage(img, 0, 0)
+    const blobOut = await new Promise<Blob | null>((resolve) =>
+      canvas.toBlob(resolve, 'image/png'),
+    )
+    if (!blobOut) throw new Error('Canvas toBlob returned null.')
+    return await createImageBitmap(blobOut)
+  } finally {
+    URL.revokeObjectURL(url)
+  }
+}
+
+async function readAsDataUrl(blob: Blob): Promise<string> {
+  if ('arrayBuffer' in blob && typeof FileReader === 'undefined') {
+    const buffer = await blob.arrayBuffer()
+    const base64 = arrayBufferToBase64(buffer)
+    const type = blob.type || 'application/octet-stream'
+    return `data:${type};base64,${base64}`
+  }
+  return await new Promise<string>((resolve, reject) => {
+    const reader = new FileReader()
+    reader.onload = () => resolve(reader.result as string)
+    reader.onerror = () =>
+      reject(reader.error ?? new Error('FileReader failed to read blob.'))
+    reader.readAsDataURL(blob)
+  })
+}
+
+function arrayBufferToBase64(buffer: ArrayBuffer): string {
+  const bytes = new Uint8Array(buffer)
+  let binary = ''
+  const chunkSize = 0x8000
+  for (let i = 0; i < bytes.byteLength; i += chunkSize) {
+    binary += String.fromCharCode.apply(
+      null,
+      Array.from(bytes.subarray(i, Math.min(i + chunkSize, bytes.byteLength))),
+    )
+  }
+  return btoa(binary)
+}
+
+function humanBytes(bytes: number): string {
+  if (bytes >= 1024 * 1024) return `${(bytes / 1024 / 1024).toFixed(0)} MB`
+  if (bytes >= 1024) return `${(bytes / 1024).toFixed(0)} KB`
+  return `${bytes} B`
+}
--- a/packages/browseros-agent/apps/agent/lib/tool-labels.ts
+++ b/packages/browseros-agent/apps/agent/lib/tool-labels.ts
@@ -0,0 +1,325 @@
+/**
+ * @license
+ * Copyright 2025 BrowserOS
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ *
+ * Maps raw tool names + arguments to human-readable activity labels for
+ * the chat UI activity view. The MCP ToolRegistry is the source of truth
+ * for tool *existence*; this file is the editorial layer that turns
+ * snake_case identifiers into agent-speak verbs.
+ */
+
+const VERB_OVERRIDES: Record<string, string> = {
+  // Navigation
+  navigate_page: 'Navigated to',
+  new_page: 'Opened tab',
+  new_hidden_page: 'Opened tab',
+  show_page: 'Showed tab',
+  close_page: 'Closed tab',
+  list_pages: 'Listed open tabs',
+  get_active_page: 'Got active tab',
+  move_page: 'Moved tab',
+  group_tabs: 'Grouped tabs',
+
+  // Page reading
+  take_snapshot: 'Captured page snapshot',
+  take_enhanced_snapshot: 'Captured detailed snapshot',
+  get_page_content: 'Read page content',
+  get_page_links: 'Extracted page links',
+  get_dom: 'Read page DOM',
+  search_dom: 'Searched page DOM',
+  take_screenshot: 'Took screenshot',
+
+  // Input
+  click: 'Clicked',
+  click_at: 'Clicked at coordinates',
+  hover: 'Hovered',
+  hover_at: 'Hovered at coordinates',
+  type_at: 'Typed at coordinates',
+  drag_at: 'Dragged',
+  focus: 'Focused element',
+  fill: 'Filled field',
+  clear: 'Cleared field',
+  check: 'Checked box',
+  uncheck: 'Unchecked box',
+  press_key: 'Pressed key',
+  upload_file: 'Uploaded file',
+
+  // Console / scripts
+  evaluate_script: 'Ran script',
+  get_console_logs: 'Read console logs',
+
+  // History / bookmarks
+  search_history: 'Searched history',
+  get_recent_history: 'Read recent history',
+  delete_history_url: 'Deleted history entry',
+  delete_history_range: 'Deleted history range',
+  get_bookmarks: 'Listed bookmarks',
+  create_bookmark: 'Created bookmark',
+  remove_bookmark: 'Removed bookmark',
+  update_bookmark: 'Updated bookmark',
+  move_bookmark: 'Moved bookmark',
+  search_bookmarks: 'Searched bookmarks',
+
+  // Filesystem (sandboxed)
+  read_file: 'Read file',
+  write_file: 'Wrote file',
+  find_files: 'Searched files',
+
+  // Memory
+  read_soul: 'Read soul memory',
+  read_core: 'Read core memory',
+  write_memory: 'Wrote memory',
+  search_memory: 'Searched memory',
+  update_soul: 'Updated soul memory',
+  update_core: 'Updated core memory',
+
+  // Web
+  web_search: 'Searched the web',
+  web_fetch: 'Fetched URL',
+
+  // Klavis / external apps (Strata)
+  connector_mcp_servers: 'Listed connected apps',
+  discover_server_categories_or_actions: 'Browsed available actions',
+  get_category_actions: 'Listed actions',
+  get_action_details: 'Looked up action',
+  execute_action: 'Ran external action',
+  search_documentation: 'Searched docs',
+  handle_auth_failure: 'Handled auth issue',
+
+  // Suggestions
+  suggest_schedule: 'Suggested schedule',
+  suggest_app_connection: 'Suggested app connect',
+
+  // BrowserOS info
+  browseros_info: 'Read BrowserOS info',
+
+  // Windows
+  list_windows: 'Listed windows',
+  focus_window: 'Focused window',
+  close_window: 'Closed window',
+  create_window: 'Created window',
+}
+
+// ──────────────────────────────────────────────────────────────────────
+// Helpers
+// ──────────────────────────────────────────────────────────────────────
+
+function asString(value: unknown): string | undefined {
+  return typeof value === 'string' && value.length > 0 ? value : undefined
+}
+
+function stringField(
+  input: Record<string, unknown>,
+  ...keys: string[]
+): string | undefined {
+  for (const k of keys) {
+    const v = asString(input[k])
+    if (v) return v
+  }
+  return undefined
+}
+
+function truncate(text: string | undefined, max: number): string | undefined {
+  if (!text) return undefined
+  return text.length > max ? `${text.slice(0, max - 1)}…` : text
+}
+
+function quote(value: string | undefined): string | undefined {
+  if (!value) return undefined
+  return `"${truncate(value, 60)}"`
+}
+
+function basename(path: string | undefined): string | undefined {
+  if (!path) return undefined
+  const parts = path.split(/[/\\]/).filter(Boolean)
+  return parts[parts.length - 1] ?? path
+}
+
+function formatUrl(value: unknown): string | undefined {
+  const url = asString(value)
+  if (!url) return undefined
+  try {
+    const parsed = new URL(url)
+    const host = parsed.host
+    const path = parsed.pathname === '/' ? '' : parsed.pathname
+    const display = path && path.length > 0 ? `${host}${path}` : host
+    return truncate(display, 60)
+  } catch {
+    return truncate(url, 60)
+  }
+}
+
+function coords(x: unknown, y: unknown): string | undefined {
+  if (typeof x === 'number' && typeof y === 'number') {
+    return `${Math.round(x)}, ${Math.round(y)}`
+  }
+  return undefined
+}
+
+// ──────────────────────────────────────────────────────────────────────
+// Subject extractors
+// ──────────────────────────────────────────────────────────────────────
+
+type SubjectExtractor = (input: Record<string, unknown>) => string | undefined
+
+const SUBJECT_EXTRACTORS: Record<string, SubjectExtractor> = {
+  // URL-bearing tools
+  new_page: (i) => formatUrl(i.url),
+  new_hidden_page: (i) => formatUrl(i.url),
+  navigate_page: (i) => {
+    const action = asString(i.action)
+    if (action === 'back') return 'back'
+    if (action === 'forward') return 'forward'
+    if (action === 'reload') return 'reload'
+    return formatUrl(i.url)
+  },
+  web_fetch: (i) => formatUrl(i.url),
+
+  // Search queries
+  web_search: (i) => quote(stringField(i, 'query', 'q')),
+  search_history: (i) => quote(stringField(i, 'query', 'text')),
+  search_bookmarks: (i) => quote(stringField(i, 'query', 'text')),
+  search_memory: (i) => quote(stringField(i, 'query', 'q')),
+  search_dom: (i) => quote(stringField(i, 'query', 'selector')),
+  search_documentation: (i) => quote(stringField(i, 'query', 'q')),
+  find_files: (i) => quote(stringField(i, 'pattern', 'query')),
+
+  // Element interactions
+  click: (i) => stringField(i, 'element'),
+  hover: (i) => stringField(i, 'element'),
+  focus: (i) => stringField(i, 'element'),
+  clear: (i) => stringField(i, 'element'),
+  check: (i) => stringField(i, 'element'),
+  uncheck: (i) => stringField(i, 'element'),
+  fill: (i) => {
+    const target = stringField(i, 'element')
+    const text = stringField(i, 'text')
+    if (target && text) return `${target}: ${truncate(text, 40)}`
+    return target ?? truncate(text, 40)
+  },
+  press_key: (i) => stringField(i, 'key'),
+
+  // Coordinate-based input
+  click_at: (i) => coords(i.x, i.y),
+  hover_at: (i) => coords(i.x, i.y),
+  type_at: (i) => {
+    const at = coords(i.x, i.y)
+    const text = stringField(i, 'text')
+    if (at && text) return `${at}: ${truncate(text, 40)}`
+    return at ?? truncate(text, 40)
+  },
+  drag_at: (i) => {
+    const from = coords(i.fromX, i.fromY)
+    const to = coords(i.toX, i.toY)
+    if (from && to) return `${from} → ${to}`
+    return from ?? to
+  },
+
+  // Tab management
+  show_page: (i) => {
+    const page = i.page
+    return typeof page === 'number' ? `tab ${page}` : asString(page)
+  },
+  close_page: (i) => {
+    const page = i.page
+    return typeof page === 'number' ? `tab ${page}` : asString(page)
+  },
+  move_page: (i) => {
+    const page = i.page
+    return typeof page === 'number' ? `tab ${page}` : asString(page)
+  },
+
+  // Page reads (take_snapshot, take_enhanced_snapshot, get_page_content,
+  // get_page_links, get_dom, take_screenshot) intentionally omit a
+  // subject — the only argument is a numeric page ID that's internal
+  // to the agent and meaningless to the user ("tab 4" tells them nothing).
+  // The verb alone communicates what happened.
+
+  // External actions via Strata
+  execute_action: (i) => {
+    const server = stringField(i, 'server_name')
+    const action = stringField(i, 'action_name')
+    if (server && action) return `${server} · ${action}`
+    return action ?? server
+  },
+  get_category_actions: (i) => stringField(i, 'category_name', 'server_name'),
+  get_action_details: (i) => stringField(i, 'action_name'),
+  discover_server_categories_or_actions: (i) =>
+    stringField(i, 'server_name', 'category_name'),
+  connector_mcp_servers: (i) => stringField(i, 'server_name'),
+
+  // Filesystem
+  read_file: (i) => basename(stringField(i, 'path')),
+  write_file: (i) => basename(stringField(i, 'path')),
+
+  // Memory writes — show first chars of content
+  write_memory: (i) => truncate(stringField(i, 'content', 'text'), 40),
+  update_soul: (i) => truncate(stringField(i, 'content'), 40),
+  update_core: (i) => truncate(stringField(i, 'content'), 40),
+
+  // Bookmarks
+  create_bookmark: (i) => stringField(i, 'title') ?? formatUrl(i.url),
+  remove_bookmark: (i) => stringField(i, 'id', 'title'),
+  update_bookmark: (i) => stringField(i, 'id', 'title'),
+  move_bookmark: (i) => stringField(i, 'id', 'title'),
+
+  // History
+  delete_history_url: (i) => formatUrl(i.url),
+}
+
+// ──────────────────────────────────────────────────────────────────────
+// Public API
+// ──────────────────────────────────────────────────────────────────────
+
+export interface ToolLabelResult {
+  label: string
+  subject?: string
+}
+
+/**
+ * Strip MCP namespace prefixes (e.g. "browseros__", "mcp_") to find the
+ * canonical tool name used in the override maps.
+ */
+function canonicalName(rawName: string): string {
+  return rawName.replace(/^browseros__/, '').replace(/^mcp_/, '')
+}
+
+/**
+ * Convert a snake_case tool name into Sentence-case English as a fallback
+ * when no curated override exists.
+ */
+function humanizeToolName(rawName: string): string {
+  const stripped = canonicalName(rawName)
+  const words = stripped.split(/[_-]/).filter((w) => w.length > 0)
+  if (words.length === 0) return rawName
+  const first = words[0]!
+  return [
+    first.charAt(0).toUpperCase() + first.slice(1),
+    ...words.slice(1),
+  ].join(' ')
+}
+
+/**
+ * Build a human-readable label and subject string for a tool call,
+ * suitable for rendering in the chat activity view.
+ */
+export function buildToolLabel(
+  rawName: string,
+  input?: Record<string, unknown>,
+): ToolLabelResult {
+  const canonical = canonicalName(rawName)
+  const label =
+    VERB_OVERRIDES[canonical] ??
+    VERB_OVERRIDES[rawName] ??
+    humanizeToolName(rawName)
+
+  const extractor = Object.hasOwn(SUBJECT_EXTRACTORS, canonical)
+    ? SUBJECT_EXTRACTORS[canonical]
+    : Object.hasOwn(SUBJECT_EXTRACTORS, rawName)
+      ? SUBJECT_EXTRACTORS[rawName]
+      : undefined
+  const subject = extractor && input ? extractor(input) : undefined
+
+  return { label, subject }
+}
--- a/packages/browseros-agent/apps/agent/wxt.config.ts
+++ b/packages/browseros-agent/apps/agent/wxt.config.ts
@@ -7,8 +7,9 @@ import { PRODUCT_WEB_HOST } from './lib/constants/productWebHost'
 // biome-ignore lint/style/noProcessEnv: build config file needs env access
 const env = process.env

-// biome-ignore lint/style/noNonNullAssertion: required env var
-const apiUrl = new URL(env.VITE_PUBLIC_BROWSEROS_API!)
+const apiUrl = new URL(
+  env.VITE_PUBLIC_BROWSEROS_API?.trim() || 'https://api.browseros.com',
+)
 const apiPattern = apiUrl.port
  ? `${apiUrl.hostname}:${apiUrl.port}`
  : apiUrl.hostname
--- a/packages/browseros-agent/apps/eval/configs/agisdk-real-smoke.json
+++ b/packages/browseros-agent/apps/eval/configs/agisdk-real-smoke.json
@@ -0,0 +1,26 @@
+{
+  "agent": {
+    "type": "single",
+    "provider": "openai-compatible",
+    "model": "moonshotai/kimi-k2.5",
+    "apiKey": "OPENROUTER_API_KEY",
+    "baseUrl": "https://openrouter.ai/api/v1",
+    "supportsImages": true
+  },
+  "dataset": "../data/agisdk-real.jsonl",
+  "num_workers": 10,
+  "restart_server_per_task": true,
+  "browseros": {
+    "server_url": "http://127.0.0.1:9110",
+    "base_cdp_port": 9010,
+    "base_server_port": 9110,
+    "base_extension_port": 9310,
+    "load_extensions": false,
+    "headless": false
+  },
+  "captcha": {
+    "api_key_env": "NOPECHA_API_KEY"
+  },
+  "graders": ["agisdk_state_diff"],
+  "timeout_ms": 1800000
+}
--- a/packages/browseros-agent/apps/eval/configs/browseros-agent-weekly.json
+++ b/packages/browseros-agent/apps/eval/configs/browseros-agent-weekly.json
@@ -2,9 +2,9 @@
  "agent": {
    "type": "single",
    "provider": "openai-compatible",
-    "model": "accounts/fireworks/models/kimi-k2p5",
-    "apiKey": "FIREWORKS_API_KEY",
-    "baseUrl": "https://api.fireworks.ai/inference/v1",
+    "model": "moonshotai/kimi-k2.5",
+    "apiKey": "OPENROUTER_API_KEY",
+    "baseUrl": "https://openrouter.ai/api/v1",
    "supportsImages": true
  },
  "dataset": "../data/webbench-2of4-50.jsonl",
--- a/packages/browseros-agent/apps/eval/configs/infinity-hard-50.json
+++ b/packages/browseros-agent/apps/eval/configs/infinity-hard-50.json
@@ -0,0 +1,26 @@
+{
+  "agent": {
+    "type": "single",
+    "provider": "openai-compatible",
+    "model": "moonshotai/kimi-k2.5",
+    "apiKey": "OPENROUTER_API_KEY",
+    "baseUrl": "https://openrouter.ai/api/v1",
+    "supportsImages": true
+  },
+  "dataset": "../data/webarena-infinity-hard-50.jsonl",
+  "num_workers": 10,
+  "restart_server_per_task": true,
+  "browseros": {
+    "server_url": "http://127.0.0.1:9110",
+    "base_cdp_port": 9010,
+    "base_server_port": 9110,
+    "base_extension_port": 9310,
+    "load_extensions": false,
+    "headless": false
+  },
+  "captcha": {
+    "api_key_env": "NOPECHA_API_KEY"
+  },
+  "graders": ["infinity_state"],
+  "timeout_ms": 1800000
+}
--- a/packages/browseros-agent/apps/eval/data/agisdk-real.jsonl
+++ b/packages/browseros-agent/apps/eval/data/agisdk-real.jsonl
@@ -0,0 +1,47 @@
+{"query_id": "agisdk-dashdish-10", "dataset": "agisdk-real", "query": "Place an order from \"Souvla\" for a \"Medium Classic Cheeseburger\" and a \"Small Bacon Double Cheeseburger\" with \"Standard Delivery\" as the method with the default charged options.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-dashdish.vercel.app", "metadata": {"original_task_id": "dashdish-10", "website": "DashDish", "category": "agisdk-real", "additional": {"agisdk_task_id": "dashdish-10", "challenge_type": "action", "difficulty": "hard", "similar_to": "Doordash"}}}
+{"query_id": "agisdk-fly-unified-5", "dataset": "agisdk-real", "query": "Find me the cheapest fare for a flight from Orlando to Milwaukee on December 5th, 2024 and book it.\nPassenger: John Doe\nDate of Birth: 01/01/1990\nSex: Male\nSeat Selection: No\nPayment: Credit Card (378342143523967), Exp: 12/25, Security Code: 420 Address: 123 Main St, San Francisco, CA, 94105, USA, Phone: 555-123-4567, Email: johndoe@example.com.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-fly-unified.vercel.app", "metadata": {"original_task_id": "fly-unified-5", "website": "Fly Unified", "category": "agisdk-real", "additional": {"agisdk_task_id": "fly-unified-5", "challenge_type": "retrieval-action", "difficulty": "medium", "similar_to": "United Airlines"}}}
+{"query_id": "agisdk-udriver-10", "dataset": "agisdk-real", "query": "Order me a ride for 4pm, I'll be at the de Young muesum headed to the Waterbar, fanciest option possible please.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-udriver.vercel.app", "metadata": {"original_task_id": "udriver-10", "website": "UDriver", "category": "agisdk-real", "additional": {"agisdk_task_id": "udriver-10", "challenge_type": "action", "difficulty": "hard", "similar_to": "Uber"}}}
+{"query_id": "agisdk-udriver-9", "dataset": "agisdk-real", "query": "Book me a ride from the thai restaurant I last took a ride to for later today at 2pm, I'll be at 333 Apartments on Fremont", "graders": ["agisdk_state_diff"], "start_url": "https://evals-udriver.vercel.app", "metadata": {"original_task_id": "udriver-9", "website": "UDriver", "category": "agisdk-real", "additional": {"agisdk_task_id": "udriver-9", "challenge_type": "retrieval-action", "difficulty": "hard", "similar_to": "Uber"}}}
+{"query_id": "agisdk-topwork-4", "dataset": "agisdk-real", "query": "Create a job post for a UI/UX Designer with expertise in Figma, Sketch, and Adobe Creative Suite, including project details, timeline, and required skills (Wireframing, Prototyping, Responsive Design).", "graders": ["agisdk_state_diff"], "start_url": "https://evals-topwork.vercel.app", "metadata": {"original_task_id": "topwork-4", "website": "TopWork", "category": "agisdk-real", "additional": {"agisdk_task_id": "topwork-4", "challenge_type": "action", "difficulty": "medium", "similar_to": "Upwork"}}}
+{"query_id": "agisdk-gocalendar-4", "dataset": "agisdk-real", "query": "Change the \"Team Check-In\" event on July 18, 2024, name to \"Project Kickoff\" and update the location to \"Zoom\"", "graders": ["agisdk_state_diff"], "start_url": "https://evals-gocalendar.vercel.app", "metadata": {"original_task_id": "gocalendar-4", "website": "GoCalendar", "category": "agisdk-real", "additional": {"agisdk_task_id": "gocalendar-4", "challenge_type": "action", "difficulty": "medium", "similar_to": "Google Calendar"}}}
+{"query_id": "agisdk-staynb-6", "dataset": "agisdk-real", "query": "Find and book the stay with the best value for money (cheapest stay with the best reviews) for 1 day. For fields you don't know the answer for, just fill them in with anything of your choice.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-staynb.vercel.app", "metadata": {"original_task_id": "staynb-6", "website": "StayNB", "category": "agisdk-real", "additional": {"agisdk_task_id": "staynb-6", "challenge_type": "retrieval-action", "difficulty": "medium", "similar_to": "Airbnb"}}}
+{"query_id": "agisdk-fly-unified-9", "dataset": "agisdk-real", "query": "Book me a flight from San Francisco to Chicago in Basic Economy on December 18th at 10:00. Ensure no seat selection is made.\nPassenger: David Lee\nDate of Birth: 07/22/1985\nSex: Male\nSeat Selection: No\nPayment: Credit Card (9999 8888 7777), Exp: 03/30, Address: 987 Cedar St, Chicago, IL, 60601, USA, Phone: 555-987-1234, Email: davidlee@example.com.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-fly-unified.vercel.app", "metadata": {"original_task_id": "fly-unified-9", "website": "Fly Unified", "category": "agisdk-real", "additional": {"agisdk_task_id": "fly-unified-9", "challenge_type": "action", "difficulty": "hard", "similar_to": "United Airlines"}}}
+{"query_id": "agisdk-networkin-9", "dataset": "agisdk-real", "query": "Find a professional who attended Stanford and send them a connection request and a message.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-networkin.vercel.app", "metadata": {"original_task_id": "networkin-9", "website": "Networkin", "category": "agisdk-real", "additional": {"agisdk_task_id": "networkin-9", "challenge_type": "retrieval-action", "difficulty": "medium", "similar_to": "LinkedIn"}}}
+{"query_id": "agisdk-udriver-11", "dataset": "agisdk-real", "query": "I need to go from Pacific Catch on Chestnut back home to 333 Fremont now. If the fancy version is within ten dollars of the regular one, book that.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-udriver.vercel.app", "metadata": {"original_task_id": "udriver-11", "website": "UDriver", "category": "agisdk-real", "additional": {"agisdk_task_id": "udriver-11", "challenge_type": "action", "difficulty": "hard", "similar_to": "Uber"}}}
+{"query_id": "agisdk-fly-unified-4", "dataset": "agisdk-real", "query": "Book me a round-trip flight from Providence (Rhode Island) to Indianapolis, departing on December 5th, 2024 at 08:00 and returning on December 9th at 14:00.\nPassenger: Jane Smith\nDate of Birth: 02/14/1995\nSex: Female\nSeat Selection: Yes (Window seat)\nPayment: Credit Card (378342143523967), Exp: 06/26, security code: 345 Address: 456 Elm St, Miami, FL, 33101, USA, Phone: 555-987-6543, Email: janesmith@example.com.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-fly-unified.vercel.app", "metadata": {"original_task_id": "fly-unified-4", "website": "Fly Unified", "category": "agisdk-real", "additional": {"agisdk_task_id": "fly-unified-4", "challenge_type": "action", "difficulty": "medium", "similar_to": "United Airlines"}}}
+{"query_id": "agisdk-networkin-5", "dataset": "agisdk-real", "query": "Send a connection request to John Smith.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-networkin.vercel.app", "metadata": {"original_task_id": "networkin-5", "website": "Networkin", "category": "agisdk-real", "additional": {"agisdk_task_id": "networkin-5", "challenge_type": "action", "difficulty": "easy", "similar_to": "LinkedIn"}}}
+{"query_id": "agisdk-zilloft-6", "dataset": "agisdk-real", "query": "Select a property listed in San Francisco as \"Condos\" within a price range under $300,000 and request a tour for tomorrow at 4:00 PM. Use these contact details: Name: Sarah Brown, Email: sarahbrown@example.com, Phone: 555-987-6543.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-zilloft.vercel.app", "metadata": {"original_task_id": "zilloft-6", "website": "Zilloft", "category": "agisdk-real", "additional": {"agisdk_task_id": "zilloft-6", "challenge_type": "action", "difficulty": "medium", "similar_to": "Zillow"}}}
+{"query_id": "agisdk-topwork-2", "dataset": "agisdk-real", "query": "Create a job posting for a Backend Developer specializing in Python, Django, and Flask to develop a high-performance web application. Include project details such as required skills (PostgreSQL, Docker, AWS, CI/CD), estimated project timeline, and budget.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-topwork.vercel.app", "metadata": {"original_task_id": "topwork-2", "website": "TopWork", "category": "agisdk-real", "additional": {"agisdk_task_id": "topwork-2", "challenge_type": "action", "difficulty": "medium", "similar_to": "Upwork"}}}
+{"query_id": "agisdk-gocalendar-3", "dataset": "agisdk-real", "query": "Delete the event titled \"Breakfast Meeting with Client\" scheduled for July 19, 2024", "graders": ["agisdk_state_diff"], "start_url": "https://evals-gocalendar.vercel.app", "metadata": {"original_task_id": "gocalendar-3", "website": "GoCalendar", "category": "agisdk-real", "additional": {"agisdk_task_id": "gocalendar-3", "challenge_type": "action", "difficulty": "easy", "similar_to": "Google Calendar"}}}
+{"query_id": "agisdk-topwork-3", "dataset": "agisdk-real", "query": "Create a job listing for a Full-Stack Developer with expertise in Java, Spring Boot, and Angular, outlining the project scope, estimated duration, and required skills (MySQL, Docker, Kubernetes, and Jenkins). The ideal candidate should have experience in enterprise-level applications and building scalable microservices. After creating the job post, please describe what you included in the job listing.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-topwork.vercel.app", "metadata": {"original_task_id": "topwork-3", "website": "TopWork", "category": "agisdk-real", "additional": {"agisdk_task_id": "topwork-3", "challenge_type": "retrieval", "difficulty": "medium", "similar_to": "Upwork"}}}
+{"query_id": "agisdk-fly-unified-2", "dataset": "agisdk-real", "query": "Book me a one-way flight from Indiana to New York on December 2nd 2024 at 12:00.\nPassenger: John Doe\nDate of Birth: 01/01/1990\nSex: Male\nSeat Selection: No\nPayment: Credit Card (378342143523967), Exp: 12/25, Security Code: 245, Address: 123 Main St, San Francisco, CA, 94105, USA, Phone: 555-123-4567, Email: johndoe@example.com.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-fly-unified.vercel.app", "metadata": {"original_task_id": "fly-unified-2", "website": "Fly Unified", "category": "agisdk-real", "additional": {"agisdk_task_id": "fly-unified-2", "challenge_type": "action", "difficulty": "easy", "similar_to": "United Airlines"}}}
+{"query_id": "agisdk-dashdish-7", "dataset": "agisdk-real", "query": "Select \"Express Delivery\" for an order from \"DragonEats\" of \"Mushroom Swiss Burger\" and complete the checkout with the pre-loaded Visa card.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-dashdish.vercel.app", "metadata": {"original_task_id": "dashdish-7", "website": "DashDish", "category": "agisdk-real", "additional": {"agisdk_task_id": "dashdish-7", "challenge_type": "action", "difficulty": "hard", "similar_to": "Doordash"}}}
+{"query_id": "agisdk-networkin-3", "dataset": "agisdk-real", "query": "Write a post inviting users to a networking event, including details about the event's purpose, date, and target audience.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-networkin.vercel.app", "metadata": {"original_task_id": "networkin-3", "website": "Networkin", "category": "agisdk-real", "additional": {"agisdk_task_id": "networkin-3", "challenge_type": "action", "difficulty": "medium", "similar_to": "LinkedIn"}}}
+{"query_id": "agisdk-gomail-7", "dataset": "agisdk-real", "query": "Delete the email with the subject \"New Leadership Articles You Can't Miss\" from the Inbox.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-gomail.vercel.app", "metadata": {"original_task_id": "gomail-7", "website": "GoMail", "category": "agisdk-real", "additional": {"agisdk_task_id": "gomail-7", "challenge_type": "retrieval-action", "difficulty": "hard", "similar_to": "Gmail"}}}
+{"query_id": "agisdk-opendining-8", "dataset": "agisdk-real", "query": "Identify and book the restaurant with the lowest rating. For fields you don't know the answer for, just fill them in with anything of your choice.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-opendining.vercel.app", "metadata": {"original_task_id": "opendining-8", "website": "OpenDining", "category": "agisdk-real", "additional": {"agisdk_task_id": "opendining-8", "challenge_type": "retrieval-action", "difficulty": "easy", "similar_to": "OpenTable"}}}
+{"query_id": "agisdk-udriver-1", "dataset": "agisdk-real", "query": "Book a ride from Fitness Urbano to Pacific Cafe", "graders": ["agisdk_state_diff"], "start_url": "https://evals-udriver.vercel.app", "metadata": {"original_task_id": "udriver-1", "website": "UDriver", "category": "agisdk-real", "additional": {"agisdk_task_id": "udriver-1", "challenge_type": "action", "difficulty": "easy", "similar_to": "Uber"}}}
+{"query_id": "agisdk-staynb-2", "dataset": "agisdk-real", "query": "Click on one of the stays displayed on the homepage and book it for a family of 4 (2 adults and 2 children). For fields you don't know the answer for, just fill them in with anything of your choice.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-staynb.vercel.app", "metadata": {"original_task_id": "staynb-2", "website": "StayNB", "category": "agisdk-real", "additional": {"agisdk_task_id": "staynb-2", "challenge_type": "action", "difficulty": "easy", "similar_to": "Airbnb"}}}
+{"query_id": "agisdk-opendining-10", "dataset": "agisdk-real", "query": "Check the menus of all restaurants for vegetarian options and make a reservation at the one with the most vegetarian choices. For fields you don't know the answer for, just fill them in with anything of your choice.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-opendining.vercel.app", "metadata": {"original_task_id": "opendining-10", "website": "OpenDining", "category": "agisdk-real", "additional": {"agisdk_task_id": "opendining-10", "challenge_type": "retrieval-action", "difficulty": "medium", "similar_to": "OpenTable"}}}
+{"query_id": "agisdk-opendining-4", "dataset": "agisdk-real", "query": "Use the search bar to search for a restaurant on September 2nd at 4:30 PM for 7 people, using \"Japanese\" as the search term, and book the first result. For fields you don't know the answer for, just fill them in with anything of your choice.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-opendining.vercel.app", "metadata": {"original_task_id": "opendining-4", "website": "OpenDining", "category": "agisdk-real", "additional": {"agisdk_task_id": "opendining-4", "challenge_type": "action", "difficulty": "hard", "similar_to": "OpenTable"}}}
+{"query_id": "agisdk-gomail-8", "dataset": "agisdk-real", "query": "Clear all emails from \"GitHub\" in the inbox to trash.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-gomail.vercel.app", "metadata": {"original_task_id": "gomail-8", "website": "GoMail", "category": "agisdk-real", "additional": {"agisdk_task_id": "gomail-8", "challenge_type": "action", "difficulty": "medium", "similar_to": "Gmail"}}}
+{"query_id": "agisdk-dashdish-4", "dataset": "agisdk-real", "query": "Schedule a delivery order from \"Taco Bell\" adding a \"Classic Cheeseburger\" large size for later and add the note \"Leave at the front door\".", "graders": ["agisdk_state_diff"], "start_url": "https://evals-dashdish.vercel.app", "metadata": {"original_task_id": "dashdish-4", "website": "DashDish", "category": "agisdk-real", "additional": {"agisdk_task_id": "dashdish-4", "challenge_type": "action", "difficulty": "medium", "similar_to": "Doordash"}}}
+{"query_id": "agisdk-networkin-1", "dataset": "agisdk-real", "query": "Create a new text post for the feed with a professional update about AI trends in 2025, mentioning three key advancements and their impact on the job market.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-networkin.vercel.app", "metadata": {"original_task_id": "networkin-1", "website": "Networkin", "category": "agisdk-real", "additional": {"agisdk_task_id": "networkin-1", "challenge_type": "action", "difficulty": "medium", "similar_to": "LinkedIn"}}}
+{"query_id": "agisdk-dashdish-5", "dataset": "agisdk-real", "query": "Add three \"Loaded Bacon Cheese Fries\" to the shopping cart from \"Man vs. Fries\". Proceed to checkout and select \"Pickup\" as the delivery method.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-dashdish.vercel.app", "metadata": {"original_task_id": "dashdish-5", "website": "DashDish", "category": "agisdk-real", "additional": {"agisdk_task_id": "dashdish-5", "challenge_type": "retrieval-action", "difficulty": "medium", "similar_to": "Doordash"}}}
+{"query_id": "agisdk-opendining-5", "dataset": "agisdk-real", "query": "Scroll through the homepage carousel until \"Ocean Breeze\" is visible, select the second available time slot, and complete the reservation. For fields you don't know the answer for, just fill them in with anything of your choice.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-opendining.vercel.app", "metadata": {"original_task_id": "opendining-5", "website": "OpenDining", "category": "agisdk-real", "additional": {"agisdk_task_id": "opendining-5", "challenge_type": "action", "difficulty": "medium", "similar_to": "OpenTable"}}}
+{"query_id": "agisdk-topwork-1", "dataset": "agisdk-real", "query": "Create a new job post for a Frontend Developer with expertise in React and TypeScript, specifying project details such as estimated duration, required skills, and budget.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-topwork.vercel.app", "metadata": {"original_task_id": "topwork-1", "website": "TopWork", "category": "agisdk-real", "additional": {"agisdk_task_id": "topwork-1", "challenge_type": "action", "difficulty": "medium", "similar_to": "Upwork"}}}
+{"query_id": "agisdk-gocalendar-1", "dataset": "agisdk-real", "query": "Create a new event titled \"Team Meeting\" on July 19, 2024, from 2 PM to 2:30 PM, and include \"Conference Room A\" as the location", "graders": ["agisdk_state_diff"], "start_url": "https://evals-gocalendar.vercel.app", "metadata": {"original_task_id": "gocalendar-1", "website": "GoCalendar", "category": "agisdk-real", "additional": {"agisdk_task_id": "gocalendar-1", "challenge_type": "action", "difficulty": "medium", "similar_to": "Google Calendar"}}}
+{"query_id": "agisdk-gomail-5", "dataset": "agisdk-real", "query": "Schedule an email to jane.doe@example.com with the subject \"Weekly Update\" to be sent next Monday at 9:00 AM.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-gomail.vercel.app", "metadata": {"original_task_id": "gomail-5", "website": "GoMail", "category": "agisdk-real", "additional": {"agisdk_task_id": "gomail-5", "challenge_type": "retrieval-action", "difficulty": "medium", "similar_to": "Gmail"}}}
+{"query_id": "agisdk-staynb-4", "dataset": "agisdk-real", "query": "Book a stay for 2 children with 1 adult. For fields you don't know the answer for, just fill them in with anything of your choice.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-staynb.vercel.app", "metadata": {"original_task_id": "staynb-4", "website": "StayNB", "category": "agisdk-real", "additional": {"agisdk_task_id": "staynb-4", "challenge_type": "action", "difficulty": "medium", "similar_to": "Airbnb"}}}
+{"query_id": "agisdk-networkin-6", "dataset": "agisdk-real", "query": "Choose a random person who you haven't connected with, connect with them, and send them a message saying, 'howdy, partner'.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-networkin.vercel.app", "metadata": {"original_task_id": "networkin-6", "website": "Networkin", "category": "agisdk-real", "additional": {"agisdk_task_id": "networkin-6", "challenge_type": "action", "difficulty": "medium", "similar_to": "LinkedIn"}}}
+{"query_id": "agisdk-dashdish-2", "dataset": "agisdk-real", "query": "Add a \"Medium Pepperoni Pizza\" from the restaurant \"Papa Johns Pizza\" to the shopping cart and purchase it.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-dashdish.vercel.app", "metadata": {"original_task_id": "dashdish-2", "website": "DashDish", "category": "agisdk-real", "additional": {"agisdk_task_id": "dashdish-2", "challenge_type": "action", "difficulty": "easy", "similar_to": "Doordash"}}}
+{"query_id": "agisdk-staynb-8", "dataset": "agisdk-real", "query": "Scroll through the homepage and book the last stay located in Paris.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-staynb.vercel.app", "metadata": {"original_task_id": "staynb-8", "website": "StayNB", "category": "agisdk-real", "additional": {"agisdk_task_id": "staynb-8", "challenge_type": "retrieval-action", "difficulty": "medium", "similar_to": "Airbnb"}}}
+{"query_id": "agisdk-gomail-2", "dataset": "agisdk-real", "query": "Mark the first email in the Inbox as \"read\".", "graders": ["agisdk_state_diff"], "start_url": "https://evals-gomail.vercel.app", "metadata": {"original_task_id": "gomail-2", "website": "GoMail", "category": "agisdk-real", "additional": {"agisdk_task_id": "gomail-2", "challenge_type": "action", "difficulty": "easy", "similar_to": "Gmail"}}}
+{"query_id": "agisdk-networkin-10", "dataset": "agisdk-real", "query": "Generate a polite follow-up message for a previous unanswered chat, starting with \"Following up on\".", "graders": ["agisdk_state_diff"], "start_url": "https://evals-networkin.vercel.app", "metadata": {"original_task_id": "networkin-10", "website": "Networkin", "category": "agisdk-real", "additional": {"agisdk_task_id": "networkin-10", "challenge_type": "action", "difficulty": "medium", "similar_to": "LinkedIn"}}}
+{"query_id": "agisdk-gomail-3", "dataset": "agisdk-real", "query": "Compose a new email to jonathan.smith@example.com with the subject \"Meeting Notes\" and body \"Please find the meeting notes attached.\"", "graders": ["agisdk_state_diff"], "start_url": "https://evals-gomail.vercel.app", "metadata": {"original_task_id": "gomail-3", "website": "GoMail", "category": "agisdk-real", "additional": {"agisdk_task_id": "gomail-3", "challenge_type": "action", "difficulty": "easy", "similar_to": "Gmail"}}}
+{"query_id": "agisdk-udriver-6", "dataset": "agisdk-real", "query": "Me and 4 friends need a ride from the Palace Hotel to dinner at Osha Thai leaving now", "graders": ["agisdk_state_diff"], "start_url": "https://evals-udriver.vercel.app", "metadata": {"original_task_id": "udriver-6", "website": "UDriver", "category": "agisdk-real", "additional": {"agisdk_task_id": "udriver-6", "challenge_type": "action", "difficulty": "hard", "similar_to": "Uber"}}}
+{"query_id": "agisdk-staynb-9", "dataset": "agisdk-real", "query": "Book a stay with the maximum number of guests supported. For fields you don't know the answer for, just fill them in with anything of your choice.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-staynb.vercel.app", "metadata": {"original_task_id": "staynb-9", "website": "StayNB", "category": "agisdk-real", "additional": {"agisdk_task_id": "staynb-9", "challenge_type": "action", "difficulty": "hard", "similar_to": "Airbnb"}}}
+{"query_id": "agisdk-zilloft-3", "dataset": "agisdk-real", "query": "Find a home in San Diego priced under $150,000 with at least 2 bedrooms and request a tour. Use these details: Contact Name: John Doe, Email: johndoe@example.com, Phone: 555-123-4567, Tour Time: 2:00 PM, Tour Date: First available.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-zilloft.vercel.app", "metadata": {"original_task_id": "zilloft-3", "website": "Zilloft", "category": "agisdk-real", "additional": {"agisdk_task_id": "zilloft-3", "challenge_type": "retrieval-action", "difficulty": "easy", "similar_to": "Zillow"}}}
+{"query_id": "agisdk-fly-unified-6", "dataset": "agisdk-real", "query": "Reserve me a seat for the flight from Austin to Pittsburgh departing on December 11th, 2024 at 8:00 in Basic Economy.\nPassenger: Alice Brown\nDate of Birth: 05/20/1992\nSex: Female\nSeat Selection: Yes (Aisle seat)\nPayment: Credit Card (378342143523967), Exp: 09/27, security code: 332 Address: 789 Pine St, Los Angeles, CA, 90012, USA, Phone: 555-456-7890, Email: alicebrown@example.com.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-fly-unified.vercel.app", "metadata": {"original_task_id": "fly-unified-6", "website": "Fly Unified", "category": "agisdk-real", "additional": {"agisdk_task_id": "fly-unified-6", "challenge_type": "action", "difficulty": "medium", "similar_to": "United Airlines"}}}
+{"query_id": "agisdk-opendining-3", "dataset": "agisdk-real", "query": "Book a table at \"The Royal Dine\" for a party of 4 on July 20, 2024, at 7 PM. For fields you don't know the answer for, just fill them in with anything of your choice.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-opendining.vercel.app", "metadata": {"original_task_id": "opendining-3", "website": "OpenDining", "category": "agisdk-real", "additional": {"agisdk_task_id": "opendining-3", "challenge_type": "action", "difficulty": "easy", "similar_to": "OpenTable"}}}
+{"query_id": "agisdk-gocalendar-7", "dataset": "agisdk-real", "query": "Reschedule the \"Morning Coffee with sister\" event from July 18, 2024, at 9 AM to July 19, 2024, at 10AM using drag-and-drop functionality", "graders": ["agisdk_state_diff"], "start_url": "https://evals-gocalendar.vercel.app", "metadata": {"original_task_id": "gocalendar-7", "website": "GoCalendar", "category": "agisdk-real", "additional": {"agisdk_task_id": "gocalendar-7", "challenge_type": "action", "difficulty": "medium", "similar_to": "Google Calendar"}}}
+{"query_id": "agisdk-staynb-5", "dataset": "agisdk-real", "query": "Use the search bar to look for a stay. For the \"Where\" section, use the \"Search by region\" popover and select \"Europe\". Set the check-in date to October 13th and the check-out date to October 23rd. For the \"Who\" section, select 1 infant, 2 children, and 2 adults. Press the search button, select the first stay, and book it.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-staynb.vercel.app", "metadata": {"original_task_id": "staynb-5", "website": "StayNB", "category": "agisdk-real", "additional": {"agisdk_task_id": "staynb-5", "challenge_type": "action", "difficulty": "medium", "similar_to": "Airbnb"}}}
--- a/packages/browseros-agent/apps/eval/data/webarena-infinity-hard-50.jsonl
+++ b/packages/browseros-agent/apps/eval/data/webarena-infinity-hard-50.jsonl
@@ -0,0 +1,50 @@
+{"query_id": "infinity-elation-prescriptions-task_h69", "dataset": "webarena-infinity", "query": "Approve all pending refill requests except for any medication that is involved in a major drug-drug interaction with another of the patient's active medications. Deny those with the reason 'Drug interaction \u2014 needs provider review before renewal'.", "graders": ["infinity_state"], "start_url": "http://localhost:8020", "metadata": {"original_task_id": "elation-prescriptions-task_h69", "website": "elation-prescriptions", "category": "webarena-infinity", "additional": {"app_name": "elation-prescriptions", "difficulty": "hard", "verifier_path": "real-tasks/task_h69.py", "app_base_port": 8020}}}
+{"query_id": "infinity-elation-clinical-records-task_h52", "dataset": "webarena-infinity", "query": "Add the document tag 'Provider-Reviewed' to every visit note template that was created by the current logged-in provider. Do not modify templates created by other providers.", "graders": ["infinity_state"], "start_url": "http://localhost:8000", "metadata": {"original_task_id": "elation-clinical-records-task_h52", "website": "elation-clinical-records", "category": "webarena-infinity", "additional": {"app_name": "elation-clinical-records", "difficulty": "hard", "verifier_path": "real-tasks/task_h52.py", "app_base_port": 8000}}}
+{"query_id": "infinity-gmail-accounts-and-contacts-task_h44", "dataset": "webarena-infinity", "query": "Your sister's husband is one of your contacts. Find him, star his entry, and add the Friends label.", "graders": ["infinity_state"], "start_url": "http://localhost:8070", "metadata": {"original_task_id": "gmail-accounts-and-contacts-task_h44", "website": "gmail-accounts-and-contacts", "category": "webarena-infinity", "additional": {"app_name": "gmail-accounts-and-contacts", "difficulty": "hard", "verifier_path": "real-tasks/task_h44.py", "app_base_port": 8070}}}
+{"query_id": "infinity-gmail-task_h2", "dataset": "webarena-infinity", "query": "Update the Datadog alerts filter to also archive matching emails and forward them to priya.sharma@cloudnine.dev instead of nate.patel@devops.tools.", "graders": ["infinity_state"], "start_url": "http://localhost:8060", "metadata": {"original_task_id": "gmail-task_h2", "website": "gmail", "category": "webarena-infinity", "additional": {"app_name": "gmail", "difficulty": "hard", "verifier_path": "real-tasks/task_h2.py", "app_base_port": 8060}}}
+{"query_id": "infinity-gitlab-plan-and-track-task_h58", "dataset": "webarena-infinity", "query": "The Performance Initiative epic has two child epics. For the child epic with more open issues, set the weight of every issue in it to 13. For the other child epic, close all its open issues.", "graders": ["infinity_state"], "start_url": "http://localhost:8050", "metadata": {"original_task_id": "gitlab-plan-and-track-task_h58", "website": "gitlab-plan-and-track", "category": "webarena-infinity", "additional": {"app_name": "gitlab-plan-and-track", "difficulty": "hard", "verifier_path": "real-tasks/task_h58.py", "app_base_port": 8050}}}
+{"query_id": "infinity-figma-slides-task_h46", "dataset": "webarena-infinity", "query": "There are two slides with tables in the deck. Lock the table that compares competitors, and change the font size to 16 on the table that tracks quarterly feature adoption.", "graders": ["infinity_state"], "start_url": "http://localhost:8030", "metadata": {"original_task_id": "figma-slides-task_h46", "website": "figma-slides", "category": "webarena-infinity", "additional": {"app_name": "figma-slides", "difficulty": "hard", "verifier_path": "real-tasks/task_h46.py", "app_base_port": 8030}}}
+{"query_id": "infinity-elation-prescriptions-task_h50", "dataset": "webarena-infinity", "query": "Deny the pending refill for the patient's cholesterol medication because his lipid panel is overdue. Then deny the Lisinopril refill as well \u2014 he needs a follow-up blood pressure check first.", "graders": ["infinity_state"], "start_url": "http://localhost:8020", "metadata": {"original_task_id": "elation-prescriptions-task_h50", "website": "elation-prescriptions", "category": "webarena-infinity", "additional": {"app_name": "elation-prescriptions", "difficulty": "hard", "verifier_path": "real-tasks/task_h50.py", "app_base_port": 8020}}}
+{"query_id": "infinity-elation-prescriptions-task_h19", "dataset": "webarena-infinity", "query": "Discontinue the Omeprazole and prescribe Famotidine 20mg tablet twice daily as a replacement for GERD \u2014 qty 60, 3 refills, send to CVS #4521.", "graders": ["infinity_state"], "start_url": "http://localhost:8020", "metadata": {"original_task_id": "elation-prescriptions-task_h19", "website": "elation-prescriptions", "category": "webarena-infinity", "additional": {"app_name": "elation-prescriptions", "difficulty": "hard", "verifier_path": "real-tasks/task_h19.py", "app_base_port": 8020}}}
+{"query_id": "infinity-paypal-my-wallet-task_h25", "dataset": "webarena-infinity", "query": "Convert all of my Australian dollars to euros.", "graders": ["infinity_state"], "start_url": "http://localhost:8100", "metadata": {"original_task_id": "paypal-my-wallet-task_h25", "website": "paypal-my-wallet", "category": "webarena-infinity", "additional": {"app_name": "paypal-my-wallet", "difficulty": "hard", "verifier_path": "real-tasks/task_h25.py", "app_base_port": 8100}}}
+{"query_id": "infinity-elation-clinical-records-task_h66", "dataset": "webarena-infinity", "query": "Create a new template called 'Anxiety Management' with HPI and Assessment sections, and billing code 99213 with description 'Office visit, established, low complexity'. Then create a visit note for Emily Nakamura using that new template and the Telehealth category, add a Psychological Status block to the note, and sign it.", "graders": ["infinity_state"], "start_url": "http://localhost:8000", "metadata": {"original_task_id": "elation-clinical-records-task_h66", "website": "elation-clinical-records", "category": "webarena-infinity", "additional": {"app_name": "elation-clinical-records", "difficulty": "hard", "verifier_path": "real-tasks/task_h66.py", "app_base_port": 8000}}}
+{"query_id": "infinity-elation-clinical-records-task_h62", "dataset": "webarena-infinity", "query": "Look up which template is assigned to the COVID Vaccine appointment type. Remove all its existing document tags and replace them with the single tag 'COVID-Protocol'. Then also assign that same template to the Urgent Same-Day appointment type.", "graders": ["infinity_state"], "start_url": "http://localhost:8000", "metadata": {"original_task_id": "elation-clinical-records-task_h62", "website": "elation-clinical-records", "category": "webarena-infinity", "additional": {"app_name": "elation-clinical-records", "difficulty": "hard", "verifier_path": "real-tasks/task_h62.py", "app_base_port": 8000}}}
+{"query_id": "infinity-elation-prescriptions-task_h32", "dataset": "webarena-infinity", "query": "The patient has a medication that's being dispensed as written (brand name only). Discontinue that prescription and replace it with a new one \u2014 same medication, same sig, same pharmacy \u2014 but allow generic substitution this time. Qty 30, 3 refills, 30 days supply.", "graders": ["infinity_state"], "start_url": "http://localhost:8020", "metadata": {"original_task_id": "elation-prescriptions-task_h32", "website": "elation-prescriptions", "category": "webarena-infinity", "additional": {"app_name": "elation-prescriptions", "difficulty": "hard", "verifier_path": "real-tasks/task_h32.py", "app_base_port": 8020}}}
+{"query_id": "infinity-gitlab-plan-and-track-task_h48", "dataset": "webarena-infinity", "query": "Add the 'breaking-change' label to every open issue in the API v3 Migration epic and remove any existing workflow-scoped labels from those issues.", "graders": ["infinity_state"], "start_url": "http://localhost:8050", "metadata": {"original_task_id": "gitlab-plan-and-track-task_h48", "website": "gitlab-plan-and-track", "category": "webarena-infinity", "additional": {"app_name": "gitlab-plan-and-track", "difficulty": "hard", "verifier_path": "real-tasks/task_h48.py", "app_base_port": 8050}}}
+{"query_id": "infinity-gitlab-plan-and-track-task_h77", "dataset": "webarena-infinity", "query": "Rename the 'UX' label to 'user-experience', change its type to 'group', and then add it to every open issue in the Frontend Modernization epic that doesn't already have it.", "graders": ["infinity_state"], "start_url": "http://localhost:8050", "metadata": {"original_task_id": "gitlab-plan-and-track-task_h77", "website": "gitlab-plan-and-track", "category": "webarena-infinity", "additional": {"app_name": "gitlab-plan-and-track", "difficulty": "hard", "verifier_path": "real-tasks/task_h77.py", "app_base_port": 8050}}}
+{"query_id": "infinity-xero-invoicing-task_h15", "dataset": "webarena-infinity", "query": "Create a new invoice for Summit Health Group for an annual software license and 12 months of support with a 10% discount on support.", "graders": ["infinity_state"], "start_url": "http://localhost:8120", "metadata": {"original_task_id": "xero-invoicing-task_h15", "website": "xero-invoicing", "category": "webarena-infinity", "additional": {"app_name": "xero-invoicing", "difficulty": "hard", "verifier_path": "real-tasks/task_h15.py", "app_base_port": 8120}}}
+{"query_id": "infinity-elation-clinical-records-task_h55", "dataset": "webarena-infinity", "query": "Resolve every problem across all patients in the system that currently has a status of Controlled.", "graders": ["infinity_state"], "start_url": "http://localhost:8000", "metadata": {"original_task_id": "elation-clinical-records-task_h55", "website": "elation-clinical-records", "category": "webarena-infinity", "additional": {"app_name": "elation-clinical-records", "difficulty": "hard", "verifier_path": "real-tasks/task_h55.py", "app_base_port": 8000}}}
+{"query_id": "infinity-gitlab-plan-and-track-task_h8", "dataset": "webarena-infinity", "query": "Create a confidential issue titled 'Emergency security patch' with priority::critical and the 'security' label, assigned to James O'Brien and Oliver Schmidt, with weight 2 in the Security Hardening milestone.", "graders": ["infinity_state"], "start_url": "http://localhost:8050", "metadata": {"original_task_id": "gitlab-plan-and-track-task_h8", "website": "gitlab-plan-and-track", "category": "webarena-infinity", "additional": {"app_name": "gitlab-plan-and-track", "difficulty": "hard", "verifier_path": "real-tasks/task_h8.py", "app_base_port": 8050}}}
+{"query_id": "infinity-paypal-my-wallet-task_h20", "dataset": "webarena-infinity", "query": "Make a $200 payment on PayPal Credit and change autopay to pay the full balance.", "graders": ["infinity_state"], "start_url": "http://localhost:8100", "metadata": {"original_task_id": "paypal-my-wallet-task_h20", "website": "paypal-my-wallet", "category": "webarena-infinity", "additional": {"app_name": "paypal-my-wallet", "difficulty": "hard", "verifier_path": "real-tasks/task_h20.py", "app_base_port": 8100}}}
+{"query_id": "infinity-gitlab-plan-and-track-task_h52", "dataset": "webarena-infinity", "query": "Create a new board called 'Performance Tracker' with lists for the priority::critical, priority::high, and priority::medium labels. Then add the 'priority::high' label to every open issue in the v4.1 milestone that has the 'performance' label.", "graders": ["infinity_state"], "start_url": "http://localhost:8050", "metadata": {"original_task_id": "gitlab-plan-and-track-task_h52", "website": "gitlab-plan-and-track", "category": "webarena-infinity", "additional": {"app_name": "gitlab-plan-and-track", "difficulty": "hard", "verifier_path": "real-tasks/task_h52.py", "app_base_port": 8050}}}
+{"query_id": "infinity-paypal-my-wallet-task_h80", "dataset": "webarena-infinity", "query": "Save all available Food & Drink offers, buy a $25 DoorDash gift card for yourself, and switch currency conversion to use my card issuer.", "graders": ["infinity_state"], "start_url": "http://localhost:8100", "metadata": {"original_task_id": "paypal-my-wallet-task_h80", "website": "paypal-my-wallet", "category": "webarena-infinity", "additional": {"app_name": "paypal-my-wallet", "difficulty": "hard", "verifier_path": "real-tasks/task_h80.py", "app_base_port": 8100}}}
+{"query_id": "infinity-gmail-accounts-and-contacts-task_h50", "dataset": "webarena-infinity", "query": "Add the Emergency label to every contact who is currently listed as a delegate (active, pending, or expired). Then remove all delegates whose status is not 'active'.", "graders": ["infinity_state"], "start_url": "http://localhost:8070", "metadata": {"original_task_id": "gmail-accounts-and-contacts-task_h50", "website": "gmail-accounts-and-contacts", "category": "webarena-infinity", "additional": {"app_name": "gmail-accounts-and-contacts", "difficulty": "hard", "verifier_path": "real-tasks/task_h50.py", "app_base_port": 8070}}}
+{"query_id": "infinity-elation-clinical-records-task_h14", "dataset": "webarena-infinity", "query": "Add the tag 'Flu-Season' to every patient whose primary provider is Dr. Sarah Chen.", "graders": ["infinity_state"], "start_url": "http://localhost:8000", "metadata": {"original_task_id": "elation-clinical-records-task_h14", "website": "elation-clinical-records", "category": "webarena-infinity", "additional": {"app_name": "elation-clinical-records", "difficulty": "hard", "verifier_path": "real-tasks/task_h14.py", "app_base_port": 8000}}}
+{"query_id": "infinity-figma-text-and-typography-task_h7", "dataset": "webarena-infinity", "query": "Remove all list formatting from every layer.", "graders": ["infinity_state"], "start_url": "http://localhost:8040", "metadata": {"original_task_id": "figma-text-and-typography-task_h7", "website": "figma-text-and-typography", "category": "webarena-infinity", "additional": {"app_name": "figma-text-and-typography", "difficulty": "hard", "verifier_path": "real-tasks/task_h7.py", "app_base_port": 8040}}}
+{"query_id": "infinity-paypal-my-wallet-task_h26", "dataset": "webarena-infinity", "query": "Send a $50 Amazon gift card to sarah.chen@email.com with 'Thank you!' as the message, and save the Amazon cashback offer.", "graders": ["infinity_state"], "start_url": "http://localhost:8100", "metadata": {"original_task_id": "paypal-my-wallet-task_h26", "website": "paypal-my-wallet", "category": "webarena-infinity", "additional": {"app_name": "paypal-my-wallet", "difficulty": "hard", "verifier_path": "real-tasks/task_h26.py", "app_base_port": 8100}}}
+{"query_id": "infinity-handshake-career-exploration-task_h97", "dataset": "webarena-infinity", "query": "Find the single most helpful answer across all Q&A questions and mark it helpful. Then find the most-viewed question and submit your own answer to it.", "graders": ["infinity_state"], "start_url": "http://localhost:8080", "metadata": {"original_task_id": "handshake-career-exploration-task_h97", "website": "handshake-career-exploration", "category": "webarena-infinity", "additional": {"app_name": "handshake-career-exploration", "difficulty": "hard", "verifier_path": "real-tasks/task_h97.py", "app_base_port": 8080}}}
+{"query_id": "infinity-figma-slides-task_h79", "dataset": "webarena-infinity", "query": "In the adoption table, find the feature with the highest Target Q4 percentage. In the competitive table, change DesignCraft's entry for that same feature to 'Market Leader'. Then update that feature's Target Q4 to '95%'.", "graders": ["infinity_state"], "start_url": "http://localhost:8030", "metadata": {"original_task_id": "figma-slides-task_h79", "website": "figma-slides", "category": "webarena-infinity", "additional": {"app_name": "figma-slides", "difficulty": "hard", "verifier_path": "real-tasks/task_h79.py", "app_base_port": 8030}}}
+{"query_id": "infinity-gitlab-plan-and-track-task_h41", "dataset": "webarena-infinity", "query": "For every open issue in the v4.2 - Security Hardening milestone: if it is already confidential, set its health status to 'at risk'. If it is not confidential, make it confidential and set its health status to 'needs attention'.", "graders": ["infinity_state"], "start_url": "http://localhost:8050", "metadata": {"original_task_id": "gitlab-plan-and-track-task_h41", "website": "gitlab-plan-and-track", "category": "webarena-infinity", "additional": {"app_name": "gitlab-plan-and-track", "difficulty": "hard", "verifier_path": "real-tasks/task_h41.py", "app_base_port": 8050}}}
+{"query_id": "infinity-handshake-career-exploration-task_h90", "dataset": "webarena-infinity", "query": "A student in the feed mentioned attending the NSBE conference. That student also answered a Q&A question about diversity programs in tech. Submit your own answer to that same question sharing your experience, then bookmark that student's feed post.", "graders": ["infinity_state"], "start_url": "http://localhost:8080", "metadata": {"original_task_id": "handshake-career-exploration-task_h90", "website": "handshake-career-exploration", "category": "webarena-infinity", "additional": {"app_name": "handshake-career-exploration", "difficulty": "hard", "verifier_path": "real-tasks/task_h90.py", "app_base_port": 8080}}}
+{"query_id": "infinity-elation-prescriptions-task_h30", "dataset": "webarena-infinity", "query": "The patient has three temporary medications. Discontinue the corticosteroid taper and the penicillin antibiotic \u2014 the patient completed both courses. Move the remaining temporary medication to permanent Rx.", "graders": ["infinity_state"], "start_url": "http://localhost:8020", "metadata": {"original_task_id": "elation-prescriptions-task_h30", "website": "elation-prescriptions", "category": "webarena-infinity", "additional": {"app_name": "elation-prescriptions", "difficulty": "hard", "verifier_path": "real-tasks/task_h30.py", "app_base_port": 8020}}}
+{"query_id": "infinity-linear-account-settings-task_h19", "dataset": "webarena-infinity", "query": "Turn off all desktop application settings: open in desktop app, notification badge, and spell check.", "graders": ["infinity_state"], "start_url": "http://localhost:8090", "metadata": {"original_task_id": "linear-account-settings-task_h19", "website": "linear-account-settings", "category": "webarena-infinity", "additional": {"app_name": "linear-account-settings", "difficulty": "hard", "verifier_path": "real-tasks/task_h19.py", "app_base_port": 8090}}}
+{"query_id": "infinity-elation-prescriptions-task_h39", "dataset": "webarena-infinity", "query": "Change the default pharmacy to Express Scripts Mail Pharmacy for mail-order prescriptions. Then document that the patient takes Magnesium Citrate 400mg tablet as an OTC supplement \u2014 once daily at bedtime, 30-day supply.", "graders": ["infinity_state"], "start_url": "http://localhost:8020", "metadata": {"original_task_id": "elation-prescriptions-task_h39", "website": "elation-prescriptions", "category": "webarena-infinity", "additional": {"app_name": "elation-prescriptions", "difficulty": "hard", "verifier_path": "real-tasks/task_h39.py", "app_base_port": 8020}}}
+{"query_id": "infinity-handshake-career-exploration-task_h136", "dataset": "webarena-infinity", "query": "Your earliest completed appointment was a specific type. Schedule a follow-up appointment of the same category and type with the same staff member, for March 28, 2026 at 9:00 AM, in person.", "graders": ["infinity_state"], "start_url": "http://localhost:8080", "metadata": {"original_task_id": "handshake-career-exploration-task_h136", "website": "handshake-career-exploration", "category": "webarena-infinity", "additional": {"app_name": "handshake-career-exploration", "difficulty": "hard", "verifier_path": "real-tasks/task_h136.py", "app_base_port": 8080}}}
+{"query_id": "infinity-handshake-career-exploration-task_h105", "dataset": "webarena-infinity", "query": "Find the second-most-viewed question in Q&A. It has two answers \u2014 mark the one with fewer helpful votes as helpful.", "graders": ["infinity_state"], "start_url": "http://localhost:8080", "metadata": {"original_task_id": "handshake-career-exploration-task_h105", "website": "handshake-career-exploration", "category": "webarena-infinity", "additional": {"app_name": "handshake-career-exploration", "difficulty": "hard", "verifier_path": "real-tasks/task_h105.py", "app_base_port": 8080}}}
+{"query_id": "infinity-gmail-accounts-and-contacts-task_h22", "dataset": "webarena-infinity", "query": "The Engineering Manager at TechCorp is listed as one of your delegates. Remove her delegation and unstar her contact.", "graders": ["infinity_state"], "start_url": "http://localhost:8070", "metadata": {"original_task_id": "gmail-accounts-and-contacts-task_h22", "website": "gmail-accounts-and-contacts", "category": "webarena-infinity", "additional": {"app_name": "gmail-accounts-and-contacts", "difficulty": "hard", "verifier_path": "real-tasks/task_h22.py", "app_base_port": 8070}}}
+{"query_id": "infinity-elation-patient-communication-task_h9", "dataset": "webarena-infinity", "query": "Acknowledge all unacknowledged reminders in the system.", "graders": ["infinity_state"], "start_url": "http://localhost:8010", "metadata": {"original_task_id": "elation-patient-communication-task_h9", "website": "elation-patient-communication", "category": "webarena-infinity", "additional": {"app_name": "elation-patient-communication", "difficulty": "hard", "verifier_path": "real-tasks/task_h9.py", "app_base_port": 8010}}}
+{"query_id": "infinity-superhuman-general-task_h1", "dataset": "webarena-infinity", "query": "Label the FinancePlus partnership email and the QuantumLab prototype email as 'Clients'.", "graders": ["infinity_state"], "start_url": "http://localhost:8110", "metadata": {"original_task_id": "superhuman-general-task_h1", "website": "superhuman-general", "category": "webarena-infinity", "additional": {"app_name": "superhuman-general", "difficulty": "hard", "verifier_path": "real-tasks/task_h1.py", "app_base_port": 8110}}}
+{"query_id": "infinity-xero-invoicing-task_h79", "dataset": "webarena-infinity", "query": "Change the invoice prefix to 'AUS-' and the next number to 100, then create a new invoice for CloudNine Analytics for 8 hours of UI/UX design work.", "graders": ["infinity_state"], "start_url": "http://localhost:8120", "metadata": {"original_task_id": "xero-invoicing-task_h79", "website": "xero-invoicing", "category": "webarena-infinity", "additional": {"app_name": "xero-invoicing", "difficulty": "hard", "verifier_path": "real-tasks/task_h79.py", "app_base_port": 8120}}}
+{"query_id": "infinity-figma-slides-task_h16", "dataset": "webarena-infinity", "query": "Enable slide numbers on every slide using the 'with total' format and change the aspect ratio to 4:3.", "graders": ["infinity_state"], "start_url": "http://localhost:8030", "metadata": {"original_task_id": "figma-slides-task_h16", "website": "figma-slides", "category": "webarena-infinity", "additional": {"app_name": "figma-slides", "difficulty": "hard", "verifier_path": "real-tasks/task_h16.py", "app_base_port": 8030}}}
+{"query_id": "infinity-linear-account-settings-task_h16", "dataset": "webarena-infinity", "query": "Revoke all API keys that have an expiration date.", "graders": ["infinity_state"], "start_url": "http://localhost:8090", "metadata": {"original_task_id": "linear-account-settings-task_h16", "website": "linear-account-settings", "category": "webarena-infinity", "additional": {"app_name": "linear-account-settings", "difficulty": "hard", "verifier_path": "real-tasks/task_h16.py", "app_base_port": 8090}}}
+{"query_id": "infinity-elation-prescriptions-task_h2", "dataset": "webarena-infinity", "query": "Prescribe Buspirone 10mg for the patient's anxiety \u2014 once daily in the morning, qty 30, 5 refills. Send it to the same pharmacy that fills his Sertraline.", "graders": ["infinity_state"], "start_url": "http://localhost:8020", "metadata": {"original_task_id": "elation-prescriptions-task_h2", "website": "elation-prescriptions", "category": "webarena-infinity", "additional": {"app_name": "elation-prescriptions", "difficulty": "hard", "verifier_path": "real-tasks/task_h2.py", "app_base_port": 8020}}}
+{"query_id": "infinity-handshake-career-exploration-task_h1", "dataset": "webarena-infinity", "query": "Follow all consulting firms on Handshake.", "graders": ["infinity_state"], "start_url": "http://localhost:8080", "metadata": {"original_task_id": "handshake-career-exploration-task_h1", "website": "handshake-career-exploration", "category": "webarena-infinity", "additional": {"app_name": "handshake-career-exploration", "difficulty": "hard", "verifier_path": "real-tasks/task_h1.py", "app_base_port": 8080}}}
+{"query_id": "infinity-handshake-career-exploration-task_h141", "dataset": "webarena-infinity", "query": "Some of your saved jobs are from employers you haven't followed yet. Find and follow each of those employers.", "graders": ["infinity_state"], "start_url": "http://localhost:8080", "metadata": {"original_task_id": "handshake-career-exploration-task_h141", "website": "handshake-career-exploration", "category": "webarena-infinity", "additional": {"app_name": "handshake-career-exploration", "difficulty": "hard", "verifier_path": "real-tasks/task_h141.py", "app_base_port": 8080}}}
+{"query_id": "infinity-figma-text-and-typography-task_h74", "dataset": "webarena-infinity", "query": "Set the spelling language to Japanese, the big nudge amount to 50, and the default horizontal alignment to right.", "graders": ["infinity_state"], "start_url": "http://localhost:8040", "metadata": {"original_task_id": "figma-text-and-typography-task_h74", "website": "figma-text-and-typography", "category": "webarena-infinity", "additional": {"app_name": "figma-text-and-typography", "difficulty": "hard", "verifier_path": "real-tasks/task_h74.py", "app_base_port": 8040}}}
+{"query_id": "infinity-elation-patient-communication-task_h63", "dataset": "webarena-infinity", "query": "Check the visit summaries to find the patient whose BNP level improved. Reply to their most recent message confirming they can resume light activity, then update their emergency contact's phone number to (650) 555-0001.", "graders": ["infinity_state"], "start_url": "http://localhost:8010", "metadata": {"original_task_id": "elation-patient-communication-task_h63", "website": "elation-patient-communication", "category": "webarena-infinity", "additional": {"app_name": "elation-patient-communication", "difficulty": "hard", "verifier_path": "real-tasks/task_h63.py", "app_base_port": 8010}}}
+{"query_id": "infinity-elation-patient-communication-task_h14", "dataset": "webarena-infinity", "query": "Change Dr. Torres's notification timeframe to 'Do not notify me' and remove Dr. Torres from Dr. Chen's General Question routing.", "graders": ["infinity_state"], "start_url": "http://localhost:8010", "metadata": {"original_task_id": "elation-patient-communication-task_h14", "website": "elation-patient-communication", "category": "webarena-infinity", "additional": {"app_name": "elation-patient-communication", "difficulty": "hard", "verifier_path": "real-tasks/task_h14.py", "app_base_port": 8010}}}
+{"query_id": "infinity-gitlab-plan-and-track-task_h67", "dataset": "webarena-infinity", "query": "Delete all time entries from the GraphQL gateway issue, add a single new entry of 16 hours with summary 'Complete rewrite estimate', and set its time estimate to 40 hours.", "graders": ["infinity_state"], "start_url": "http://localhost:8050", "metadata": {"original_task_id": "gitlab-plan-and-track-task_h67", "website": "gitlab-plan-and-track", "category": "webarena-infinity", "additional": {"app_name": "gitlab-plan-and-track", "difficulty": "hard", "verifier_path": "real-tasks/task_h67.py", "app_base_port": 8050}}}
+{"query_id": "infinity-gmail-accounts-and-contacts-task_h73", "dataset": "webarena-infinity", "query": "Among the individual people in your other contacts (those with a first and last name), find the one who was saved most recently. Move them to your main contacts, set their company to 'Salesforce', job title to 'Account Executive', and add the Work label.", "graders": ["infinity_state"], "start_url": "http://localhost:8070", "metadata": {"original_task_id": "gmail-accounts-and-contacts-task_h73", "website": "gmail-accounts-and-contacts", "category": "webarena-infinity", "additional": {"app_name": "gmail-accounts-and-contacts", "difficulty": "hard", "verifier_path": "real-tasks/task_h73.py", "app_base_port": 8070}}}
+{"query_id": "infinity-elation-prescriptions-task_h4", "dataset": "webarena-infinity", "query": "Run a medication reconciliation and mark the Calcium+D3 supplement for discontinuation during the review.", "graders": ["infinity_state"], "start_url": "http://localhost:8020", "metadata": {"original_task_id": "elation-prescriptions-task_h4", "website": "elation-prescriptions", "category": "webarena-infinity", "additional": {"app_name": "elation-prescriptions", "difficulty": "hard", "verifier_path": "real-tasks/task_h4.py", "app_base_port": 8020}}}
+{"query_id": "infinity-elation-prescriptions-task_h47", "dataset": "webarena-infinity", "query": "The patient's SSRI is currently dispensed at a different pharmacy than most of his other medications. Prescribe a refill of the same SSRI at the same dose and sig, but send it to CVS #4521 instead \u2014 qty 30, 5 refills, 30 days supply.", "graders": ["infinity_state"], "start_url": "http://localhost:8020", "metadata": {"original_task_id": "elation-prescriptions-task_h47", "website": "elation-prescriptions", "category": "webarena-infinity", "additional": {"app_name": "elation-prescriptions", "difficulty": "hard", "verifier_path": "real-tasks/task_h47.py", "app_base_port": 8020}}}
+{"query_id": "infinity-paypal-my-wallet-task_h89", "dataset": "webarena-infinity", "query": "If your USD PayPal balance is above $2,500, convert $500 to Japanese Yen. If it is $2,500 or below, first add $500 from your Chase bank account, then convert $500 to JPY. Either way, set the debit card cash back category to Fuel.", "graders": ["infinity_state"], "start_url": "http://localhost:8100", "metadata": {"original_task_id": "paypal-my-wallet-task_h89", "website": "paypal-my-wallet", "category": "webarena-infinity", "additional": {"app_name": "paypal-my-wallet", "difficulty": "hard", "verifier_path": "real-tasks/task_h89.py", "app_base_port": 8100}}}
--- a/packages/browseros-agent/apps/eval/scripts/agisdk-evaluate.py
+++ b/packages/browseros-agent/apps/eval/scripts/agisdk-evaluate.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python3
+"""
+AGI SDK evaluation helper for BrowserOS eval framework.
+
+Reads JSON from stdin with task_id and env_state, runs the agisdk
+evaluator, and outputs the result as JSON to stdout.
+
+Input format:
+    {"task_id": "dashdish-1", "env_state": {...}, "model_response": ""}
+
+Output format:
+    {"reward": 0.0, "pass": false, "message": "...", "per_criterion": [...]}
+"""
+
+import json
+import sys
+
+
+def main():
+    data = json.loads(sys.stdin.read())
+    task_id = data["task_id"]
+    env_state = data["env_state"]
+    model_response = data.get("model_response", "")
+
+    try:
+        from agisdk.REAL.browsergym.webclones.evaluate import WebCloneEvaluator
+        from agisdk.REAL.browsergym.webclones.task_config import TaskConfig
+    except ImportError:
+        print(
+            json.dumps(
+                {
+                    "reward": 0,
+                    "pass": False,
+                    "message": "agisdk package not installed. Run: pip install agisdk",
+                    "per_criterion": [],
+                }
+            )
+        )
+        sys.exit(0)
+
+    try:
+        # Redirect stdout to stderr during evaluation — agisdk's rich logger
+        # prints directly to stdout, which would corrupt our JSON output
+        real_stdout = sys.stdout
+        sys.stdout = sys.stderr
+
+        tc = TaskConfig(task_id)
+        evaluator = WebCloneEvaluator(tc)
+        reward_val, _done, message, info = evaluator.evaluate(
+            env_state=env_state, model_response=model_response
+        )
+
+        sys.stdout = real_stdout
+
+        reward_val = float(reward_val) if reward_val is not None else 0.0
+        results = info.get("results", [])
+        per_criterion = [
+            {"passed": r[0], "detail": str(r[1]) if len(r) > 1 else ""}
+            for r in results
+        ]
+
+        print(
+            json.dumps(
+                {
+                    "reward": reward_val,
+                    "pass": reward_val == 1.0,
+                    "message": str(message),
+                    "per_criterion": per_criterion,
+                }
+            )
+        )
+
+    except Exception as e:
+        sys.stdout = real_stdout if "real_stdout" in dir() else sys.__stdout__
+        print(
+            json.dumps(
+                {
+                    "reward": 0,
+                    "pass": False,
+                    "message": f"Evaluation error: {str(e)}",
+                    "per_criterion": [],
+                }
+            )
+        )
+
+
+if __name__ == "__main__":
+    main()
--- a/packages/browseros-agent/apps/eval/scripts/build-agisdk-dataset.py
+++ b/packages/browseros-agent/apps/eval/scripts/build-agisdk-dataset.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python3
+"""
+Build JSONL dataset for AGI SDK / REAL Bench evaluation.
+
+Reads task definitions from the agisdk package, filters to feasible
+action-only tasks (excludes llm_boolean evaluators), and outputs JSONL
+to stdout in the BrowserOS eval framework format.
+
+Usage:
+    python scripts/build-agisdk-dataset.py > data/agisdk-real.jsonl
+"""
+
+import json
+import sys
+
+# evals-omnizon.vercel.app was DMCA-takedown'd by Vercel (HTTP 451). Every task
+# on that site fails grading with "Failed to fetch /finish endpoint".
+EXCLUDED_WEBSITES = {"omnizon"}
+
+
+def has_llm_eval(task: dict) -> bool:
+    return any(e.get("type") == "llm_boolean" for e in task.get("evals", []))
+
+
+def main():
+    try:
+        from agisdk.REAL.tasks import all_tasks
+    except ImportError:
+        print(
+            "Error: agisdk package not installed. Run: pip install agisdk",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+
+    count = 0
+    skipped_infeasible = 0
+    skipped_llm = 0
+    skipped_excluded = 0
+
+    for task in all_tasks:
+        if not task.get("possible", True):
+            skipped_infeasible += 1
+            continue
+
+        if has_llm_eval(task):
+            skipped_llm += 1
+            continue
+
+        website = task.get("website", {})
+        if website.get("id") in EXCLUDED_WEBSITES:
+            skipped_excluded += 1
+            continue
+
+        task_id = task["id"]
+        goal = task.get("goal", "")
+        start_url = website.get("url", "")
+
+        if not start_url or not goal:
+            print(f"Warning: Skipping {task_id} — missing url or goal", file=sys.stderr)
+            continue
+
+        entry = {
+            "query_id": f"agisdk-{task_id}",
+            "dataset": "agisdk-real",
+            "query": goal,
+            "graders": ["agisdk_state_diff"],
+            "start_url": start_url,
+            "metadata": {
+                "original_task_id": task_id,
+                "website": website.get("name", ""),
+                "category": "agisdk-real",
+                "additional": {
+                    "agisdk_task_id": task_id,
+                    "challenge_type": task.get("challengeType", "action"),
+                    "difficulty": task.get("difficulty", "unknown"),
+                    "similar_to": website.get("similarTo", ""),
+                },
+            },
+        }
+
+        print(json.dumps(entry))
+        count += 1
+
+    print(
+        f"Generated {count} tasks (skipped {skipped_infeasible} infeasible, "
+        f"{skipped_llm} llm_boolean, {skipped_excluded} excluded sites)",
+        file=sys.stderr,
+    )
+
+
+if __name__ == "__main__":
+    main()
--- a/packages/browseros-agent/apps/eval/scripts/build-infinity-dataset.py
+++ b/packages/browseros-agent/apps/eval/scripts/build-infinity-dataset.py
@@ -0,0 +1,118 @@
+#!/usr/bin/env python3
+"""
+Dataset generator for WebArena-Infinity benchmark.
+
+Reads real-tasks.json from each app directory and outputs JSONL
+in the eval framework's TaskSchema format.
+
+Usage:
+    python build-infinity-dataset.py --apps-dir /path/to/webarena-infinity/apps
+    python build-infinity-dataset.py --apps-dir /path/to/apps --apps gmail linear --difficulty medium
+"""
+
+import argparse
+import json
+import os
+import sys
+
+
+def load_tasks(app_dir: str) -> list[dict]:
+    tasks_file = os.path.join(app_dir, "real-tasks.json")
+    if not os.path.exists(tasks_file):
+        print(f"Warning: No real-tasks.json found in {app_dir}", file=sys.stderr)
+        return []
+    with open(tasks_file) as f:
+        return json.load(f)
+
+
+def build_task_entry(
+    app_name: str,
+    task: dict,
+    base_port: int,
+) -> dict:
+    task_id = task.get("id", task.get("task_id", "unknown"))
+    difficulty = task.get("difficulty", "unknown")
+    query = task.get("query", task.get("instruction", task.get("task", "")))
+    verifier_path = task.get(
+        "verify",
+        task.get("verifier_path", f"real-tasks/{task_id}.py"),
+    )
+
+    return {
+        "query_id": f"infinity-{app_name}-{task_id}",
+        "dataset": "webarena-infinity",
+        "query": query,
+        "graders": ["infinity_state"],
+        "start_url": f"http://localhost:{base_port}",
+        "setup_script": f"POST http://localhost:{base_port}/api/reset",
+        "metadata": {
+            "original_task_id": f"{app_name}-{task_id}",
+            "website": app_name,
+            "category": "webarena-infinity",
+            "additional": {
+                "app_name": app_name,
+                "difficulty": difficulty,
+                "verifier_path": verifier_path,
+                "app_base_port": base_port,
+            },
+        },
+    }
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Generate JSONL dataset from WebArena-Infinity apps"
+    )
+    parser.add_argument(
+        "--apps-dir",
+        required=True,
+        help="Path to webarena-infinity/apps/ directory",
+    )
+    parser.add_argument(
+        "--apps",
+        nargs="*",
+        default=None,
+        help="Filter to specific app names (default: all)",
+    )
+    parser.add_argument(
+        "--difficulty",
+        choices=["easy", "medium", "hard"],
+        default=None,
+        help="Filter by difficulty tier",
+    )
+    parser.add_argument(
+        "--base-port",
+        type=int,
+        default=8000,
+        help="Starting port number for apps (default: 8000)",
+    )
+    args = parser.parse_args()
+
+    if not os.path.isdir(args.apps_dir):
+        print(f"Error: {args.apps_dir} is not a directory", file=sys.stderr)
+        sys.exit(1)
+
+    app_dirs = sorted(os.listdir(args.apps_dir))
+    if args.apps:
+        app_dirs = [d for d in app_dirs if d in args.apps]
+
+    port = args.base_port
+    for app_name in app_dirs:
+        app_path = os.path.join(args.apps_dir, app_name)
+        if not os.path.isdir(app_path):
+            continue
+
+        tasks = load_tasks(app_path)
+        for task in tasks:
+            difficulty = task.get("difficulty", "unknown")
+            if args.difficulty and difficulty != args.difficulty:
+                continue
+
+            entry = build_task_entry(app_name, task, port)
+            print(json.dumps(entry))
+
+        port += 1
+
+
+if __name__ == "__main__":
+    main()
--- a/packages/browseros-agent/apps/eval/scripts/infinity-evaluate.py
+++ b/packages/browseros-agent/apps/eval/scripts/infinity-evaluate.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python3
+"""
+Evaluation helper for WebArena-Infinity verifier scripts.
+
+Reads JSON from stdin with app_server_url, verifier_path, and task_id.
+Runs the verifier against the app server and outputs a JSON result.
+
+Verifiers have the signature: verify(server_url: str) -> tuple[bool, str]
+They fetch /api/state internally and return (passed, message).
+
+Usage:
+    echo '{"app_server_url": "http://localhost:8000", "verifier_path": "/path/to/verify.py"}' | python infinity-evaluate.py
+"""
+
+import importlib.util
+import json
+import sys
+import traceback
+
+
+def load_verifier(verifier_path: str):
+    spec = importlib.util.spec_from_file_location("verifier", verifier_path)
+    if spec is None or spec.loader is None:
+        raise ImportError(f"Cannot load verifier from {verifier_path}")
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    return module
+
+
+def main():
+    try:
+        data = json.loads(sys.stdin.read())
+    except json.JSONDecodeError as e:
+        print(json.dumps({"pass": False, "reward": 0.0, "message": f"Invalid JSON input: {e}"}))
+        sys.exit(1)
+
+    server_url = data.get("app_server_url", "")
+    verifier_path = data.get("verifier_path", "")
+
+    if not server_url or not verifier_path:
+        print(json.dumps({
+            "pass": False,
+            "reward": 0.0,
+            "message": "Missing app_server_url or verifier_path",
+        }))
+        sys.exit(1)
+
+    try:
+        verifier = load_verifier(verifier_path)
+        fn = getattr(verifier, "verify", None)
+        if not callable(fn):
+            raise AttributeError(
+                f"Verifier has no verify() function. "
+                f"Available: {[a for a in dir(verifier) if not a.startswith('_')]}"
+            )
+
+        # Verifiers take server_url and fetch state internally
+        result = fn(server_url)
+
+        # Return is tuple[bool, str]
+        if isinstance(result, tuple) and len(result) >= 2:
+            passed, message = result[0], str(result[1])
+        else:
+            passed, message = bool(result), str(result)
+
+    except Exception as e:
+        print(json.dumps({
+            "pass": False,
+            "reward": 0.0,
+            "message": f"Verifier error: {e}\n{traceback.format_exc()}",
+        }))
+        sys.exit(1)
+
+    print(json.dumps({
+        "pass": passed,
+        "reward": 1.0 if passed else 0.0,
+        "message": message,
+    }))
+
+
+if __name__ == "__main__":
+    main()
--- a/packages/browseros-agent/apps/eval/scripts/weekly-report.ts
+++ b/packages/browseros-agent/apps/eval/scripts/weekly-report.ts
@@ -59,6 +59,8 @@ interface RunSummary {
 }

 const PASS_FAIL_GRADER_ORDER = [
+  'agisdk_state_diff',
+  'infinity_state',
  'performance_grader',
  'webvoyager_grader',
  'fara_combined',
--- a/packages/browseros-agent/apps/eval/src/graders/benchmark/agisdk-state-diff.ts
+++ b/packages/browseros-agent/apps/eval/src/graders/benchmark/agisdk-state-diff.ts
@@ -0,0 +1,202 @@
+import { spawn } from 'node:child_process'
+import { join } from 'node:path'
+import type { GraderResult } from '../../types'
+import { callMcpTool } from '../../utils/mcp-client'
+import type { Grader, GraderInput } from '../types'
+
+const EVAL_SCRIPT = join(
+  import.meta.dirname,
+  '..',
+  '..',
+  '..',
+  'scripts',
+  'agisdk-evaluate.py',
+)
+
+export class AgisdkStateDiffGrader implements Grader {
+  name = 'agisdk_state_diff'
+
+  async grade(input: GraderInput): Promise<GraderResult> {
+    const taskId = this.extractTaskId(input.task.query_id)
+    const startUrl = this.extractStartUrl(input)
+    const mcpEndpoint =
+      input.mcpUrl ||
+      `${process.env.BROWSEROS_SERVER_URL || 'http://127.0.0.1:9110'}/mcp`
+
+    if (!startUrl) {
+      return {
+        score: 0,
+        pass: false,
+        reasoning: 'Could not determine clone site URL from task',
+      }
+    }
+
+    const origin = new URL(startUrl).origin
+
+    let envState: Record<string, unknown>
+    try {
+      envState = await this.fetchFinishState(origin, mcpEndpoint)
+    } catch (error) {
+      return {
+        score: 0,
+        pass: false,
+        reasoning: `Failed to fetch /finish endpoint: ${error instanceof Error ? error.message : String(error)}`,
+        details: { origin, error: true },
+      }
+    }
+
+    try {
+      const result = await this.runPythonEvaluator(
+        taskId,
+        envState,
+        input.finalAnswer || '',
+      )
+      return {
+        score: result.reward,
+        pass: result.pass,
+        reasoning:
+          result.message ||
+          (result.pass ? 'All criteria passed' : 'Some criteria failed'),
+        details: {
+          reward: result.reward,
+          per_criterion: result.per_criterion,
+          origin,
+          agisdk_task_id: taskId,
+        },
+      }
+    } catch (error) {
+      return {
+        score: 0,
+        pass: false,
+        reasoning: `Python evaluator error: ${error instanceof Error ? error.message : String(error)}`,
+        details: { error: true },
+      }
+    }
+  }
+
+  private extractTaskId(queryId: string): string {
+    return queryId.replace(/^agisdk-/, '')
+  }
+
+  private extractStartUrl(input: GraderInput): string | null {
+    // Derive from task_id: "dashdish-10" → "https://evals-dashdish.vercel.app"
+    // Task IDs are "{site}-{number}" where site may contain hyphens (e.g. "fly-unified-5")
+    const taskId = this.extractTaskId(input.task.query_id)
+    const siteId = taskId.replace(/-\d+$/, '')
+    if (siteId) return `https://evals-${siteId}.vercel.app`
+
+    // Fallback: search messages for vercel.app URLs
+    for (const msg of input.messages) {
+      const text =
+        msg.type === 'user'
+          ? msg.content
+          : msg.type === 'tool-input-available'
+            ? JSON.stringify(msg.input)
+            : ''
+      const urlMatch = text.match(/https?:\/\/[^\s"']+\.vercel\.app/)
+      if (urlMatch) return urlMatch[0]
+    }
+
+    return null
+  }
+
+  private async fetchFinishState(
+    origin: string,
+    mcpEndpoint: string,
+  ): Promise<Record<string, unknown>> {
+    const finishUrl = `${origin}/finish`
+
+    // Navigate browser to /finish page (state diff is rendered client-side)
+    await callMcpTool(mcpEndpoint, 'navigate_page', {
+      url: finishUrl,
+      page: 1,
+    })
+
+    // Wait for the page to render, then extract JSON from <pre> element
+    const result = await callMcpTool(mcpEndpoint, 'evaluate_script', {
+      page: 1,
+      expression: `
+        new Promise((resolve, reject) => {
+          let attempts = 0;
+          const check = () => {
+            const pre = document.querySelector('pre');
+            if (pre && pre.textContent.trim().startsWith('{')) {
+              resolve(pre.textContent);
+            } else if (++attempts > 20) {
+              reject(new Error('Timed out waiting for <pre> JSON on /finish'));
+            } else {
+              setTimeout(check, 500);
+            }
+          };
+          check();
+        })
+      `,
+    })
+
+    const textContent = result.content?.find(
+      (c: { type: string }) => c.type === 'text',
+    )
+    if (!textContent?.text) {
+      throw new Error('No text content returned from /finish page')
+    }
+
+    return JSON.parse(textContent.text) as Record<string, unknown>
+  }
+
+  private runPythonEvaluator(
+    taskId: string,
+    envState: Record<string, unknown>,
+    modelResponse: string,
+  ): Promise<{
+    reward: number
+    pass: boolean
+    message: string
+    per_criterion: unknown[]
+  }> {
+    return new Promise((resolve, reject) => {
+      const proc = spawn('python3', [EVAL_SCRIPT], {
+        stdio: ['pipe', 'pipe', 'pipe'],
+      })
+
+      const inputData = JSON.stringify({
+        task_id: taskId,
+        env_state: envState,
+        model_response: modelResponse,
+      })
+
+      let stdout = ''
+      let stderr = ''
+
+      proc.stdout.on('data', (data: Buffer) => {
+        stdout += data.toString()
+      })
+
+      proc.stderr.on('data', (data: Buffer) => {
+        stderr += data.toString()
+      })
+
+      proc.on('close', (code) => {
+        if (code !== 0) {
+          reject(
+            new Error(`Python evaluator exited with code ${code}: ${stderr}`),
+          )
+          return
+        }
+
+        try {
+          const result = JSON.parse(stdout.trim())
+          resolve(result)
+        } catch {
+          reject(new Error(`Failed to parse evaluator output: ${stdout}`))
+        }
+      })
+
+      proc.on('error', (err) => {
+        reject(new Error(`Failed to spawn Python evaluator: ${err.message}`))
+      })
+
+      proc.stdin.write(inputData)
+      proc.stdin.end()
+    })
+  }
+}
--- a/packages/browseros-agent/apps/eval/src/graders/benchmark/infinity-state.ts
+++ b/packages/browseros-agent/apps/eval/src/graders/benchmark/infinity-state.ts
@@ -0,0 +1,134 @@
+import { join, resolve } from 'node:path'
+import type { GraderResult } from '../../types'
+import type { Grader, GraderInput } from '../types'
+
+interface InfinityEvalInput {
+  app_server_url: string
+  verifier_path: string
+  task_id: string
+}
+
+interface InfinityEvalOutput {
+  pass: boolean
+  reward: number
+  message: string
+}
+
+const EVAL_SCRIPT = resolve(
+  import.meta.dir,
+  '../../../scripts/infinity-evaluate.py',
+)
+
+export class InfinityStateGrader implements Grader {
+  name = 'infinity_state'
+
+  async grade(input: GraderInput): Promise<GraderResult> {
+    const parsed = this.parseQueryId(input.task.query_id)
+    if (!parsed) {
+      return {
+        score: 0,
+        pass: false,
+        reasoning: `Cannot parse query_id "${input.task.query_id}" — expected format: infinity-{app}-{task_id}`,
+      }
+    }
+
+    const appServerUrl = this.resolveAppServerUrl(input)
+    if (!appServerUrl) {
+      return {
+        score: 0,
+        pass: false,
+        reasoning: 'Cannot determine app server URL',
+      }
+    }
+
+    const infinityDir = process.env.WEBARENA_INFINITY_DIR
+    if (!infinityDir) {
+      return {
+        score: 0,
+        pass: false,
+        reasoning:
+          'WEBARENA_INFINITY_DIR env var not set. Point it to the webarena-infinity repo root.',
+      }
+    }
+
+    const verifierPath = join(
+      infinityDir,
+      'apps',
+      parsed.appName,
+      'real-tasks',
+      `${parsed.taskId}.py`,
+    )
+
+    const evalInput: InfinityEvalInput = {
+      app_server_url: appServerUrl,
+      verifier_path: verifierPath,
+      task_id: input.task.query_id,
+    }
+
+    try {
+      const result = await this.runPythonEvaluator(evalInput)
+      return {
+        score: result.pass ? 1 : 0,
+        pass: result.pass,
+        reasoning: result.message,
+        details: {
+          reward: result.reward,
+          app_name: parsed.appName,
+          app_server_url: appServerUrl,
+        },
+      }
+    } catch (error) {
+      return {
+        score: 0,
+        pass: false,
+        reasoning: `Evaluator process error: ${error instanceof Error ? error.message : String(error)}`,
+      }
+    }
+  }
+
+  private parseQueryId(
+    queryId: string,
+  ): { appName: string; taskId: string } | null {
+    // Task IDs start with "task_", app names may contain hyphens
+    // e.g. "infinity-elation-prescriptions-task_h69"
+    const match = queryId.match(/^infinity-(.+)-(task_.+)$/)
+    if (!match) return null
+    return { appName: match[1], taskId: match[2] }
+  }
+
+  private resolveAppServerUrl(input: GraderInput): string | null {
+    // Passed directly from task executor (started by InfinityAppManager)
+    if (input.infinityAppUrl) return input.infinityAppUrl
+
+    // Fallback: env var for manual testing
+    if (process.env.INFINITY_APP_URL) return process.env.INFINITY_APP_URL
+
+    return null
+  }
+
+  private async runPythonEvaluator(
+    evalInput: InfinityEvalInput,
+  ): Promise<InfinityEvalOutput> {
+    const proc = Bun.spawn(['python3', EVAL_SCRIPT], {
+      stdin: 'pipe',
+      stdout: 'pipe',
+      stderr: 'pipe',
+    })
+
+    const inputJson = JSON.stringify(evalInput)
+    proc.stdin.write(inputJson)
+    proc.stdin.end()
+
+    const stdout = await new Response(proc.stdout).text()
+    const stderr = await new Response(proc.stderr).text()
+    const exitCode = await proc.exited
+
+    if (exitCode !== 0) {
+      throw new Error(
+        `Python evaluator exited with code ${exitCode}: ${stderr || stdout}`,
+      )
+    }
+
+    return JSON.parse(stdout.trim()) as InfinityEvalOutput
+  }
+}
--- a/packages/browseros-agent/apps/eval/src/graders/registry.ts
+++ b/packages/browseros-agent/apps/eval/src/graders/registry.ts
@@ -1,4 +1,6 @@
 import type { GraderResult } from '../types'
+import { AgisdkStateDiffGrader } from './benchmark/agisdk-state-diff'
+import { InfinityStateGrader } from './benchmark/infinity-state'
 import { Mind2WebJudgeGrader } from './benchmark/mind2web'
 import { WebVoyagerGrader } from './benchmark/webvoyager'
 import { FaraAlignmentGrader } from './fara/alignment'
@@ -19,7 +21,13 @@ export function createGrader(
  options: GraderOptions | null,
 ): Grader | null {
  switch (name) {
-    // Benchmark graders
+    // Deterministic benchmark graders (no LLM judge)
+    case 'agisdk_state_diff':
+      return new AgisdkStateDiffGrader()
+    case 'infinity_state':
+      return new InfinityStateGrader()
+
+    // LLM-based benchmark graders
    case 'webvoyager_grader':
      if (!options?.apiKey) return null
      return new WebVoyagerGrader(
@@ -107,10 +115,12 @@ export async function runGraders(

 // Export grader classes for direct use
 export {
+  AgisdkStateDiffGrader,
  FaraAlignmentGrader,
  FaraCombinedGrader,
  FaraMultimodalGrader,
  FaraRubricGrader,
+  InfinityStateGrader,
  Mind2WebJudgeGrader,
  PerformanceGrader,
  WebVoyagerGrader,
--- a/packages/browseros-agent/apps/eval/src/graders/types.ts
+++ b/packages/browseros-agent/apps/eval/src/graders/types.ts
@@ -11,6 +11,8 @@ export interface GraderInput {
  finalAnswer: string | null
  expectedAnswer?: string | null
  outputDir: string
+  mcpUrl?: string
+  infinityAppUrl?: string
 }

 export interface Grader {
--- a/packages/browseros-agent/apps/eval/src/runner/infinity-app-manager.ts
+++ b/packages/browseros-agent/apps/eval/src/runner/infinity-app-manager.ts
@@ -0,0 +1,89 @@
+/**
+ * Manages WebArena-Infinity app server lifecycle per task.
+ *
+ * Each worker gets a unique port: base_port + worker_index.
+ * Server is started fresh before each task and killed after,
+ * guaranteeing clean state.
+ */
+
+import { type ChildProcess, spawn } from 'node:child_process'
+import { join } from 'node:path'
+
+export class InfinityAppManager {
+  private proc: ChildProcess | null = null
+  private port: number
+  private infinityDir: string
+
+  constructor(
+    private workerIndex: number,
+    private basePort: number = 8000,
+  ) {
+    this.port = basePort + workerIndex
+    this.infinityDir = process.env.WEBARENA_INFINITY_DIR || ''
+  }
+
+  async startApp(appName: string): Promise<string> {
+    await this.stop()
+
+    if (!this.infinityDir) {
+      throw new Error('WEBARENA_INFINITY_DIR env var not set')
+    }
+
+    const serverScript = join(this.infinityDir, 'apps', appName, 'server.py')
+    this.proc = spawn('python3', [serverScript, '--port', String(this.port)], {
+      stdio: ['ignore', 'pipe', 'pipe'],
+      cwd: join(this.infinityDir, 'apps', appName),
+    })
+
+    // Wait for server to be ready
+    const url = `http://localhost:${this.port}`
+    await this.waitForReady(url)
+    return url
+  }
+
+  async stop(): Promise<void> {
+    if (this.proc) {
+      this.proc.kill('SIGTERM')
+      await new Promise<void>((resolve) => {
+        const timeout = setTimeout(() => {
+          this.proc?.kill('SIGKILL')
+          resolve()
+        }, 3000)
+        this.proc?.on('exit', () => {
+          clearTimeout(timeout)
+          resolve()
+        })
+      })
+      this.proc = null
+    }
+  }
+
+  getPort(): number {
+    return this.port
+  }
+
+  getUrl(): string {
+    return `http://localhost:${this.port}`
+  }
+
+  private async waitForReady(
+    url: string,
+    maxAttempts = 30,
+    intervalMs = 500,
+  ): Promise<void> {
+    for (let i = 0; i < maxAttempts; i++) {
+      try {
+        const resp = await fetch(url, {
+          signal: AbortSignal.timeout(2000),
+        })
+        if (resp.ok) return
+      } catch {
+        // Server not ready yet
+      }
+      await new Promise((r) => setTimeout(r, intervalMs))
+    }
+    throw new Error(
+      `Infinity app server not ready after ${maxAttempts * intervalMs}ms on port ${this.port}`,
+    )
+  }
+}
--- a/packages/browseros-agent/apps/eval/src/runner/parallel-executor.ts
+++ b/packages/browseros-agent/apps/eval/src/runner/parallel-executor.ts
@@ -160,6 +160,7 @@ export class ParallelExecutor {
    }
    const executor = createTaskExecutor(
      workerConfig,
+      workerIndex,
      this.config.outputDir,
      this.config.graderOptions,
      this.config.onEvent,
--- a/packages/browseros-agent/apps/eval/src/runner/task-executor.ts
+++ b/packages/browseros-agent/apps/eval/src/runner/task-executor.ts
@@ -9,6 +9,7 @@ import {
 import { runGraders } from '../graders/registry'
 import type { ErrorSource, EvalConfig, GraderResult, Task } from '../types'
 import { callMcpTool } from '../utils/mcp-client'
+import { InfinityAppManager } from './infinity-app-manager'
 import type { GraderOptions, TaskResult } from './types'

 // ============================================================================
@@ -46,6 +47,7 @@ export interface TaskExecutorDeps {
 export class TaskExecutor {
  constructor(
    private readonly config: EvalConfig,
+    private readonly workerIndex: number,
    private readonly outputDir: string,
    private readonly deps: TaskExecutorDeps,
  ) {}
@@ -101,6 +103,35 @@ export class TaskExecutor {
    // Resolve page ID once — fresh browser has exactly one page
    const pageId = await this.resolveInitialPageId(mcpUrl)

+    // For Infinity tasks, start a fresh app server per task
+    let infinityManager: InfinityAppManager | null = null
+    let actualStartUrl = task.start_url
+
+    if (task.dataset === 'webarena-infinity') {
+      const appName = (task.metadata?.additional as Record<string, unknown>)
+        ?.app_name as string
+      const appBasePort =
+        ((task.metadata?.additional as Record<string, unknown>)
+          ?.app_base_port as number) || 8000
+
+      if (appName && process.env.WEBARENA_INFINITY_DIR) {
+        infinityManager = new InfinityAppManager(this.workerIndex, appBasePort)
+        try {
+          actualStartUrl = await infinityManager.startApp(appName)
+          console.log(
+            `  Infinity app "${appName}" started on port ${infinityManager.getPort()}`,
+          )
+        } catch (error) {
+          throw new TaskExecutionError(
+            `Failed to start Infinity app: ${error instanceof Error ? error.message : String(error)}`,
+            task,
+            'navigation',
+            error instanceof Error ? error : undefined,
+          )
+        }
+      }
+    }
+
    try {
      // Phase 1: Set viewport + navigate to start URL
      try {
@@ -114,10 +145,10 @@ export class TaskExecutor {
        )
      }

-      if (task.start_url && task.start_url !== 'about:blank') {
+      if (actualStartUrl && actualStartUrl !== 'about:blank') {
        try {
          await callMcpTool(mcpUrl, 'navigate_page', {
-            url: task.start_url,
+            url: actualStartUrl,
            page: pageId,
          })
        } catch (error) {
@@ -134,7 +165,11 @@ export class TaskExecutor {
      const agentResult = await this.executeAgent(task, pageId)

      // Phase 3: Run graders
-      const graderResults = await this.runGraders(task, agentResult)
+      const graderResults = await this.runGraders(
+        task,
+        agentResult,
+        infinityManager?.getUrl(),
+      )

      const status =
        agentResult.metadata.termination_reason === 'timeout'
@@ -169,6 +204,11 @@ export class TaskExecutor {
      } catch {
        // Ignore cleanup errors
      }
+
+      // Stop Infinity app server if running
+      if (infinityManager) {
+        await infinityManager.stop().catch(() => {})
+      }
    }
  }

@@ -209,6 +249,7 @@ export class TaskExecutor {
  private async runGraders(
    task: Task,
    agentResult: AgentResult,
+    infinityAppUrl?: string,
  ): Promise<Record<string, GraderResult>> {
    const configGraders = this.config.graders ?? []
    const taskGraders = task.graders ?? []
@@ -234,6 +275,8 @@ export class TaskExecutor {
          expectedAnswer: (task.metadata?.additional as Record<string, unknown>)
            ?.answer as string | undefined,
          outputDir: join(this.outputDir, task.query_id),
+          mcpUrl: `${this.config.browseros.server_url}/mcp`,
+          infinityAppUrl,
        },
        this.deps.graderOptions,
      )
@@ -269,11 +312,12 @@ export class TaskExecutor {

 export function createTaskExecutor(
  config: EvalConfig,
+  workerIndex: number,
  outputDir: string,
  graderOptions: GraderOptions | null,
  onEvent?: (taskId: string, event: Record<string, unknown>) => void,
 ): TaskExecutor {
-  return new TaskExecutor(config, outputDir, {
+  return new TaskExecutor(config, workerIndex, outputDir, {
    graderOptions,
    onEvent,
  })
--- a/packages/browseros-agent/apps/eval/src/runner/types.ts
+++ b/packages/browseros-agent/apps/eval/src/runner/types.ts
@@ -100,6 +100,8 @@ export interface TaskResultSummary {
 // ============================================================================

 export const PASS_FAIL_GRADER_ORDER = [
+  'agisdk_state_diff',
+  'infinity_state',
  'performance_grader',
  'webvoyager_grader',
  'fara_combined',
--- a/packages/browseros-agent/apps/server/.env.example
+++ b/packages/browseros-agent/apps/server/.env.example
@@ -7,6 +7,11 @@ BROWSEROS_EXTENSION_PORT=9300
 # BROWSEROS_RESOURCES_DIR=./resources
 # BROWSEROS_EXECUTION_DIR=./out

+# VM cache (optional - runtime downloads published agent cache in background)
+# Set prefetch=false to skip startup warmup; VM/OpenClaw startup still syncs on demand.
+BROWSEROS_VM_CACHE_PREFETCH=true
+BROWSEROS_VM_CACHE_MANIFEST_URL=https://cdn.browseros.com/vm/manifest.json
+
 # BrowserOS config
 BROWSEROS_CONFIG_URL=https://llm.browseros.com/api/browseros-server/config
 BROWSEROS_VERSION=
--- a/packages/browseros-agent/apps/server/.env.production.example
+++ b/packages/browseros-agent/apps/server/.env.production.example
@@ -5,6 +5,9 @@ CODEGEN_SERVICE_URL=
 POSTHOG_API_KEY=
 SENTRY_DSN=

+BROWSEROS_VM_CACHE_PREFETCH=true
+BROWSEROS_VM_CACHE_MANIFEST_URL=https://cdn.browseros.com/vm/manifest.json
+
 R2_ACCOUNT_ID=
 R2_ACCESS_KEY_ID=
 R2_SECRET_ACCESS_KEY=
--- a/packages/browseros-agent/apps/server/README.md
+++ b/packages/browseros-agent/apps/server/README.md
@@ -142,7 +142,7 @@ cp .env.example .env.development
 bun run start
 ```

-See the [agent monorepo README](../../README.md) for full environment variable reference and `process-compose` setup.
+See the [agent monorepo README](../../README.md) for full environment variable reference and `dev:watch` setup.

 ### Testing

--- a/packages/browseros-agent/apps/server/package.json
+++ b/packages/browseros-agent/apps/server/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@browseros/server",
-  "version": "0.0.85",
+  "version": "0.0.92",
  "description": "BrowserOS server",
  "type": "module",
  "main": "./src/index.ts",
@@ -10,9 +10,21 @@
  "scripts": {
    "start": "bun --watch --env-file=.env.development src/index.ts",
    "build": "bun ../../scripts/build/server.ts --target=all",
-    "test:tools": "bun run test:cleanup && bun --env-file=.env.development test tests/tools",
-    "test:integration": "bun run test:cleanup && bun --env-file=.env.development test tests/server.integration.test.ts",
-    "test:sdk": "echo 'SDK tests disabled: test environment does not provide the extract/verify LLM service'",
+    "test": "bun run test:all",
+    "test:all": "bun run ./tests/__helpers__/run-test-group.ts all",
+    "test:agent": "bun run ./tests/__helpers__/run-test-group.ts agent",
+    "test:api": "bun run ./tests/__helpers__/run-test-group.ts api",
+    "test:browser": "bun run ./tests/__helpers__/run-test-group.ts browser",
+    "test:cdp": "bun run test:browser",
+    "test:core": "bun run ./tests/__helpers__/run-test-group.ts core",
+    "test:integration": "bun run ./tests/__helpers__/run-test-group.ts integration",
+    "test:root": "bun run ./tests/__helpers__/run-test-group.ts root",
+    "test:sdk": "bun run ./tests/__helpers__/run-test-group.ts sdk",
+    "test:skills": "bun run ./tests/__helpers__/run-test-group.ts skills",
+    "test:tools": "bun run ./tests/__helpers__/run-test-group.ts tools",
+    "test:tools:acl": "bun run test:cleanup && bun --env-file=.env.development test ./tests/tools/acl-scorer.test.ts",
+    "test:tools:filesystem": "bun run test:cleanup && bun --env-file=.env.development test ./tests/tools/filesystem",
+    "test:tools:input": "bun run test:cleanup && bun --env-file=.env.development test ./tests/tools/input.test.ts",
    "test:cleanup": "./tests/__helpers__/cleanup.sh",
    "typecheck": "tsc --noEmit",
    "devtools": "bunx @ai-sdk/devtools"
--- a/packages/browseros-agent/apps/server/resources/openclaw-compose.yml
+++ b/packages/browseros-agent/apps/server/resources/openclaw-compose.yml
@@ -1,37 +0,0 @@
-services:
-  openclaw-gateway:
-    image: ${OPENCLAW_IMAGE:-ghcr.io/openclaw/openclaw:latest}
-    ports:
-      - "127.0.0.1:${OPENCLAW_GATEWAY_PORT:-18789}:18789"
-    environment:
-      - HOME=/home/node
-      - NODE_ENV=production
-      - OPENCLAW_GATEWAY_TOKEN=${OPENCLAW_GATEWAY_TOKEN}
-      - OPENCLAW_GATEWAY_BIND=lan
-      - TZ=${TZ}
-      - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
-      - OPENAI_API_KEY=${OPENAI_API_KEY:-}
-      - GEMINI_API_KEY=${GEMINI_API_KEY:-}
-      - OPENROUTER_API_KEY=${OPENROUTER_API_KEY:-}
-      - GROQ_API_KEY=${GROQ_API_KEY:-}
-      - MISTRAL_API_KEY=${MISTRAL_API_KEY:-}
-      - MOONSHOT_API_KEY=${MOONSHOT_API_KEY:-}
-    volumes:
-      - ${OPENCLAW_CONFIG_DIR}:/home/node/.openclaw
-    extra_hosts:
-      - "host.containers.internal:host-gateway"
-    command:
-      - node
-      - dist/index.js
-      - gateway
-      - --bind
-      - lan
-      - --port
-      - "18789"
-      - --allow-unconfigured
-    healthcheck:
-      test: ["CMD", "curl", "-sf", "http://127.0.0.1:18789/healthz"]
-      interval: 30s
-      timeout: 10s
-      retries: 3
-    restart: unless-stopped
--- a/packages/browseros-agent/apps/server/src/api/routes/mcp.ts
+++ b/packages/browseros-agent/apps/server/src/api/routes/mcp.ts
@@ -10,6 +10,7 @@ import type { Browser } from '../../browser/browser'
 import { logger } from '../../lib/logger'
 import { metrics } from '../../lib/metrics'
 import { Sentry } from '../../lib/sentry'
+import { getMonitoringService } from '../../monitoring/service'
 import type { ToolRegistry } from '../../tools/tool-registry'
 import type { GlobalAclPolicyService } from '../services/acl/global-acl-policy'
 import { resolveAclPolicyForMcpRequest } from '../services/acl/resolve-acl-policy'
@@ -39,16 +40,30 @@ export function createMcpRoutes(deps: McpRouteDeps) {

  app.post('/', async (c) => {
    const scopeId = c.req.header('X-BrowserOS-Scope-Id') || 'ephemeral'
+    const monitoringService = getMonitoringService()
+    const explicitAgentId =
+      c.req.query('agentId') ??
+      c.req.header('X-BrowserOS-Agent-Id') ??
+      undefined
+    const activeSession =
+      monitoringService.resolveSessionForMcpRequest(explicitAgentId)
+    const agentId = activeSession?.agentId
    metrics.log('mcp.request', { scopeId })
    const aclRules = await resolveAclPolicyForMcpRequest({
      policyService: deps.policyService,
    })
+    const monitoringSessionId = activeSession?.monitoringSessionId
+    const observer =
+      monitoringSessionId && agentId
+        ? monitoringService.createObserver(monitoringSessionId, agentId)
+        : undefined

    // Per-request server + transport: no shared state, no race conditions,
    // no ID collisions. Required by MCP SDK 1.26.0+ security fix (GHSA-345p-7cg4-v4c7).
    const mcpServer = createMcpServer({
      ...deps,
      aclRules,
+      observer,
    })
    const transport = new StreamableHTTPTransport({
      sessionIdGenerator: undefined,
@@ -62,6 +77,9 @@ export function createMcpRoutes(deps: McpRouteDeps) {
      Sentry.withScope((scope) => {
        scope.setTag('route', 'mcp')
        scope.setTag('scopeId', scopeId)
+        if (agentId) {
+          scope.setTag('agentId', agentId)
+        }
        Sentry.captureException(error)
      })
      logger.error('Error handling MCP request', {
--- a/packages/browseros-agent/apps/server/src/api/routes/monitoring.ts
+++ b/packages/browseros-agent/apps/server/src/api/routes/monitoring.ts
@@ -0,0 +1,113 @@
+import { Hono } from 'hono'
+import { getMonitoringService } from '../../monitoring/service'
+import { isValidMonitoringRunId } from '../../monitoring/storage'
+
+export function createMonitoringRoutes() {
+  return new Hono()
+    .get('/runs', async (c) => {
+      const limitParam = c.req.query('limit')
+      const parsedLimit = limitParam ? Number.parseInt(limitParam, 10) : 50
+      const limit =
+        Number.isFinite(parsedLimit) && parsedLimit > 0 ? parsedLimit : 50
+
+      const runs = await getMonitoringService().listRuns(limit)
+      return c.json({ runs })
+    })
+    .get('/runs/:id', async (c) => {
+      const runId = c.req.param('id')
+      if (!isValidMonitoringRunId(runId)) {
+        return c.json({ error: 'Invalid monitoring run id' }, 400)
+      }
+      const envelope = await getMonitoringService().getRunEnvelope(runId)
+
+      if (!envelope) {
+        return c.json({ error: 'Monitoring run not found' }, 404)
+      }
+
+      return c.json({ run: envelope })
+    })
+    .post('/debug/runs', async (c) => {
+      const body = await c.req.json<{
+        agentId?: string
+        sessionKey?: string
+        originalPrompt?: string
+        chatHistory?: Array<{ role?: 'user' | 'assistant'; content?: string }>
+      }>()
+
+      if (!body.agentId?.trim()) {
+        return c.json({ error: 'agentId is required' }, 400)
+      }
+      if (!body.sessionKey?.trim()) {
+        return c.json({ error: 'sessionKey is required' }, 400)
+      }
+      if (!body.originalPrompt?.trim()) {
+        return c.json({ error: 'originalPrompt is required' }, 400)
+      }
+
+      const chatHistory = Array.isArray(body.chatHistory)
+        ? body.chatHistory
+            .filter(
+              (turn): turn is { role: 'user' | 'assistant'; content: string } =>
+                (turn.role === 'user' || turn.role === 'assistant') &&
+                typeof turn.content === 'string',
+            )
+            .map((turn) => ({
+              role: turn.role,
+              content: turn.content,
+            }))
+        : []
+
+      const session = await getMonitoringService().startSession({
+        agentId: body.agentId.trim(),
+        sessionKey: body.sessionKey.trim(),
+        originalPrompt: body.originalPrompt.trim(),
+        chatHistory,
+        source: 'debug',
+      })
+
+      return c.json({ session }, 201)
+    })
+    .post('/debug/runs/:id/finalize', async (c) => {
+      const runId = c.req.param('id')
+      if (!isValidMonitoringRunId(runId)) {
+        return c.json({ error: 'Invalid monitoring run id' }, 400)
+      }
+      const body = await c.req.json<{
+        agentId?: string
+        sessionKey?: string
+        status?: 'completed' | 'failed' | 'aborted' | 'incomplete'
+        finalAssistantMessage?: string
+        error?: string
+      }>()
+
+      if (!body.agentId?.trim()) {
+        return c.json({ error: 'agentId is required' }, 400)
+      }
+      if (!body.sessionKey?.trim()) {
+        return c.json({ error: 'sessionKey is required' }, 400)
+      }
+      if (
+        body.status !== 'completed' &&
+        body.status !== 'failed' &&
+        body.status !== 'aborted' &&
+        body.status !== 'incomplete'
+      ) {
+        return c.json({ error: 'status is invalid' }, 400)
+      }
+
+      const envelope = await getMonitoringService().finalizeSession({
+        monitoringSessionId: runId,
+        agentId: body.agentId.trim(),
+        sessionKey: body.sessionKey.trim(),
+        status: body.status,
+        finalAssistantMessage: body.finalAssistantMessage,
+        error: body.error,
+      })
+
+      if (!envelope) {
+        return c.json({ error: 'Monitoring run not found' }, 404)
+      }
+
+      return c.json({ run: envelope })
+    })
+}
--- a/packages/browseros-agent/apps/server/src/api/routes/openclaw.ts
+++ b/packages/browseros-agent/apps/server/src/api/routes/openclaw.ts
@@ -7,38 +7,200 @@
 * Thin layer delegating to OpenClawService.
 */

-import { OPENCLAW_GATEWAY_PORT } from '@browseros/shared/constants/openclaw'
-import { BROWSEROS_ROLE_TEMPLATES } from '@browseros/shared/constants/role-aware-agents'
-import type {
-  BrowserOSAgentRoleId,
-  BrowserOSCustomRoleInput,
-} from '@browseros/shared/types/role-aware-agents'
 import { Hono } from 'hono'
 import { stream } from 'hono/streaming'
 import { logger } from '../../lib/logger'
+import { getMonitoringService } from '../../monitoring/service'
+import type { MonitoringChatTurn } from '../../monitoring/types'
 import {
  OpenClawAgentAlreadyExistsError,
  OpenClawAgentNotFoundError,
  OpenClawInvalidAgentNameError,
  OpenClawProtectedAgentError,
+  OpenClawSessionNotFoundError,
 } from '../services/openclaw/errors'
-import { getOpenClawService } from '../services/openclaw/openclaw-service'
+import { getOpenClawCliProvider } from '../services/openclaw/openclaw-cli-providers/registry'
+import type { OpenClawChatContentPart } from '../services/openclaw/openclaw-http-client'
+import { isUnsupportedOpenClawProviderError } from '../services/openclaw/openclaw-provider-map'
+import {
+  getOpenClawService,
+  normalizeBrowserOSChatSessionKey,
+} from '../services/openclaw/openclaw-service'
+import type { QueuedItemPublic } from '../services/queue'
+import { getOutboundQueueService } from '../services/queue'

-function isValidBoundaryMode(
-  value: unknown,
-): value is BrowserOSCustomRoleInput['boundaries'][number]['defaultMode'] {
-  return value === 'allow' || value === 'ask' || value === 'block'
+/**
+ * Inbound attachment shapes the chat route accepts. Images travel as
+ * data: URLs (the gateway is on 127.0.0.1 so we don't pay public-network
+ * cost for the base64 overhead). Files arrive with their text already
+ * extracted on the client — we just inline them as a fenced text part on
+ * the user message.
+ */
+type ImageAttachment = {
+  kind: 'image'
+  mediaType: string
+  dataUrl: string
+  name?: string
+}
+type FileAttachment = {
+  kind: 'file'
+  mediaType: string
+  name: string
+  text: string
+}
+type ChatAttachment = ImageAttachment | FileAttachment
+
+const MAX_ATTACHMENTS = 10
+const MAX_IMAGE_BYTES = 5 * 1024 * 1024 // 5 MB after compression
+// data: URLs encode bytes as base64 (~4/3 inflation) plus a small media-type
+// prefix; cap the encoded string against that, not 2× the byte budget.
+const MAX_IMAGE_DATA_URL_LENGTH = Math.ceil(MAX_IMAGE_BYTES * (4 / 3)) + 100
+const MAX_FILE_TEXT_BYTES = 1 * 1024 * 1024 // 1 MB extracted text
+const ALLOWED_IMAGE_MEDIA_TYPES = new Set([
+  'image/png',
+  'image/jpeg',
+  'image/jpg',
+  'image/webp',
+  'image/gif',
+])
+const ALLOWED_FILE_MEDIA_TYPE_PREFIXES = ['text/', 'application/json']
+
+function validateChatAttachments(input: unknown): {
+  attachments: ChatAttachment[] | null
+  error: string | null
+} {
+  if (input === undefined || input === null) {
+    return { attachments: null, error: null }
+  }
+  if (!Array.isArray(input)) {
+    return { attachments: null, error: 'attachments must be an array' }
+  }
+  if (input.length > MAX_ATTACHMENTS) {
+    return {
+      attachments: null,
+      error: `at most ${MAX_ATTACHMENTS} attachments are allowed per message`,
+    }
+  }
+
+  const result: ChatAttachment[] = []
+  for (const raw of input) {
+    if (!raw || typeof raw !== 'object') {
+      return { attachments: null, error: 'invalid attachment entry' }
+    }
+    const entry = raw as Record<string, unknown>
+    if (entry.kind === 'image') {
+      const mediaType =
+        typeof entry.mediaType === 'string' ? entry.mediaType : ''
+      const dataUrl = typeof entry.dataUrl === 'string' ? entry.dataUrl : ''
+      if (!ALLOWED_IMAGE_MEDIA_TYPES.has(mediaType)) {
+        return {
+          attachments: null,
+          error: `unsupported image type: ${mediaType || 'unknown'}`,
+        }
+      }
+      if (!dataUrl.startsWith('data:')) {
+        return {
+          attachments: null,
+          error: 'image attachment must include a data: URL',
+        }
+      }
+      if (dataUrl.length > MAX_IMAGE_DATA_URL_LENGTH) {
+        return {
+          attachments: null,
+          error: `image exceeds ${MAX_IMAGE_BYTES} bytes`,
+        }
+      }
+      result.push({
+        kind: 'image',
+        mediaType,
+        dataUrl,
+        name: typeof entry.name === 'string' ? entry.name : undefined,
+      })
+      continue
+    }
+    if (entry.kind === 'file') {
+      const mediaType =
+        typeof entry.mediaType === 'string' ? entry.mediaType : ''
+      const name = typeof entry.name === 'string' ? entry.name : ''
+      const text = typeof entry.text === 'string' ? entry.text : ''
+      const allowed = ALLOWED_FILE_MEDIA_TYPE_PREFIXES.some((prefix) =>
+        mediaType.startsWith(prefix),
+      )
+      if (!allowed) {
+        return {
+          attachments: null,
+          error: `unsupported file type: ${mediaType || 'unknown'}`,
+        }
+      }
+      if (!name) {
+        return {
+          attachments: null,
+          error: 'file attachment must include a name',
+        }
+      }
+      if (text.length > MAX_FILE_TEXT_BYTES) {
+        return {
+          attachments: null,
+          error: `file "${name}" exceeds ${MAX_FILE_TEXT_BYTES} bytes`,
+        }
+      }
+      result.push({ kind: 'file', mediaType, name, text })
+      continue
+    }
+    return {
+      attachments: null,
+      error: 'attachment kind must be "image" or "file"',
+    }
+  }
+  return { attachments: result, error: null }
 }

-function isValidCustomRoleBoundary(value: unknown): boolean {
-  if (!value || typeof value !== 'object') return false
-  const boundary = value as Record<string, unknown>
-  return (
-    typeof boundary.key === 'string' &&
-    typeof boundary.label === 'string' &&
-    typeof boundary.description === 'string' &&
-    isValidBoundaryMode(boundary.defaultMode)
+function buildMessagePartsFromAttachments(
+  message: string,
+  attachments: ChatAttachment[],
+): { text: string; parts: OpenClawChatContentPart[] | undefined } {
+  const images = attachments.filter(
+    (a): a is ImageAttachment => a.kind === 'image',
  )
+  const files = attachments.filter(
+    (a): a is FileAttachment => a.kind === 'file',
+  )
+
+  const fileBlocks = files
+    .map(
+      (f) => `<attachment name="${f.name}" mediaType="${f.mediaType}">
+${f.text}
+</attachment>`,
+    )
+    .join('\n\n')
+  const text = fileBlocks ? `${message}\n\n${fileBlocks}`.trim() : message
+
+  if (images.length === 0) {
+    return { text, parts: undefined }
+  }
+
+  const parts: OpenClawChatContentPart[] = [{ type: 'text', text }]
+  for (const image of images) {
+    parts.push({ type: 'image_url', image_url: { url: image.dataUrl } })
+  }
+  return { text, parts }
+}
+
+function getCreateAgentValidationError(body: { name?: string }): string | null {
+  if (!body.name?.trim()) {
+    return 'Name is required'
+  }
+  return null
+}
+
+function parsePositiveIntQuery(
+  value: string | undefined,
+  fallback: number,
+): number {
+  if (value === undefined) return fallback
+  const parsed = Number(value)
+  if (!Number.isFinite(parsed)) return fallback
+  return Math.max(1, Math.trunc(parsed))
 }

 export function createOpenClawRoutes() {
@@ -48,6 +210,29 @@ export function createOpenClawRoutes() {
      return c.json(status)
    })

+    .get('/providers/:providerId/auth-status', async (c) => {
+      const { providerId } = c.req.param()
+      const provider = getOpenClawCliProvider(providerId)
+      if (!provider) {
+        return c.json({ error: `Unknown CLI provider: ${providerId}` }, 404)
+      }
+      try {
+        const status =
+          await getOpenClawService().getCliProviderAuthStatus(provider)
+        return c.json(status)
+      } catch (err) {
+        const message = err instanceof Error ? err.message : String(err)
+        logger.warn('CLI provider auth-status failed', {
+          providerId,
+          error: message,
+        })
+        return c.json(
+          { installed: false, loggedIn: false, error: message },
+          500,
+        )
+      }
+    })
+
    .post('/setup', async (c) => {
      const body = await c.req.json<{
        providerType?: string
@@ -72,7 +257,7 @@ export function createOpenClawRoutes() {
        return c.json(
          {
            status: 'running',
-            port: OPENCLAW_GATEWAY_PORT,
+            port: getOpenClawService().getPort(),
            agents: agents.map((a) => ({
              agentId: a.agentId,
              name: a.name,
@@ -89,7 +274,10 @@ export function createOpenClawRoutes() {
          providerType: body.providerType,
          providerName: body.providerName,
        })
-        if (message.includes('Podman is not available')) {
+        if (isUnsupportedOpenClawProviderError(err)) {
+          return c.json({ error: err.message }, 400)
+        }
+        if (message.includes('VM runtime is not available')) {
          return c.json({ error: message }, 503)
        }
        return c.json({ error: message }, 500)
@@ -154,97 +342,23 @@ export function createOpenClawRoutes() {
      }
    })

-    .get('/roles', async (c) => {
-      return c.json({
-        roles: BROWSEROS_ROLE_TEMPLATES.map((role) => ({
-          id: role.id,
-          name: role.name,
-          shortDescription: role.shortDescription,
-          longDescription: role.longDescription,
-          recommendedApps: role.recommendedApps,
-          boundaries: role.boundaries,
-          defaultAgentName: role.defaultAgentName,
-        })),
-      })
-    })
-
    .post('/agents', async (c) => {
      const body = await c.req.json<{
        name: string
-        roleId?: BrowserOSAgentRoleId
-        customRole?: BrowserOSCustomRoleInput
        providerType?: string
        providerName?: string
        baseUrl?: string
        apiKey?: string
        modelId?: string
      }>()
-      const name = body.name?.trim()
-
-      if (!name) {
-        return c.json({ error: 'Name is required' }, 400)
-      }
-      if (body.roleId && body.customRole) {
-        return c.json(
-          { error: 'Provide either roleId or customRole, not both' },
-          400,
-        )
-      }
-      if (
-        body.customRole &&
-        (!body.customRole.name?.trim() ||
-          !body.customRole.shortDescription?.trim() ||
-          !body.customRole.longDescription?.trim())
-      ) {
-        return c.json(
-          {
-            error:
-              'Custom roles require name, shortDescription, and longDescription',
-          },
-          400,
-        )
-      }
-      if (
-        body.customRole &&
-        (!Array.isArray(body.customRole.recommendedApps) ||
-          !Array.isArray(body.customRole.boundaries))
-      ) {
-        return c.json(
-          {
-            error: 'Custom roles require recommendedApps and boundaries arrays',
-          },
-          400,
-        )
-      }
-      if (
-        body.customRole &&
-        !body.customRole.recommendedApps.every((app) => typeof app === 'string')
-      ) {
-        return c.json(
-          {
-            error: 'Custom role recommendedApps must be an array of strings',
-          },
-          400,
-        )
-      }
-      if (
-        body.customRole &&
-        !body.customRole.boundaries.every(isValidCustomRoleBoundary)
-      ) {
-        return c.json(
-          {
-            error:
-              'Custom role boundaries must include key, label, description, and a valid defaultMode',
-          },
-          400,
-        )
+      const validationError = getCreateAgentValidationError(body)
+      if (validationError) {
+        return c.json({ error: validationError }, 400)
      }

      try {
        const agent = await getOpenClawService().createAgent({
-          name,
-          roleId: body.roleId,
-          customRole: body.customRole,
+          name: body.name.trim(),
          providerType: body.providerType,
          providerName: body.providerName,
          baseUrl: body.baseUrl,
@@ -259,6 +373,9 @@ export function createOpenClawRoutes() {
        if (err instanceof OpenClawInvalidAgentNameError) {
          return c.json({ error: err.message }, 400)
        }
+        if (isUnsupportedOpenClawProviderError(err)) {
+          return c.json({ error: err.message }, 400)
+        }
        const message = err instanceof Error ? err.message : String(err)
        return c.json({ error: message }, 500)
      }
@@ -282,24 +399,189 @@ export function createOpenClawRoutes() {
      }
    })

+    .get('/agents/:id/sessions', async (c) => {
+      const { id } = c.req.param()
+      const limit = parsePositiveIntQuery(c.req.query('limit'), 20)
+
+      try {
+        const sessions = await getOpenClawService().listSessions(id)
+        return c.json({
+          agentId: id,
+          sessions: sessions.slice(0, Math.min(limit, 100)),
+        })
+      } catch (err) {
+        const message = err instanceof Error ? err.message : String(err)
+        return c.json({ error: message }, 500)
+      }
+    })
+
+    .get('/agents/:id/session', async (c) => {
+      const { id } = c.req.param()
+
+      try {
+        const session = await getOpenClawService().resolveAgentSession(id)
+        return c.json(session)
+      } catch (err) {
+        const message = err instanceof Error ? err.message : String(err)
+        return c.json({ error: message }, 500)
+      }
+    })
+
+    .get('/agents/:id/history', async (c) => {
+      const { id } = c.req.param()
+      const limit = parsePositiveIntQuery(c.req.query('limit'), 50)
+
+      try {
+        const page = await getOpenClawService().getAgentHistoryPage(id, {
+          sessionKey: c.req.query('sessionKey'),
+          cursor: c.req.query('cursor'),
+          limit,
+        })
+        return c.json(page)
+      } catch (err) {
+        const message = err instanceof Error ? err.message : String(err)
+        return c.json({ error: message }, 500)
+      }
+    })
+
+    .get('/dashboard', (c) => {
+      try {
+        const dashboard = getOpenClawService().getDashboard()
+        return c.json(dashboard)
+      } catch (err) {
+        const message = err instanceof Error ? err.message : String(err)
+        return c.json({ error: message }, 500)
+      }
+    })
+
+    .get('/dashboard/stream', (c) => {
+      c.header('Content-Type', 'text/event-stream')
+      c.header('Cache-Control', 'no-cache')
+      c.header('Connection', 'keep-alive')
+
+      return stream(c, async (s) => {
+        const encoder = new TextEncoder()
+
+        // Send initial snapshot
+        try {
+          const dashboard = getOpenClawService().getDashboard()
+          await s.write(
+            encoder.encode(
+              `event: snapshot\ndata: ${JSON.stringify(dashboard)}\n\n`,
+            ),
+          )
+        } catch {}
+
+        // Subscribe to live status changes
+        const unsubscribe = getOpenClawService().onAgentStatusChange(
+          (agentId, entry) => {
+            const event = {
+              agentId,
+              status: entry.status,
+              currentTool: entry.currentTool,
+              error: entry.error,
+              timestamp: entry.lastEventAt,
+            }
+            s.write(
+              encoder.encode(
+                `event: status\ndata: ${JSON.stringify(event)}\n\n`,
+              ),
+            ).catch(() => {})
+          },
+        )
+
+        // Heartbeat every 15s to keep connection alive
+        const heartbeat = setInterval(() => {
+          s.write(
+            encoder.encode(
+              `event: heartbeat\ndata: ${JSON.stringify({ ts: Date.now() })}\n\n`,
+            ),
+          ).catch(() => {})
+        }, 15_000)
+
+        // Wait until client disconnects
+        try {
+          await new Promise<void>((resolve) => {
+            s.onAbort(() => resolve())
+          })
+        } finally {
+          unsubscribe()
+          clearInterval(heartbeat)
+        }
+      })
+    })
    .post('/agents/:id/chat', async (c) => {
      const { id } = c.req.param()
      const body = await c.req.json<{
        message: string
        sessionKey?: string
+        history?: MonitoringChatTurn[]
+        attachments?: unknown
      }>()

-      if (!body.message?.trim()) {
+      const trimmedMessage = body.message?.trim() ?? ''
+      const attachmentValidation = validateChatAttachments(body.attachments)
+      if (attachmentValidation.error) {
+        return c.json({ error: attachmentValidation.error }, 400)
+      }
+      const attachments = attachmentValidation.attachments ?? []
+      // Either a non-empty text body or at least one attachment is required.
+      if (!trimmedMessage && attachments.length === 0) {
        return c.json({ error: 'Message is required' }, 400)
      }

-      const sessionKey = body.sessionKey ?? crypto.randomUUID()
+      const sessionKey = normalizeBrowserOSChatSessionKey(
+        id,
+        body.sessionKey ?? crypto.randomUUID(),
+      )
+      const history = Array.isArray(body.history)
+        ? body.history.filter((entry): entry is MonitoringChatTurn =>
+            Boolean(
+              entry &&
+                (entry.role === 'user' || entry.role === 'assistant') &&
+                typeof entry.content === 'string',
+            ),
+          )
+        : []
+
+      // Replace the immediate 409 with a bounded wait so back-to-back user
+      // sends or a cron / hook turn that's still finishing don't reject the
+      // user-chat outright. The client-side outbound queue (Feature 2) keeps
+      // the per-agent send rate at 1, so this only kicks in for cross-source
+      // contention.
+      try {
+        await getMonitoringService().waitForSessionFree(id, {
+          timeoutMs: 30_000,
+        })
+      } catch (err) {
+        return c.json(
+          {
+            error:
+              err instanceof Error
+                ? err.message
+                : 'Agent is busy. Try again shortly.',
+          },
+          503,
+        )
+      }
+
+      const { text: composedMessage, parts: messageParts } =
+        buildMessagePartsFromAttachments(trimmedMessage, attachments)
+
+      const monitoringContext = await getMonitoringService().startSession({
+        agentId: id,
+        sessionKey,
+        originalPrompt: composedMessage,
+        chatHistory: history,
+      })

      try {
        const eventStream = await getOpenClawService().chatStream(
          id,
          sessionKey,
-          body.message,
+          composedMessage,
+          history,
+          { messageParts },
        )

        c.header('Content-Type', 'text/event-stream')
@@ -309,20 +591,227 @@ export function createOpenClawRoutes() {
        return stream(c, async (s) => {
          const reader = eventStream.getReader()
          const encoder = new TextEncoder()
+          let finalAssistantMessage: string | undefined
+          let status: 'completed' | 'failed' | 'aborted' | 'incomplete' =
+            'incomplete'
+          let finalError: string | undefined
          try {
            while (true) {
              const { done, value } = await reader.read()
              if (done) break
+              if (
+                value.type === 'done' &&
+                typeof value.data.text === 'string' &&
+                value.data.text.trim()
+              ) {
+                finalAssistantMessage = value.data.text
+                status = 'completed'
+              }
+              if (value.type === 'error') {
+                finalError =
+                  (typeof value.data.message === 'string'
+                    ? value.data.message
+                    : typeof value.data.error === 'string'
+                      ? value.data.error
+                      : undefined) ?? 'Unknown chat stream error'
+                status = 'failed'
+              }
              await s.write(
                encoder.encode(`data: ${JSON.stringify(value)}\n\n`),
              )
            }
            await s.write(encoder.encode('data: [DONE]\n\n'))
+          } catch (error) {
+            if (c.req.raw.signal.aborted) {
+              status = 'aborted'
+            } else {
+              status = 'failed'
+              finalError =
+                error instanceof Error ? error.message : String(error)
+            }
+            throw error
+          } finally {
+            await reader.cancel()
+            await getMonitoringService().finalizeSession({
+              monitoringSessionId: monitoringContext.monitoringSessionId,
+              agentId: id,
+              sessionKey,
+              status,
+              finalAssistantMessage,
+              error: finalError,
+            })
+          }
+        })
+      } catch (err) {
+        await getMonitoringService().finalizeSession({
+          monitoringSessionId: monitoringContext.monitoringSessionId,
+          agentId: id,
+          sessionKey,
+          status: c.req.raw.signal.aborted ? 'aborted' : 'failed',
+          error: err instanceof Error ? err.message : String(err),
+        })
+        if (isUnsupportedOpenClawProviderError(err)) {
+          return c.json({ error: err.message }, 400)
+        }
+        const message = err instanceof Error ? err.message : String(err)
+        return c.json({ error: message }, 500)
+      }
+    })
+
+    .post('/agents/:id/queue', async (c) => {
+      const { id } = c.req.param()
+      const body = await c.req.json<{
+        message: string
+        sessionKey?: string
+        history?: MonitoringChatTurn[]
+        attachments?: unknown
+        // Optional client-provided id — when set, the queue uses it as
+        // the canonical item id so the browser's optimistic row and the
+        // SSE snapshot reconcile on the same key.
+        id?: string
+      }>()
+      const trimmedMessage = body.message?.trim() ?? ''
+      const attachmentValidation = validateChatAttachments(body.attachments)
+      if (attachmentValidation.error) {
+        return c.json({ error: attachmentValidation.error }, 400)
+      }
+      const attachments = attachmentValidation.attachments ?? []
+      if (!trimmedMessage && attachments.length === 0) {
+        return c.json({ error: 'Message is required' }, 400)
+      }
+
+      const sessionKey = body.sessionKey
+        ? normalizeBrowserOSChatSessionKey(id, body.sessionKey)
+        : undefined
+      const history = Array.isArray(body.history)
+        ? body.history.filter((entry): entry is MonitoringChatTurn =>
+            Boolean(
+              entry &&
+                (entry.role === 'user' || entry.role === 'assistant') &&
+                typeof entry.content === 'string',
+            ),
+          )
+        : []
+
+      const { text: composedMessage, parts: messageParts } =
+        buildMessagePartsFromAttachments(trimmedMessage, attachments)
+
+      const item = getOutboundQueueService().enqueue({
+        agentId: id,
+        id: typeof body.id === 'string' && body.id ? body.id : undefined,
+        message: composedMessage,
+        messageParts,
+        sessionKey,
+        history,
+        attachmentsPreview: attachments.map((a) => ({
+          kind: a.kind,
+          mediaType: a.mediaType,
+          name: 'name' in a ? a.name : undefined,
+        })),
+      })
+      return c.json({ id: item.id }, 202)
+    })
+
+    .delete('/agents/:id/queue/:itemId', (c) => {
+      const { id, itemId } = c.req.param()
+      const result = getOutboundQueueService().cancel(id, itemId)
+      if (!result.ok) {
+        const code = result.reason === 'dispatching' ? 409 : 404
+        const message =
+          result.reason === 'dispatching'
+            ? 'Item is already dispatching'
+            : 'Item not found'
+        return c.json({ error: message }, code)
+      }
+      return c.json({ ok: true })
+    })
+
+    .post('/agents/:id/queue/:itemId/retry', (c) => {
+      const { id, itemId } = c.req.param()
+      const result = getOutboundQueueService().retry(id, itemId)
+      if (!result.ok) {
+        return c.json({ error: 'Item not found or not failed' }, 404)
+      }
+      return c.json({ ok: true })
+    })
+
+    .get('/agents/:id/queue/stream', (c) => {
+      const { id } = c.req.param()
+      c.header('Content-Type', 'text/event-stream')
+      c.header('Cache-Control', 'no-cache')
+      return stream(c, async (s) => {
+        const encoder = new TextEncoder()
+        const sendSnapshot = (items: QueuedItemPublic[]) => {
+          void s.write(encoder.encode(`data: ${JSON.stringify({ items })}\n\n`))
+        }
+        const unsubscribe = getOutboundQueueService().subscribe(
+          id,
+          sendSnapshot,
+        )
+        const heartbeat = setInterval(() => {
+          void s.write(encoder.encode(': keep-alive\n\n'))
+        }, 15_000)
+        try {
+          await new Promise<void>((resolve) => {
+            s.onAbort(() => resolve())
+          })
+        } finally {
+          clearInterval(heartbeat)
+          unsubscribe()
+        }
+      })
+    })
+
+    .get('/session/:key/history', async (c) => {
+      const key = c.req.param('key')
+      const limitRaw = c.req.query('limit')
+      const cursor = c.req.query('cursor')
+      const limitParsed =
+        limitRaw !== undefined ? Number.parseInt(limitRaw, 10) : Number.NaN
+      const limit = Number.isFinite(limitParsed) ? limitParsed : undefined
+      const wantsStream = (c.req.header('accept') ?? '').includes(
+        'text/event-stream',
+      )
+
+      try {
+        if (!wantsStream) {
+          const history = await getOpenClawService().getSessionHistory(key, {
+            limit,
+            cursor,
+          })
+          return c.json(history)
+        }
+
+        const eventStream = await getOpenClawService().streamSessionHistory(
+          key,
+          { limit, cursor, signal: c.req.raw.signal },
+        )
+
+        c.header('Content-Type', 'text/event-stream')
+        c.header('Cache-Control', 'no-cache')
+        c.header('X-Session-Key', key)
+
+        return stream(c, async (s) => {
+          const reader = eventStream.getReader()
+          const encoder = new TextEncoder()
+          try {
+            while (true) {
+              const { done, value } = await reader.read()
+              if (done) break
+              await s.write(
+                encoder.encode(
+                  `event: ${value.type}\ndata: ${JSON.stringify(value.data)}\n\n`,
+                ),
+              )
+            }
          } finally {
            await reader.cancel()
          }
        })
      } catch (err) {
+        if (err instanceof OpenClawSessionNotFoundError) {
+          return c.json({ error: err.message }, 404)
+        }
        const message = err instanceof Error ? err.message : String(err)
        return c.json({ error: message }, 500)
      }
@@ -352,12 +841,17 @@ export function createOpenClawRoutes() {
      }

      try {
-        await getOpenClawService().updateProviderKeys(body)
+        const result = await getOpenClawService().updateProviderKeys(body)
        return c.json({
-          status: 'restarting',
-          message: 'Provider updated, restarting gateway',
+          status: result.restarted ? 'restarting' : 'updated',
+          message: result.restarted
+            ? 'Provider updated, restarting gateway'
+            : 'Provider updated without a restart',
        })
      } catch (err) {
+        if (isUnsupportedOpenClawProviderError(err)) {
+          return c.json({ error: err.message }, 400)
+        }
        const message = err instanceof Error ? err.message : String(err)
        return c.json({ error: message }, 500)
      }
--- a/packages/browseros-agent/apps/server/src/api/routes/terminal.ts
+++ b/packages/browseros-agent/apps/server/src/api/routes/terminal.ts
@@ -16,7 +16,9 @@ export const TERMINAL_WS_PATH = '/terminal/ws'

 interface TerminalRouteDeps {
  containerName: string
-  podmanPath: string
+  limaHome: string
+  limactlPath: string
+  vmName: string
 }

 function safeSend(ws: { send(data: string): void }, data: string): void {
@@ -45,7 +47,9 @@ function createSocketEvents(deps: TerminalRouteDeps) {
      try {
        session = createTerminalSession({
          containerName: deps.containerName,
-          podmanPath: deps.podmanPath,
+          limaHome: deps.limaHome,
+          limactlPath: deps.limactlPath,
+          vmName: deps.vmName,
          workingDir: TERMINAL_HOME_DIR,
          onOutput(data) {
            sendOutput(ws, data)
--- a/packages/browseros-agent/apps/server/src/api/server.ts
+++ b/packages/browseros-agent/apps/server/src/api/server.ts
@@ -22,6 +22,7 @@ import { initializeOAuth } from '../lib/clients/oauth'
 import { getDb } from '../lib/db'
 import { logger } from '../lib/logger'
 import { Sentry } from '../lib/sentry'
+import { getLimaHomeDir, resolveBundledLimactl, VM_NAME } from '../lib/vm'
 import { createAclRoutes } from './routes/acl'
 import { createChatRoutes } from './routes/chat'
 import { createCreditsRoutes } from './routes/credits'
@@ -29,6 +30,7 @@ import { createHealthRoute } from './routes/health'
 import { createKlavisRoutes } from './routes/klavis'
 import { createMcpRoutes } from './routes/mcp'
 import { createMemoryRoutes } from './routes/memory'
+import { createMonitoringRoutes } from './routes/monitoring'
 import { createOAuthRoutes } from './routes/oauth'
 import { createOpenClawRoutes } from './routes/openclaw'
 import { createProviderRoutes } from './routes/provider'
@@ -44,7 +46,6 @@ import {
  connectKlavisInBackground,
  type KlavisProxyRef,
 } from './services/klavis/strata-proxy'
-import { getPodmanRuntime } from './services/openclaw/podman-runtime'
 import type { Env, HttpServerConfig } from './types'
 import { defaultCorsConfig } from './utils/cors'
 import { requireTrustedAppOrigin } from './utils/request-auth'
@@ -113,7 +114,9 @@ export async function createHttpServer(config: HttpServerConfig) {
      '/',
      createTerminalRoutes({
        containerName: OPENCLAW_GATEWAY_CONTAINER_NAME,
-        podmanPath: getPodmanRuntime().getPodmanPath(),
+        limaHome: getLimaHomeDir(),
+        limactlPath: resolveBundledLimactl(resourcesDir),
+        vmName: VM_NAME,
      }),
    )

@@ -121,6 +124,10 @@ export async function createHttpServer(config: HttpServerConfig) {
    .use('/*', requireTrustedAppOrigin())
    .route('/', createAclRoutes({ policyService: aclPolicyService }))

+  const monitoringRoutes = new Hono<Env>()
+    .use('/*', requireTrustedAppOrigin())
+    .route('/', createMonitoringRoutes())
+
  const app = new Hono<Env>()
    .use('/*', cors(defaultCorsConfig))
    .route('/health', createHealthRoute({ browser }))
@@ -143,6 +150,7 @@ export async function createHttpServer(config: HttpServerConfig) {
    .route('/soul', createSoulRoutes())
    .route('/memory', createMemoryRoutes())
    .route('/skills', createSkillsRoutes())
+    .route('/monitoring', monitoringRoutes)
    .route('/acl-rules', aclRoutes)
    .route('/test-provider', createProviderRoutes({ browserosId }))
    .route('/refine-prompt', createRefinePromptRoutes({ browserosId }))
--- a/packages/browseros-agent/apps/server/src/api/services/klavis/strata-proxy.ts
+++ b/packages/browseros-agent/apps/server/src/api/services/klavis/strata-proxy.ts
@@ -20,6 +20,10 @@ import { KlavisClient } from '../../../lib/clients/klavis/klavis-client'
 import { OAUTH_MCP_SERVERS } from '../../../lib/clients/klavis/oauth-mcp-servers'
 import { logger } from '../../../lib/logger'
 import { metrics } from '../../../lib/metrics'
+import {
+  buildMonitoringToolOutput,
+  type ToolExecutionObserver,
+} from '../../../monitoring/observer'
 import { klavisStrataCache } from './strata-cache'

 function withTimeout<T>(promise: Promise<T>, label: string): Promise<T> {
@@ -237,6 +241,7 @@ export function buildKlavisToolSet(handle: KlavisProxyHandle): ToolSet {
 export function registerKlavisTools(
  mcpServer: McpServer,
  handle: KlavisProxyHandle,
+  observer?: ToolExecutionObserver,
 ): void {
  mcpServer.registerTool(
    'connector_mcp_servers',
@@ -247,9 +252,18 @@ export function registerKlavisTools(
    },
    async (args: Record<string, unknown>) => {
      const startTime = performance.now()
+      const toolCallId = crypto.randomUUID()
      const server_name = args.server_name as string

      try {
+        await observer?.onToolStart({
+          toolCallId,
+          toolName: 'connector_mcp_servers',
+          toolDescription:
+            'Check whether an external connector is connected and ready for use.',
+          source: 'klavis-tool',
+          args,
+        })
        const klavisClient = new KlavisClient()
        const integrations = await klavisClient.getUserIntegrations(
          handle.browserosId,
@@ -266,6 +280,14 @@ export function registerKlavisTools(
            success: true,
          })

+          await observer?.onToolEnd({
+            toolCallId,
+            output: {
+              connected: true,
+              server_name,
+            },
+          })
+
          return {
            content: [
              {
@@ -294,6 +316,15 @@ export function registerKlavisTools(
          success: true,
        })

+        await observer?.onToolEnd({
+          toolCallId,
+          output: {
+            connected: false,
+            server_name,
+            authUrl,
+          },
+        })
+
        return {
          content: [
            {
@@ -320,6 +351,11 @@ export function registerKlavisTools(
          error_message: errorText,
        })

+        await observer?.onToolEnd({
+          toolCallId,
+          error: errorText,
+        })
+
        return {
          content: [{ type: 'text' as const, text: errorText }],
          isError: true,
@@ -339,7 +375,15 @@ export function registerKlavisTools(
      },
      async (args: Record<string, unknown>) => {
        const startTime = performance.now()
+        const toolCallId = crypto.randomUUID()
        try {
+          await observer?.onToolStart({
+            toolCallId,
+            toolName: tool.name,
+            toolDescription: tool.description ?? undefined,
+            source: 'klavis-tool',
+            args,
+          })
          const result = await handle.callTool(tool.name, args)

          metrics.log('tool_executed', {
@@ -349,6 +393,12 @@ export function registerKlavisTools(
            success: !result.isError,
          })

+          await observer?.onToolEnd({
+            toolCallId,
+            output: buildMonitoringToolOutput(result),
+            error: result.isError ? 'Tool returned isError=true' : undefined,
+          })
+
          return result
        } catch (error) {
          const errorText =
@@ -362,6 +412,11 @@ export function registerKlavisTools(
            error_message: errorText,
          })

+          await observer?.onToolEnd({
+            toolCallId,
+            error: errorText,
+          })
+
          return {
            content: [{ type: 'text' as const, text: errorText }],
            isError: true,
--- a/packages/browseros-agent/apps/server/src/api/services/mcp/mcp-server.ts
+++ b/packages/browseros-agent/apps/server/src/api/services/mcp/mcp-server.ts
@@ -8,6 +8,7 @@ import type { AclRule } from '@browseros/shared/types/acl'
 import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'
 import { SetLevelRequestSchema } from '@modelcontextprotocol/sdk/types.js'
 import type { Browser } from '../../../browser/browser'
+import type { ToolExecutionObserver } from '../../../monitoring/observer'
 import type { ToolRegistry } from '../../../tools/tool-registry'
 import {
  type KlavisProxyRef,
@@ -24,6 +25,7 @@ export interface McpServiceDeps {
  resourcesDir: string
  aclRules?: AclRule[]
  klavisRef?: KlavisProxyRef
+  observer?: ToolExecutionObserver
 }

 export function createMcpServer(deps: McpServiceDeps): McpServer {
@@ -48,11 +50,12 @@ export function createMcpServer(deps: McpServiceDeps): McpServer {
      resourcesDir: deps.resourcesDir,
    },
    aclRules: deps.aclRules,
+    observer: deps.observer,
  })

  // Register Klavis proxy tools (if connected via background init)
  if (deps.klavisRef?.handle) {
-    registerKlavisTools(server, deps.klavisRef.handle)
+    registerKlavisTools(server, deps.klavisRef.handle, deps.observer)
  }

  return server
--- a/packages/browseros-agent/apps/server/src/api/services/mcp/register-mcp.ts
+++ b/packages/browseros-agent/apps/server/src/api/services/mcp/register-mcp.ts
@@ -1,13 +1,17 @@
 import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'
 import { logger } from '../../../lib/logger'
 import { metrics } from '../../../lib/metrics'
+import {
+  buildMonitoringToolOutput,
+  type ToolExecutionObserver,
+} from '../../../monitoring/observer'
 import { executeTool, type ToolContext } from '../../../tools/framework'
 import type { ToolRegistry } from '../../../tools/tool-registry'

 export function registerTools(
  mcpServer: McpServer,
  registry: ToolRegistry,
-  ctx: ToolContext,
+  ctx: ToolContext & { observer?: ToolExecutionObserver },
 ): void {
  for (const tool of registry.all()) {
    const handler = async (
@@ -15,9 +19,17 @@ export function registerTools(
      extra: { signal: AbortSignal },
    ) => {
      const startTime = performance.now()
+      const toolCallId = crypto.randomUUID()

      try {
        logger.info(`${tool.name} request: ${JSON.stringify(args, null, '  ')}`)
+        await ctx.observer?.onToolStart({
+          toolCallId,
+          toolName: tool.name,
+          toolDescription: tool.description,
+          source: 'browser-tool',
+          args,
+        })

        const result = await executeTool(tool, args, ctx, extra.signal)

@@ -28,6 +40,17 @@ export function registerTools(
          source: 'mcp',
        })

+        await ctx.observer?.onToolEnd({
+          toolCallId,
+          output: buildMonitoringToolOutput({
+            content: result.content,
+            structuredContent: result.structuredContent,
+            metadata: result.metadata,
+            isError: result.isError,
+          }),
+          error: result.isError ? 'Tool returned isError=true' : undefined,
+        })
+
        return {
          content: result.content,
          isError: result.isError,
@@ -44,6 +67,11 @@ export function registerTools(
          source: 'mcp',
        })

+        await ctx.observer?.onToolEnd({
+          toolCallId,
+          error: errorText,
+        })
+
        return {
          content: [{ type: 'text' as const, text: errorText }],
          isError: true,
--- a/packages/browseros-agent/apps/server/src/api/services/openclaw/claw-session.ts
+++ b/packages/browseros-agent/apps/server/src/api/services/openclaw/claw-session.ts
@@ -0,0 +1,267 @@
+/**
+ * @license
+ * Copyright 2025 BrowserOS
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ *
+ * In-memory state machine tracking the live status of every OpenClaw agent
+ * session. Acts as the single source of truth for "is agent X running?"
+ *
+ * Two data sources feed it:
+ *   1. JSONL files (seed) — on init, reads the latest events for each agent
+ *      to infer whether a session is running or idle. This handles the case
+ *      where an agent was already mid-task when BrowserOS started.
+ *   2. Gateway WS events (live) — the OpenClawObserver pipes chat broadcast
+ *      events into this state machine for real-time transitions.
+ *
+ * Consumers (SSE streams, dashboard endpoint) read from this class and get
+ * correct state from the first call — no "unknown" period while waiting for
+ * the first WS event.
+ */
+
+import { logger } from '../../../lib/logger'
+import type { ClawEvent, OpenClawJsonlReader } from './openclaw-jsonl-reader'
+
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+
+export type AgentLiveStatus = 'working' | 'idle' | 'error' | 'unknown'
+
+export interface AgentSessionState {
+  status: AgentLiveStatus
+  sessionKey: string | null
+  lastEventAt: number
+  currentTool: string | null
+  error: string | null
+}
+
+export type SessionStateListener = (
+  agentId: string,
+  state: AgentSessionState,
+) => void
+
+// ---------------------------------------------------------------------------
+// State machine
+// ---------------------------------------------------------------------------
+
+export class ClawSession {
+  private readonly states = new Map<string, AgentSessionState>()
+  private readonly listeners = new Set<SessionStateListener>()
+  private seeded = false
+
+  /**
+   * Seed the state machine from JSONL files. Call this once when the
+   * gateway becomes ready. For each agent, reads the latest session's
+   * events and infers whether the agent is currently working or idle.
+   *
+   * A session is considered "working" if:
+   * - The last message-type event is a user.message (agent hasn't replied yet)
+   * - The last event is an agent.tool_use without a matching agent.tool_result
+   *
+   * Otherwise it's "idle".
+   */
+  seedFromJsonl(reader: OpenClawJsonlReader): void {
+    const agents = reader.listAgents()
+
+    for (const agentId of agents) {
+      const sessions = reader.listSessions(agentId)
+      if (sessions.length === 0) continue
+
+      const latestSession = sessions[0]
+      const events = reader.listBySession(agentId, latestSession.key)
+      const state = inferStateFromEvents(events, latestSession.key)
+
+      this.states.set(agentId, state)
+
+      if (state.status === 'working') {
+        logger.info('ClawSession seed: agent is working', {
+          agentId,
+          currentTool: state.currentTool,
+        })
+      }
+    }
+
+    this.seeded = true
+    logger.info('ClawSession seeded from JSONL', {
+      agentCount: agents.length,
+      working: [...this.states.values()].filter((s) => s.status === 'working')
+        .length,
+    })
+  }
+
+  /** Whether seedFromJsonl() has been called. */
+  isSeeded(): boolean {
+    return this.seeded
+  }
+
+  /** Get the current state of an agent. */
+  getState(agentId: string): AgentSessionState {
+    return (
+      this.states.get(agentId) ?? {
+        status: 'unknown',
+        sessionKey: null,
+        lastEventAt: 0,
+        currentTool: null,
+        error: null,
+      }
+    )
+  }
+
+  /** Get all tracked agent states. */
+  getAllStates(): Map<string, AgentSessionState> {
+    return this.states
+  }
+
+  /**
+   * Transition an agent's state. Called by the OpenClawObserver when
+   * a chat WS event arrives.
+   */
+  transition(
+    agentId: string,
+    status: AgentLiveStatus,
+    update: {
+      sessionKey?: string | null
+      currentTool?: string | null
+      error?: string | null
+    } = {},
+  ): void {
+    const prev = this.states.get(agentId)
+    const entry: AgentSessionState = {
+      status,
+      sessionKey: update.sessionKey ?? prev?.sessionKey ?? null,
+      lastEventAt: Date.now(),
+      currentTool:
+        status === 'working'
+          ? (update.currentTool ?? prev?.currentTool ?? null)
+          : null,
+      error: status === 'error' ? (update.error ?? null) : null,
+    }
+
+    this.states.set(agentId, entry)
+
+    for (const listener of this.listeners) {
+      try {
+        listener(agentId, entry)
+      } catch {}
+    }
+  }
+
+  /** Subscribe to state changes. Returns unsubscribe function. */
+  onStateChange(listener: SessionStateListener): () => void {
+    this.listeners.add(listener)
+    return () => this.listeners.delete(listener)
+  }
+}
+
+// ---------------------------------------------------------------------------
+// JSONL state inference
+// ---------------------------------------------------------------------------
+
+/**
+ * Infer the current session state from JSONL events.
+ *
+ * The key insight: if the last meaningful event in the JSONL is a
+ * user.message with no subsequent agent.message, the agent is still
+ * processing (working). Similarly, an agent.tool_use without a matching
+ * agent.tool_result means the agent is mid-tool-call.
+ *
+ * We also check event recency — if the last event was more than 5 minutes
+ * ago, we assume the session is idle regardless (handles cases where the
+ * agent crashed without writing a final event).
+ */
+function inferStateFromEvents(
+  events: ClawEvent[],
+  sessionKey: string,
+): AgentSessionState {
+  if (events.length === 0) {
+    return {
+      status: 'idle',
+      sessionKey,
+      lastEventAt: 0,
+      currentTool: null,
+      error: null,
+    }
+  }
+
+  const lastEvent = events[events.length - 1]!
+  const lastEventAt = lastEvent.createdAt
+
+  // If the last event is older than 5 minutes, assume idle — the agent
+  // likely finished or crashed without writing a final event.
+  const STALE_THRESHOLD_MS = 5 * 60 * 1000
+  if (Date.now() - lastEventAt > STALE_THRESHOLD_MS) {
+    return {
+      status: 'idle',
+      sessionKey,
+      lastEventAt,
+      currentTool: null,
+      error: null,
+    }
+  }
+
+  // Walk backward to find the last meaningful event
+  let lastUserMessageIdx = -1
+  let lastAssistantMessageIdx = -1
+  let lastToolUseIdx = -1
+  let lastToolResultIdx = -1
+
+  for (let i = events.length - 1; i >= 0; i--) {
+    const e = events[i]!
+    if (e.type === 'user.message' && lastUserMessageIdx === -1) {
+      lastUserMessageIdx = i
+    }
+    if (e.type === 'agent.message' && lastAssistantMessageIdx === -1) {
+      lastAssistantMessageIdx = i
+    }
+    if (e.type === 'agent.tool_use' && lastToolUseIdx === -1) {
+      lastToolUseIdx = i
+    }
+    if (e.type === 'agent.tool_result' && lastToolResultIdx === -1) {
+      lastToolResultIdx = i
+    }
+    // Stop scanning once we've found all event types
+    if (
+      lastUserMessageIdx !== -1 &&
+      lastAssistantMessageIdx !== -1 &&
+      lastToolUseIdx !== -1 &&
+      lastToolResultIdx !== -1
+    ) {
+      break
+    }
+  }
+
+  // Agent is working if the last user message came AFTER the last
+  // assistant message — the agent hasn't replied yet
+  if (
+    lastUserMessageIdx !== -1 &&
+    lastUserMessageIdx > lastAssistantMessageIdx
+  ) {
+    return {
+      status: 'working',
+      sessionKey,
+      lastEventAt,
+      currentTool: null,
+      error: null,
+    }
+  }
+
+  // Agent is working if there's a tool_use without a subsequent tool_result
+  if (lastToolUseIdx !== -1 && lastToolUseIdx > lastToolResultIdx) {
+    const toolEvent = events[lastToolUseIdx]!
+    return {
+      status: 'working',
+      sessionKey,
+      lastEventAt,
+      currentTool: toolEvent.toolName ?? null,
+      error: null,
+    }
+  }
+
+  return {
+    status: 'idle',
+    sessionKey,
+    lastEventAt,
+    currentTool: null,
+    error: null,
+  }
+}
--- a/packages/browseros-agent/apps/server/src/api/services/openclaw/container-runtime-factory.ts
+++ b/packages/browseros-agent/apps/server/src/api/services/openclaw/container-runtime-factory.ts
@@ -0,0 +1,229 @@
+/**
+ * @license
+ * Copyright 2025 BrowserOS
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ */
+
+import { cpSync, existsSync, mkdirSync } from 'node:fs'
+import { dirname, join } from 'node:path'
+import { getBrowserosDir } from '../../../lib/browseros-dir'
+import { ContainerCli, ImageLoader } from '../../../lib/container'
+import { logger } from '../../../lib/logger'
+import {
+  detectArch,
+  getLimaHomeDir,
+  resolveBundledLimactl,
+  resolveBundledLimaTemplate,
+  VM_NAME,
+  VmRuntime,
+} from '../../../lib/vm'
+import {
+  ensureVmCacheAvailable,
+  ensureVmCacheSynced,
+  type VmCacheSyncOptions,
+} from '../../../lib/vm/cache-sync'
+import { readCachedManifest } from '../../../lib/vm/manifest'
+import { VM_TELEMETRY_EVENTS } from '../../../lib/vm/telemetry'
+import { ContainerRuntime } from './container-runtime'
+
+const UNSUPPORTED_PLATFORM_MESSAGE =
+  'browseros-vm currently supports macOS only; see the Linux/Windows tracking issue'
+
+export interface ContainerRuntimeFactoryInput {
+  resourcesDir?: string
+  projectDir: string
+  browserosRoot?: string
+  platform?: NodeJS.Platform
+  vmCache?: VmCacheRuntimeConfig
+}
+
+export interface VmCacheRuntimeConfig
+  extends Pick<VmCacheSyncOptions, 'manifestUrl'> {
+  ensureAvailable?: () => Promise<void>
+  ensureSynced?: () => Promise<unknown>
+}
+
+export function buildContainerRuntime(
+  input: ContainerRuntimeFactoryInput,
+): ContainerRuntime {
+  const platform = input.platform ?? process.platform
+  if (platform !== 'darwin') {
+    if (process.env.NODE_ENV === 'test') {
+      return new UnsupportedPlatformTestRuntime(input.projectDir)
+    }
+    throw unsupportedPlatformError()
+  }
+
+  const browserosRoot = input.browserosRoot ?? getBrowserosDir()
+  if (input.resourcesDir) {
+    migrateLegacyOpenClawDirSync(browserosRoot)
+  }
+
+  const limactlPath = input.resourcesDir
+    ? resolveBundledLimactl(input.resourcesDir)
+    : 'limactl'
+  const limaHome = getLimaHomeDir(browserosRoot)
+  const vm = new VmRuntime({
+    limactlPath,
+    limaHome,
+    templatePath: input.resourcesDir
+      ? resolveBundledLimaTemplate(input.resourcesDir)
+      : undefined,
+    browserosRoot,
+    ensureCacheAvailable:
+      input.vmCache?.ensureAvailable ??
+      (() =>
+        ensureVmCacheAvailable({
+          browserosRoot,
+          manifestUrl: input.vmCache?.manifestUrl,
+        })),
+  })
+  const shell = new ContainerCli({ limactlPath, limaHome, vmName: VM_NAME })
+  const loader = new DeferredImageLoader(shell, browserosRoot, input.vmCache)
+
+  return new ContainerRuntime({
+    vm,
+    shell,
+    loader,
+    projectDir: input.projectDir,
+  })
+}
+
+export async function migrateLegacyOpenClawDir(
+  browserosRoot = getBrowserosDir(),
+): Promise<void> {
+  migrateLegacyOpenClawDirSync(browserosRoot)
+}
+
+function migrateLegacyOpenClawDirSync(browserosRoot = getBrowserosDir()): void {
+  const legacyDir = join(browserosRoot, 'openclaw')
+  const nextDir = join(browserosRoot, 'vm', 'openclaw')
+  if (!existsSync(legacyDir)) return
+  if (existsSync(nextDir)) {
+    logger.warn('OpenClaw legacy and VM state directories both exist', {
+      legacyDir,
+      nextDir,
+    })
+    return
+  }
+
+  mkdirSync(dirname(nextDir), { recursive: true })
+  cpSync(legacyDir, nextDir, { recursive: true })
+  logger.info(VM_TELEMETRY_EVENTS.migrationOpenClawMoved, {
+    from: legacyDir,
+    to: nextDir,
+  })
+}
+
+class DeferredImageLoader {
+  constructor(
+    private readonly shell: ContainerCli,
+    private readonly browserosRoot: string,
+    private readonly vmCache?: VmCacheRuntimeConfig,
+  ) {}
+
+  async ensureImageLoaded(ref: string, onLog?: (msg: string) => void) {
+    await this.ensureCacheSynced()
+    const manifest = await readCachedManifest(this.browserosRoot)
+    const loader = new ImageLoader(
+      this.shell,
+      manifest,
+      detectArch(),
+      this.browserosRoot,
+    )
+    await loader.ensureImageLoaded(ref, onLog)
+  }
+
+  private async ensureCacheSynced(): Promise<void> {
+    if (this.vmCache?.ensureSynced) {
+      await this.vmCache.ensureSynced()
+      return
+    }
+    await ensureVmCacheSynced({
+      browserosRoot: this.browserosRoot,
+      manifestUrl: this.vmCache?.manifestUrl,
+    })
+  }
+}
+
+class UnsupportedPlatformTestRuntime extends ContainerRuntime {
+  constructor(projectDir: string) {
+    super({
+      vm: {} as VmRuntime,
+      shell: {} as ContainerCli,
+      loader: { ensureImageLoaded: rejectUnsupportedPlatform },
+      projectDir,
+    })
+  }
+
+  override async ensureReady(): Promise<void> {
+    throw unsupportedPlatformError()
+  }
+
+  override async isPodmanAvailable(): Promise<boolean> {
+    return false
+  }
+
+  override async getMachineStatus(): Promise<{
+    initialized: boolean
+    running: boolean
+  }> {
+    return { initialized: false, running: false }
+  }
+
+  override async pullImage(): Promise<void> {
+    throw unsupportedPlatformError()
+  }
+
+  override async startGateway(): Promise<void> {
+    throw unsupportedPlatformError()
+  }
+
+  override async stopGateway(): Promise<void> {}
+
+  override async restartGateway(): Promise<void> {
+    throw unsupportedPlatformError()
+  }
+
+  override async getGatewayLogs(): Promise<string[]> {
+    return []
+  }
+
+  override async isHealthy(): Promise<boolean> {
+    return false
+  }
+
+  override async isReady(): Promise<boolean> {
+    return false
+  }
+
+  override async waitForReady(): Promise<boolean> {
+    return false
+  }
+
+  override async stopVm(): Promise<void> {}
+
+  override async execInContainer(): Promise<number> {
+    throw unsupportedPlatformError()
+  }
+
+  override async runInContainer(): Promise<never> {
+    throw unsupportedPlatformError()
+  }
+
+  override async runGatewaySetupCommand(): Promise<number> {
+    throw unsupportedPlatformError()
+  }
+
+  override tailGatewayLogs(): () => void {
+    return () => {}
+  }
+}
+
+async function rejectUnsupportedPlatform(): Promise<never> {
+  throw unsupportedPlatformError()
+}
+
+function unsupportedPlatformError(): Error {
+  return new Error(UNSUPPORTED_PLATFORM_MESSAGE)
+}
--- a/packages/browseros-agent/apps/server/src/api/services/openclaw/container-runtime.ts
+++ b/packages/browseros-agent/apps/server/src/api/services/openclaw/container-runtime.ts
@@ -2,191 +2,316 @@
 * @license
 * Copyright 2025 BrowserOS
 * SPDX-License-Identifier: AGPL-3.0-or-later
- *
- * Compose-level abstraction over PodmanRuntime.
- * Manages a single compose project for the OpenClaw gateway container.
 */

-import { copyFile, writeFile } from 'node:fs/promises'
-import { join } from 'node:path'
 import {
-  OPENCLAW_COMPOSE_PROJECT_NAME,
  OPENCLAW_GATEWAY_CONTAINER_NAME,
+  OPENCLAW_GATEWAY_CONTAINER_PORT,
 } from '@browseros/shared/constants/openclaw'
+import type {
+  ContainerCli,
+  ContainerCommandResult,
+  ContainerSpec,
+  LogFn,
+} from '../../../lib/container'
 import { logger } from '../../../lib/logger'
-import type { LogFn, PodmanRuntime } from './podman-runtime'
+import {
+  GUEST_VM_STATE,
+  hostPathToGuest,
+  type VmRuntime,
+} from '../../../lib/vm'

-const COMPOSE_FILE_NAME = 'docker-compose.yml'
-const ENV_FILE_NAME = '.env'
+const GATEWAY_CONTAINER_HOME = '/home/node'
+const GATEWAY_STATE_DIR = `${GATEWAY_CONTAINER_HOME}/.openclaw`
+const GUEST_OPENCLAW_HOME = `${GUEST_VM_STATE}/openclaw`
+const GATEWAY_NPM_PREFIX = `${GATEWAY_CONTAINER_HOME}/.npm-global`
+// Prepend user-installed bin so tools like `claude` / `gemini` CLI that
+// are installed via npm into the mounted home are discoverable by
+// OpenClaw's child-process spawns (no login shell is involved).
+const GATEWAY_PATH = [
+  `${GATEWAY_NPM_PREFIX}/bin`,
+  '/usr/local/sbin',
+  '/usr/local/bin',
+  '/usr/sbin',
+  '/usr/bin',
+  '/sbin',
+  '/bin',
+].join(':')
+
+export type GatewayContainerSpec = {
+  image: string
+  hostPort: number
+  hostHome: string
+  envFilePath: string
+  gatewayToken?: string
+  timezone: string
+}
+
+export interface ContainerRuntimeConfig {
+  vm: VmRuntime
+  shell: ContainerCli
+  loader: { ensureImageLoaded(ref: string, onLog?: LogFn): Promise<void> }
+  projectDir: string
+}

 export class ContainerRuntime {
-  constructor(
-    private podman: PodmanRuntime,
-    private projectDir: string,
-  ) {}
+  private readonly vm: VmRuntime
+  private readonly shell: ContainerCli
+  private readonly loader: {
+    ensureImageLoaded(ref: string, onLog?: LogFn): Promise<void>
+  }
+  private readonly projectDir: string
+
+  constructor(config: ContainerRuntimeConfig) {
+    this.vm = config.vm
+    this.shell = config.shell
+    this.loader = config.loader
+    this.projectDir = config.projectDir
+  }

  async ensureReady(onLog?: LogFn): Promise<void> {
-    logger.info('Ensuring Podman runtime readiness')
-    return this.podman.ensureReady(onLog)
+    logger.info('Ensuring BrowserOS VM runtime readiness')
+    await this.vm.ensureReady(onLog)
+    await this.vm.getDefaultGateway()
  }

  async isPodmanAvailable(): Promise<boolean> {
-    return this.podman.isPodmanAvailable()
+    return true
  }

  async getMachineStatus(): Promise<{
    initialized: boolean
    running: boolean
  }> {
-    return this.podman.getMachineStatus()
+    const running = await this.vm.isReady()
+    return { initialized: running, running }
  }

-  async composeUp(onLog?: LogFn): Promise<void> {
-    const code = await this.compose(['up', '-d'], onLog)
-    if (code !== 0) throw new Error(`compose up failed with code ${code}`)
+  async pullImage(image: string, onLog?: LogFn): Promise<void> {
+    await this.loader.ensureImageLoaded(image, onLog)
  }

-  async composeDown(onLog?: LogFn): Promise<void> {
-    const code = await this.compose(['down'], onLog)
-    if (code !== 0) throw new Error(`compose down failed with code ${code}`)
+  async startGateway(
+    input: GatewayContainerSpec,
+    onLog?: LogFn,
+  ): Promise<void> {
+    await this.removeGatewayContainer(onLog)
+    await this.loader.ensureImageLoaded(input.image, onLog)
+    const container = await this.buildGatewayContainerSpec(input)
+    await this.shell.createContainer(container, onLog)
+    await this.shell.startContainer(container.name)
  }

-  async composeStop(onLog?: LogFn): Promise<void> {
-    const code = await this.compose(['stop'], onLog)
-    if (code !== 0) throw new Error(`compose stop failed with code ${code}`)
+  async stopGateway(onLog?: LogFn): Promise<void> {
+    await this.removeGatewayContainer(onLog)
  }

-  async composeRestart(onLog?: LogFn): Promise<void> {
-    const code = await this.compose(['restart'], onLog)
-    if (code !== 0) throw new Error(`compose restart failed with code ${code}`)
+  async restartGateway(
+    input: GatewayContainerSpec,
+    onLog?: LogFn,
+  ): Promise<void> {
+    await this.startGateway(input, onLog)
  }

-  async composePull(onLog?: LogFn): Promise<void> {
-    const code = await this.compose(['pull', '--quiet'], onLog)
-    if (code !== 0) throw new Error(`compose pull failed with code ${code}`)
-  }
-
-  async composeLogs(tail = 50): Promise<string[]> {
+  async getGatewayLogs(tail = 50): Promise<string[]> {
    const lines: string[] = []
-    await this.compose(['logs', '--no-color', '--tail', String(tail)], (line) =>
-      lines.push(line),
+    await this.shell.runCommand(
+      ['logs', '-n', String(tail), OPENCLAW_GATEWAY_CONTAINER_NAME],
+      (line) => lines.push(line),
    )
    return lines
  }

-  async isHealthy(port: number): Promise<boolean> {
+  async isHealthy(hostPort: number): Promise<boolean> {
    try {
-      const res = await fetch(`http://127.0.0.1:${port}/healthz`)
+      const res = await fetch(`http://127.0.0.1:${hostPort}/healthz`)
      return res.ok
    } catch {
      return false
    }
  }

-  async isReady(port: number): Promise<boolean> {
+  async isReady(hostPort: number): Promise<boolean> {
    try {
-      const res = await fetch(`http://127.0.0.1:${port}/readyz`)
+      const res = await fetch(`http://127.0.0.1:${hostPort}/readyz`)
      return res.ok
    } catch {
      return false
    }
  }

-  async waitForReady(port: number, timeoutMs = 30_000): Promise<boolean> {
-    logger.info('Waiting for OpenClaw gateway readiness', { port, timeoutMs })
+  async waitForReady(hostPort: number, timeoutMs = 30_000): Promise<boolean> {
+    logger.info('Waiting for OpenClaw gateway readiness', {
+      hostPort,
+      timeoutMs,
+    })
    const start = Date.now()
    while (Date.now() - start < timeoutMs) {
-      if (await this.isReady(port)) {
-        logger.info('OpenClaw gateway became ready', {
-          port,
-          waitMs: Date.now() - start,
-        })
-        return true
-      }
+      if (await this.isReady(hostPort)) return true
      await Bun.sleep(1000)
    }
    logger.error('Timed out waiting for OpenClaw gateway readiness', {
-      port,
+      hostPort,
      timeoutMs,
    })
    return false
  }

-  async copyComposeFile(sourceTemplatePath: string): Promise<void> {
-    await copyFile(sourceTemplatePath, join(this.projectDir, COMPOSE_FILE_NAME))
-  }
-
-  async writeEnvFile(content: string): Promise<void> {
-    await writeFile(join(this.projectDir, ENV_FILE_NAME), content, {
-      mode: 0o600,
-    })
-  }
-
-  /**
-   * Stops the Podman machine only if no non-BrowserOS containers are running.
-   * Prevents killing the user's own Podman workloads.
-   */
-  async stopMachineIfSafe(): Promise<void> {
-    const status = await this.podman.getMachineStatus()
-    if (!status.running) return
-
-    try {
-      const containers = await this.podman.listRunningContainers()
-      const allOurs = containers.every((name) =>
-        name.startsWith(OPENCLAW_COMPOSE_PROJECT_NAME),
-      )
-
-      if (containers.length === 0 || allOurs) {
-        await this.podman.stopMachine()
-      }
-    } catch {
-      // Best effort — don't stop machine if we can't check
-    }
+  async stopVm(): Promise<void> {
+    await this.vm.stopVm()
  }

  async execInContainer(command: string[], onLog?: LogFn): Promise<number> {
-    return this.podman.runCommand(
-      ['exec', OPENCLAW_GATEWAY_CONTAINER_NAME, ...command],
-      {
-        onOutput: onLog,
-      },
+    return this.shell.exec(OPENCLAW_GATEWAY_CONTAINER_NAME, command, onLog)
+  }
+
+  // Unlike execInContainer, this returns stdout and stderr separately
+  // so callers that need to parse program output (e.g. JSON status
+  // commands) aren't forced to untangle it from nerdctl's stderr.
+  async runInContainer(command: string[]): Promise<ContainerCommandResult> {
+    return this.shell.runCommand([
+      'exec',
+      OPENCLAW_GATEWAY_CONTAINER_NAME,
+      ...command,
+    ])
+  }
+
+  async runGatewaySetupCommand(
+    command: string[],
+    spec: GatewayContainerSpec,
+    onLog?: LogFn,
+  ): Promise<number> {
+    const setupContainerName = `${OPENCLAW_GATEWAY_CONTAINER_NAME}-setup`
+    await this.shell.removeContainer(setupContainerName, { force: true }, onLog)
+    await this.loader.ensureImageLoaded(spec.image, onLog)
+    const setupArgs = command[0] === 'node' ? command.slice(1) : command
+    const createResult = await this.shell.runCommand(
+      [
+        'create',
+        '--name',
+        setupContainerName,
+        ...(await this.buildGatewayRunArgs(spec)),
+        spec.image,
+        'node',
+        ...setupArgs,
+      ],
+      onLog,
    )
+    if (createResult.exitCode !== 0) {
+      await this.shell.removeContainer(
+        setupContainerName,
+        { force: true },
+        onLog,
+      )
+      return createResult.exitCode
+    }
+
+    try {
+      const startResult = await this.shell.runCommand(
+        ['start', '-a', setupContainerName],
+        onLog,
+      )
+      return startResult.exitCode
+    } finally {
+      await this.shell.removeContainer(
+        setupContainerName,
+        { force: true },
+        onLog,
+      )
+    }
  }

  tailGatewayLogs(onLine: LogFn): () => void {
-    return this.podman.tailContainerLogs(
+    return this.shell.tailLogs(OPENCLAW_GATEWAY_CONTAINER_NAME, onLine)
+  }
+
+  private async removeGatewayContainer(onLog?: LogFn): Promise<void> {
+    await this.shell.removeContainer(
      OPENCLAW_GATEWAY_CONTAINER_NAME,
-      onLine,
+      { force: true },
+      onLog,
    )
  }

-  private async compose(args: string[], onLog?: LogFn): Promise<number> {
-    const lines: string[] = []
-    const command = ['podman', 'compose', ...args].join(' ')
-    logger.info('Running OpenClaw compose command', {
-      command,
-    })
-    const code = await this.podman.runCommand(['compose', ...args], {
-      cwd: this.projectDir,
-      env: { COMPOSE_PROJECT_NAME: OPENCLAW_COMPOSE_PROJECT_NAME },
-      onOutput: (line) => {
-        lines.push(line)
-        onLog?.(line)
+  private async buildGatewayContainerSpec(
+    input: GatewayContainerSpec,
+  ): Promise<ContainerSpec> {
+    return {
+      name: OPENCLAW_GATEWAY_CONTAINER_NAME,
+      image: input.image,
+      restart: 'unless-stopped',
+      ports: [
+        {
+          hostIp: '127.0.0.1',
+          hostPort: input.hostPort,
+          containerPort: OPENCLAW_GATEWAY_CONTAINER_PORT,
+        },
+      ],
+      envFile: this.translateHostPath(input.envFilePath, input.hostHome),
+      env: this.buildGatewayEnv(input),
+      mounts: [{ source: GUEST_OPENCLAW_HOME, target: GATEWAY_CONTAINER_HOME }],
+      addHosts: [await this.hostContainersInternalEntry()],
+      health: {
+        cmd: `curl -sf http://127.0.0.1:${OPENCLAW_GATEWAY_CONTAINER_PORT}/healthz`,
+        interval: '30s',
+        timeout: '10s',
+        retries: 3,
      },
-    })
-
-    if (code !== 0) {
-      logger.error('OpenClaw compose command failed', {
-        command,
-        exitCode: code,
-        output: lines,
-      })
-    } else {
-      logger.info('OpenClaw compose command succeeded', {
-        command,
-      })
+      command: [
+        'node',
+        'dist/index.js',
+        'gateway',
+        '--bind',
+        'lan',
+        '--port',
+        String(OPENCLAW_GATEWAY_CONTAINER_PORT),
+        '--allow-unconfigured',
+      ],
    }
+  }

-    return code
+  private async buildGatewayRunArgs(
+    input: GatewayContainerSpec,
+  ): Promise<string[]> {
+    const args = [
+      '--env-file',
+      this.translateHostPath(input.envFilePath, input.hostHome),
+      '-v',
+      `${GUEST_OPENCLAW_HOME}:${GATEWAY_CONTAINER_HOME}`,
+    ]
+    for (const [key, value] of Object.entries(this.buildGatewayEnv(input))) {
+      args.push('-e', `${key}=${value}`)
+    }
+    args.push('--add-host', await this.hostContainersInternalEntry())
+    return args
+  }
+
+  private async hostContainersInternalEntry(): Promise<string> {
+    return `host.containers.internal:${await this.vm.getDefaultGateway()}`
+  }
+
+  private buildGatewayEnv(input: GatewayContainerSpec): Record<string, string> {
+    return {
+      HOME: GATEWAY_CONTAINER_HOME,
+      OPENCLAW_HOME: GATEWAY_CONTAINER_HOME,
+      OPENCLAW_STATE_DIR: GATEWAY_STATE_DIR,
+      OPENCLAW_NO_RESPAWN: '1',
+      NODE_COMPILE_CACHE: '/var/tmp/openclaw-compile-cache',
+      NODE_ENV: 'production',
+      TZ: input.timezone,
+      PATH: GATEWAY_PATH,
+      NPM_CONFIG_PREFIX: GATEWAY_NPM_PREFIX,
+      ...(input.gatewayToken
+        ? { OPENCLAW_GATEWAY_TOKEN: input.gatewayToken }
+        : {}),
+    }
+  }
+
+  private translateHostPath(path: string, openclawHostDir: string): string {
+    if (path === openclawHostDir) return GUEST_OPENCLAW_HOME
+    if (path.startsWith(`${openclawHostDir}/`)) {
+      return `${GUEST_OPENCLAW_HOME}${path.slice(openclawHostDir.length)}`
+    }
+    return hostPathToGuest(path)
  }
 }
--- a/packages/browseros-agent/apps/server/src/api/services/openclaw/errors.ts
+++ b/packages/browseros-agent/apps/server/src/api/services/openclaw/errors.ts
@@ -27,3 +27,10 @@ export class OpenClawProtectedAgentError extends Error {
    this.name = 'OpenClawProtectedAgentError'
  }
 }
+
+export class OpenClawSessionNotFoundError extends Error {
+  constructor(public readonly sessionKey: string) {
+    super(`OpenClaw session not found: ${sessionKey}`)
+    this.name = 'OpenClawSessionNotFoundError'
+  }
+}
--- a/packages/browseros-agent/apps/server/src/api/services/openclaw/gateway-client.ts
+++ b/packages/browseros-agent/apps/server/src/api/services/openclaw/gateway-client.ts
@@ -1,754 +0,0 @@
-/**
- * @license
- * Copyright 2025 BrowserOS
- * SPDX-License-Identifier: AGPL-3.0-or-later
- *
- * WebSocket client for the OpenClaw Gateway protocol.
- * Handles handshake (challenge → connect → hello-ok) with Ed25519 device
- * identity signing, JSON-RPC over WS, and auto-reconnect.
- * Used for agent CRUD and health — chat uses HTTP.
- */
-
-import crypto from 'node:crypto'
-import { mkdirSync, readFileSync, writeFileSync } from 'node:fs'
-import { join } from 'node:path'
-import { OPENCLAW_CONTAINER_HOME } from '@browseros/shared/constants/openclaw'
-import { logger } from '../../../lib/logger'
-
-const RPC_TIMEOUT_MS = 15_000
-const SCOPES = [
-  'operator.read',
-  'operator.write',
-  'operator.admin',
-  'operator.approvals',
-  'operator.pairing',
-]
-
-interface DeviceIdentity {
-  deviceId: string
-  publicKeyPem: string
-  privateKeyPem: string
-}
-
-interface PendingRequest {
-  resolve: (value: unknown) => void
-  reject: (reason: Error) => void
-  timer: ReturnType<typeof setTimeout>
-}
-
-interface WsFrame {
-  type: 'req' | 'res' | 'event'
-  id?: string
-  method?: string
-  params?: Record<string, unknown>
-  ok?: boolean
-  payload?: Record<string, unknown>
-  error?: { message: string; code?: string }
-  event?: string
-}
-
-export type GatewayClientConnectionState =
-  | 'idle'
-  | 'connecting'
-  | 'connected'
-  | 'closed'
-  | 'failed'
-
-export interface GatewayHandshakeError {
-  code?: string
-  message: string
-}
-
-export interface OpenClawStreamEvent {
-  type:
-    | 'text-delta'
-    | 'thinking'
-    | 'tool-start'
-    | 'tool-end'
-    | 'tool-output'
-    | 'lifecycle'
-    | 'done'
-    | 'error'
-  data: Record<string, unknown>
-}
-
-export interface GatewayAgentEntry {
-  agentId: string
-  name: string
-  workspace: string
-  model?: string
-}
-
-// ── Device Identity Helpers ─────────────────────────────────────────
-
-function rawPublicKeyFromPem(pem: string): Buffer {
-  const der = Buffer.from(
-    pem.replace(/-----[^-]+-----/g, '').replace(/\s/g, ''),
-    'base64',
-  )
-  return der.subarray(12)
-}
-
-function signChallenge(
-  device: DeviceIdentity,
-  nonce: string,
-  token: string,
-): { signature: string; signedAt: number; publicKey: string } {
-  const signedAt = Date.now()
-  const payload = `v3|${device.deviceId}|cli|cli|operator|${SCOPES.join(',')}|${signedAt}|${token}|${nonce}|${process.platform}|`
-  const privateKey = crypto.createPrivateKey(device.privateKeyPem)
-  const sig = crypto.sign(null, Buffer.from(payload, 'utf-8'), privateKey)
-
-  return {
-    signature: sig.toString('base64url'),
-    signedAt,
-    publicKey: rawPublicKeyFromPem(device.publicKeyPem).toString('base64url'),
-  }
-}
-
-/**
- * Generates a client Ed25519 identity and pre-seeds it into the gateway's
- * paired devices file so the gateway trusts it on next boot.
- * Must be called before compose up (or requires a restart after).
- */
-export function ensureClientIdentity(openclawDir: string): DeviceIdentity {
-  const identityPath = join(openclawDir, 'client-identity.json')
-
-  try {
-    return JSON.parse(readFileSync(identityPath, 'utf-8'))
-  } catch {
-    // Generate new identity
-  }
-
-  const { publicKey, privateKey } = crypto.generateKeyPairSync('ed25519')
-  const publicKeyPem = publicKey
-    .export({ type: 'spki', format: 'pem' })
-    .toString()
-  const privateKeyPem = privateKey
-    .export({ type: 'pkcs8', format: 'pem' })
-    .toString()
-
-  const rawPub = rawPublicKeyFromPem(publicKeyPem)
-  const deviceId = crypto.createHash('sha256').update(rawPub).digest('hex')
-
-  const identity: DeviceIdentity = { deviceId, publicKeyPem, privateKeyPem }
-  writeFileSync(identityPath, JSON.stringify(identity, null, 2), {
-    mode: 0o600,
-  })
-
-  seedPairedDevice(openclawDir, identity)
-  logger.info('Generated client device identity and pre-seeded pairing')
-
-  return identity
-}
-
-function seedPairedDevice(openclawDir: string, identity: DeviceIdentity): void {
-  const devicesDir = join(openclawDir, 'devices')
-  mkdirSync(devicesDir, { recursive: true })
-
-  const pairedPath = join(devicesDir, 'paired.json')
-  let paired: Record<string, unknown> = {}
-  try {
-    paired = JSON.parse(readFileSync(pairedPath, 'utf-8'))
-  } catch {
-    // First time
-  }
-
-  const rawPub = rawPublicKeyFromPem(identity.publicKeyPem)
-  paired[identity.deviceId] = {
-    deviceId: identity.deviceId,
-    publicKey: rawPub.toString('base64url'),
-    platform: process.platform,
-    clientId: 'cli',
-    clientMode: 'cli',
-    role: 'operator',
-    roles: ['operator'],
-    scopes: SCOPES,
-    pairedAt: Date.now(),
-    label: 'browseros-server',
-  }
-
-  writeFileSync(pairedPath, JSON.stringify(paired, null, 2), { mode: 0o600 })
-}
-
-// ── Gateway Client ──────────────────────────────────────────────────
-
-export class GatewayClient {
-  private ws: WebSocket | null = null
-  private _connected = false
-  private pendingRequests = new Map<string, PendingRequest>()
-  private device: DeviceIdentity | null = null
-  private connectionState: GatewayClientConnectionState = 'idle'
-  private lastHandshakeError: GatewayHandshakeError | null = null
-
-  constructor(
-    private readonly port: number,
-    private readonly token: string,
-    private readonly openclawDir: string,
-    private readonly version = '1.0.0',
-  ) {
-    try {
-      const identityPath = join(this.openclawDir, 'client-identity.json')
-      this.device = JSON.parse(readFileSync(identityPath, 'utf-8'))
-    } catch {
-      logger.warn('Client device identity not found, WS auth may fail')
-    }
-  }
-
-  get isConnected(): boolean {
-    return this._connected
-  }
-
-  get state(): GatewayClientConnectionState {
-    return this.connectionState
-  }
-
-  get lastError(): GatewayHandshakeError | null {
-    return this.lastHandshakeError
-  }
-
-  async connect(): Promise<void> {
-    return new Promise((resolve, reject) => {
-      this.connectionState = 'connecting'
-      this.lastHandshakeError = null
-      logger.info('Connecting to OpenClaw Gateway WS', {
-        port: this.port,
-        hasDeviceIdentity: !!this.device,
-      })
-      this.ws = new WebSocket(`ws://127.0.0.1:${this.port}`, {
-        headers: { Origin: `http://127.0.0.1:${this.port}` },
-      } as unknown as string[])
-
-      let handshakeComplete = false
-      let connectReqId: string | null = null
-
-      this.ws.onmessage = (event) => {
-        const frame = GatewayClient.parseFrame(event.data)
-        if (!frame) return
-
-        if (!handshakeComplete) {
-          if (frame.type === 'event' && frame.event === 'connect.challenge') {
-            const nonce = (frame.payload as Record<string, unknown>)
-              ?.nonce as string
-            logger.info('Received OpenClaw Gateway challenge', {
-              hasNonce: !!nonce,
-              hasDeviceIdentity: !!this.device,
-            })
-            connectReqId = globalThis.crypto.randomUUID()
-
-            const params: Record<string, unknown> = {
-              minProtocol: 3,
-              maxProtocol: 3,
-              client: {
-                id: 'cli',
-                version: this.version,
-                platform: process.platform,
-                mode: 'cli',
-              },
-              role: 'operator',
-              scopes: SCOPES,
-              caps: [],
-              commands: [],
-              permissions: {},
-              auth: { token: this.token },
-              locale: 'en-US',
-              userAgent: `browseros-server/${this.version}`,
-            }
-
-            if (this.device && nonce) {
-              const signed = signChallenge(this.device, nonce, this.token)
-              params.device = {
-                id: this.device.deviceId,
-                publicKey: signed.publicKey,
-                signature: signed.signature,
-                signedAt: signed.signedAt,
-                nonce,
-              }
-            }
-
-            this.ws?.send(
-              JSON.stringify({
-                type: 'req',
-                id: connectReqId,
-                method: 'connect',
-                params,
-              }),
-            )
-            return
-          }
-
-          if (frame.type === 'res' && frame.id === connectReqId) {
-            if (frame.ok) {
-              handshakeComplete = true
-              this._connected = true
-              this.connectionState = 'connected'
-              logger.info('Gateway WS connected')
-              resolve()
-            } else {
-              const msg = frame.error?.message ?? 'Handshake failed'
-              this.connectionState = 'failed'
-              this.lastHandshakeError = {
-                message: msg,
-                code: frame.error?.code,
-              }
-              logger.error('Gateway WS handshake rejected', {
-                error: msg,
-                code: frame.error?.code,
-              })
-              reject(new Error(msg))
-            }
-            return
-          }
-          return
-        }
-
-        this.resolvePendingRequest(frame)
-      }
-
-      this.ws.onerror = (err) => {
-        logger.error('Gateway WS socket error', {
-          error: err instanceof Error ? err.message : 'unknown',
-          handshakeComplete,
-        })
-        if (!handshakeComplete) {
-          this.connectionState = 'failed'
-          reject(
-            new Error(
-              `WS connection error: ${err instanceof Error ? err.message : 'unknown'}`,
-            ),
-          )
-        }
-      }
-
-      this.ws.onclose = () => {
-        this._connected = false
-        this.connectionState = 'closed'
-        this.rejectAllPending('WebSocket closed')
-        if (handshakeComplete) {
-          logger.info('Gateway WS disconnected')
-        }
-        this.ws = null
-      }
-    })
-  }
-
-  disconnect(): void {
-    this._connected = false
-    this.connectionState = 'closed'
-    this.rejectAllPending('Client disconnecting')
-    if (this.ws) {
-      this.ws.onclose = null
-      this.ws.close()
-      this.ws = null
-    }
-  }
-
-  // ── RPC ──────────────────────────────────────────────────────────────
-
-  async rpc<T = Record<string, unknown>>(
-    method: string,
-    params: Record<string, unknown> = {},
-  ): Promise<T> {
-    if (!this._connected || !this.ws) {
-      throw new Error('Gateway WS not connected')
-    }
-    const id = globalThis.crypto.randomUUID()
-
-    return new Promise<T>((resolve, reject) => {
-      const timer = setTimeout(() => {
-        this.pendingRequests.delete(id)
-        reject(new Error(`RPC timeout: ${method}`))
-      }, RPC_TIMEOUT_MS)
-
-      this.pendingRequests.set(id, {
-        resolve: resolve as (value: unknown) => void,
-        reject,
-        timer,
-      })
-
-      this.ws?.send(JSON.stringify({ type: 'req', id, method, params }))
-    })
-  }
-
-  // ── Agent Methods ────────────────────────────────────────────────────
-
-  async listAgents(): Promise<GatewayAgentEntry[]> {
-    const result = await this.rpc<{
-      agents: Array<{
-        id: string
-        name?: string
-        workspace: string
-        model?: string
-      }>
-    }>('agents.list')
-
-    return (result.agents ?? []).map((a) => ({
-      agentId: a.id,
-      name: a.name ?? a.id,
-      workspace: a.workspace,
-      model: a.model,
-    }))
-  }
-
-  async createAgent(input: {
-    name: string
-    workspace: string
-    model?: string
-  }): Promise<GatewayAgentEntry> {
-    const result = await this.rpc<{
-      agentId?: string
-      id?: string
-      name?: string
-      workspace?: string
-      model?: string
-    }>('agents.create', input)
-
-    return {
-      agentId: result.agentId ?? result.id ?? input.name,
-      name: result.name ?? input.name,
-      workspace: result.workspace ?? input.workspace,
-      model: result.model ?? input.model,
-    }
-  }
-
-  async deleteAgent(agentId: string): Promise<void> {
-    await this.rpc('agents.delete', { id: agentId })
-  }
-
-  // ── Health ───────────────────────────────────────────────────────────
-
-  async getHealth(): Promise<Record<string, unknown>> {
-    return this.rpc('health')
-  }
-
-  // ── Chat Stream ─────────────────────────────────────────────────────
-
-  chatStream(
-    agentId: string,
-    sessionKey: string,
-    message: string,
-  ): ReadableStream<OpenClawStreamEvent> {
-    if (!this._connected) {
-      throw new Error('Gateway WS not connected')
-    }
-
-    const fullSessionKey = `agent:${agentId}:browseros-${sessionKey}`
-    const idempotencyKey = globalThis.crypto.randomUUID()
-    const streamClient = new GatewayClient(
-      this.port,
-      this.token,
-      this.openclawDir,
-      this.version,
-    )
-
-    return new ReadableStream<OpenClawStreamEvent>({
-      start: async (controller) => {
-        try {
-          await streamClient.connect()
-        } catch (error) {
-          controller.enqueue({
-            type: 'error',
-            data: {
-              message:
-                error instanceof Error
-                  ? error.message
-                  : 'Gateway WS not connected',
-            },
-          })
-          controller.close()
-          return
-        }
-
-        const ws = streamClient.ws
-        if (!ws) {
-          controller.enqueue({
-            type: 'error',
-            data: { message: 'Gateway WS not connected' },
-          })
-          controller.close()
-          return
-        }
-
-        const subscribeId = globalThis.crypto.randomUUID()
-        const agentReqId = globalThis.crypto.randomUUID()
-        let finished = false
-
-        const finish = (event?: OpenClawStreamEvent) => {
-          if (finished) return
-          finished = true
-          if (event) controller.enqueue(event)
-          controller.close()
-          streamClient.disconnect()
-        }
-
-        ws.onmessage = (event) => {
-          const frame = GatewayClient.parseFrame(event.data)
-          if (!frame) return
-
-          if (
-            this.handleChatStreamControlFrame(
-              frame,
-              subscribeId,
-              agentReqId,
-              finish,
-            )
-          ) {
-            return
-          }
-
-          this.handleChatStreamEventFrame(frame, controller, finish)
-        }
-
-        ws.onclose = () => {
-          if (finished) return
-          finish({
-            type: 'error',
-            data: { message: 'Gateway WS disconnected' },
-          })
-        }
-
-        ws.onerror = () => {
-          if (finished) return
-          finish({
-            type: 'error',
-            data: { message: 'Gateway WS connection error' },
-          })
-        }
-
-        ws.send(
-          JSON.stringify({
-            type: 'req',
-            id: subscribeId,
-            method: 'sessions.subscribe',
-            params: { sessionKey: fullSessionKey },
-          }),
-        )
-
-        ws.send(
-          JSON.stringify({
-            type: 'req',
-            id: agentReqId,
-            method: 'agent',
-            params: {
-              message,
-              sessionKey: fullSessionKey,
-              idempotencyKey,
-            },
-          }),
-        )
-      },
-      cancel: () => {
-        if (streamClient.ws?.readyState === WebSocket.OPEN) {
-          streamClient.ws.send(
-            JSON.stringify({
-              type: 'req',
-              id: globalThis.crypto.randomUUID(),
-              method: 'sessions.abort',
-              params: { sessionKey: fullSessionKey },
-            }),
-          )
-        }
-        streamClient.disconnect()
-      },
-    })
-  }
-
-  // ── Helpers ──────────────────────────────────────────────────────────
-
-  static agentWorkspace(name: string): string {
-    return name === 'main'
-      ? `${OPENCLAW_CONTAINER_HOME}/workspace`
-      : `${OPENCLAW_CONTAINER_HOME}/workspace-${name}`
-  }
-
-  private static parseFrame(data: unknown): WsFrame | null {
-    try {
-      return JSON.parse(
-        typeof data === 'string'
-          ? data
-          : new TextDecoder().decode(data as ArrayBuffer),
-      ) as WsFrame
-    } catch {
-      return null
-    }
-  }
-
-  private rejectAllPending(reason: string): void {
-    for (const [id, pending] of this.pendingRequests) {
-      clearTimeout(pending.timer)
-      pending.reject(new Error(reason))
-      this.pendingRequests.delete(id)
-    }
-  }
-
-  private resolvePendingRequest(frame: WsFrame): void {
-    if (frame.type !== 'res' || !frame.id) return
-
-    const pending = this.pendingRequests.get(frame.id)
-    if (!pending) return
-
-    this.pendingRequests.delete(frame.id)
-    clearTimeout(pending.timer)
-    if (frame.ok) {
-      pending.resolve(frame.payload)
-    } else {
-      pending.reject(new Error(frame.error?.message ?? 'RPC error'))
-    }
-  }
-
-  private handleChatStreamControlFrame(
-    frame: WsFrame,
-    subscribeId: string,
-    agentReqId: string,
-    finish: (event?: OpenClawStreamEvent) => void,
-  ): boolean {
-    if (frame.type !== 'res' || !frame.id) return false
-    if (frame.id !== subscribeId && frame.id !== agentReqId) return false
-
-    if (!frame.ok) {
-      finish({
-        type: 'error',
-        data: {
-          message: frame.error?.message ?? 'RPC error',
-          code: frame.error?.code,
-        },
-      })
-    }
-
-    return true
-  }
-
-  private handleChatStreamEventFrame(
-    frame: WsFrame,
-    controller: ReadableStreamDefaultController<OpenClawStreamEvent>,
-    finish: (event?: OpenClawStreamEvent) => void,
-  ): void {
-    if (frame.type !== 'event' || !frame.event || !frame.payload) return
-
-    switch (frame.event) {
-      case 'agent':
-        this.handleAgentStreamEvent(frame.payload, controller)
-        return
-      case 'session.tool':
-        this.handleSessionToolStreamEvent(frame.payload, controller)
-        return
-      case 'session.message':
-        this.handleSessionMessageStreamEvent(frame.payload, controller)
-        return
-      case 'chat':
-        this.handleChatCompletionEvent(frame.payload, finish)
-        return
-      default:
-        return
-    }
-  }
-
-  private handleAgentStreamEvent(
-    payload: Record<string, unknown>,
-    controller: ReadableStreamDefaultController<OpenClawStreamEvent>,
-  ): void {
-    const streamType = payload.stream as string | undefined
-    const data = payload.data as Record<string, unknown> | undefined
-
-    if (streamType === 'assistant' && data?.delta) {
-      controller.enqueue({
-        type: 'text-delta',
-        data: { text: data.delta },
-      })
-      return
-    }
-
-    if (streamType === 'item' && data) {
-      const phase = data.phase as string | undefined
-      if (phase === 'start') {
-        controller.enqueue({
-          type: 'tool-start',
-          data: {
-            toolCallId: data.toolCallId ?? data.id,
-            toolName: data.name ?? data.title,
-            kind: data.kind,
-          },
-        })
-        return
-      }
-
-      if (phase === 'end') {
-        controller.enqueue({
-          type: 'tool-end',
-          data: {
-            toolCallId: data.toolCallId ?? data.id,
-            status: data.status,
-            durationMs: data.durationMs,
-          },
-        })
-        return
-      }
-    }
-
-    if (streamType === 'lifecycle') {
-      controller.enqueue({
-        type: 'lifecycle',
-        data: { phase: data?.phase ?? payload.phase },
-      })
-    }
-  }
-
-  private handleSessionToolStreamEvent(
-    payload: Record<string, unknown>,
-    controller: ReadableStreamDefaultController<OpenClawStreamEvent>,
-  ): void {
-    const toolData = (payload.data as Record<string, unknown>) ?? payload
-    const phase = (toolData.phase as string) ?? (payload.phase as string)
-    if (phase !== 'result') return
-
-    controller.enqueue({
-      type: 'tool-output',
-      data: {
-        toolCallId: toolData.toolCallId,
-        isError: toolData.isError ?? false,
-        meta: toolData.meta,
-      },
-    })
-  }
-
-  private handleSessionMessageStreamEvent(
-    payload: Record<string, unknown>,
-    controller: ReadableStreamDefaultController<OpenClawStreamEvent>,
-  ): void {
-    const message = payload.message as Record<string, unknown> | undefined
-    if (message?.role !== 'assistant') return
-
-    const content = message.content as
-      | Array<Record<string, unknown>>
-      | undefined
-    if (!content) return
-
-    for (const block of content) {
-      if (block.type !== 'thinking') continue
-
-      const text =
-        (block.thinking as string) ??
-        (block.content as string) ??
-        (block.text as string) ??
-        ''
-      if (!text) continue
-
-      controller.enqueue({
-        type: 'thinking',
-        data: { text },
-      })
-    }
-  }
-
-  private handleChatCompletionEvent(
-    payload: Record<string, unknown>,
-    finish: (event?: OpenClawStreamEvent) => void,
-  ): void {
-    if ((payload.state as string | undefined) !== 'final') return
-
-    finish({
-      type: 'done',
-      data: { text: (payload.text as string) ?? '' },
-    })
-  }
-}
--- a/packages/browseros-agent/apps/server/src/api/services/openclaw/openclaw-cli-client.ts
+++ b/packages/browseros-agent/apps/server/src/api/services/openclaw/openclaw-cli-client.ts
@@ -0,0 +1,581 @@
+/**
+ * @license
+ * Copyright 2025 BrowserOS
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ */
+
+import { OPENCLAW_CONTAINER_HOME } from '@browseros/shared/constants/openclaw'
+
+type LogFn = (line: string) => void
+
+interface ContainerExecutor {
+  execInContainer(command: string[], onLog?: LogFn): Promise<number>
+}
+
+export interface OpenClawConfigBatchEntry {
+  path: string
+  value: unknown
+}
+
+interface RawAgentRecord {
+  id: string
+  name?: string
+  workspace: string
+  model?: string
+}
+
+export interface OpenClawAgentRecord {
+  agentId: string
+  name: string
+  workspace: string
+  model?: string
+}
+
+export interface OpenClawSessionEntry {
+  key: string
+  updatedAt: number
+  sessionId: string
+  agentId: string
+  kind: string
+  status?: string
+  totalTokens?: number
+  model?: string
+  modelProvider?: string
+}
+
+export interface OpenClawChatBlock {
+  type: 'text' | 'toolCall' | 'thinking'
+  text?: string
+  name?: string
+  arguments?: unknown
+  thinking?: string
+}
+
+export interface OpenClawChatMessage {
+  role: 'user' | 'assistant' | 'toolResult'
+  content: OpenClawChatBlock[]
+  timestamp?: number
+  usage?: { input: number; output: number }
+  stopReason?: string
+  toolName?: string
+  toolCallId?: string
+  isError?: boolean
+}
+
+export class OpenClawCliClient {
+  constructor(private readonly executor: ContainerExecutor) {}
+
+  async runOnboard(
+    input: {
+      acceptRisk?: boolean
+      authChoice?: string
+      customBaseUrl?: string
+      customCompatibility?: 'anthropic' | 'openai-completions'
+      customModelId?: string
+      customProviderId?: string
+      gatewayAuth?: 'none' | 'password' | 'token'
+      gatewayBind?: 'auto' | 'custom' | 'lan' | 'loopback' | 'tailnet'
+      gatewayPort?: number
+      gatewayToken?: string
+      gatewayTokenRefEnv?: string
+      installDaemon?: boolean
+      mode?: 'local' | 'remote'
+      nonInteractive?: boolean
+      reset?: boolean
+      resetScope?: 'config' | 'config+creds+sessions' | 'full'
+      secretInputMode?: 'plain' | 'ref'
+      skipHealth?: boolean
+      workspace?: string
+    } = {},
+  ): Promise<void> {
+    const args = ['onboard']
+
+    if (input.nonInteractive) {
+      args.push('--non-interactive')
+    }
+    if (input.mode) {
+      args.push('--mode', input.mode)
+    }
+    if (input.workspace) {
+      args.push('--workspace', input.workspace)
+    }
+    if (input.reset) {
+      args.push('--reset')
+    }
+    if (input.resetScope) {
+      args.push('--reset-scope', input.resetScope)
+    }
+    if (input.authChoice) {
+      args.push('--auth-choice', input.authChoice)
+    }
+    if (input.secretInputMode) {
+      args.push('--secret-input-mode', input.secretInputMode)
+    }
+    if (input.customBaseUrl) {
+      args.push('--custom-base-url', input.customBaseUrl)
+    }
+    if (input.customModelId) {
+      args.push('--custom-model-id', input.customModelId)
+    }
+    if (input.customProviderId) {
+      args.push('--custom-provider-id', input.customProviderId)
+    }
+    if (input.customCompatibility) {
+      args.push('--custom-compatibility', input.customCompatibility)
+    }
+    if (input.gatewayAuth) {
+      args.push('--gateway-auth', input.gatewayAuth)
+    }
+    if (input.gatewayToken) {
+      args.push('--gateway-token', input.gatewayToken)
+    }
+    if (input.gatewayTokenRefEnv) {
+      args.push('--gateway-token-ref-env', input.gatewayTokenRefEnv)
+    }
+    if (input.gatewayPort) {
+      args.push('--gateway-port', String(input.gatewayPort))
+    }
+    if (input.gatewayBind) {
+      args.push('--gateway-bind', input.gatewayBind)
+    }
+    if (input.installDaemon === true) {
+      args.push('--install-daemon')
+    } else if (input.installDaemon === false) {
+      args.push('--no-install-daemon')
+    }
+    if (input.skipHealth) {
+      args.push('--skip-health')
+    }
+    if (input.acceptRisk) {
+      args.push('--accept-risk')
+    }
+
+    await this.runCommand(args)
+  }
+
+  async setConfig(path: string, value: unknown): Promise<void> {
+    await this.runCommand(['config', 'set', path, formatConfigValue(value)])
+  }
+
+  async setConfigBatch(entries: OpenClawConfigBatchEntry[]): Promise<void> {
+    await this.runCommand([
+      'config',
+      'set',
+      '--batch-json',
+      JSON.stringify(entries),
+    ])
+  }
+
+  async getConfig(path: string): Promise<unknown> {
+    const output = await this.runCommand(['config', 'get', path])
+    return parseConfigValue(output)
+  }
+
+  async validateConfig(): Promise<unknown> {
+    const output = await this.runCommand(['config', 'validate', '--json'])
+    return parseConfigValue(output)
+  }
+
+  async setDefaultModel(model: string): Promise<void> {
+    await this.runCommand(['models', 'set', model])
+  }
+
+  async listAgents(): Promise<OpenClawAgentRecord[]> {
+    const records = await this.runAgentListCommand()
+    const agents = Array.isArray(records) ? records : (records.agents ?? [])
+    return agents.map((record) => ({
+      agentId: record.id,
+      name: record.name ?? record.id,
+      workspace: record.workspace,
+      model: record.model,
+    }))
+  }
+
+  async createAgent(input: {
+    name: string
+    model?: string
+  }): Promise<OpenClawAgentRecord> {
+    const workspace = this.agentWorkspace(input.name)
+    const args = ['agents', 'add', input.name, '--workspace', workspace]
+
+    if (input.model) {
+      args.push('--model', input.model)
+    }
+
+    args.push('--non-interactive', '--json')
+    await this.runCommand(args)
+
+    const agents = await this.listAgents()
+    const agent = agents.find((entry) => entry.agentId === input.name)
+    if (!agent) {
+      throw new Error(`Created agent ${input.name} was not found in agent list`)
+    }
+
+    return agent
+  }
+
+  async deleteAgent(agentId: string): Promise<void> {
+    await this.runCommand(['agents', 'delete', agentId, '--force', '--json'])
+  }
+
+  async probe(): Promise<void> {
+    await this.listAgents()
+  }
+
+  async listSessions(agentId?: string): Promise<OpenClawSessionEntry[]> {
+    const args = ['sessions', '--json']
+    if (agentId) {
+      args.push('--agent', agentId)
+    } else {
+      args.push('--all-agents')
+    }
+
+    const output = await this.runCommand(args)
+    const parsed = parseFirstMatchingJson<
+      { sessions?: unknown[]; count?: number } | unknown[]
+    >(output, isSessionListPayload)
+
+    if (parsed === null) {
+      throw new Error(
+        `Failed to parse OpenClaw sessions output: ${output.slice(0, 200)}`,
+      )
+    }
+
+    const entries = Array.isArray(parsed) ? parsed : (parsed.sessions ?? [])
+    return entries.map(toSessionEntry)
+  }
+
+  async getChatHistory(sessionKey: string): Promise<OpenClawChatMessage[]> {
+    const output = await this.runCommand([
+      'gateway',
+      'call',
+      'chat.history',
+      '--params',
+      JSON.stringify({ sessionKey }),
+      '--json',
+    ])
+
+    const parsed = parseFirstMatchingJson<{ messages?: unknown[] }>(
+      output,
+      (value) => isPlainObject(value) && 'messages' in value,
+    )
+
+    if (parsed === null) {
+      throw new Error(
+        `Failed to parse OpenClaw chat history output: ${output.slice(0, 200)}`,
+      )
+    }
+
+    return (parsed.messages ?? []).map(toChatMessage)
+  }
+
+  private agentWorkspace(name: string): string {
+    return name === 'main'
+      ? `${OPENCLAW_CONTAINER_HOME}/workspace`
+      : `${OPENCLAW_CONTAINER_HOME}/workspace-${name}`
+  }
+
+  private async runCommand(args: string[]): Promise<string> {
+    const output: string[] = []
+    const command = ['node', 'dist/index.js', ...args]
+    const exitCode = await this.executor.execInContainer(command, (line) => {
+      output.push(line)
+    })
+
+    if (exitCode !== 0) {
+      const detail = output.join('\n').trim()
+      throw new Error(
+        detail || `OpenClaw command failed (${args.slice(0, 2).join(' ')})`,
+      )
+    }
+
+    return output.join('\n').trim()
+  }
+
+  private async runAgentListCommand(): Promise<
+    RawAgentRecord[] | { agents?: RawAgentRecord[] }
+  > {
+    const output = await this.runCommand(['agents', 'list', '--json'])
+    return parseAgentListOutput(output)
+  }
+}
+
+function formatConfigValue(value: unknown): string {
+  if (typeof value === 'string') return value
+  return JSON.stringify(value)
+}
+
+function parseConfigValue(output: string): unknown {
+  const parsed = selectConfigJson<unknown>(output)
+  return parsed ?? output
+}
+
+function parseAgentListOutput(
+  output: string,
+): RawAgentRecord[] | { agents?: RawAgentRecord[] } {
+  const parsed = parseFirstMatchingJson<
+    RawAgentRecord[] | { agents?: RawAgentRecord[] }
+  >(output, isAgentListPayload)
+  if (parsed !== null) return parsed
+
+  throw new Error(
+    `Failed to parse OpenClaw JSON output: ${output.slice(0, 200)}`,
+  )
+}
+
+function parseFirstMatchingJson<T>(
+  output: string,
+  predicate?: (value: unknown) => boolean,
+): T | null {
+  const candidates = collectJsonCandidates(output)
+
+  for (const candidate of candidates) {
+    const parsed = tryParseJson<T>(candidate)
+    if (parsed === null) continue
+    if (predicate && !predicate(parsed)) continue
+    return parsed
+  }
+
+  return null
+}
+
+function selectConfigJson<T>(output: string): T | null {
+  const candidates = collectJsonCandidates(output)
+  const parsedCandidates: Array<{ text: string; value: T }> = []
+
+  for (const candidate of candidates) {
+    const parsed = tryParseJson<T>(candidate)
+    if (parsed === null) continue
+    if (isStructuredLogPayload(parsed)) continue
+    parsedCandidates.push({ text: candidate, value: parsed })
+  }
+
+  if (parsedCandidates.length === 0) return null
+
+  return parsedCandidates.reduce((best, candidate) =>
+    candidate.text.length > best.text.length ? candidate : best,
+  ).value
+}
+
+function collectJsonCandidates(output: string): string[] {
+  const candidates = [output.trim()]
+
+  for (const line of output.split(/\r?\n/)) {
+    const trimmed = line.trim()
+    if (trimmed) candidates.push(trimmed)
+  }
+
+  for (let index = 0; index < output.length; index += 1) {
+    const char = output[index]
+    if (char !== '[' && char !== '{') continue
+    const extracted = extractJsonSubstring(output, index)
+    if (extracted) {
+      candidates.push(extracted)
+    }
+  }
+
+  return candidates
+}
+
+function extractJsonSubstring(
+  output: string,
+  startIndex: number,
+): string | null {
+  const opening = output[startIndex]
+  const closing = opening === '{' ? '}' : ']'
+  const stack: string[] = [closing]
+  let inString = false
+  let escaped = false
+
+  for (let index = startIndex + 1; index < output.length; index += 1) {
+    const char = output[index]
+
+    if (inString) {
+      if (escaped) {
+        escaped = false
+        continue
+      }
+      if (char === '\\') {
+        escaped = true
+        continue
+      }
+      if (char === '"') {
+        inString = false
+      }
+      continue
+    }
+
+    if (char === '"') {
+      inString = true
+      continue
+    }
+
+    if (char === '{') {
+      stack.push('}')
+      continue
+    }
+
+    if (char === '[') {
+      stack.push(']')
+      continue
+    }
+
+    const expectedClosing = stack[stack.length - 1]
+    if (char === expectedClosing) {
+      stack.pop()
+      if (stack.length === 0) {
+        return output.slice(startIndex, index + 1)
+      }
+    }
+  }
+
+  return null
+}
+
+function tryParseJson<T>(value: string): T | null {
+  const trimmed = value.trim()
+  if (!trimmed) return null
+
+  try {
+    return JSON.parse(trimmed) as T
+  } catch {
+    return null
+  }
+}
+
+function isAgentListPayload(
+  value: unknown,
+): value is RawAgentRecord[] | { agents?: RawAgentRecord[] } {
+  if (Array.isArray(value)) {
+    return value.every(isRawAgentRecord)
+  }
+
+  if (!isPlainObject(value)) return false
+
+  if (!('agents' in value)) return false
+
+  const agents = (value as { agents?: unknown }).agents
+  return (
+    agents === undefined ||
+    (Array.isArray(agents) && agents.every(isRawAgentRecord))
+  )
+}
+
+function isRawAgentRecord(value: unknown): value is RawAgentRecord {
+  return (
+    isPlainObject(value) &&
+    typeof value.id === 'string' &&
+    typeof value.workspace === 'string' &&
+    (value.name === undefined || typeof value.name === 'string') &&
+    (value.model === undefined || typeof value.model === 'string')
+  )
+}
+
+function isPlainObject(value: unknown): value is Record<string, unknown> {
+  return typeof value === 'object' && value !== null && !Array.isArray(value)
+}
+
+function isStructuredLogPayload(value: unknown): boolean {
+  if (!isPlainObject(value)) return false
+
+  return (
+    typeof value.level === 'string' &&
+    (typeof value.message === 'string' || typeof value.msg === 'string')
+  )
+}
+
+function isSessionListPayload(value: unknown): boolean {
+  if (Array.isArray(value)) return true
+  if (!isPlainObject(value)) return false
+  return 'sessions' in value || 'count' in value
+}
+
+function toSessionEntry(raw: unknown): OpenClawSessionEntry {
+  const record = isPlainObject(raw) ? raw : {}
+  return {
+    key: String(record.key ?? ''),
+    updatedAt: typeof record.updatedAt === 'number' ? record.updatedAt : 0,
+    sessionId: String(record.sessionId ?? ''),
+    agentId: String(record.agentId ?? ''),
+    kind: String(record.kind ?? ''),
+    status: typeof record.status === 'string' ? record.status : undefined,
+    totalTokens:
+      typeof record.totalTokens === 'number' ? record.totalTokens : undefined,
+    model: typeof record.model === 'string' ? record.model : undefined,
+    modelProvider:
+      typeof record.modelProvider === 'string'
+        ? record.modelProvider
+        : undefined,
+  }
+}
+
+function toChatMessage(raw: unknown): OpenClawChatMessage {
+  const record = isPlainObject(raw) ? raw : {}
+  const role = isOpenClawMessageRole(record.role) ? record.role : 'assistant'
+  const message: OpenClawChatMessage = {
+    role,
+    content: toChatBlocks(record.content),
+  }
+
+  if (typeof record.timestamp === 'number') message.timestamp = record.timestamp
+  if (isPlainObject(record.usage)) {
+    const { input, output } = record.usage
+    if (typeof input === 'number' && typeof output === 'number') {
+      message.usage = { input, output }
+    }
+  }
+  if (typeof record.stopReason === 'string') {
+    message.stopReason = record.stopReason
+  }
+  if (typeof record.toolName === 'string') message.toolName = record.toolName
+  if (typeof record.toolCallId === 'string') {
+    message.toolCallId = record.toolCallId
+  }
+  if (typeof record.isError === 'boolean') message.isError = record.isError
+
+  return message
+}
+
+function toChatBlocks(content: unknown): OpenClawChatBlock[] {
+  if (typeof content === 'string') {
+    return [{ type: 'text', text: content }]
+  }
+
+  if (!Array.isArray(content)) return []
+
+  const blocks: OpenClawChatBlock[] = []
+  for (const rawBlock of content) {
+    if (!isPlainObject(rawBlock)) continue
+
+    if (rawBlock.type === 'toolCall') {
+      const block: OpenClawChatBlock = { type: 'toolCall' }
+      if (typeof rawBlock.name === 'string') block.name = rawBlock.name
+      if (rawBlock.arguments !== undefined) {
+        block.arguments = rawBlock.arguments
+      }
+      blocks.push(block)
+      continue
+    }
+
+    if (rawBlock.type === 'thinking') {
+      const block: OpenClawChatBlock = { type: 'thinking' }
+      if (typeof rawBlock.thinking === 'string') {
+        block.thinking = rawBlock.thinking
+      }
+      blocks.push(block)
+      continue
+    }
+
+    const block: OpenClawChatBlock = { type: 'text' }
+    if (typeof rawBlock.text === 'string') block.text = rawBlock.text
+    blocks.push(block)
+  }
+
+  return blocks
+}
+
+function isOpenClawMessageRole(
+  value: unknown,
+): value is OpenClawChatMessage['role'] {
+  return value === 'user' || value === 'assistant' || value === 'toolResult'
+}
--- a/packages/browseros-agent/apps/server/src/api/services/openclaw/openclaw-cli-providers/claude-cli.ts
+++ b/packages/browseros-agent/apps/server/src/api/services/openclaw/openclaw-cli-providers/claude-cli.ts
@@ -0,0 +1,72 @@
+/**
+ * @license
+ * Copyright 2025 BrowserOS
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ */
+
+import type {
+  OpenClawCliProvider,
+  OpenClawCliProviderAuthStatus,
+} from './types'
+
+const CLAUDE_CLI_MODELS = [
+  'claude-sonnet-4-6',
+  'claude-opus-4-6',
+  'claude-haiku-4-5',
+] as const
+
+// `claude auth status` emits JSON on both the logged-in (exit 0) and
+// not-logged-in (exit 1) paths. The caller passes us stdout alone —
+// the exec layer separates stdout and stderr so no extraction or
+// stripping of nerdctl noise is needed.
+interface ClaudeAuthStatusPayload {
+  loggedIn?: boolean
+  email?: string
+  subscriptionType?: string
+}
+
+function parseClaudeAuthStatus(
+  stdout: string,
+  exitCode: number,
+): OpenClawCliProviderAuthStatus {
+  const trimmed = stdout.trim()
+
+  // Binary missing: claude isn't installed / not on PATH.
+  if (exitCode === 127 || !trimmed) {
+    return { installed: false, loggedIn: false }
+  }
+
+  let payload: ClaudeAuthStatusPayload
+  try {
+    payload = JSON.parse(trimmed) as ClaudeAuthStatusPayload
+  } catch {
+    return {
+      installed: true,
+      loggedIn: false,
+      error: `Unexpected claude auth status output: ${trimmed.slice(0, 200)}`,
+    }
+  }
+
+  return {
+    installed: true,
+    loggedIn: !!payload.loggedIn,
+    accountLabel: payload.email,
+    subscriptionLabel: payload.subscriptionType,
+  }
+}
+
+export const CLAUDE_CLI_PROVIDER: OpenClawCliProvider = {
+  id: 'claude-cli',
+  displayName: 'Anthropic Claude CLI',
+  description: 'Uses your Claude.ai subscription via the Claude Code CLI',
+  npmPackage: '@anthropic-ai/claude-code',
+  npmPackageVersion: '2.1.119',
+  binary: 'claude',
+  authStatusCommand: ['claude', 'auth', 'status'],
+  // `claude auth login` in 2.1.x silently discards stdin. The REPL's
+  // `/login` slash command, launched from a fresh `claude` invocation,
+  // does accept a pasted token.
+  authLoginCommand: 'claude /login',
+  models: CLAUDE_CLI_MODELS,
+  parseAuthStatus: parseClaudeAuthStatus,
+}
--- a/packages/browseros-agent/apps/server/src/api/services/openclaw/openclaw-cli-providers/registry.ts
+++ b/packages/browseros-agent/apps/server/src/api/services/openclaw/openclaw-cli-providers/registry.ts
@@ -0,0 +1,32 @@
+/**
+ * @license
+ * Copyright 2025 BrowserOS
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ *
+ * Registry of OpenClaw CLI-backed providers. Add entries here as we
+ * enable more (Gemini CLI, Codex CLI, etc.).
+ */
+
+import { CLAUDE_CLI_PROVIDER } from './claude-cli'
+import type { OpenClawCliProvider } from './types'
+
+export const OPENCLAW_CLI_PROVIDERS: readonly OpenClawCliProvider[] = [
+  CLAUDE_CLI_PROVIDER,
+]
+
+export function getOpenClawCliProvider(
+  id: string,
+): OpenClawCliProvider | undefined {
+  return OPENCLAW_CLI_PROVIDERS.find((provider) => provider.id === id)
+}
+
+export function isOpenClawCliProviderId(id: string): boolean {
+  return OPENCLAW_CLI_PROVIDERS.some((provider) => provider.id === id)
+}
+
+export function buildOpenClawCliProviderModelRef(
+  providerId: string,
+  modelId: string,
+): string {
+  return `${providerId}/${modelId}`
+}
--- a/packages/browseros-agent/apps/server/src/api/services/openclaw/openclaw-cli-providers/types.ts
+++ b/packages/browseros-agent/apps/server/src/api/services/openclaw/openclaw-cli-providers/types.ts
@@ -0,0 +1,39 @@
+/**
+ * @license
+ * Copyright 2025 BrowserOS
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ *
+ * OpenClaw CLI-backed provider registry types.
+ *
+ * A "CLI provider" is a tool that runs inside the OpenClaw gateway
+ * container (e.g. Claude Code CLI, Gemini CLI). OpenClaw spawns the
+ * binary as a subprocess when the active model is prefixed with the
+ * provider id — so our job is to install the tool and surface its
+ * auth status to the user. No Anthropic/OpenRouter-style API key.
+ */
+
+export interface OpenClawCliProviderAuthStatus {
+  installed: boolean
+  loggedIn: boolean
+  accountLabel?: string
+  subscriptionLabel?: string
+  error?: string
+}
+
+export interface OpenClawCliProvider {
+  id: string
+  displayName: string
+  description: string
+  npmPackage: string
+  // Pinned package version. npm installs go through argv directly
+  // (no shell), so `@latest` drift can't silently ship through.
+  npmPackageVersion: string
+  binary: string
+  authStatusCommand: string[]
+  authLoginCommand: string
+  models: readonly string[]
+  parseAuthStatus: (
+    stdout: string,
+    exitCode: number,
+  ) => OpenClawCliProviderAuthStatus
+}
--- a/packages/browseros-agent/apps/server/src/api/services/openclaw/openclaw-config.ts
+++ b/packages/browseros-agent/apps/server/src/api/services/openclaw/openclaw-config.ts
@@ -1,279 +0,0 @@
-/**
- * @license
- * Copyright 2025 BrowserOS
- * SPDX-License-Identifier: AGPL-3.0-or-later
- *
- * Pure functions for building OpenClaw bootstrap configuration.
- * Config is write-once at setup — agent CRUD uses WS RPC, not config edits.
- */
-
-import {
-  OPENCLAW_CONTAINER_HOME,
-  OPENCLAW_GATEWAY_PORT,
-} from '@browseros/shared/constants/openclaw'
-import { DEFAULT_PORTS } from '@browseros/shared/constants/ports'
-
-const OPENCLAW_IMAGE = 'ghcr.io/openclaw/openclaw:latest'
-
-export const PROVIDER_ENV_MAP: Record<string, string> = {
-  anthropic: 'ANTHROPIC_API_KEY',
-  openai: 'OPENAI_API_KEY',
-  google: 'GEMINI_API_KEY',
-  openrouter: 'OPENROUTER_API_KEY',
-  moonshot: 'MOONSHOT_API_KEY',
-  groq: 'GROQ_API_KEY',
-  mistral: 'MISTRAL_API_KEY',
-}
-
-export interface OpenClawProviderInput {
-  providerType?: string
-  providerName?: string
-  baseUrl?: string
-  modelId?: string
-  apiKey?: string
-}
-
-export interface BootstrapConfigInput {
-  gatewayPort: number
-  gatewayToken: string
-  browserosServerPort?: number
-  providerType?: string
-  providerName?: string
-  baseUrl?: string
-  modelId?: string
-}
-
-export interface EnvFileInput {
-  image?: string
-  port?: number
-  token: string
-  configDir: string
-  timezone?: string
-  providerKeys?: Record<string, string>
-}
-
-export interface ResolvedProviderConfig {
-  model?: string
-  providerKeys: Record<string, string>
-  models?: {
-    mode: 'merge'
-    providers: Record<string, Record<string, unknown>>
-  }
-}
-
-function hasBuiltinProvider(providerType?: string): providerType is string {
-  return !!providerType && providerType in PROVIDER_ENV_MAP
-}
-
-/**
- * OpenRouter's public slugs use dots for version numbers
- * (e.g. `anthropic/claude-haiku-4.5`), but openclaw's model registry expects
- * dashes (`claude-haiku-4-5`). Passing the dotted form makes openclaw fail
- * the registry lookup silently and the agent turn completes with zero
- * payloads. Rewrite dots to dashes for openrouter model ids only.
- */
-function normalizeBuiltinModelId(
-  providerType: string,
-  modelId: string,
-): string {
-  if (providerType !== 'openrouter') return modelId
-  return modelId.replace(/\./g, '-')
-}
-
-export function deriveOpenClawProviderId(providerInput: {
-  providerType?: string
-  providerName?: string
-  baseUrl?: string
-}): string {
-  const source =
-    providerInput.providerName?.trim() ||
-    providerInput.baseUrl?.trim() ||
-    providerInput.providerType?.trim() ||
-    'custom-provider'
-
-  const candidate = source
-    .toLowerCase()
-    .replace(/^https?:\/\//, '')
-    .replace(/[^a-z0-9]+/g, '-')
-    .replace(/^-|-$/g, '')
-
-  return candidate || 'custom-provider'
-}
-
-export function deriveOpenClawApiKeyEnvVar(providerId: string): string {
-  return `${providerId.toUpperCase().replace(/-/g, '_')}_API_KEY`
-}
-
-export function resolveProviderConfig(
-  input: OpenClawProviderInput,
-): ResolvedProviderConfig {
-  if (!input.providerType) {
-    return { providerKeys: {} }
-  }
-
-  if (hasBuiltinProvider(input.providerType)) {
-    const providerKeys: Record<string, string> = {}
-    if (input.apiKey) {
-      providerKeys[PROVIDER_ENV_MAP[input.providerType]] = input.apiKey
-    }
-
-    const normalizedModelId = input.modelId
-      ? normalizeBuiltinModelId(input.providerType, input.modelId)
-      : undefined
-
-    return {
-      providerKeys,
-      model: normalizedModelId
-        ? `${input.providerType}/${normalizedModelId}`
-        : undefined,
-    }
-  }
-
-  if (!input.baseUrl) {
-    return { providerKeys: {} }
-  }
-
-  const providerId = deriveOpenClawProviderId(input)
-  const apiKeyEnvVar = deriveOpenClawApiKeyEnvVar(providerId)
-  const providerKeys: Record<string, string> = {}
-
-  if (input.apiKey) {
-    providerKeys[apiKeyEnvVar] = input.apiKey
-  }
-
-  const providerConfig: Record<string, unknown> = {
-    baseUrl: input.baseUrl,
-    apiKey: `\${${apiKeyEnvVar}}`,
-    api: 'openai-completions',
-  }
-
-  if (input.modelId) {
-    providerConfig.models = [{ id: input.modelId, name: input.modelId }]
-  }
-
-  return {
-    providerKeys,
-    model: input.modelId ? `${providerId}/${input.modelId}` : undefined,
-    models: {
-      mode: 'merge',
-      providers: {
-        [providerId]: providerConfig,
-      },
-    },
-  }
-}
-
-export function buildBootstrapConfig(
-  input: BootstrapConfigInput,
-): Record<string, unknown> {
-  const serverPort = input.browserosServerPort ?? DEFAULT_PORTS.server
-  const provider = resolveProviderConfig(input)
-
-  const defaults: Record<string, unknown> = {
-    workspace: `${OPENCLAW_CONTAINER_HOME}/workspace`,
-    timeoutSeconds: 4200,
-    thinkingDefault: 'adaptive',
-  }
-
-  if (provider.model) {
-    defaults.model = { primary: provider.model }
-  }
-  const config: Record<string, unknown> = {
-    gateway: {
-      mode: 'local',
-      port: input.gatewayPort,
-      bind: 'lan',
-      auth: { mode: 'token', token: input.gatewayToken },
-      reload: { mode: 'restart' },
-      controlUi: {
-        allowInsecureAuth: true,
-        allowedOrigins: [
-          `http://127.0.0.1:${input.gatewayPort}`,
-          `http://localhost:${input.gatewayPort}`,
-        ],
-      },
-      http: {
-        endpoints: {
-          chatCompletions: { enabled: true },
-        },
-      },
-    },
-    agents: { defaults },
-    tools: {
-      profile: 'full',
-      web: {
-        search: { provider: 'duckduckgo', enabled: true },
-      },
-      exec: {
-        host: 'gateway',
-        security: 'full',
-        ask: 'off',
-      },
-    },
-    cron: { enabled: true },
-    hooks: {
-      internal: {
-        enabled: true,
-        entries: {
-          'boot-md': { enabled: true },
-          'bootstrap-extra-files': { enabled: true },
-          'session-memory': { enabled: true },
-        },
-      },
-    },
-    mcp: {
-      servers: {
-        browseros: {
-          url: `http://host.containers.internal:${serverPort}/mcp`,
-          transport: 'streamable-http',
-        },
-      },
-    },
-    approvals: {
-      exec: { enabled: false },
-    },
-    skills: {
-      install: { nodeManager: 'bun' },
-    },
-  }
-
-  if (provider.models) {
-    config.models = provider.models
-  }
-
-  if (process.env.NODE_ENV === 'development') {
-    config.logging = { level: 'debug', consoleLevel: 'debug' }
-  }
-
-  return config
-}
-
-export function buildEnvFile(input: EnvFileInput): string {
-  const lines: string[] = [
-    `OPENCLAW_IMAGE=${input.image ?? OPENCLAW_IMAGE}`,
-    `OPENCLAW_GATEWAY_PORT=${input.port ?? OPENCLAW_GATEWAY_PORT}`,
-    `OPENCLAW_GATEWAY_TOKEN=${input.token}`,
-    `OPENCLAW_CONFIG_DIR=${input.configDir}`,
-    `TZ=${input.timezone ?? Intl.DateTimeFormat().resolvedOptions().timeZone}`,
-  ]
-
-  if (input.providerKeys) {
-    for (const [key, value] of Object.entries(input.providerKeys)) {
-      lines.push(`${key}=${value}`)
-    }
-  }
-
-  return `${lines.join('\n')}\n`
-}
-
-export function resolveProviderKeys(
-  input: OpenClawProviderInput,
-): Record<string, string> {
-  return resolveProviderConfig(input).providerKeys
-}
-
-export function resolveProviderModel(
-  input: OpenClawProviderInput,
-): string | undefined {
-  return resolveProviderConfig(input).model
-}
--- a/packages/browseros-agent/apps/server/src/api/services/openclaw/openclaw-env.ts
+++ b/packages/browseros-agent/apps/server/src/api/services/openclaw/openclaw-env.ts
@@ -0,0 +1,73 @@
+/**
+ * @license
+ * Copyright 2025 BrowserOS
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ */
+
+import { join } from 'node:path'
+
+const STATE_DIR_NAME = '.openclaw'
+
+export function getOpenClawStateDir(openclawDir: string): string {
+  return join(openclawDir, STATE_DIR_NAME)
+}
+
+export function getOpenClawStateConfigPath(openclawDir: string): string {
+  return join(getOpenClawStateDir(openclawDir), 'openclaw.json')
+}
+
+export function getOpenClawStateEnvPath(openclawDir: string): string {
+  return join(getOpenClawStateDir(openclawDir), '.env')
+}
+
+export function getHostWorkspaceDir(
+  openclawDir: string,
+  agentName: string,
+): string {
+  return join(
+    getOpenClawStateDir(openclawDir),
+    agentName === 'main' ? 'workspace' : `workspace-${agentName}`,
+  )
+}
+
+export function mergeEnvContent(
+  current: string,
+  updates: Record<string, string>,
+): { changed: boolean; content: string } {
+  if (Object.keys(updates).length === 0) {
+    return {
+      changed: false,
+      content: normalizeEnvContent(current),
+    }
+  }
+
+  const lines = current === '' ? [] : current.replace(/\r\n/g, '\n').split('\n')
+  const nextLines = [...lines]
+  let changed = false
+
+  for (const [key, value] of Object.entries(updates)) {
+    const replacement = `${key}=${value}`
+    const index = nextLines.findIndex((line) => line.startsWith(`${key}=`))
+    if (index === -1) {
+      nextLines.push(replacement)
+      changed = true
+      continue
+    }
+    if (nextLines[index] === replacement) {
+      continue
+    }
+    nextLines[index] = replacement
+    changed = true
+  }
+
+  const content = normalizeEnvContent(nextLines.join('\n'))
+  return {
+    changed: changed || content !== normalizeEnvContent(current),
+    content,
+  }
+}
+
+function normalizeEnvContent(content: string): string {
+  const trimmed = content.trim()
+  return trimmed ? `${trimmed}\n` : ''
+}
--- a/packages/browseros-agent/apps/server/src/api/services/openclaw/openclaw-http-client.ts
+++ b/packages/browseros-agent/apps/server/src/api/services/openclaw/openclaw-http-client.ts
@@ -0,0 +1,529 @@
+/**
+ * @license
+ * Copyright 2025 BrowserOS
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ */
+
+import { createParser, type EventSourceMessage } from 'eventsource-parser'
+import { OpenClawSessionNotFoundError } from './errors'
+import type { OpenClawStreamEvent } from './openclaw-types'
+
+export interface OpenClawChatHistoryMessage {
+  role: 'user' | 'assistant'
+  content: string
+}
+
+/**
+ * OpenAI-compatible content parts for multimodal user messages. OpenClaw's
+ * gateway accepts the standard `content: [{ type: 'text', ... }, { type:
+ * 'image_url', image_url: { url } }]` shape on /v1/chat/completions and
+ * routes it to whichever upstream provider the agent's model points at.
+ */
+export type OpenClawChatContentPart =
+  | { type: 'text'; text: string }
+  | {
+      type: 'image_url'
+      image_url: { url: string; detail?: 'auto' | 'low' | 'high' }
+    }
+
+export interface OpenClawChatRequest {
+  agentId: string
+  sessionKey: string
+  message: string
+  // When present, sent as the user message's `content` array verbatim. The
+  // legacy string `message` is folded into a leading text part if no text
+  // part is present in `messageParts`.
+  messageParts?: OpenClawChatContentPart[]
+  history?: OpenClawChatHistoryMessage[]
+  signal?: AbortSignal
+}
+
+export interface OpenClawSessionHistoryMessage {
+  role: 'user' | 'assistant' | 'system' | 'tool'
+  content: string
+  messageId?: string
+  messageSeq?: number
+  timestamp?: number
+}
+
+export interface OpenClawSessionHistory {
+  sessionKey: string
+  messages: OpenClawSessionHistoryMessage[]
+  cursor?: string | null
+  hasMore?: boolean
+  truncated?: boolean
+}
+
+export interface OpenClawSessionHistoryInput {
+  limit?: number
+  cursor?: string
+  signal?: AbortSignal
+}
+
+export type OpenClawSessionHistoryEvent =
+  | { type: 'history'; data: OpenClawSessionHistory }
+  | {
+      type: 'message'
+      data: {
+        sessionKey: string
+        message: OpenClawSessionHistoryMessage
+        messageId?: string
+        messageSeq: number
+      }
+    }
+  | { type: 'error'; data: { message: string } }
+
+export class OpenClawHttpClient {
+  constructor(
+    private readonly hostPort: number,
+    private readonly getToken: () => Promise<string>,
+  ) {}
+
+  async streamChat(
+    input: OpenClawChatRequest,
+  ): Promise<ReadableStream<OpenClawStreamEvent>> {
+    const response = await this.fetchChat(input)
+    const body = response.body
+
+    if (!body) {
+      throw new Error('OpenClaw chat response had no body')
+    }
+
+    return createEventStream(body, input.signal)
+  }
+
+  async getSessionHistory(
+    sessionKey: string,
+    input: OpenClawSessionHistoryInput = {},
+  ): Promise<OpenClawSessionHistory> {
+    const response = await this.fetchSessionHistory(sessionKey, input, {})
+    return (await response.json()) as OpenClawSessionHistory
+  }
+
+  async streamSessionHistory(
+    sessionKey: string,
+    input: OpenClawSessionHistoryInput = {},
+  ): Promise<ReadableStream<OpenClawSessionHistoryEvent>> {
+    const response = await this.fetchSessionHistory(sessionKey, input, {
+      Accept: 'text/event-stream',
+    })
+    const body = response.body
+    if (!body) {
+      throw new Error('OpenClaw session history stream had no body')
+    }
+    return createHistoryEventStream(body, input.signal)
+  }
+
+  async isAuthenticated(): Promise<boolean> {
+    try {
+      const token = await this.getToken()
+      const response = await fetch(
+        `http://127.0.0.1:${this.hostPort}/v1/models`,
+        {
+          method: 'GET',
+          headers: {
+            Authorization: `Bearer ${token}`,
+          },
+        },
+      )
+      return response.ok
+    } catch {
+      return false
+    }
+  }
+
+  private async fetchChat(input: OpenClawChatRequest): Promise<Response> {
+    const token = await this.getToken()
+    const userContent = buildUserContent(input)
+    const response = await fetch(
+      `http://127.0.0.1:${this.hostPort}/v1/chat/completions`,
+      {
+        method: 'POST',
+        headers: {
+          Authorization: `Bearer ${token}`,
+          'Content-Type': 'application/json',
+        },
+        body: JSON.stringify({
+          model: resolveAgentModel(input.agentId),
+          stream: true,
+          messages: [
+            ...(input.history ?? []),
+            { role: 'user', content: userContent },
+          ],
+          user: `browseros:${input.agentId}:${input.sessionKey}`,
+        }),
+        signal: input.signal,
+      },
+    )
+
+    if (response.ok) {
+      return response
+    }
+
+    const detail = await response.text()
+    throw new Error(
+      detail || `OpenClaw chat failed with status ${response.status}`,
+    )
+  }
+
+  private async fetchSessionHistory(
+    sessionKey: string,
+    input: OpenClawSessionHistoryInput,
+    extraHeaders: Record<string, string>,
+  ): Promise<Response> {
+    const token = await this.getToken()
+    const response = await fetch(
+      `http://127.0.0.1:${this.hostPort}${buildHistoryPath(sessionKey, input)}`,
+      {
+        method: 'GET',
+        headers: {
+          Authorization: `Bearer ${token}`,
+          ...extraHeaders,
+        },
+        signal: input.signal,
+      },
+    )
+
+    if (response.status === 404) {
+      throw new OpenClawSessionNotFoundError(sessionKey)
+    }
+    if (!response.ok) {
+      const detail = await response.text()
+      throw new Error(
+        detail ||
+          `OpenClaw session history failed with status ${response.status}`,
+      )
+    }
+    return response
+  }
+}
+
+function buildHistoryPath(
+  sessionKey: string,
+  input: OpenClawSessionHistoryInput,
+): string {
+  const qs = new URLSearchParams()
+  if (input.limit !== undefined) qs.set('limit', String(input.limit))
+  if (input.cursor !== undefined) qs.set('cursor', input.cursor)
+  const suffix = qs.toString()
+  return `/sessions/${encodeURIComponent(sessionKey)}/history${
+    suffix ? `?${suffix}` : ''
+  }`
+}
+
+function resolveAgentModel(agentId: string): string {
+  return agentId === 'main' ? 'openclaw' : `openclaw/${agentId}`
+}
+
+/**
+ * Build the OpenAI-compatible `content` payload for the trailing user
+ * message. When the caller supplies multimodal parts via `messageParts`,
+ * use them as-is, ensuring at least one text part is present (we fold the
+ * legacy `message` string in as a leading text part if not). Otherwise,
+ * fall back to a plain string `content` so simple text-only sends keep
+ * the same wire shape we've always sent.
+ */
+function buildUserContent(
+  input: OpenClawChatRequest,
+): string | OpenClawChatContentPart[] {
+  if (!input.messageParts || input.messageParts.length === 0) {
+    return input.message
+  }
+
+  const hasText = input.messageParts.some((p) => p.type === 'text')
+  if (hasText) return input.messageParts
+
+  const trimmed = input.message.trim()
+  if (!trimmed) return input.messageParts
+
+  return [{ type: 'text', text: input.message }, ...input.messageParts]
+}
+
+function createEventStream(
+  body: ReadableStream<Uint8Array>,
+  signal?: AbortSignal,
+): ReadableStream<OpenClawStreamEvent> {
+  return new ReadableStream<OpenClawStreamEvent>({
+    start(controller) {
+      void pumpChatEvents(body, controller, signal)
+    },
+  })
+}
+
+async function pumpChatEvents(
+  body: ReadableStream<Uint8Array>,
+  controller: ReadableStreamDefaultController<OpenClawStreamEvent>,
+  signal?: AbortSignal,
+): Promise<void> {
+  const reader = body.getReader()
+  const decoder = new TextDecoder()
+  let text = ''
+  let done = false
+  const parser = createParser({
+    onEvent(message) {
+      if (done) return
+      const nextText = updateAccumulatedText(message, text)
+      done = handleMessage(message, controller, nextText, done)
+      if (!done) {
+        text = nextText
+      }
+    },
+  })
+
+  try {
+    while (true) {
+      if (signal?.aborted) {
+        await reader.cancel()
+        done = true
+        controller.close()
+        return
+      }
+
+      const { done: streamDone, value } = await reader.read()
+      if (streamDone) break
+      parser.feed(decoder.decode(value, { stream: true }))
+    }
+  } catch (error) {
+    if (!done) {
+      controller.enqueue({
+        type: 'error',
+        data: {
+          message: error instanceof Error ? error.message : String(error),
+        },
+      })
+      done = true
+      controller.close()
+    }
+  } finally {
+    if (!done) {
+      controller.close()
+    }
+    reader.releaseLock()
+  }
+}
+
+function handleMessage(
+  message: EventSourceMessage,
+  controller: ReadableStreamDefaultController<OpenClawStreamEvent>,
+  text: string,
+  done: boolean,
+): boolean {
+  if (message.data === '[DONE]') {
+    return finishStream(controller, text, done)
+  }
+
+  const chunk = parseChunk(message.data)
+  if (!chunk) {
+    controller.enqueue({
+      type: 'error',
+      data: { message: 'Failed to parse OpenClaw chat stream chunk' },
+    })
+    controller.close()
+    return true
+  }
+
+  for (const event of mapChunkToEvents(chunk)) {
+    controller.enqueue(event)
+  }
+
+  return hasFinishReason(chunk) ? finishStream(controller, text, done) : false
+}
+
+function updateAccumulatedText(
+  message: EventSourceMessage,
+  text: string,
+): string {
+  const chunk = parseChunk(message.data)
+  if (!chunk) return text
+
+  let next = text
+  for (const choice of readChoices(chunk)) {
+    const delta = readDeltaText(choice)
+    if (delta) {
+      next += delta
+    }
+  }
+  return next
+}
+
+function finishStream(
+  controller: ReadableStreamDefaultController<OpenClawStreamEvent>,
+  text: string,
+  done: boolean,
+): boolean {
+  if (!done) {
+    if (!text.trim()) {
+      controller.enqueue({
+        type: 'error',
+        data: {
+          message: "Agent couldn't generate a response. Please try again.",
+        },
+      })
+      controller.close()
+      return true
+    }
+    controller.enqueue({
+      type: 'done',
+      data: { text },
+    })
+    controller.close()
+  }
+
+  return true
+}
+
+function mapChunkToEvents(
+  chunk: Record<string, unknown>,
+): OpenClawStreamEvent[] {
+  const events: OpenClawStreamEvent[] = []
+
+  for (const choice of readChoices(chunk)) {
+    const delta = readDeltaText(choice)
+    if (delta) {
+      events.push({
+        type: 'text-delta',
+        data: { text: delta },
+      })
+    }
+  }
+
+  return events
+}
+
+function hasFinishReason(chunk: Record<string, unknown>): boolean {
+  return readChoices(chunk).some((choice) => !!readFinishReason(choice))
+}
+
+function readChoices(
+  chunk: Record<string, unknown>,
+): Array<Record<string, unknown>> {
+  const choices = chunk.choices
+  return Array.isArray(choices)
+    ? choices.filter(
+        (choice): choice is Record<string, unknown> =>
+          !!choice && typeof choice === 'object',
+      )
+    : []
+}
+
+function readDeltaText(choice: Record<string, unknown>): string {
+  const delta = choice.delta
+  if (!delta || typeof delta !== 'object') return ''
+
+  const content = (delta as Record<string, unknown>).content
+  return typeof content === 'string' ? content : ''
+}
+
+function readFinishReason(choice: Record<string, unknown>): string | null {
+  const reason = choice.finish_reason
+  return typeof reason === 'string' && reason ? reason : null
+}
+
+function parseChunk(data: string): Record<string, unknown> | null {
+  try {
+    return JSON.parse(data) as Record<string, unknown>
+  } catch {
+    return null
+  }
+}
+
+function createHistoryEventStream(
+  body: ReadableStream<Uint8Array>,
+  signal?: AbortSignal,
+): ReadableStream<OpenClawSessionHistoryEvent> {
+  return new ReadableStream<OpenClawSessionHistoryEvent>({
+    start(controller) {
+      void pumpHistoryEvents(body, controller, signal)
+    },
+  })
+}
+
+async function pumpHistoryEvents(
+  body: ReadableStream<Uint8Array>,
+  controller: ReadableStreamDefaultController<OpenClawSessionHistoryEvent>,
+  signal?: AbortSignal,
+): Promise<void> {
+  const reader = body.getReader()
+  const decoder = new TextDecoder()
+  let closed = false
+  const close = () => {
+    if (closed) return
+    closed = true
+    controller.close()
+  }
+  const parser = createParser({
+    onEvent(message) {
+      if (closed) return
+      const event = toHistoryEvent(message)
+      if (!event) return
+      controller.enqueue(event)
+      if (event.type === 'error') close()
+    },
+  })
+
+  const onAbort = () => {
+    void reader.cancel().catch(() => {})
+    close()
+  }
+  signal?.addEventListener('abort', onAbort, { once: true })
+
+  try {
+    while (true) {
+      if (signal?.aborted) {
+        await reader.cancel()
+        close()
+        return
+      }
+      const { done, value } = await reader.read()
+      if (done) break
+      parser.feed(decoder.decode(value, { stream: true }))
+    }
+  } catch (error) {
+    if (!closed) {
+      controller.enqueue({
+        type: 'error',
+        data: {
+          message: error instanceof Error ? error.message : String(error),
+        },
+      })
+      close()
+    }
+  } finally {
+    signal?.removeEventListener('abort', onAbort)
+    close()
+    reader.releaseLock()
+  }
+}
+
+function toHistoryEvent(
+  message: EventSourceMessage,
+): OpenClawSessionHistoryEvent | null {
+  if (!message.event) return null
+  const payload = parseChunk(message.data)
+  if (!payload) return null
+  if (message.event === 'history') {
+    return {
+      type: 'history',
+      data: payload as unknown as OpenClawSessionHistory,
+    }
+  }
+  if (message.event === 'message') {
+    return {
+      type: 'message',
+      data: payload as unknown as {
+        sessionKey: string
+        message: OpenClawSessionHistoryMessage
+        messageId?: string
+        messageSeq: number
+      },
+    }
+  }
+  if (message.event === 'error') {
+    const errMessage =
+      typeof payload.message === 'string'
+        ? payload.message
+        : 'OpenClaw session history stream error'
+    return { type: 'error', data: { message: errMessage } }
+  }
+  return null
+}
--- a/packages/browseros-agent/apps/server/src/api/services/openclaw/openclaw-jsonl-reader.ts
+++ b/packages/browseros-agent/apps/server/src/api/services/openclaw/openclaw-jsonl-reader.ts
@@ -0,0 +1,667 @@
+/**
+ * @license
+ * Copyright 2025 BrowserOS
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ */
+
+import { existsSync, readdirSync, readFileSync, statSync } from 'node:fs'
+import { resolve } from 'node:path'
+
+// ---------------------------------------------------------------------------
+// Types for raw JSONL line parsing (matches OpenClaw's internal format)
+// ---------------------------------------------------------------------------
+
+interface PiContentBlock {
+  type: string
+  text?: string
+  // OpenClaw stores reasoning blocks as { type: 'thinking', thinking: '...' }
+  // — the prose lives on a `thinking` field, not `text`.
+  thinking?: string
+  id?: string
+  name?: string
+  arguments?: Record<string, unknown>
+  // OpenAI-shaped image blocks: { type: 'image_url', image_url: { url } }.
+  // The data: URL carries mediaType + base64 in one string.
+  image_url?: { url?: string; detail?: string }
+  // Anthropic-shaped image blocks: { type: 'image', source: { type:
+  // 'base64', media_type, data } } and the simpler { type: 'image', data }
+  // variant the gateway emits on tool results.
+  source?: { type?: string; media_type?: string; data?: string }
+  data?: string
+  media_type?: string
+  mediaType?: string
+}
+
+interface PiMessage {
+  role?: 'user' | 'assistant' | 'toolResult'
+  content?: PiContentBlock[]
+  stopReason?: string
+  errorMessage?: string
+  usage?: {
+    input?: number
+    output?: number
+    cost?: {
+      total?: number
+    }
+  }
+  model?: string
+  provider?: string
+  toolCallId?: string
+  toolName?: string
+  isError?: boolean
+}
+
+interface PiLine {
+  type: string
+  id?: string
+  timestamp?: string
+  message?: PiMessage
+  provider?: string
+  modelId?: string
+  thinkingLevel?: string
+  summary?: string
+  firstKeptEntryId?: string
+  tokensBefore?: number
+}
+
+interface SessionsJsonEntry {
+  sessionId?: string
+  updatedAt?: number
+  [k: string]: unknown
+}
+
+type SessionsJson = Record<string, SessionsJsonEntry>
+
+// ---------------------------------------------------------------------------
+// Public types
+// ---------------------------------------------------------------------------
+
+export type ClawEventType =
+  | 'user.message'
+  | 'user.attachment'
+  | 'agent.message'
+  | 'agent.thinking'
+  | 'agent.tool_use'
+  | 'agent.tool_result'
+  | 'session.model_change'
+  | 'session.thinking_level_change'
+  | 'session.compaction'
+
+export interface ClawAttachmentInfo {
+  kind: 'image' | 'file'
+  mediaType: string
+  // For images we always emit a data: URL so downstream consumers don't
+  // have to reconstruct it. `name` is best-effort (JSONL rarely carries
+  // a filename for inline image content blocks).
+  dataUrl?: string
+  name?: string
+}
+
+export interface ClawEvent {
+  eventId: string
+  type: ClawEventType
+  content: string
+  createdAt: number
+  tokensIn?: number
+  tokensOut?: number
+  costUsd?: number
+  model?: string
+  toolName?: string
+  toolCallId?: string
+  toolArguments?: Record<string, unknown>
+  isError?: boolean
+  attachment?: ClawAttachmentInfo
+}
+
+export interface JsonlSessionEntry {
+  key: string
+  sessionId: string
+  updatedAt: number
+}
+
+export interface JsonlSessionStats {
+  userTurns: number
+  assistantMessages: number
+  toolCalls: number
+  totalCostUsd: number
+  totalTokensIn: number
+  totalTokensOut: number
+}
+
+// ---------------------------------------------------------------------------
+// Reader
+// ---------------------------------------------------------------------------
+
+/**
+ * Reads OpenClaw's per-session JSONL files directly from the host filesystem.
+ * OpenClaw is the sole writer — this reader never modifies the files.
+ *
+ * Path layout on the host (via Lima virtiofs mount):
+ *   <stateRoot>/agents/<agentId>/sessions/sessions.json
+ *   <stateRoot>/agents/<agentId>/sessions/<piSessionId>.jsonl
+ */
+export class OpenClawJsonlReader {
+  constructor(private readonly stateRoot: string) {}
+
+  /** List all sessions for an agent by reading sessions.json. */
+  listSessions(agentId: string): JsonlSessionEntry[] {
+    const sessionsJson = this.readSessionsJson(agentId)
+    if (!sessionsJson) return []
+
+    const entries: JsonlSessionEntry[] = []
+    for (const [key, entry] of Object.entries(sessionsJson)) {
+      if (typeof entry.sessionId === 'string') {
+        entries.push({
+          key,
+          sessionId: entry.sessionId,
+          updatedAt: typeof entry.updatedAt === 'number' ? entry.updatedAt : 0,
+        })
+      }
+    }
+    return entries.sort((a, b) => b.updatedAt - a.updatedAt)
+  }
+
+  /** List all agent IDs by scanning the agents directory. */
+  listAgents(): string[] {
+    try {
+      const entries = readdirSync(this.safePath('agents'), {
+        withFileTypes: true,
+      })
+      return entries.filter((e) => e.isDirectory()).map((e) => e.name)
+    } catch {
+      return []
+    }
+  }
+
+  /**
+   * Read and parse all events from a session's JSONL file.
+   *
+   * Uses resolveJsonlPath() which handles a known OpenClaw quirk: the
+   * Pi session ID recorded in sessions.json can drift from the actual
+   * JSONL filename after context compaction or session restart. When the
+   * mapped ID doesn't match a file on disk, we fall back to the most
+   * recently modified JSONL in the agent's sessions directory.
+   */
+  listBySession(agentId: string, sessionKey: string): ClawEvent[] {
+    const filePath = this.resolveJsonlPath(agentId, sessionKey)
+    if (!filePath) return []
+
+    let raw: string
+    try {
+      raw = readFileSync(filePath, 'utf8')
+    } catch {
+      return []
+    }
+
+    const events: ClawEvent[] = []
+    for (const line of raw.split('\n')) {
+      if (!line.trim()) continue
+      let parsed: PiLine
+      try {
+        parsed = JSON.parse(line) as PiLine
+      } catch {
+        // Skip malformed lines — a partial line at the tail is possible
+        // if OpenClaw is mid-write.
+        continue
+      }
+      for (const event of mapLineToEvents(parsed)) {
+        events.push(event)
+      }
+    }
+    return events
+  }
+
+  /** Get the latest assistant message from a session. */
+  latestAgentMessage(
+    agentId: string,
+    sessionKey: string,
+  ): ClawEvent | undefined {
+    const events = this.listBySession(agentId, sessionKey)
+    for (let i = events.length - 1; i >= 0; i--) {
+      if (events[i]?.type === 'agent.message') return events[i]
+    }
+    return undefined
+  }
+
+  /** Count user turns in a session. */
+  countUserTurns(agentId: string, sessionKey: string): number {
+    const events = this.listBySession(agentId, sessionKey)
+    let n = 0
+    for (const e of events) {
+      if (e.type === 'user.message') n++
+    }
+    return n
+  }
+
+  /** Aggregate stats for a session. */
+  getSessionStats(agentId: string, sessionKey: string): JsonlSessionStats {
+    const events = this.listBySession(agentId, sessionKey)
+    const stats: JsonlSessionStats = {
+      userTurns: 0,
+      assistantMessages: 0,
+      toolCalls: 0,
+      totalCostUsd: 0,
+      totalTokensIn: 0,
+      totalTokensOut: 0,
+    }
+    for (const e of events) {
+      if (e.type === 'user.message') stats.userTurns++
+      if (e.type === 'agent.message') {
+        stats.assistantMessages++
+        if (e.costUsd) stats.totalCostUsd += e.costUsd
+        if (e.tokensIn) stats.totalTokensIn += e.tokensIn
+        if (e.tokensOut) stats.totalTokensOut += e.tokensOut
+      }
+      if (e.type === 'agent.tool_use') stats.toolCalls++
+    }
+    return stats
+  }
+
+  // ── Private helpers ─────────────────────────────────────────────────
+
+  /**
+   * Ensure a resolved path stays within stateRoot to prevent path traversal
+   * via crafted agentId or sessionId values containing ".." segments.
+   */
+  private safePath(...segments: string[]): string {
+    const resolved = resolve(this.stateRoot, ...segments)
+    const root = resolve(this.stateRoot)
+    if (!resolved.startsWith(`${root}/`) && resolved !== root) {
+      throw new Error(`Path traversal blocked: ${segments.join('/')}`)
+    }
+    return resolved
+  }
+
+  private readSessionsJson(agentId: string): SessionsJson | null {
+    const filePath = this.safePath(
+      'agents',
+      agentId,
+      'sessions',
+      'sessions.json',
+    )
+    try {
+      const raw = readFileSync(filePath, 'utf8')
+      return JSON.parse(raw) as SessionsJson
+    } catch {
+      return null
+    }
+  }
+
+  /**
+   * Resolve the path to a session's JSONL file. Tries the sessions.json
+   * mapping first (fast), then falls back to scanning the directory for
+   * the most recently modified JSONL file when the mapped ID doesn't
+   * match an actual file on disk.
+   *
+   * This fallback handles a known OpenClaw behavior where the Pi session
+   * ID in sessions.json can become stale after context compaction or
+   * session restart — the JSONL file on disk has a different UUID than
+   * what sessions.json records.
+   */
+  private resolveJsonlPath(agentId: string, sessionKey: string): string | null {
+    const sessionsJson = this.readSessionsJson(agentId)
+    if (!sessionsJson) return null
+
+    // Try exact key match in sessions.json
+    let resolvedId: string | undefined
+    const entry = sessionsJson[sessionKey]
+    if (entry && typeof entry.sessionId === 'string') {
+      resolvedId = entry.sessionId
+    }
+
+    // Try matching by scanning all keys (handles key format variations)
+    if (!resolvedId) {
+      for (const [key, value] of Object.entries(sessionsJson)) {
+        if (key === sessionKey || key.endsWith(`:${sessionKey}`)) {
+          if (typeof value.sessionId === 'string') {
+            resolvedId = value.sessionId
+            break
+          }
+        }
+      }
+    }
+
+    // If we found a sessionId and the file exists, use it
+    if (resolvedId) {
+      const path = this.safePath(
+        'agents',
+        agentId,
+        'sessions',
+        `${resolvedId}.jsonl`,
+      )
+      if (existsSync(path)) return path
+    }
+
+    // Fallback: scan the sessions directory for the most recent JSONL
+    // file. This handles stale sessions.json entries where the Pi
+    // session ID doesn't match the actual file on disk.
+    return this.findMostRecentJsonl(agentId)
+  }
+
+  /**
+   * Scan the sessions directory and return the path to the most recently
+   * modified JSONL file. Used as a fallback when sessions.json points to
+   * a non-existent file.
+   */
+  private findMostRecentJsonl(agentId: string): string | null {
+    let sessionsDir: string
+    try {
+      sessionsDir = this.safePath('agents', agentId, 'sessions')
+    } catch {
+      return null
+    }
+
+    let names: string[]
+    try {
+      names = readdirSync(sessionsDir).filter(
+        (n): n is string => typeof n === 'string' && n.endsWith('.jsonl'),
+      )
+    } catch {
+      return null
+    }
+
+    let best: { path: string; mtime: number } | null = null
+    for (const name of names) {
+      const fullPath = this.safePath('agents', agentId, 'sessions', name)
+      try {
+        const st = statSync(fullPath)
+        if (!best || st.mtimeMs > best.mtime) {
+          best = { path: fullPath, mtime: st.mtimeMs }
+        }
+      } catch {}
+    }
+
+    return best?.path ?? null
+  }
+}
+
+// ---------------------------------------------------------------------------
+// JSONL line → ClawEvent mapping
+// ---------------------------------------------------------------------------
+
+function mapLineToEvents(line: PiLine): ClawEvent[] {
+  const eventId = line.id ?? ''
+  const createdAt = line.timestamp ? Date.parse(line.timestamp) : Date.now()
+
+  if (line.type === 'model_change') {
+    const model = combineModel(line.provider, line.modelId)
+    if (!model) return []
+    return [
+      {
+        eventId,
+        type: 'session.model_change',
+        content: model,
+        createdAt,
+        model,
+      },
+    ]
+  }
+
+  if (line.type === 'thinking_level_change') {
+    return [
+      {
+        eventId,
+        type: 'session.thinking_level_change',
+        content: line.thinkingLevel ?? 'unknown',
+        createdAt,
+      },
+    ]
+  }
+
+  if (line.type === 'compaction') {
+    return [
+      {
+        eventId,
+        type: 'session.compaction',
+        content: line.summary ?? '(compacted)',
+        createdAt,
+      },
+    ]
+  }
+
+  if (line.type !== 'message' || !line.message) return []
+
+  return mapMessageToEvents(line.message, eventId, createdAt)
+}
+
+function mapMessageToEvents(
+  msg: PiMessage,
+  eventId: string,
+  createdAt: number,
+): ClawEvent[] {
+  if (msg.role === 'user') {
+    return mapUserMessage(msg, eventId, createdAt)
+  }
+
+  if (msg.role === 'assistant') {
+    return mapAssistantMessage(msg, eventId, createdAt)
+  }
+
+  if (msg.role === 'toolResult') {
+    const text = extractText(msg.content)
+    return [
+      {
+        eventId,
+        type: 'agent.tool_result',
+        content: text || '(no output)',
+        createdAt,
+        toolName: msg.toolName,
+        toolCallId: msg.toolCallId,
+        isError: msg.isError,
+      },
+    ]
+  }
+
+  return []
+}
+
+/**
+ * Build events for a user JSONL message. Each image content block becomes
+ * a separate `user.attachment` event ordered before the `user.message`
+ * text event so downstream accumulators (in jsonlEventsToHistoryItems)
+ * can flush attachments onto the message they arrived with.
+ */
+function mapUserMessage(
+  msg: PiMessage,
+  eventId: string,
+  createdAt: number,
+): ClawEvent[] {
+  const events: ClawEvent[] = []
+  const text = extractText(msg.content)
+
+  if (msg.content) {
+    let attachmentIdx = 0
+    for (const block of msg.content) {
+      const attachment = extractImageAttachment(block)
+      if (!attachment) continue
+      events.push({
+        eventId: `${eventId}:attachment:${attachmentIdx}`,
+        type: 'user.attachment',
+        content: attachment.dataUrl ?? '',
+        createdAt,
+        attachment,
+      })
+      attachmentIdx++
+    }
+  }
+
+  if (text) {
+    events.push({ eventId, type: 'user.message', content: text, createdAt })
+  } else if (events.length > 0) {
+    // User sent only attachments and no caption — synthesize an empty
+    // user.message so downstream pipelines that gate on user.message still
+    // see a turn boundary.
+    events.push({ eventId, type: 'user.message', content: '', createdAt })
+  }
+
+  return events
+}
+
+/**
+ * Extract a normalised image attachment from a single content block.
+ * Handles all three shapes the OpenClaw gateway round-trips:
+ *   - OpenAI: `{ type: 'image_url', image_url: { url } }` (data: URL)
+ *   - Anthropic: `{ type: 'image', source: { type: 'base64', media_type, data } }`
+ *   - Bare: `{ type: 'image', data: '<base64>' }` (used by tool-result outputs)
+ */
+function extractImageAttachment(
+  block: PiContentBlock,
+): ClawAttachmentInfo | null {
+  if (block.type === 'image_url') {
+    const url = block.image_url?.url
+    if (typeof url !== 'string' || !url.startsWith('data:')) return null
+    const mediaType =
+      url.slice(5, url.indexOf(';')).trim() || 'application/octet-stream'
+    return { kind: 'image', mediaType, dataUrl: url }
+  }
+
+  if (block.type === 'image') {
+    const sourceData = block.source?.data
+    const sourceMediaType =
+      block.source?.media_type ?? block.media_type ?? block.mediaType
+    const bareData = block.data
+    if (typeof sourceData === 'string' && typeof sourceMediaType === 'string') {
+      return {
+        kind: 'image',
+        mediaType: sourceMediaType,
+        dataUrl: `data:${sourceMediaType};base64,${sourceData}`,
+      }
+    }
+    if (typeof bareData === 'string') {
+      const mediaType =
+        typeof sourceMediaType === 'string' ? sourceMediaType : 'image/png'
+      return {
+        kind: 'image',
+        mediaType,
+        dataUrl: `data:${mediaType};base64,${bareData}`,
+      }
+    }
+  }
+
+  return null
+}
+
+function mapAssistantMessage(
+  msg: PiMessage,
+  eventId: string,
+  createdAt: number,
+): ClawEvent[] {
+  const events: ClawEvent[] = []
+  const text = extractText(msg.content)
+
+  if (msg.content) {
+    let thinkingIdx = 0
+    let toolIdx = 0
+    for (const block of msg.content) {
+      if (block.type === 'thinking') {
+        const thinkingText =
+          (typeof block.thinking === 'string' && block.thinking) ||
+          (typeof block.text === 'string' && block.text) ||
+          ''
+        if (thinkingText.length > 0) {
+          events.push({
+            eventId: `${eventId}:thinking:${thinkingIdx}`,
+            type: 'agent.thinking',
+            content: thinkingText,
+            createdAt,
+          })
+          thinkingIdx++
+        }
+      }
+      if (block.type === 'toolCall' && block.name) {
+        events.push({
+          eventId: `${eventId}:tool:${block.id ?? toolIdx}`,
+          type: 'agent.tool_use',
+          content: block.name,
+          createdAt,
+          toolName: block.name,
+          toolCallId: block.id,
+          toolArguments: block.arguments,
+        })
+        toolIdx++
+      }
+    }
+  }
+
+  if (text) {
+    events.push({
+      eventId,
+      type: 'agent.message',
+      content: text,
+      createdAt,
+      tokensIn: msg.usage?.input,
+      tokensOut: msg.usage?.output,
+      costUsd: msg.usage?.cost?.total,
+      model: combineModel(msg.provider, msg.model),
+    })
+  }
+
+  return events
+}
+
+function extractText(blocks: PiContentBlock[] | undefined): string {
+  if (!blocks || blocks.length === 0) return ''
+  const parts: string[] = []
+  for (const block of blocks) {
+    if (block.type === 'text' && typeof block.text === 'string') {
+      parts.push(block.text)
+    }
+  }
+  return parts.join('')
+}
+
+function combineModel(
+  provider: string | undefined,
+  model: string | undefined,
+): string | undefined {
+  if (!model) return undefined
+  return provider ? `${provider}/${model}` : model
+}
+
+// ---------------------------------------------------------------------------
+// Tool activity summary
+// ---------------------------------------------------------------------------
+
+const TOOL_DESCRIPTIONS: Record<string, (count: number) => string> = {
+  browser_navigate: (n) => `Browsed ${n} page${n !== 1 ? 's' : ''}`,
+  browser_take_screenshot: (n) => `Took ${n} screenshot${n !== 1 ? 's' : ''}`,
+  browser_click: (n) => `Clicked ${n} element${n !== 1 ? 's' : ''}`,
+  browser_fill: (n) => `Filled ${n} field${n !== 1 ? 's' : ''}`,
+  browser_type: (n) => `Typed in ${n} field${n !== 1 ? 's' : ''}`,
+  google_calendar_list_events: (n) =>
+    n > 1 ? `Checked calendar ${n} times` : 'Checked calendar',
+  gmail_search: (n) => (n > 1 ? `Searched email ${n} times` : 'Searched email'),
+  gmail_send: (n) => `Sent ${n} email${n !== 1 ? 's' : ''}`,
+  slack_post_message: (n) => `Sent ${n} Slack message${n !== 1 ? 's' : ''}`,
+  file_write: (n) => `Wrote ${n} file${n !== 1 ? 's' : ''}`,
+  file_read: (n) => `Read ${n} file${n !== 1 ? 's' : ''}`,
+}
+
+function defaultToolDescription(toolName: string, count: number): string {
+  const short = toolName
+    .replace(/^(browser_|google_|mcp_)/, '')
+    .replaceAll('_', ' ')
+  return count > 1 ? `Used ${short} ${count} times` : `Used ${short}`
+}
+
+/**
+ * Convert raw tool-use events into a human-readable activity summary.
+ *
+ * Example output: "Browsed 3 pages, took 2 screenshots"
+ */
+export function summarizeToolActivity(events: ClawEvent[]): string | null {
+  const toolCounts = new Map<string, number>()
+  for (const e of events) {
+    if (e.type === 'agent.tool_use' && e.toolName) {
+      toolCounts.set(e.toolName, (toolCounts.get(e.toolName) ?? 0) + 1)
+    }
+  }
+  if (toolCounts.size === 0) return null
+
+  const parts: string[] = []
+  for (const [tool, count] of toolCounts) {
+    const describe = TOOL_DESCRIPTIONS[tool]
+    parts.push(describe ? describe(count) : defaultToolDescription(tool, count))
+  }
+  return parts.join(', ')
+}
--- a/packages/browseros-agent/apps/server/src/api/services/openclaw/openclaw-observer.ts
+++ b/packages/browseros-agent/apps/server/src/api/services/openclaw/openclaw-observer.ts
@@ -0,0 +1,276 @@
+/**
+ * @license
+ * Copyright 2025 BrowserOS
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ *
+ * Connects to the OpenClaw gateway's WebSocket control plane and pipes
+ * chat broadcast events into a ClawSession state machine. The observer
+ * is a transport layer only — it handles the WS connection lifecycle
+ * (connect, handshake, reconnect) and delegates all state management
+ * to ClawSession.
+ */
+
+import WebSocket from 'ws'
+import { logger } from '../../../lib/logger'
+import type { ClawSession } from './claw-session'
+
+// ---------------------------------------------------------------------------
+// Protocol types (subset of OpenClaw gateway protocol v3)
+// ---------------------------------------------------------------------------
+
+const PROTOCOL_VERSION = 3
+const HANDSHAKE_REQUEST_ID = 'connect'
+const RECONNECT_DELAY_MS = 5_000
+const CONNECT_TIMEOUT_MS = 10_000
+
+interface RequestFrame {
+  type: 'req'
+  id: string
+  method: string
+  params: Record<string, unknown>
+}
+
+type IncomingFrame =
+  | { type: 'res'; id: string; ok: true; payload?: unknown }
+  | {
+      type: 'res'
+      id: string
+      ok: false
+      error: { code: string; message: string }
+    }
+  | { type: 'event'; event: string; payload?: unknown }
+
+// ---------------------------------------------------------------------------
+// Observer
+// ---------------------------------------------------------------------------
+
+export class OpenClawObserver {
+  private ws: WebSocket | null = null
+  private reconnectTimer: ReturnType<typeof setTimeout> | null = null
+  private connected = false
+  private closed = false
+  private gatewayUrl: string | null = null
+  private gatewayToken: string | null = null
+
+  constructor(private readonly session: ClawSession) {}
+
+  /** Start observing the gateway at the given URL with the given token. */
+  connect(gatewayUrl: string, token: string): void {
+    this.gatewayUrl = gatewayUrl
+    this.gatewayToken = token
+    this.closed = false
+    this.doConnect()
+  }
+
+  /** Stop observing and close the WebSocket. */
+  disconnect(): void {
+    this.closed = true
+    this.clearReconnect()
+    if (this.ws) {
+      try {
+        this.ws.close()
+      } catch {}
+      this.ws = null
+    }
+    this.connected = false
+  }
+
+  /** Whether the observer has an active WS connection. */
+  isConnected(): boolean {
+    return this.connected
+  }
+
+  // ── Private ─────────────────────────────────────────────────────────
+
+  private doConnect(): void {
+    if (this.closed || !this.gatewayUrl || !this.gatewayToken) return
+
+    const wsUrl = this.gatewayUrl
+      .replace(/^http:\/\//, 'ws://')
+      .replace(/^https:\/\//, 'wss://')
+
+    logger.debug('OpenClaw observer connecting', { url: wsUrl })
+
+    const ws = new WebSocket(wsUrl)
+    this.ws = ws
+
+    const connectTimeout = setTimeout(() => {
+      logger.warn('OpenClaw observer handshake timeout')
+      ws.terminate()
+    }, CONNECT_TIMEOUT_MS)
+
+    let handshakeSent = false
+
+    ws.on('message', (raw) => {
+      let frame: IncomingFrame
+      try {
+        frame = JSON.parse(raw.toString('utf8')) as IncomingFrame
+      } catch {
+        return
+      }
+
+      // The gateway sends a connect.challenge event before accepting
+      // the connect request. Send the handshake after receiving it.
+      if (
+        frame.type === 'event' &&
+        frame.event === 'connect.challenge' &&
+        !handshakeSent
+      ) {
+        handshakeSent = true
+        const connectReq: RequestFrame = {
+          type: 'req',
+          id: HANDSHAKE_REQUEST_ID,
+          method: 'connect',
+          params: {
+            minProtocol: PROTOCOL_VERSION,
+            maxProtocol: PROTOCOL_VERSION,
+            client: {
+              id: 'openclaw-tui',
+              displayName: 'browseros-observer',
+              version: '1.0.0',
+              platform: 'node',
+              mode: 'ui',
+            },
+            role: 'operator',
+            scopes: ['operator.read'],
+            auth: { token: this.gatewayToken },
+          },
+        }
+        ws.send(JSON.stringify(connectReq))
+        return
+      }
+
+      // Handshake response
+      if (frame.type === 'res' && frame.id === HANDSHAKE_REQUEST_ID) {
+        clearTimeout(connectTimeout)
+        if (frame.ok) {
+          this.connected = true
+          logger.info('OpenClaw observer connected')
+        } else {
+          logger.warn('OpenClaw observer handshake failed', {
+            error: frame.error,
+          })
+          ws.close()
+        }
+        return
+      }
+
+      // Broadcast events (only process after handshake completes)
+      if (frame.type === 'event' && this.connected) {
+        this.handleEvent(frame.event, frame.payload)
+      }
+    })
+
+    ws.on('close', () => {
+      clearTimeout(connectTimeout)
+      this.connected = false
+      this.ws = null
+
+      // Reset any agents stuck in "working" to "unknown" — we missed
+      // the final/end event because the WS closed mid-task. The
+      // ClawSession will re-infer correct state from JSONL when the
+      // observer reconnects and ensureObserverConnected() re-seeds.
+      for (const [agentId, state] of this.session.getAllStates()) {
+        if (state.status === 'working') {
+          this.session.transition(agentId, 'unknown')
+        }
+      }
+
+      if (!this.closed) {
+        logger.debug('OpenClaw observer disconnected, scheduling reconnect')
+        this.scheduleReconnect()
+      }
+    })
+
+    ws.on('error', (err) => {
+      clearTimeout(connectTimeout)
+      logger.debug('OpenClaw observer WS error', {
+        message: err.message,
+      })
+    })
+  }
+
+  private handleEvent(eventName: string, payload: unknown): void {
+    if (eventName === 'chat') {
+      this.handleChatEvent(payload)
+    }
+  }
+
+  /**
+   * Parse a gateway chat broadcast event and transition the ClawSession
+   * state machine accordingly.
+   */
+  private handleChatEvent(payload: unknown): void {
+    if (!payload || typeof payload !== 'object') return
+    const p = payload as Record<string, unknown>
+
+    const sessionKey = typeof p.sessionKey === 'string' ? p.sessionKey : null
+    const state = typeof p.state === 'string' ? p.state : null
+
+    if (!sessionKey || !state) return
+
+    const agentId = extractAgentId(sessionKey)
+    if (!agentId) return
+
+    if (state === 'delta' || state === 'streaming') {
+      this.session.transition(agentId, 'working', {
+        sessionKey,
+        currentTool: extractToolName(p),
+      })
+    } else if (state === 'final' || state === 'end') {
+      this.session.transition(agentId, 'idle', { sessionKey })
+    } else if (state === 'error') {
+      const errorMsg =
+        typeof p.errorMessage === 'string'
+          ? p.errorMessage
+          : typeof p.error === 'string'
+            ? p.error
+            : 'Unknown error'
+      this.session.transition(agentId, 'error', { sessionKey, error: errorMsg })
+    }
+  }
+
+  private scheduleReconnect(): void {
+    this.clearReconnect()
+    this.reconnectTimer = setTimeout(() => {
+      this.reconnectTimer = null
+      this.doConnect()
+    }, RECONNECT_DELAY_MS)
+  }
+
+  private clearReconnect(): void {
+    if (this.reconnectTimer) {
+      clearTimeout(this.reconnectTimer)
+      this.reconnectTimer = null
+    }
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+/**
+ * Extract agentId from an OpenClaw session key.
+ * Format: "agent:<agentId>:..." — we take the segment after "agent:".
+ */
+function extractAgentId(sessionKey: string): string | null {
+  if (!sessionKey.startsWith('agent:')) return null
+  const colonIdx = sessionKey.indexOf(':', 6)
+  if (colonIdx === -1) return sessionKey.slice(6)
+  return sessionKey.slice(6, colonIdx)
+}
+
+/**
+ * Try to extract a tool name from a chat event payload.
+ */
+function extractToolName(payload: Record<string, unknown>): string | null {
+  if (typeof payload.toolName === 'string') return payload.toolName
+  if (typeof payload.tool === 'string') return payload.tool
+  const content = payload.content
+  if (content && typeof content === 'object' && 'name' in content) {
+    const name = (content as Record<string, unknown>).name
+    if (typeof name === 'string') return name
+  }
+  return null
+}
--- a/packages/browseros-agent/apps/server/src/api/services/openclaw/openclaw-provider-map.ts
+++ b/packages/browseros-agent/apps/server/src/api/services/openclaw/openclaw-provider-map.ts
@@ -0,0 +1,157 @@
+/**
+ * @license
+ * Copyright 2025 BrowserOS
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ */
+
+export const SUPPORTED_OPENCLAW_PROVIDERS = [
+  'openrouter',
+  'openai',
+  'anthropic',
+  'moonshot',
+] as const
+
+export type SupportedOpenClawProvider =
+  (typeof SUPPORTED_OPENCLAW_PROVIDERS)[number]
+
+export interface CustomOpenClawProviderConfig {
+  providerId: string
+  apiKeyEnvVar: string
+  config: Record<string, unknown>
+}
+
+export interface ResolvedOpenClawProviderConfig {
+  envValues: Record<string, string>
+  model?: string
+  providerType?: SupportedOpenClawProvider
+  customProvider?: CustomOpenClawProviderConfig
+}
+
+const PROVIDER_ENV_VARS: Record<SupportedOpenClawProvider, string> = {
+  anthropic: 'ANTHROPIC_API_KEY',
+  moonshot: 'MOONSHOT_API_KEY',
+  openai: 'OPENAI_API_KEY',
+  openrouter: 'OPENROUTER_API_KEY',
+}
+
+export class UnsupportedOpenClawProviderError extends Error {
+  constructor(providerType: string) {
+    super(`Unsupported OpenClaw provider: ${providerType}`)
+    this.name = 'UnsupportedOpenClawProviderError'
+  }
+}
+
+export function isUnsupportedOpenClawProviderError(
+  error: unknown,
+): error is UnsupportedOpenClawProviderError {
+  return (
+    error instanceof UnsupportedOpenClawProviderError ||
+    (error instanceof Error &&
+      error.name === 'UnsupportedOpenClawProviderError')
+  )
+}
+
+export function isSupportedOpenClawProvider(
+  providerType: string,
+): providerType is SupportedOpenClawProvider {
+  return SUPPORTED_OPENCLAW_PROVIDERS.includes(
+    providerType as SupportedOpenClawProvider,
+  )
+}
+
+export function assertSupportedOpenClawProvider(
+  providerType?: string,
+): SupportedOpenClawProvider | undefined {
+  if (!providerType) {
+    return undefined
+  }
+  if (!isSupportedOpenClawProvider(providerType)) {
+    throw new UnsupportedOpenClawProviderError(providerType)
+  }
+  return providerType
+}
+
+export function buildOpenClawModelRef(
+  providerType: SupportedOpenClawProvider,
+  modelId?: string,
+): string | undefined {
+  return modelId ? `${providerType}/${modelId}` : undefined
+}
+
+export function deriveOpenClawProviderId(input: {
+  providerType?: string
+  providerName?: string
+  baseUrl?: string
+}): string {
+  const source =
+    input.providerName?.trim() ||
+    input.baseUrl?.trim() ||
+    input.providerType?.trim() ||
+    'custom-provider'
+
+  const candidate = source
+    .toLowerCase()
+    .replace(/^https?:\/\//, '')
+    .replace(/[^a-z0-9]+/g, '-')
+    .replace(/^-|-$/g, '')
+
+  return candidate || 'custom-provider'
+}
+
+export function deriveOpenClawApiKeyEnvVar(providerId: string): string {
+  return `${providerId.toUpperCase().replace(/-/g, '_')}_API_KEY`
+}
+
+export function getOpenClawProviderEnvVar(
+  providerType: SupportedOpenClawProvider,
+): string {
+  return PROVIDER_ENV_VARS[providerType]
+}
+
+export function resolveSupportedOpenClawProvider(input: {
+  providerType?: string
+  providerName?: string
+  baseUrl?: string
+  apiKey?: string
+  modelId?: string
+}): ResolvedOpenClawProviderConfig {
+  if (!input.providerType) {
+    return { envValues: {} }
+  }
+
+  if (isSupportedOpenClawProvider(input.providerType)) {
+    const providerType = input.providerType
+    const envVar = getOpenClawProviderEnvVar(providerType)
+    return {
+      envValues: input.apiKey ? { [envVar]: input.apiKey } : {},
+      model: buildOpenClawModelRef(providerType, input.modelId),
+      providerType,
+    }
+  }
+
+  if (!input.baseUrl) {
+    throw new UnsupportedOpenClawProviderError(input.providerType)
+  }
+
+  const providerId = deriveOpenClawProviderId(input)
+  const apiKeyEnvVar = deriveOpenClawApiKeyEnvVar(providerId)
+
+  return {
+    envValues: input.apiKey ? { [apiKeyEnvVar]: input.apiKey } : {},
+    model: input.modelId ? `${providerId}/${input.modelId}` : undefined,
+    customProvider: {
+      providerId,
+      apiKeyEnvVar,
+      config: {
+        api: 'openai-completions',
+        baseUrl: input.baseUrl,
+        apiKey: `\${${apiKeyEnvVar}}`,
+        ...(input.modelId
+          ? {
+              models: [{ id: input.modelId, name: input.modelId }],
+            }
+          : {}),
+      },
+    },
+  }
+}
--- a/packages/browseros-agent/apps/server/src/api/services/openclaw/openclaw-service.ts
+++ b/packages/browseros-agent/apps/server/src/api/services/openclaw/openclaw-service.ts
--- a/packages/browseros-agent/apps/server/src/api/services/openclaw/openclaw-types.ts
+++ b/packages/browseros-agent/apps/server/src/api/services/openclaw/openclaw-types.ts
@@ -0,0 +1,18 @@
+/**
+ * @license
+ * Copyright 2025 BrowserOS
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ */
+
+export interface OpenClawStreamEvent {
+  type:
+    | 'text-delta'
+    | 'thinking'
+    | 'tool-start'
+    | 'tool-end'
+    | 'tool-output'
+    | 'lifecycle'
+    | 'done'
+    | 'error'
+  data: Record<string, unknown>
+}
--- a/packages/browseros-agent/apps/server/src/api/services/openclaw/podman-runtime.ts
+++ b/packages/browseros-agent/apps/server/src/api/services/openclaw/podman-runtime.ts
@@ -1,285 +0,0 @@
-/**
- * @license
- * Copyright 2025 BrowserOS
- * SPDX-License-Identifier: AGPL-3.0-or-later
- *
- * Abstraction over the Podman CLI for container lifecycle management.
- * Handles Podman machine init/start on macOS/Windows (where a Linux VM is required).
- * On Linux, machine operations are no-ops since Podman runs natively.
- */
-
-import { existsSync } from 'node:fs'
-import { join } from 'node:path'
-
-const isLinux = process.platform === 'linux'
-const PODMAN_BUNDLE_PATH = ['bin', 'third_party', 'podman'] as const
-
-export type LogFn = (msg: string) => void
-
-function getPodmanBinaryName(platform: NodeJS.Platform): string {
-  return platform === 'win32' ? 'podman.exe' : 'podman'
-}
-
-export function resolveBundledPodmanPath(
-  resourcesDir?: string,
-  platform: NodeJS.Platform = process.platform,
-): string | null {
-  if (!resourcesDir) return null
-
-  const bundledPath = join(
-    resourcesDir,
-    ...PODMAN_BUNDLE_PATH,
-    getPodmanBinaryName(platform),
-  )
-
-  return existsSync(bundledPath) ? bundledPath : null
-}
-
-export class PodmanRuntime {
-  private podmanPath: string
-  private machineReady = false
-
-  constructor(config?: { podmanPath?: string }) {
-    this.podmanPath = config?.podmanPath ?? 'podman'
-  }
-
-  getPodmanPath(): string {
-    return this.podmanPath
-  }
-
-  async isPodmanAvailable(): Promise<boolean> {
-    try {
-      const proc = Bun.spawn([this.podmanPath, '--version'], {
-        stdout: 'ignore',
-        stderr: 'ignore',
-      })
-      return (await proc.exited) === 0
-    } catch {
-      return false
-    }
-  }
-
-  async getMachineStatus(): Promise<{
-    initialized: boolean
-    running: boolean
-  }> {
-    if (isLinux) return { initialized: true, running: true }
-
-    try {
-      const proc = Bun.spawn(
-        [this.podmanPath, 'machine', 'list', '--format', 'json'],
-        { stdout: 'pipe', stderr: 'ignore' },
-      )
-      const output = await new Response(proc.stdout).text()
-      await proc.exited
-
-      const machines = JSON.parse(output) as Array<{
-        Running?: boolean
-        LastUp?: string
-      }>
-
-      if (!machines.length) return { initialized: false, running: false }
-
-      const machine = machines[0]
-      const running =
-        machine.Running === true || machine.LastUp === 'Currently running'
-
-      return { initialized: true, running }
-    } catch {
-      return { initialized: false, running: false }
-    }
-  }
-
-  async initMachine(onLog?: LogFn): Promise<void> {
-    if (isLinux) return
-
-    const proc = Bun.spawn(
-      [
-        this.podmanPath,
-        'machine',
-        'init',
-        '--cpus',
-        '2',
-        '--memory',
-        '2048',
-        '--disk-size',
-        '10',
-      ],
-      { stdout: 'ignore', stderr: 'pipe' },
-    )
-
-    await this.drainStderr(proc, onLog)
-    const code = await proc.exited
-    if (code !== 0)
-      throw new Error(`podman machine init failed with code ${code}`)
-  }
-
-  async startMachine(onLog?: LogFn): Promise<void> {
-    if (isLinux) return
-
-    const proc = Bun.spawn([this.podmanPath, 'machine', 'start'], {
-      stdout: 'ignore',
-      stderr: 'pipe',
-    })
-
-    await this.drainStderr(proc, onLog)
-    const code = await proc.exited
-    if (code !== 0)
-      throw new Error(`podman machine start failed with code ${code}`)
-  }
-
-  async stopMachine(): Promise<void> {
-    if (isLinux) return
-
-    const proc = Bun.spawn([this.podmanPath, 'machine', 'stop'], {
-      stdout: 'ignore',
-      stderr: 'ignore',
-    })
-    const code = await proc.exited
-    if (code !== 0)
-      throw new Error(`podman machine stop failed with code ${code}`)
-    this.machineReady = false
-  }
-
-  async ensureReady(onLog?: LogFn): Promise<void> {
-    if (this.machineReady) return
-
-    const status = await this.getMachineStatus()
-
-    if (!status.initialized) {
-      onLog?.('Initializing Podman machine...')
-      await this.initMachine(onLog)
-    }
-
-    if (!status.running) {
-      onLog?.('Starting Podman machine...')
-      await this.startMachine(onLog)
-    }
-
-    this.machineReady = true
-  }
-
-  async runCommand(
-    args: string[],
-    options?: {
-      cwd?: string
-      env?: Record<string, string>
-      onOutput?: (line: string) => void
-    },
-  ): Promise<number> {
-    const useStreaming = !!options?.onOutput
-    const proc = Bun.spawn([this.podmanPath, ...args], {
-      cwd: options?.cwd,
-      env: options?.env ? { ...process.env, ...options.env } : undefined,
-      stdout: useStreaming ? 'pipe' : 'ignore',
-      stderr: useStreaming ? 'pipe' : 'ignore',
-    })
-
-    if (options?.onOutput) {
-      await Promise.all([
-        this.drainStream(proc.stdout ?? null, options.onOutput),
-        this.drainStream(proc.stderr ?? null, options.onOutput),
-      ])
-    }
-
-    return proc.exited
-  }
-
-  /**
-   * Follow container logs. Returns a stop function that terminates the
-   * underlying `podman logs -f` process. Each output line is passed to
-   * onLine as-is.
-   */
-  tailContainerLogs(containerName: string, onLine: LogFn): () => void {
-    const proc = Bun.spawn(
-      [this.podmanPath, 'logs', '-f', '--tail', '0', containerName],
-      { stdout: 'pipe', stderr: 'pipe' },
-    )
-
-    void this.drainStream(proc.stdout ?? null, onLine)
-    void this.drainStream(proc.stderr ?? null, onLine)
-
-    let stopped = false
-    return () => {
-      if (stopped) return
-      stopped = true
-      try {
-        proc.kill()
-      } catch {
-        // process may already be gone
-      }
-    }
-  }
-
-  /**
-   * Lists running container names. Used to check whether non-BrowserOS
-   * containers are running before stopping the Podman machine.
-   */
-  async listRunningContainers(): Promise<string[]> {
-    const proc = Bun.spawn([this.podmanPath, 'ps', '--format', '{{.Names}}'], {
-      stdout: 'pipe',
-      stderr: 'ignore',
-    })
-    const output = await new Response(proc.stdout).text()
-    await proc.exited
-
-    return output
-      .trim()
-      .split('\n')
-      .filter((name) => name.trim())
-  }
-
-  private async drainStderr(
-    proc: {
-      stderr: ReadableStream<Uint8Array> | null
-      exited: Promise<number>
-    },
-    onLog?: LogFn,
-  ): Promise<void> {
-    if (!onLog || !proc.stderr) return
-    await this.drainStream(proc.stderr, onLog)
-  }
-
-  private async drainStream(
-    stream: ReadableStream<Uint8Array> | null,
-    onLine: (line: string) => void,
-  ): Promise<void> {
-    if (!stream) return
-    const reader = stream.getReader()
-    const decoder = new TextDecoder()
-    let buffer = ''
-
-    while (true) {
-      const { done, value } = await reader.read()
-      if (done) break
-      buffer += decoder.decode(value, { stream: true })
-      const lines = buffer.split('\n')
-      buffer = lines.pop() ?? ''
-      for (const line of lines) {
-        const trimmed = line.trim()
-        if (trimmed) onLine(trimmed)
-      }
-    }
-    if (buffer.trim()) onLine(buffer.trim())
-  }
-}
-
-let runtime: PodmanRuntime | null = null
-
-export function configurePodmanRuntime(config: {
-  resourcesDir?: string
-  podmanPath?: string
-}): PodmanRuntime {
-  const podmanPath =
-    config.podmanPath ??
-    resolveBundledPodmanPath(config.resourcesDir) ??
-    'podman'
-
-  runtime = new PodmanRuntime({ podmanPath })
-  return runtime
-}
-
-export function getPodmanRuntime(): PodmanRuntime {
-  if (!runtime) runtime = new PodmanRuntime()
-  return runtime
-}
--- a/packages/browseros-agent/apps/server/src/api/services/openclaw/role-bootstrap.ts
+++ b/packages/browseros-agent/apps/server/src/api/services/openclaw/role-bootstrap.ts
@@ -1,200 +0,0 @@
-import {
-  type BROWSEROS_ROLE_TEMPLATES,
-  getBrowserOSRoleTemplate,
-} from '@browseros/shared/constants/role-aware-agents'
-import type {
-  BrowserOSAgentRoleId,
-  BrowserOSAgentRoleSummary,
-  BrowserOSCustomRoleInput,
-  BrowserOSRoleTemplate,
-} from '@browseros/shared/types/role-aware-agents'
-
-type RoleTemplate = (typeof BROWSEROS_ROLE_TEMPLATES)[number]
-interface BootstrapRenderableRole {
-  name: string
-  shortDescription: string
-  longDescription: string
-  recommendedApps: string[]
-  boundaries: BrowserOSRoleTemplate['boundaries']
-  bootstrap: BrowserOSRoleTemplate['bootstrap']
-}
-
-export interface RoleBootstrapFiles {
-  'AGENTS.md': string
-  'SOUL.md': string
-  'TOOLS.md': string
-  '.browseros-role.json': string
-}
-
-export function resolveRoleTemplate(
-  roleId: BrowserOSAgentRoleId,
-): RoleTemplate {
-  const role = getBrowserOSRoleTemplate(roleId)
-  if (!role) {
-    throw new Error(`Unknown BrowserOS role: ${roleId}`)
-  }
-  return role
-}
-
-export function buildRoleBootstrapFiles(input: {
-  role: BrowserOSRoleTemplate | BrowserOSCustomRoleInput
-  agentName: string
-}): RoleBootstrapFiles {
-  const normalizedRole = normalizeRoleForBootstrap(input.role)
-  const roleId = 'id' in input.role ? input.role.id : undefined
-  return {
-    'AGENTS.md': normalizedRole.bootstrap.agentsMd,
-    'SOUL.md': normalizedRole.bootstrap.soulMd,
-    'TOOLS.md': normalizedRole.bootstrap.toolsMd,
-    '.browseros-role.json': `${JSON.stringify(
-      {
-        version: 1,
-        roleSource: roleId ? 'builtin' : 'custom',
-        roleId,
-        roleName: normalizedRole.name,
-        shortDescription: normalizedRole.shortDescription,
-        createdBy: 'browseros',
-        agentName: input.agentName,
-      },
-      null,
-      2,
-    )}\n`,
-  }
-}
-
-export function toRoleSummary(
-  role: BrowserOSRoleTemplate | BrowserOSCustomRoleInput,
-): BrowserOSAgentRoleSummary {
-  const normalizedRole = normalizeRoleForBootstrap(role)
-  return {
-    roleSource: 'id' in role ? 'builtin' : 'custom',
-    roleId: 'id' in role ? role.id : undefined,
-    roleName: normalizedRole.name,
-    shortDescription: normalizedRole.shortDescription,
-  }
-}
-
-export function normalizeCustomRole(
-  role: BrowserOSCustomRoleInput,
-): BootstrapRenderableRole {
-  const recommendedApps = Array.isArray(role.recommendedApps)
-    ? role.recommendedApps.filter(
-        (app): app is string => typeof app === 'string',
-      )
-    : []
-  const boundaries = Array.isArray(role.boundaries) ? role.boundaries : []
-
-  return {
-    name: role.name,
-    shortDescription: role.shortDescription,
-    longDescription: role.longDescription,
-    recommendedApps,
-    boundaries,
-    bootstrap: {
-      agentsMd:
-        role.bootstrap?.agentsMd?.trim() ||
-        buildAgentsMd({
-          name: role.name,
-          longDescription: role.longDescription,
-          boundaries,
-        }),
-      soulMd:
-        role.bootstrap?.soulMd?.trim() ||
-        buildSoulMd({
-          name: role.name,
-          shortDescription: role.shortDescription,
-          longDescription: role.longDescription,
-        }),
-      toolsMd:
-        role.bootstrap?.toolsMd?.trim() ||
-        buildToolsMd({
-          boundaries,
-          recommendedApps,
-        }),
-    },
-  }
-}
-
-function normalizeRoleForBootstrap(
-  role: BrowserOSRoleTemplate | BrowserOSCustomRoleInput,
-): BootstrapRenderableRole {
-  return 'id' in role ? role : normalizeCustomRole(role)
-}
-
-function buildAgentsMd(input: {
-  name: string
-  longDescription: string
-  boundaries: BrowserOSRoleTemplate['boundaries']
-}): string {
-  const boundaryLines = input.boundaries
-    .map(
-      (boundary) =>
-        `- ${boundary.label}: ${boundary.description} Default mode: ${boundary.defaultMode}.`,
-    )
-    .join('\n')
-
-  return `# ${input.name}
-
-You are the ${input.name} specialist for this workspace.
-
-## Core Purpose
-${input.longDescription}
-
-## Operating Rules
-${boundaryLines}
-
-## Default Output Style
- concise
- action-oriented
- explicit about blockers and approvals
-`
-}
-
-function buildSoulMd(input: {
-  name: string
-  shortDescription: string
-  longDescription: string
-}): string {
-  return `# Operating Style
-
-You act like a trusted ${input.name}.
-
-## Working Posture
- calm
- structured
- direct
- explicit about tradeoffs
-
-## Role Framing
-${input.shortDescription}
-
-${input.longDescription}
-`
-}
-
-function buildToolsMd(input: {
-  boundaries: BrowserOSRoleTemplate['boundaries']
-  recommendedApps: string[]
-}): string {
-  const boundaryLines = input.boundaries
-    .map((boundary) => `- ${boundary.label}: ${boundary.defaultMode}`)
-    .join('\n')
-
-  const appsLine =
-    input.recommendedApps.length > 0
-      ? input.recommendedApps.join(', ')
-      : 'No specific apps configured yet.'
-
-  return `# Tooling Guidelines
-
- Use BrowserOS MCP for browser and connected SaaS tasks.
- Prefer read, summarize, and draft flows.
- Keep outputs in the workspace when possible so work remains inspectable.
-
-## Recommended Apps
-${appsLine}
-
-## Boundary Defaults
-${boundaryLines}
-`
-}
--- a/packages/browseros-agent/apps/server/src/api/services/openclaw/runtime-state.ts
+++ b/packages/browseros-agent/apps/server/src/api/services/openclaw/runtime-state.ts
@@ -0,0 +1,114 @@
+/**
+ * @license
+ * Copyright 2025 BrowserOS
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ *
+ * Runtime state for the OpenClaw gateway. Today this is just the host port
+ * we mapped the gateway container to, persisted so that a once-chosen port
+ * is reused across restarts when it's still free.
+ */
+
+import { existsSync } from 'node:fs'
+import { mkdir, readFile, writeFile } from 'node:fs/promises'
+import { createServer } from 'node:net'
+import { join } from 'node:path'
+import { OPENCLAW_GATEWAY_CONTAINER_PORT } from '@browseros/shared/constants/openclaw'
+import { getOpenClawStateDir } from './openclaw-env'
+
+const RUNTIME_STATE_FILE = 'runtime-state.json'
+
+interface RuntimeState {
+  gatewayPort: number
+}
+
+function readForcedGatewayPort(): number | null {
+  const raw = process.env.BROWSEROS_TEST_OPENCLAW_GATEWAY_PORT?.trim()
+  if (!raw) return null
+
+  const parsed = Number.parseInt(raw, 10)
+  if (!Number.isInteger(parsed) || parsed <= 0 || parsed > 65535) {
+    return null
+  }
+  return parsed
+}
+
+function getRuntimeStatePath(openclawDir: string): string {
+  return join(getOpenClawStateDir(openclawDir), RUNTIME_STATE_FILE)
+}
+
+export async function readPersistedGatewayPort(
+  openclawDir: string,
+): Promise<number | null> {
+  const path = getRuntimeStatePath(openclawDir)
+  if (!existsSync(path)) return null
+  try {
+    const parsed = JSON.parse(
+      await readFile(path, 'utf-8'),
+    ) as Partial<RuntimeState>
+    if (
+      typeof parsed.gatewayPort === 'number' &&
+      Number.isInteger(parsed.gatewayPort) &&
+      parsed.gatewayPort > 0 &&
+      parsed.gatewayPort <= 65535
+    ) {
+      return parsed.gatewayPort
+    }
+    return null
+  } catch {
+    return null
+  }
+}
+
+async function writePersistedGatewayPort(
+  openclawDir: string,
+  port: number,
+): Promise<void> {
+  await mkdir(getOpenClawStateDir(openclawDir), { recursive: true })
+  const state: RuntimeState = { gatewayPort: port }
+  await writeFile(
+    getRuntimeStatePath(openclawDir),
+    `${JSON.stringify(state, null, 2)}\n`,
+  )
+}
+
+function isPortAvailable(port: number): Promise<boolean> {
+  return new Promise((resolve) => {
+    const server = createServer()
+    server.once('error', () => resolve(false))
+    server.once('listening', () => {
+      server.close(() => resolve(true))
+    })
+    server.listen(port, '127.0.0.1')
+  })
+}
+
+async function findAvailablePort(startPort: number): Promise<number> {
+  let port = startPort
+  while (!(await isPortAvailable(port))) {
+    port++
+  }
+  return port
+}
+
+/**
+ * Pick a host port for the gateway container and persist it. Prefers the
+ * previously persisted port when it's still bindable; otherwise scans
+ * upward from OPENCLAW_GATEWAY_CONTAINER_PORT until a free port is found.
+ */
+export async function allocateGatewayPort(
+  openclawDir: string,
+): Promise<number> {
+  const forcedPort = readForcedGatewayPort()
+  if (forcedPort !== null) {
+    await writePersistedGatewayPort(openclawDir, forcedPort)
+    return forcedPort
+  }
+
+  const persisted = await readPersistedGatewayPort(openclawDir)
+  if (persisted !== null && (await isPortAvailable(persisted))) {
+    return persisted
+  }
+  const port = await findAvailablePort(OPENCLAW_GATEWAY_CONTAINER_PORT)
+  await writePersistedGatewayPort(openclawDir, port)
+  return port
+}
--- a/packages/browseros-agent/apps/server/src/api/services/queue/index.ts
+++ b/packages/browseros-agent/apps/server/src/api/services/queue/index.ts
@@ -0,0 +1,61 @@
+/**
+ * @license
+ * Copyright 2025 BrowserOS
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ */
+
+import { getOpenClawService } from '../openclaw/openclaw-service'
+import { OutboundQueueService } from './outbound-queue-service'
+
+let service: OutboundQueueService | null = null
+
+/**
+ * Lazy singleton — built on first access so the OpenClaw service is
+ * already available. The queue subscribes to ClawSession state changes
+ * via OpenClawService.onAgentStatusChange and dispatches through
+ * OpenClawService.chatStream, so no extra wiring on the openclaw side.
+ */
+export function getOutboundQueueService(): OutboundQueueService {
+  if (!service) {
+    const openclaw = getOpenClawService()
+    service = new OutboundQueueService({
+      onAgentStatusChange: (listener) => openclaw.onAgentStatusChange(listener),
+      getAgentState: (agentId) => openclaw.getAgentState(agentId),
+      // Resolve the agent's existing user-chat session for queued sends
+      // so we don't accidentally orphan the conversation by spawning a
+      // fresh session per queued message. Only the very first message
+      // for an agent (no prior session at all) falls back to a new key,
+      // which mirrors what the existing /chat route does.
+      resolveExistingSessionKey: (agentId) =>
+        openclaw.resolveAgentSession(agentId).sessionKey ?? null,
+      chatStream: ({
+        agentId,
+        sessionKey,
+        message,
+        history,
+        messageParts,
+        signal,
+      }) =>
+        openclaw.chatStream(agentId, sessionKey, message, history, {
+          messageParts,
+          signal,
+        }),
+    })
+  }
+  return service
+}
+
+/** Tear down the singleton — wired into server shutdown. */
+export function shutdownOutboundQueueService(): void {
+  if (service) {
+    service.shutdown()
+    service = null
+  }
+}
+
+export type {
+  QueuedItem,
+  QueuedItemAttachmentPreview,
+  QueuedItemPublic,
+  QueuedItemStatus,
+} from './outbound-queue-service'
--- a/packages/browseros-agent/apps/server/src/api/services/queue/outbound-queue-service.ts
+++ b/packages/browseros-agent/apps/server/src/api/services/queue/outbound-queue-service.ts
@@ -0,0 +1,289 @@
+/**
+ * @license
+ * Copyright 2025 BrowserOS
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ *
+ * Per-agent FIFO queue of outbound chat messages. The user submits a
+ * message via /claw/agents/:id/queue, the server holds it, and a worker
+ * dispatches it through the existing chatStream path the moment the
+ * agent's ClawSession status flips to idle.
+ *
+ * The queue lives in memory only — server restart loses pending items.
+ * Persistence is a follow-up; the deliberate v1 trade-off is keeping the
+ * dispatch reactive (single source of truth = ClawSession) and avoiding
+ * a parallel store that could drift from the agent's actual state.
+ */
+
+import { randomUUID } from 'node:crypto'
+import { logger } from '../../../lib/logger'
+import type {
+  AgentSessionState,
+  SessionStateListener,
+} from '../openclaw/claw-session'
+import type { OpenClawChatContentPart } from '../openclaw/openclaw-http-client'
+import type { OpenClawStreamEvent } from '../openclaw/openclaw-types'
+
+export type QueuedItemStatus = 'queued' | 'dispatching' | 'failed'
+
+export interface QueuedItemAttachmentPreview {
+  kind: 'image' | 'file'
+  mediaType: string
+  name?: string
+}
+
+export interface QueuedItem {
+  id: string
+  agentId: string
+  /** Plain text body — what we send through chatStream's `message` arg. */
+  message: string
+  /** Multimodal parts when attachments are present. */
+  messageParts?: OpenClawChatContentPart[]
+  /** Compact preview the SSE feed broadcasts; never includes data URLs. */
+  attachmentsPreview: QueuedItemAttachmentPreview[]
+  sessionKey?: string
+  history: Array<{ role: 'user' | 'assistant'; content: string }>
+  status: QueuedItemStatus
+  error?: string
+  createdAt: number
+  startedAt?: number
+}
+
+/** Public projection sent over the SSE feed — strips heavy fields. */
+export interface QueuedItemPublic {
+  id: string
+  status: QueuedItemStatus
+  message: string
+  attachmentsPreview: QueuedItemAttachmentPreview[]
+  error?: string
+  createdAt: number
+  startedAt?: number
+}
+
+interface QueueListener {
+  agentId: string
+  send(items: QueuedItemPublic[]): void
+}
+
+/** A "send" delegate — wraps OpenClawService.chatStream to avoid a hard dep. */
+export type ChatStreamFn = (input: {
+  agentId: string
+  sessionKey: string
+  message: string
+  history: QueuedItem['history']
+  messageParts?: OpenClawChatContentPart[]
+  signal?: AbortSignal
+}) => Promise<ReadableStream<OpenClawStreamEvent>>
+
+interface OutboundQueueServiceDeps {
+  /** Subscribe to per-agent status transitions from the ClawSession SM. */
+  onAgentStatusChange(listener: SessionStateListener): () => void
+  /** Read the current ClawSession state for an agent. */
+  getAgentState(agentId: string): AgentSessionState
+  /**
+   * Look up the agent's existing user-chat sessionKey, if any. The worker
+   * uses this to keep queued sends on the same conversation thread —
+   * generating a fresh UUID per queued message would orphan the prior
+   * conversation by spawning a brand-new session each time.
+   */
+  resolveExistingSessionKey(agentId: string): string | null
+  /** Send a chat — wraps OpenClawService.chatStream. */
+  chatStream: ChatStreamFn
+}
+
+export class OutboundQueueService {
+  private readonly queues = new Map<string, QueuedItem[]>()
+  private readonly listeners = new Set<QueueListener>()
+  private readonly workerInflight = new Map<string, AbortController>()
+  private unsubscribe: (() => void) | null = null
+
+  constructor(private readonly deps: OutboundQueueServiceDeps) {
+    this.unsubscribe = deps.onAgentStatusChange((agentId, state) => {
+      if (state.status === 'idle') void this.tryDispatch(agentId)
+    })
+  }
+
+  enqueue(
+    item: Omit<QueuedItem, 'id' | 'status' | 'createdAt'> & { id?: string },
+  ): QueuedItem {
+    // Caller-supplied ids let the browser keep its optimistic row and the
+    // server snapshot reconciled on a single key — without that, SSE
+    // can't dedupe the optimistic entry until the POST response lands
+    // and the client learns the server-generated UUID.
+    const list = this.queues.get(item.agentId) ?? []
+    const id =
+      item.id && !list.some((existing) => existing.id === item.id)
+        ? item.id
+        : randomUUID()
+    const queued: QueuedItem = {
+      ...item,
+      id,
+      status: 'queued',
+      createdAt: Date.now(),
+    }
+    list.push(queued)
+    this.queues.set(item.agentId, list)
+    this.broadcast(item.agentId)
+    void this.tryDispatch(item.agentId)
+    return queued
+  }
+
+  cancel(
+    agentId: string,
+    itemId: string,
+  ): { ok: true } | { ok: false; reason: 'not_found' | 'dispatching' } {
+    const list = this.queues.get(agentId) ?? []
+    const idx = list.findIndex((i) => i.id === itemId)
+    if (idx < 0) return { ok: false, reason: 'not_found' }
+    const target = list[idx]
+    if (!target) return { ok: false, reason: 'not_found' }
+    if (target.status === 'dispatching') {
+      return { ok: false, reason: 'dispatching' }
+    }
+    list.splice(idx, 1)
+    this.queues.set(agentId, list)
+    this.broadcast(agentId)
+    return { ok: true }
+  }
+
+  retry(agentId: string, itemId: string): { ok: boolean } {
+    const list = this.queues.get(agentId) ?? []
+    const item = list.find((i) => i.id === itemId)
+    if (!item || item.status !== 'failed') return { ok: false }
+    item.status = 'queued'
+    item.error = undefined
+    this.broadcast(agentId)
+    void this.tryDispatch(agentId)
+    return { ok: true }
+  }
+
+  list(agentId: string): QueuedItemPublic[] {
+    const items = this.queues.get(agentId) ?? []
+    return items.map(toPublic)
+  }
+
+  /** Subscribe to per-agent queue state. Sends a snapshot immediately. */
+  subscribe(
+    agentId: string,
+    send: (items: QueuedItemPublic[]) => void,
+  ): () => void {
+    const listener: QueueListener = { agentId, send }
+    this.listeners.add(listener)
+    try {
+      send(this.list(agentId))
+    } catch {
+      // best effort
+    }
+    return () => {
+      this.listeners.delete(listener)
+    }
+  }
+
+  private broadcast(agentId: string): void {
+    const snapshot = this.list(agentId)
+    for (const listener of this.listeners) {
+      if (listener.agentId !== agentId) continue
+      try {
+        listener.send(snapshot)
+      } catch {
+        // ignore — broken listeners GC themselves on next subscribe attempt
+      }
+    }
+  }
+
+  private async tryDispatch(agentId: string): Promise<void> {
+    if (this.workerInflight.has(agentId)) return
+    const list = this.queues.get(agentId) ?? []
+    const head = list.find((i) => i.status === 'queued')
+    if (!head) return
+
+    // Don't fire if the agent isn't actually idle yet — even if the
+    // listener happened to call us early during a state transition.
+    const state = this.deps.getAgentState(agentId)
+    if (state.status === 'working') return
+
+    head.status = 'dispatching'
+    head.startedAt = Date.now()
+    this.broadcast(agentId)
+
+    const abort = new AbortController()
+    this.workerInflight.set(agentId, abort)
+
+    try {
+      // Resolution order: explicit sessionKey on the queued item ➜
+      // the agent's existing user-chat session ➜ a fresh UUID for the
+      // first-ever message. This prevents the queue from inadvertently
+      // splintering an active conversation into a new session.
+      const targetSessionKey =
+        head.sessionKey ??
+        this.deps.resolveExistingSessionKey(agentId) ??
+        randomUUID()
+      const stream = await this.deps.chatStream({
+        agentId,
+        sessionKey: targetSessionKey,
+        message: head.message,
+        history: head.history,
+        messageParts: head.messageParts,
+        signal: abort.signal,
+      })
+      // Drain the stream to completion so the gateway run finalizes
+      // properly (writes the JSONL turn, releases the run controller).
+      const reader = stream.getReader()
+      try {
+        while (true) {
+          if (abort.signal.aborted) break
+          const { done } = await reader.read()
+          if (done) break
+        }
+      } finally {
+        await reader.cancel().catch(() => {})
+      }
+      this.removeAndBroadcast(agentId, head.id)
+    } catch (err) {
+      const message = err instanceof Error ? err.message : String(err)
+      logger.warn('OutboundQueue dispatch failed', {
+        agentId,
+        itemId: head.id,
+        error: message,
+      })
+      head.status = 'failed'
+      head.error = message
+      this.broadcast(agentId)
+    } finally {
+      this.workerInflight.delete(agentId)
+    }
+
+    // If anything else is still queued and the agent's still idle, drain
+    // it now without waiting for the next state-change callback.
+    void this.tryDispatch(agentId)
+  }
+
+  private removeAndBroadcast(agentId: string, itemId: string): void {
+    const list = this.queues.get(agentId) ?? []
+    this.queues.set(
+      agentId,
+      list.filter((i) => i.id !== itemId),
+    )
+    this.broadcast(agentId)
+  }
+
+  shutdown(): void {
+    this.unsubscribe?.()
+    this.unsubscribe = null
+    for (const abort of this.workerInflight.values()) abort.abort()
+    this.workerInflight.clear()
+    this.listeners.clear()
+    this.queues.clear()
+  }
+}
+
+function toPublic(item: QueuedItem): QueuedItemPublic {
+  return {
+    id: item.id,
+    status: item.status,
+    message: item.message,
+    attachmentsPreview: item.attachmentsPreview,
+    error: item.error,
+    createdAt: item.createdAt,
+    startedAt: item.startedAt,
+  }
+}
--- a/packages/browseros-agent/apps/server/src/api/services/terminal/terminal-session.ts
+++ b/packages/browseros-agent/apps/server/src/api/services/terminal/terminal-session.ts
@@ -2,6 +2,7 @@ import {
  OPENCLAW_CONTAINER_HOME,
  OPENCLAW_TERMINAL_SHELL,
 } from '@browseros/shared/constants/openclaw'
+import { buildNerdctlCommand } from '../../../lib/container'
 import { logger } from '../../../lib/logger'

 export const TERMINAL_HOME_DIR = OPENCLAW_CONTAINER_HOME
@@ -11,7 +12,9 @@ const TERMINAL_NAME = 'xterm-256color'

 interface TerminalSessionDeps {
  containerName: string
-  podmanPath: string
+  limaHome: string
+  limactlPath: string
+  vmName: string
  workingDir: string
  onExit: (exitCode: number) => void
  onOutput: (data: string) => void
@@ -24,32 +27,44 @@ export interface TerminalSession {
 }

 export function buildTerminalExecCommand(
-  podmanPath: string,
+  limactlPath: string,
+  vmName: string,
  containerName: string,
  workingDir: string,
 ): string[] {
  return [
-    podmanPath,
-    'exec',
-    '-it',
-    '-w',
-    workingDir,
-    containerName,
-    OPENCLAW_TERMINAL_SHELL,
+    limactlPath,
+    'shell',
+    vmName,
+    '--',
+    ...buildNerdctlCommand([
+      'exec',
+      '-it',
+      '-w',
+      workingDir,
+      containerName,
+      OPENCLAW_TERMINAL_SHELL,
+    ]),
  ]
 }

+export function buildTerminalEnv(limaHome: string): NodeJS.ProcessEnv {
+  return { ...process.env, LIMA_HOME: limaHome, TERM: TERMINAL_NAME }
+}
+
 export function createTerminalSession(
  deps: TerminalSessionDeps,
 ): TerminalSession {
  const decoder = new TextDecoder()
  const proc = Bun.spawn(
    buildTerminalExecCommand(
-      deps.podmanPath,
+      deps.limactlPath,
+      deps.vmName,
      deps.containerName,
      deps.workingDir,
    ),
    {
+      cwd: '/',
      terminal: {
        cols: DEFAULT_COLS,
        rows: DEFAULT_ROWS,
@@ -58,7 +73,7 @@ export function createTerminalSession(
          if (chunk) deps.onOutput(chunk)
        },
      },
-      env: { ...process.env, TERM: TERMINAL_NAME },
+      env: buildTerminalEnv(deps.limaHome),
    },
  )
  let closed = false
--- a/packages/browseros-agent/apps/server/src/browser/browser.ts
+++ b/packages/browseros-agent/apps/server/src/browser/browser.ts
@@ -517,15 +517,45 @@ export class Browser {
    return null
  }

+  private async resolveWindowIdForNewPage(opts?: {
+    hidden?: boolean
+    windowId?: number
+  }): Promise<number | undefined> {
+    if (!opts?.hidden) {
+      return opts?.windowId
+    }
+
+    if (opts.windowId !== undefined) {
+      const windows = await this.listWindows()
+      const targetWindow = windows.find(
+        (window) => window.windowId === opts.windowId,
+      )
+      if (targetWindow && !targetWindow.isVisible) {
+        return targetWindow.windowId
+      }
+      if (targetWindow?.isVisible) {
+        logger.warn(
+          'Requested hidden page target window is visible, creating a new hidden window instead',
+          {
+            requestedWindowId: opts.windowId,
+          },
+        )
+      }
+    }
+
+    const hiddenWindow = await this.createWindow({ hidden: true })
+    return hiddenWindow.windowId
+  }
+
  async newPage(
    url: string,
    opts?: { hidden?: boolean; background?: boolean; windowId?: number },
  ): Promise<number> {
+    const windowId = await this.resolveWindowIdForNewPage(opts)
    const createResult = await this.cdp.Browser.createTab({
      url,
-      ...(opts?.hidden !== undefined && { hidden: opts.hidden }),
      ...(opts?.background !== undefined && { background: opts.background }),
-      ...(opts?.windowId !== undefined && { windowId: opts.windowId }),
+      ...(windowId !== undefined && { windowId }),
    })

    const tabId = (createResult.tab as TabInfo).tabId
@@ -553,7 +583,7 @@ export class Browser {
      loadProgress: tabInfo.loadProgress,
      isPinned: tabInfo.isPinned,
      isHidden: tabInfo.isHidden,
-      windowId: tabInfo.windowId,
+      windowId: tabInfo.windowId ?? windowId,
      index: tabInfo.index,
      groupId: tabInfo.groupId,
    })
--- a/packages/browseros-agent/apps/server/src/config.ts
+++ b/packages/browseros-agent/apps/server/src/config.ts
@@ -8,6 +8,7 @@
 import fs from 'node:fs'
 import path from 'node:path'

+import { EXTERNAL_URLS } from '@browseros/shared/constants/urls'
 import { Command, InvalidArgumentError } from 'commander'
 import { z } from 'zod'

@@ -30,6 +31,8 @@ export const ServerConfigSchema = z.object({
  instanceBrowserosVersion: z.string().optional(),
  instanceChromiumVersion: z.string().optional(),
  aiSdkDevtoolsEnabled: z.boolean(),
+  vmCachePrefetch: z.boolean(),
+  vmCacheManifestUrl: z.string().url(),
 })

 export type ServerConfig = z.infer<typeof ServerConfigSchema>
@@ -226,6 +229,11 @@ function parseConfigFile(filePath?: string): ConfigResult<PartialConfig> {
          cfg.flags?.allow_remote_in_mcp === true ? true : undefined,
        aiSdkDevtoolsEnabled:
          cfg.flags?.ai_sdk_devtools === true ? true : undefined,
+        vmCachePrefetch:
+          typeof cfg.vm_cache?.prefetch === 'boolean'
+            ? cfg.vm_cache.prefetch
+            : undefined,
+        vmCacheManifestUrl: parseTrimmedString(cfg.vm_cache?.manifest_url),
        instanceClientId:
          typeof cfg.instance?.client_id === 'string'
            ? cfg.instance.client_id
@@ -272,6 +280,10 @@ function parseRuntimeEnv(): PartialConfig {
    instanceClientId: process.env.BROWSEROS_CLIENT_ID,
    aiSdkDevtoolsEnabled:
      process.env.BROWSEROS_AI_SDK_DEVTOOLS === 'true' ? true : undefined,
+    vmCachePrefetch: parseBooleanEnv(process.env.BROWSEROS_VM_CACHE_PREFETCH),
+    vmCacheManifestUrl: parseTrimmedString(
+      process.env.BROWSEROS_VM_CACHE_MANIFEST_URL,
+    ),
  })
 }

@@ -305,6 +317,8 @@ function getDefaults(cwd: string): PartialConfig {
    executionDir: cwd,
    mcpAllowRemote: false,
    aiSdkDevtoolsEnabled: false,
+    vmCachePrefetch: true,
+    vmCacheManifestUrl: EXTERNAL_URLS.VM_CACHE_MANIFEST,
  }
 }

@@ -325,6 +339,18 @@ function safeParseInt(value: string): number | undefined {
  return Number.isNaN(num) ? undefined : num
 }

+function parseBooleanEnv(value: string | undefined): boolean | undefined {
+  if (value === 'true') return true
+  if (value === 'false') return false
+  return undefined
+}
+
+function parseTrimmedString(value: unknown): string | undefined {
+  if (typeof value !== 'string') return undefined
+  const trimmed = value.trim()
+  return trimmed.length > 0 ? trimmed : undefined
+}
+
 function omitUndefined<T extends Record<string, unknown>>(obj: T): Partial<T> {
  return Object.fromEntries(
    Object.entries(obj).filter(([_, v]) => v !== undefined),
--- a/packages/browseros-agent/apps/server/src/env.ts
+++ b/packages/browseros-agent/apps/server/src/env.ts
@@ -19,6 +19,8 @@ export const INLINED_ENV = {
  CODEGEN_SERVICE_URL: process.env.CODEGEN_SERVICE_URL,
  POSTHOG_API_KEY: process.env.POSTHOG_API_KEY,
  BROWSEROS_CONFIG_URL: process.env.BROWSEROS_CONFIG_URL,
+  BROWSEROS_VM_CACHE_PREFETCH: process.env.BROWSEROS_VM_CACHE_PREFETCH,
+  BROWSEROS_VM_CACHE_MANIFEST_URL: process.env.BROWSEROS_VM_CACHE_MANIFEST_URL,
  SKILLS_CATALOG_URL: process.env.SKILLS_CATALOG_URL,
 } as const

@@ -27,4 +29,6 @@ export const REQUIRED_FOR_PRODUCTION = [
  'CODEGEN_SERVICE_URL',
  'POSTHOG_API_KEY',
  'BROWSEROS_CONFIG_URL',
+  'BROWSEROS_VM_CACHE_PREFETCH',
+  'BROWSEROS_VM_CACHE_MANIFEST_URL',
 ] as const satisfies readonly (keyof typeof INLINED_ENV)[]
--- a/packages/browseros-agent/apps/server/src/lib/browseros-dir.ts
+++ b/packages/browseros-agent/apps/server/src/lib/browseros-dir.ts
@@ -7,7 +7,20 @@ import type { ServerDiscoveryConfig } from '@browseros/shared/types/server-confi
 import { logger } from './logger'

 export function getBrowserosDir(): string {
-  return join(homedir(), PATHS.BROWSEROS_DIR_NAME)
+  const override = process.env.BROWSEROS_DIR?.trim()
+  if (override) {
+    return override
+  }
+  const dirName =
+    process.env.NODE_ENV === 'development'
+      ? PATHS.DEV_BROWSEROS_DIR_NAME
+      : PATHS.BROWSEROS_DIR_NAME
+  return join(homedir(), dirName)
+}
+
+export function logDevelopmentBrowserosDir(): void {
+  if (process.env.NODE_ENV !== 'development') return
+  logger.info(`Using development BrowserOS directory: ${getBrowserosDir()}`)
 }

 export function getMemoryDir(): string {
@@ -35,9 +48,49 @@ export function getBuiltinSkillsDir(): string {
 }

 export function getOpenClawDir(): string {
+  return join(getVmStateDir(), PATHS.OPENCLAW_DIR_NAME)
+}
+
+export function getLegacyOpenClawDir(): string {
  return join(getBrowserosDir(), PATHS.OPENCLAW_DIR_NAME)
 }

+export function getCacheDir(): string {
+  return join(getBrowserosDir(), PATHS.CACHE_DIR_NAME)
+}
+
+export function getVmCacheDir(): string {
+  return join(getCacheDir(), 'vm')
+}
+
+export function getLimaHomeDir(): string {
+  return join(getBrowserosDir(), 'lima')
+}
+
+export function getVmStateDir(): string {
+  return join(getBrowserosDir(), 'vm')
+}
+
+export function getVmDisksDir(): string {
+  return getVmCacheDir()
+}
+
+export function getAgentCacheDir(): string {
+  return join(getVmCacheDir(), 'images')
+}
+
+export function getLazyMonitoringDir(): string {
+  return join(getBrowserosDir(), 'lazy-monitoring')
+}
+
+export function getLazyMonitoringRunsDir(): string {
+  return join(getLazyMonitoringDir(), 'runs')
+}
+
+export function getLazyMonitoringRunDir(runId: string): string {
+  return join(getLazyMonitoringRunsDir(), runId)
+}
+
 export function getServerConfigPath(): string {
  return join(getBrowserosDir(), PATHS.SERVER_CONFIG_FILE_NAME)
 }
@@ -57,10 +110,13 @@ export function removeServerConfigSync(): void {
 }

 export async function ensureBrowserosDir(): Promise<void> {
+  logDevelopmentBrowserosDir()
  await mkdir(getMemoryDir(), { recursive: true })
  await mkdir(getSkillsDir(), { recursive: true })
  await mkdir(getBuiltinSkillsDir(), { recursive: true })
  await mkdir(getSessionsDir(), { recursive: true })
+  await mkdir(getLazyMonitoringRunsDir(), { recursive: true })
+  await mkdir(getAgentCacheDir(), { recursive: true })
 }

 export async function cleanOldSessions(): Promise<void> {
--- a/packages/browseros-agent/apps/server/src/lib/container/container-cli.ts
+++ b/packages/browseros-agent/apps/server/src/lib/container/container-cli.ts
@@ -0,0 +1,209 @@
+/**
+ * @license
+ * Copyright 2025 BrowserOS
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ */
+
+import { ContainerCliError } from '../vm/errors'
+import { LimaCli } from '../vm/lima-cli'
+import type { ContainerSpec, LogFn, MountSpec, PortMapping } from './types'
+
+export function buildNerdctlCommand(args: string[]): string[] {
+  return ['nerdctl', ...args]
+}
+
+export interface ContainerCliConfig {
+  limactlPath: string
+  limaHome: string
+  vmName: string
+  sshPath?: string
+}
+
+export interface ContainerCommandResult {
+  exitCode: number
+  stdout: string
+  stderr: string
+}
+
+export class ContainerCli {
+  private readonly lima: LimaCli
+
+  constructor(private readonly cfg: ContainerCliConfig) {
+    this.lima = new LimaCli({
+      limactlPath: cfg.limactlPath,
+      limaHome: cfg.limaHome,
+      sshPath: cfg.sshPath,
+    })
+  }
+
+  async imageExists(ref: string): Promise<boolean> {
+    const result = await this.runCommand(['image', 'inspect', ref])
+    return result.exitCode === 0
+  }
+
+  async pullImage(ref: string, onLog?: LogFn): Promise<void> {
+    await this.runRequired(['pull', ref], onLog)
+  }
+
+  async loadImage(tarballPath: string, onLog?: LogFn): Promise<string[]> {
+    const result = await this.runRequired(['load', '-i', tarballPath], onLog)
+    return parseLoadedImageRefs(result.stdout)
+  }
+
+  async createContainer(spec: ContainerSpec, onLog?: LogFn): Promise<void> {
+    await this.runRequired(buildCreateArgs(spec), onLog)
+  }
+
+  async startContainer(name: string, onLog?: LogFn): Promise<void> {
+    await this.runRequired(['start', name], onLog)
+  }
+
+  async stopContainer(name: string, onLog?: LogFn): Promise<void> {
+    const result = await this.runCommand(['stop', name], onLog)
+    if (result.exitCode === 0 || isNoSuchContainer(result.stderr)) return
+    throw this.commandError(['stop', name], result)
+  }
+
+  async removeContainer(
+    name: string,
+    opts?: { force?: boolean },
+    onLog?: LogFn,
+  ): Promise<void> {
+    const args = ['rm']
+    if (opts?.force) args.push('-f')
+    args.push(name)
+    const result = await this.runCommand(args, onLog)
+    if (result.exitCode === 0 || isNoSuchContainer(result.stderr)) return
+    throw this.commandError(args, result)
+  }
+
+  async exec(name: string, cmd: string[], onLog?: LogFn): Promise<number> {
+    const result = await this.runCommand(['exec', name, ...cmd], onLog)
+    return result.exitCode
+  }
+
+  async ps(opts?: { namesOnly?: boolean }): Promise<string[]> {
+    const args = opts?.namesOnly ? ['ps', '--format', '{{.Names}}'] : ['ps']
+    const result = await this.runRequired(args)
+    return result.stdout
+      .trim()
+      .split('\n')
+      .map((line) => line.trim())
+      .filter(Boolean)
+  }
+
+  tailLogs(name: string, onLine: LogFn): () => void {
+    const proc = this.lima.spawnShell(
+      this.cfg.vmName,
+      buildNerdctlCommand(['logs', '-f', '-n', '0', name]),
+      { onStdout: onLine, onStderr: onLine },
+    )
+
+    let stopped = false
+    return () => {
+      if (stopped) return
+      stopped = true
+      proc.kill()
+    }
+  }
+
+  async runCommand(
+    args: string[],
+    onLog?: LogFn,
+  ): Promise<ContainerCommandResult> {
+    const stdoutLines: string[] = []
+    const stderrLines: string[] = []
+    const exitCode = await this.lima.shell(
+      this.cfg.vmName,
+      buildNerdctlCommand(args),
+      {
+        onStdout: (line) => {
+          stdoutLines.push(line)
+          onLog?.(line)
+        },
+        onStderr: (line) => {
+          stderrLines.push(line)
+          onLog?.(line)
+        },
+      },
+    )
+
+    return {
+      exitCode,
+      stdout: linesToOutput(stdoutLines),
+      stderr: stderrLines.join('\n'),
+    }
+  }
+
+  private async runRequired(
+    args: string[],
+    onLog?: LogFn,
+  ): Promise<ContainerCommandResult> {
+    const result = await this.runCommand(args, onLog)
+    if (result.exitCode === 0) return result
+    throw this.commandError(args, result)
+  }
+
+  private commandError(
+    args: string[],
+    result: ContainerCommandResult,
+  ): ContainerCliError {
+    return new ContainerCliError(
+      `nerdctl ${args.join(' ')}`,
+      result.exitCode,
+      result.stderr.trim(),
+    )
+  }
+}
+
+function buildCreateArgs(spec: ContainerSpec): string[] {
+  const args = ['create', '--name', spec.name]
+
+  if (spec.restart) args.push('--restart', spec.restart)
+  for (const port of spec.ports ?? []) args.push('-p', portArg(port))
+  if (spec.envFile) args.push('--env-file', spec.envFile)
+  for (const [key, value] of Object.entries(spec.env ?? {})) {
+    args.push('-e', `${key}=${value}`)
+  }
+  for (const mount of spec.mounts ?? []) args.push('-v', mountArg(mount))
+  for (const host of spec.addHosts ?? []) args.push('--add-host', host)
+  if (spec.health) {
+    args.push('--health-cmd', spec.health.cmd)
+    if (spec.health.interval)
+      args.push('--health-interval', spec.health.interval)
+    if (spec.health.timeout) args.push('--health-timeout', spec.health.timeout)
+    if (spec.health.retries !== undefined) {
+      args.push('--health-retries', String(spec.health.retries))
+    }
+  }
+
+  args.push(spec.image)
+  args.push(...(spec.command ?? []))
+  return args
+}
+
+function portArg(port: PortMapping): string {
+  const host = port.hostIp ? `${port.hostIp}:${port.hostPort}` : port.hostPort
+  return `${host}:${port.containerPort}`
+}
+
+function mountArg(mount: MountSpec): string {
+  return `${mount.source}:${mount.target}${mount.readonly ? ':ro' : ''}`
+}
+
+function parseLoadedImageRefs(stdout: string): string[] {
+  return stdout
+    .split('\n')
+    .map((line) => line.match(/^Loaded image(?:\(s\))?:\s*(.+)$/i)?.[1]?.trim())
+    .filter((ref): ref is string => !!ref)
+}
+
+function isNoSuchContainer(stderr: string): boolean {
+  const lower = stderr.toLowerCase()
+  return lower.includes('no such container') || lower.includes('not found')
+}
+
+function linesToOutput(lines: string[]): string {
+  if (lines.length === 0) return ''
+  return `${lines.join('\n')}\n`
+}
--- a/packages/browseros-agent/apps/server/src/lib/container/image-loader.ts
+++ b/packages/browseros-agent/apps/server/src/lib/container/image-loader.ts
@@ -0,0 +1,64 @@
+/**
+ * @license
+ * Copyright 2025 BrowserOS
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ */
+
+import { basename, join } from 'node:path'
+import { ContainerCliError, ImageLoadError } from '../vm/errors'
+import type { VmManifest } from '../vm/manifest'
+import type { Arch } from '../vm/paths'
+import { getImageCacheDir, hostPathToGuest } from '../vm/paths'
+import type { ContainerCli } from './container-cli'
+import type { LogFn } from './types'
+
+export class ImageLoader {
+  constructor(
+    private readonly cli: ContainerCli,
+    private readonly manifest: VmManifest,
+    private readonly arch: Arch,
+    private readonly browserosRoot?: string,
+  ) {}
+
+  async ensureImageLoaded(ref: string, onLog?: LogFn): Promise<void> {
+    if (await this.cli.imageExists(ref)) return
+
+    const tarball = this.resolveTarball(ref)
+    const hostPath = join(
+      getImageCacheDir(this.browserosRoot),
+      basename(tarball.key),
+    )
+    const guestPath = hostPathToGuest(hostPath, this.browserosRoot)
+
+    try {
+      await this.cli.loadImage(guestPath, onLog)
+    } catch (error) {
+      if (error instanceof ContainerCliError) {
+        throw new ImageLoadError(ref, `load failed: ${error.stderr}`, error)
+      }
+      throw error
+    }
+
+    if (!(await this.cli.imageExists(ref))) {
+      throw new ImageLoadError(
+        ref,
+        `image not present after successful load of ${guestPath}`,
+      )
+    }
+  }
+
+  private resolveTarball(
+    ref: string,
+  ): VmManifest['agents'][string]['tarballs'][Arch] {
+    for (const agent of Object.values(this.manifest.agents)) {
+      if (`${agent.image}:${agent.version}` !== ref) continue
+      const tarball = agent.tarballs[this.arch]
+      if (!tarball) {
+        throw new ImageLoadError(ref, `no ${this.arch} tarball in manifest`)
+      }
+      return tarball
+    }
+
+    throw new ImageLoadError(ref, `no agent in manifest matches ${ref}`)
+  }
+}
--- a/packages/browseros-agent/apps/server/src/lib/container/index.ts
+++ b/packages/browseros-agent/apps/server/src/lib/container/index.ts
@@ -0,0 +1,9 @@
+/**
+ * @license
+ * Copyright 2025 BrowserOS
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ */
+
+export * from './container-cli'
+export * from './image-loader'
+export * from './types'
--- a/packages/browseros-agent/apps/server/src/lib/container/types.ts
+++ b/packages/browseros-agent/apps/server/src/lib/container/types.ts
@@ -0,0 +1,44 @@
+/**
+ * @license
+ * Copyright 2025 BrowserOS
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ */
+
+export type LogFn = (msg: string) => void
+
+export interface PortMapping {
+  hostIp?: string
+  hostPort: number
+  containerPort: number
+}
+
+export interface MountSpec {
+  source: string
+  target: string
+  readonly?: boolean
+}
+
+export interface HealthConfig {
+  cmd: string
+  interval?: string
+  timeout?: string
+  retries?: number
+}
+
+export interface ContainerSpec {
+  name: string
+  image: string
+  restart?: 'no' | 'unless-stopped' | 'always'
+  ports?: PortMapping[]
+  env?: Record<string, string>
+  envFile?: string
+  mounts?: MountSpec[]
+  addHosts?: string[]
+  health?: HealthConfig
+  command?: string[]
+}
+
+export interface LogLine {
+  stream: 'stdout' | 'stderr'
+  line: string
+}
--- a/packages/browseros-agent/apps/server/src/lib/vm/cache-sync.ts
+++ b/packages/browseros-agent/apps/server/src/lib/vm/cache-sync.ts
@@ -0,0 +1,322 @@
+/**
+ * @license
+ * Copyright 2025 BrowserOS
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ */
+
+import { createHash } from 'node:crypto'
+import { createReadStream, existsSync } from 'node:fs'
+import { mkdir, readFile, rename, rm } from 'node:fs/promises'
+import { arch as hostArch } from 'node:os'
+import { dirname, join } from 'node:path'
+import { EXTERNAL_URLS } from '@browseros/shared/constants/urls'
+import type { VmArtifact, VmManifest } from './manifest'
+import type { Arch } from './paths'
+import { getCachedManifestPath } from './paths'
+
+const DEFAULT_TIMEOUT_MS = 30_000
+const ARCHES: Arch[] = ['arm64', 'x64']
+const CANONICAL_MANIFEST_SUFFIX = '/vm/manifest.json'
+
+export interface VmCacheSyncOptions {
+  browserosRoot?: string
+  manifestUrl?: string
+  allArches?: boolean
+  fetchImpl?: typeof fetch
+  rawHostArch?: NodeJS.Architecture
+  timeoutMs?: number
+}
+
+export interface VmCacheSyncResult {
+  downloaded: string[]
+  manifestPath: string
+  skipped: boolean
+}
+
+const inFlight = new Map<string, Promise<VmCacheSyncResult>>()
+
+export function prefetchVmCache(
+  options: VmCacheSyncOptions = {},
+): Promise<VmCacheSyncResult> {
+  return startOrReuseSync(options)
+}
+
+export function ensureVmCacheSynced(
+  options: VmCacheSyncOptions = {},
+): Promise<VmCacheSyncResult> {
+  return startOrReuseSync(options)
+}
+
+export async function ensureVmCacheAvailable(
+  options: VmCacheSyncOptions = {},
+): Promise<void> {
+  const cfg = resolveSyncConfig(options)
+  const pending = inFlight.get(syncKey(cfg))
+  if (pending) {
+    await pending.catch(() => {})
+  }
+
+  if (existsSync(getCachedManifestPath(cfg.browserosRoot))) return
+
+  await startOrReuseSyncWithConfig(cfg)
+}
+
+function startOrReuseSync(
+  options: VmCacheSyncOptions,
+): Promise<VmCacheSyncResult> {
+  try {
+    return startOrReuseSyncWithConfig(resolveSyncConfig(options))
+  } catch (error) {
+    return Promise.reject(error)
+  }
+}
+
+function startOrReuseSyncWithConfig(
+  cfg: SyncConfig,
+): Promise<VmCacheSyncResult> {
+  const key = syncKey(cfg)
+  const existing = inFlight.get(key)
+  if (existing) return existing
+  const current = syncVmCache(cfg).finally(() => {
+    if (inFlight.get(key) === current) inFlight.delete(key)
+  })
+  inFlight.set(key, current)
+  return current
+}
+
+async function syncVmCache(cfg: SyncConfig): Promise<VmCacheSyncResult> {
+  const remote = await fetchManifest(cfg)
+  const manifestPath = getCachedManifestPath(cfg.browserosRoot)
+  const local = await readLocalManifest(manifestPath)
+  const plan = await planDownloads({
+    remote,
+    local,
+    cacheRoot: cacheRootForManifest(manifestPath),
+    arches: cfg.arches,
+  })
+
+  for (const item of plan) {
+    await downloadArtifact(
+      cfg.fetchImpl,
+      artifactUrlForKey(cfg.manifestUrl, item.key),
+      item.destPath,
+      item.sha256,
+      cfg.timeoutMs,
+    )
+  }
+
+  await mkdir(dirname(manifestPath), { recursive: true })
+  const tempPath = `${manifestPath}.${process.pid}.${Date.now()}.tmp`
+  await Bun.write(tempPath, `${JSON.stringify(remote, null, 2)}\n`)
+  await rename(tempPath, manifestPath)
+
+  return {
+    downloaded: plan.map((item) => item.key),
+    manifestPath,
+    skipped: plan.length === 0,
+  }
+}
+
+interface SyncConfig {
+  browserosRoot?: string
+  manifestUrl: string
+  fetchImpl: typeof fetch
+  arches: Arch[]
+  timeoutMs: number
+}
+
+function resolveSyncConfig(options: VmCacheSyncOptions): SyncConfig {
+  return {
+    browserosRoot: options.browserosRoot,
+    manifestUrl:
+      trimNonEmpty(options.manifestUrl) ??
+      trimNonEmpty(process.env.BROWSEROS_VM_CACHE_MANIFEST_URL) ??
+      EXTERNAL_URLS.VM_CACHE_MANIFEST,
+    fetchImpl: options.fetchImpl ?? fetch,
+    arches: selectSyncArches(options),
+    timeoutMs: options.timeoutMs ?? DEFAULT_TIMEOUT_MS,
+  }
+}
+
+async function fetchManifest(cfg: SyncConfig): Promise<VmManifest> {
+  const response = await fetchWithTimeout(
+    cfg.fetchImpl,
+    cfg.manifestUrl,
+    cfg.timeoutMs,
+  )
+  if (!response.ok) {
+    throw new Error(
+      `manifest fetch failed: ${cfg.manifestUrl} (${response.status})`,
+    )
+  }
+  return (await response.json()) as VmManifest
+}
+
+interface DownloadPlanItem {
+  key: string
+  destPath: string
+  sha256: string
+}
+
+async function planDownloads(opts: {
+  remote: VmManifest
+  local: VmManifest | null
+  cacheRoot: string
+  arches: Arch[]
+}): Promise<DownloadPlanItem[]> {
+  const out: DownloadPlanItem[] = []
+  for (const arch of opts.arches) {
+    for (const [name, agent] of Object.entries(opts.remote.agents)) {
+      const remote = agent.tarballs[arch]
+      if (!remote) continue
+      const destPath = join(opts.cacheRoot, remote.key)
+      if (
+        !(await needsDownload(
+          remote,
+          opts.local?.agents[name]?.tarballs[arch],
+          destPath,
+        ))
+      ) {
+        continue
+      }
+      out.push({ key: remote.key, destPath, sha256: remote.sha256 })
+    }
+  }
+  return out
+}
+
+async function needsDownload(
+  remote: VmArtifact,
+  local: VmArtifact | undefined,
+  destPath: string,
+): Promise<boolean> {
+  if (!existsSync(destPath)) return true
+  if (local?.sha256 === remote.sha256) return false
+  try {
+    return (await sha256File(destPath)) !== remote.sha256
+  } catch {
+    return true
+  }
+}
+
+async function downloadArtifact(
+  fetchImpl: typeof fetch,
+  url: string,
+  destPath: string,
+  sha256: string,
+  timeoutMs: number,
+): Promise<void> {
+  const partialPath = `${destPath}.partial`
+  await mkdir(dirname(destPath), { recursive: true })
+  await rm(partialPath, { force: true })
+
+  try {
+    const response = await fetchWithTimeout(fetchImpl, url, timeoutMs)
+    if (!response.ok || !response.body) {
+      throw new Error(`download failed: ${url} (${response.status})`)
+    }
+
+    const sink = Bun.file(partialPath).writer()
+    const reader = response.body.getReader()
+    try {
+      for (;;) {
+        const { done, value } = await reader.read()
+        if (done) break
+        sink.write(value)
+      }
+    } finally {
+      await sink.end()
+    }
+
+    await verifySha256(partialPath, sha256)
+    await rename(partialPath, destPath)
+  } catch (error) {
+    await rm(partialPath, { force: true })
+    throw error
+  }
+}
+
+async function fetchWithTimeout(
+  fetchImpl: typeof fetch,
+  url: string,
+  timeoutMs: number,
+): Promise<Response> {
+  const controller = new AbortController()
+  const timer = setTimeout(() => controller.abort(), timeoutMs)
+  try {
+    return await fetchImpl(url, { signal: controller.signal })
+  } catch (error) {
+    if ((error as { name?: string }).name === 'AbortError') {
+      throw new Error(`fetch timed out after ${timeoutMs}ms: ${url}`)
+    }
+    throw error
+  } finally {
+    clearTimeout(timer)
+  }
+}
+
+async function verifySha256(path: string, expected: string): Promise<void> {
+  const actual = await sha256File(path)
+  if (actual !== expected) {
+    throw new Error(
+      `sha256 mismatch for ${path}: expected ${expected}, got ${actual}`,
+    )
+  }
+}
+
+async function sha256File(path: string): Promise<string> {
+  const hash = createHash('sha256')
+  for await (const chunk of createReadStream(path)) {
+    hash.update(chunk)
+  }
+  return hash.digest('hex')
+}
+
+async function readLocalManifest(path: string): Promise<VmManifest | null> {
+  try {
+    return JSON.parse(await readFile(path, 'utf8')) as VmManifest
+  } catch (error) {
+    if ((error as NodeJS.ErrnoException).code === 'ENOENT') return null
+    throw error
+  }
+}
+
+function selectSyncArches(options: VmCacheSyncOptions): Arch[] {
+  if (options.allArches) return [...ARCHES]
+  const rawArch = options.rawHostArch ?? hostArch()
+  if (rawArch === 'arm64') return ['arm64']
+  if (rawArch === 'x64' || rawArch === 'ia32') return ['x64']
+  throw new Error(`unsupported host arch: ${rawArch}`)
+}
+
+function cacheRootForManifest(manifestPath: string): string {
+  return dirname(dirname(manifestPath))
+}
+
+function syncKey(cfg: SyncConfig): string {
+  return [
+    getCachedManifestPath(cfg.browserosRoot),
+    cfg.manifestUrl,
+    cfg.arches.join(','),
+    String(cfg.timeoutMs),
+  ].join('\0')
+}
+
+function artifactUrlForKey(manifestUrl: string, key: string): string {
+  const artifactKey = key.replace(/^\/+/, '')
+  const url = new URL(manifestUrl)
+  const normalizedPath = url.pathname.replace(/\/+$/, '')
+  const prefix = normalizedPath.endsWith(CANONICAL_MANIFEST_SUFFIX)
+    ? normalizedPath.slice(0, -CANONICAL_MANIFEST_SUFFIX.length)
+    : normalizedPath.slice(0, Math.max(0, normalizedPath.lastIndexOf('/')))
+
+  url.pathname = `${prefix.replace(/\/+$/, '')}/${artifactKey}`
+  url.search = ''
+  url.hash = ''
+  return url.toString()
+}
+
+function trimNonEmpty(value: string | undefined): string | undefined {
+  const trimmed = value?.trim()
+  return trimmed ? trimmed : undefined
+}
--- a/packages/browseros-agent/apps/server/src/lib/vm/errors.ts
+++ b/packages/browseros-agent/apps/server/src/lib/vm/errors.ts
@@ -0,0 +1,60 @@
+/**
+ * @license
+ * Copyright 2025 BrowserOS
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ */
+
+export class VmError extends Error {
+  constructor(message: string) {
+    super(message)
+    this.name = new.target.name
+  }
+}
+
+export class VmNotReadyError extends VmError {}
+
+export class VmStateCorruptedError extends VmError {}
+
+export class LimaCommandError extends VmError {
+  constructor(
+    command: string,
+    public readonly exitCode: number,
+    public readonly stderr: string,
+  ) {
+    super(`${command} failed with exit code ${exitCode}: ${stderr}`)
+  }
+}
+
+export class ContainerCliError extends VmError {
+  constructor(
+    command: string,
+    public readonly exitCode: number,
+    public readonly stderr: string,
+  ) {
+    super(`${command} failed with exit code ${exitCode}: ${stderr}`)
+  }
+}
+
+export class ImageLoadError extends VmError {
+  constructor(
+    public readonly imageRef: string,
+    message: string,
+    public override readonly cause?: unknown,
+  ) {
+    super(`failed to load image ${imageRef}: ${message}`)
+  }
+}
+
+export class ManifestMissingError extends VmError {
+  constructor(public readonly manifestPath: string) {
+    super(manifestMissingMessage(manifestPath))
+  }
+}
+
+function manifestMissingMessage(manifestPath: string): string {
+  const message = `VM manifest is missing at ${manifestPath}`
+  if (process.env.NODE_ENV === 'development') {
+    return `${message}; run bun run dev:setup before starting the server`
+  }
+  return message
+}
--- a/packages/browseros-agent/apps/server/src/lib/vm/index.ts
+++ b/packages/browseros-agent/apps/server/src/lib/vm/index.ts
@@ -0,0 +1,13 @@
+/**
+ * @license
+ * Copyright 2025 BrowserOS
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ */
+
+export * from './errors'
+export * from './lima-cli'
+export * from './lima-config'
+export * from './manifest'
+export * from './paths'
+export * from './telemetry'
+export * from './vm-runtime'
--- a/Show More
+++ b/Show More