fix(cli): add install header to release workflow

feat(cli): add install commands to release notes
feat: add CDN-hosted CLI installer flow (#588 )
2026-05-14 08:03:58 +00:00 · 2026-03-26 18:04:10 -07:00 · 2026-03-26 17:58:42 -07:00 · 2026-03-26 17:41:03 -07:00 · 2026-03-26 17:05:21 -07:00 · 2026-03-27 01:28:04 +05:30
150 changed files with 12742 additions and 1078 deletions
--- a/.github/workflows/eval-weekly.yml
+++ b/.github/workflows/eval-weekly.yml
@@ -0,0 +1,98 @@
+name: Weekly Eval
+
+on:
+  schedule:
+    # Every Saturday at 06:00 UTC
+    - cron: '0 6 * * 6'
+  push:
+    branches: [main]
+    paths:
+      - 'packages/browseros-agent/apps/server/src/agent/**'
+      - 'packages/browseros-agent/apps/server/src/tools/**'
+  workflow_dispatch:
+    inputs:
+      config:
+        description: 'Eval config file (relative to apps/eval/)'
+        required: false
+        default: 'configs/browseros-agent-weekly.json'
+
+permissions:
+  contents: read
+
+jobs:
+  eval:
+    runs-on: ubuntu-latest
+    timeout-minutes: 360
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Install BrowserOS
+        run: |
+          wget -q https://github.com/browseros-ai/BrowserOS/releases/download/v0.44.0.1/BrowserOS_v0.44.0.1_amd64.deb
+          sudo dpkg -i BrowserOS_v0.44.0.1_amd64.deb
+          browseros --version || echo "BrowserOS installed at $(which browseros)"
+
+      - name: Install Bun
+        uses: oven-sh/setup-bun@v2
+        with:
+          bun-version: latest
+
+      - name: Install dependencies
+        working-directory: packages/browseros-agent
+        run: bun install --ignore-scripts && bun run build:agent-sdk
+
+      - name: Install xvfb
+        run: sudo apt-get update && sudo apt-get install -y xvfb
+
+      - name: Install captcha solver extension
+        working-directory: packages/browseros-agent/apps/eval
+        run: |
+          mkdir -p extensions
+          curl -sL -o /tmp/nopecha.zip https://github.com/NopeCHALLC/nopecha-extension/releases/latest/download/chromium_automation.zip
+          unzip -qo /tmp/nopecha.zip -d extensions/nopecha
+
+      - name: Run eval
+        working-directory: packages/browseros-agent/apps/eval
+        env:
+          FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
+          CLAUDE_CODE_OAUTH_TOKEN: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
+          NOPECHA_API_KEY: ${{ secrets.NOPECHA_API_KEY }}
+          BROWSEROS_BINARY: /usr/bin/browseros
+          EVAL_CONFIG: ${{ github.event.inputs.config || 'configs/browseros-agent-weekly.json' }}
+        run: |
+          echo "Running eval with config: $EVAL_CONFIG"
+          xvfb-run --auto-servernum --server-args="-screen 0 1440x900x24" bun run src/index.ts -c "$EVAL_CONFIG"
+
+      - name: Upload runs to R2
+        if: success()
+        working-directory: packages/browseros-agent/apps/eval
+        env:
+          EVAL_R2_ACCOUNT_ID: ${{ secrets.EVAL_R2_ACCOUNT_ID }}
+          EVAL_R2_ACCESS_KEY_ID: ${{ secrets.EVAL_R2_ACCESS_KEY_ID }}
+          EVAL_R2_SECRET_ACCESS_KEY: ${{ secrets.EVAL_R2_SECRET_ACCESS_KEY }}
+          EVAL_R2_BUCKET: ${{ secrets.EVAL_R2_BUCKET }}
+          EVAL_R2_CDN_BASE_URL: ${{ secrets.EVAL_R2_CDN_BASE_URL }}
+          EVAL_CONFIG: ${{ github.event.inputs.config || 'configs/browseros-agent-weekly.json' }}
+        run: |
+          CONFIG_NAME=$(basename "$EVAL_CONFIG" .json)
+          bun scripts/upload-run.ts "results/$CONFIG_NAME"
+
+      - name: Generate trend report
+        if: success()
+        working-directory: packages/browseros-agent
+        env:
+          EVAL_R2_ACCOUNT_ID: ${{ secrets.EVAL_R2_ACCOUNT_ID }}
+          EVAL_R2_ACCESS_KEY_ID: ${{ secrets.EVAL_R2_ACCESS_KEY_ID }}
+          EVAL_R2_SECRET_ACCESS_KEY: ${{ secrets.EVAL_R2_SECRET_ACCESS_KEY }}
+          EVAL_R2_BUCKET: ${{ secrets.EVAL_R2_BUCKET }}
+          EVAL_R2_CDN_BASE_URL: ${{ secrets.EVAL_R2_CDN_BASE_URL }}
+        run: bun apps/eval/scripts/weekly-report.ts /tmp/eval-report.html
+
+      - name: Upload report as artifact
+        if: success()
+        uses: actions/upload-artifact@v4
+        with:
+          name: eval-report-${{ github.run_id }}
+          path: /tmp/eval-report.html
--- a/.github/workflows/pr-title.yml
+++ b/.github/workflows/pr-title.yml
@@ -2,7 +2,7 @@ name: PR Conventional Commit Validation

 on:
  pull_request:
-    types: [opened, synchronize, reopened, edited]
+    types: [opened, edited]

 permissions:
  pull-requests: write
--- a/.github/workflows/release-agent-extension.yml
+++ b/.github/workflows/release-agent-extension.yml
@@ -0,0 +1,148 @@
+name: Release Agent Extension
+
+on:
+  workflow_dispatch:
+
+concurrency:
+  group: release-agent-extension
+  cancel-in-progress: false
+
+jobs:
+  release:
+    if: github.ref == 'refs/heads/main'
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write
+      pull-requests: write
+    defaults:
+      run:
+        working-directory: packages/browseros-agent/apps/agent
+
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          fetch-depth: 0
+
+      - uses: oven-sh/setup-bun@v2
+
+      - name: Install dependencies
+        run: bun ci
+        working-directory: packages/browseros-agent
+
+      - name: Build and zip extension
+        run: bun run codegen && bun run zip
+        env:
+          VITE_PUBLIC_BROWSEROS_API: https://api.browseros.com
+
+      - name: Get version and zip path
+        id: version
+        run: |
+          echo "version=$(node -p "require('./package.json').version")" >> "$GITHUB_OUTPUT"
+          echo "release_sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
+          ZIP_FILE=$(ls "$(pwd)/dist/"*-chrome.zip | head -n 1)
+          echo "zip_path=$ZIP_FILE" >> "$GITHUB_OUTPUT"
+          echo "zip_name=$(basename "$ZIP_FILE")" >> "$GITHUB_OUTPUT"
+
+      - name: Generate release notes
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          AGENT_PATH="packages/browseros-agent/apps/agent"
+          CURRENT_TAG="agent-extension-v${{ steps.version.outputs.version }}"
+          PREV_TAG=$(git tag -l "agent-extension-v*" --sort=-v:refname | grep -v "^${CURRENT_TAG}$" | head -n 1)
+
+          if [ -z "$PREV_TAG" ]; then
+            echo "Initial release" > /tmp/release-notes.md
+          else
+            COMMITS=$(git log "$PREV_TAG"..HEAD --pretty=format:"%H" -- "$AGENT_PATH")
+
+            if [ -z "$COMMITS" ]; then
+              echo "No notable changes." > /tmp/release-notes.md
+            else
+              echo "## What's Changed" > /tmp/release-notes.md
+              echo "" >> /tmp/release-notes.md
+
+              while IFS= read -r SHA; do
+                SUBJECT=$(git log -1 --pretty=format:"%s" "$SHA")
+                PR_NUM=$(gh api "/repos/${{ github.repository }}/commits/${SHA}/pulls" --jq '.[0].number // empty' 2>/dev/null)
+
+                # Skip PR number if already in the commit subject (squash merges include it)
+                if [ -n "$PR_NUM" ] && ! echo "$SUBJECT" | grep -qF "(#${PR_NUM})"; then
+                  echo "- ${SUBJECT} (#${PR_NUM})" >> /tmp/release-notes.md
+                else
+                  echo "- ${SUBJECT}" >> /tmp/release-notes.md
+                fi
+              done <<< "$COMMITS"
+            fi
+          fi
+        working-directory: ${{ github.workspace }}
+
+      - name: Create GitHub release
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          TAG="agent-extension-v${{ steps.version.outputs.version }}"
+          RELEASE_SHA="${{ steps.version.outputs.release_sha }}"
+          TITLE="BrowserOS Agent Extension v${{ steps.version.outputs.version }}"
+
+          if git rev-parse "$TAG" >/dev/null 2>&1; then
+            echo "Tag $TAG already exists, skipping tag creation"
+          else
+            git tag "$TAG" "$RELEASE_SHA"
+          fi
+
+          if git ls-remote --tags origin "$TAG" | grep -q "$TAG"; then
+            echo "Tag $TAG already on remote, skipping push"
+          else
+            git push origin "$TAG"
+          fi
+
+          if gh release view "$TAG" >/dev/null 2>&1; then
+            echo "Release $TAG already exists, updating"
+            gh release edit "$TAG" --title "$TITLE" --notes-file /tmp/release-notes.md
+            gh release upload "$TAG" "${{ steps.version.outputs.zip_path }}" --clobber
+          else
+            gh release create "$TAG" \
+              --title "$TITLE" \
+              --notes-file /tmp/release-notes.md \
+              "${{ steps.version.outputs.zip_path }}"
+          fi
+        working-directory: ${{ github.workspace }}
+
+      - name: Update CHANGELOG.md via PR
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          VERSION="${{ steps.version.outputs.version }}"
+          DATE=$(date -u +"%Y-%m-%d")
+          BRANCH="docs/agent-extension-changelog-v${VERSION}"
+          CHANGELOG="packages/browseros-agent/apps/agent/CHANGELOG.md"
+
+          git checkout main
+
+          {
+            head -n 1 "$CHANGELOG"
+            echo ""
+            echo "## v${VERSION} (${DATE})"
+            echo ""
+            cat /tmp/release-notes.md
+            echo ""
+            tail -n +2 "$CHANGELOG"
+          } > /tmp/new-changelog.md
+          mv /tmp/new-changelog.md "$CHANGELOG"
+
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+          git checkout -b "$BRANCH"
+          git add "$CHANGELOG"
+          git commit -m "docs: update agent extension changelog for v${VERSION}"
+          git push origin "$BRANCH"
+
+          gh pr create \
+            --title "docs: update agent extension changelog for v${VERSION}" \
+            --body "Auto-generated changelog update for BrowserOS Agent Extension v${VERSION}." \
+            --base main \
+            --head "$BRANCH"
+
+          gh pr merge "$BRANCH" --squash --auto || true
+        working-directory: ${{ github.workspace }}
--- a/.github/workflows/release-agent-sdk.yml
+++ b/.github/workflows/release-agent-sdk.yml
@@ -3,16 +3,25 @@ name: Release Agent SDK
 on:
  workflow_dispatch:

+concurrency:
+  group: release-agent-sdk
+  cancel-in-progress: false
+
 jobs:
  publish:
    if: github.ref == 'refs/heads/main'
    runs-on: ubuntu-latest
+    permissions:
+      contents: write
+      pull-requests: write
    defaults:
      run:
        working-directory: packages/browseros-agent/packages/agent-sdk

    steps:
      - uses: actions/checkout@v6
+        with:
+          fetch-depth: 0

      - uses: oven-sh/setup-bun@v2

@@ -31,7 +40,129 @@ jobs:
      - name: Test
        run: bun test

+      - name: Get version
+        id: version
+        run: |
+          echo "version=$(node -p "require('./package.json').version")" >> "$GITHUB_OUTPUT"
+          echo "release_sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
+
+      - name: Generate release notes
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          SDK_PATH="packages/browseros-agent/packages/agent-sdk"
+          CURRENT_TAG="agent-sdk-v${{ steps.version.outputs.version }}"
+          # Find the previous tag, excluding the current version's tag
+          # (which may already exist from a prior failed run)
+          PREV_TAG=$(git tag -l "agent-sdk-v*" --sort=-v:refname | grep -v "^${CURRENT_TAG}$" | head -n 1)
+
+          if [ -z "$PREV_TAG" ]; then
+            echo "Initial release" > /tmp/release-notes.md
+          else
+            # Get commits scoped to the SDK directory
+            COMMITS=$(git log "$PREV_TAG"..HEAD --pretty=format:"%H" -- "$SDK_PATH")
+
+            if [ -z "$COMMITS" ]; then
+              echo "No notable changes." > /tmp/release-notes.md
+            else
+              echo "## What's Changed" > /tmp/release-notes.md
+              echo "" >> /tmp/release-notes.md
+
+              # For each commit, find the associated PR and format with author
+              CONTRIBUTORS=""
+              while IFS= read -r SHA; do
+                # Get commit subject and author
+                SUBJECT=$(git log -1 --pretty=format:"%s" "$SHA")
+                AUTHOR=$(git log -1 --pretty=format:"%an" "$SHA")
+                GITHUB_USER=$(gh api "/repos/${{ github.repository }}/commits/${SHA}" --jq '.author.login // empty' 2>/dev/null)
+
+                # Find associated PR number
+                PR_NUM=$(gh api "/repos/${{ github.repository }}/commits/${SHA}/pulls" --jq '.[0].number // empty' 2>/dev/null)
+
+                # Format line: skip PR number if already in the commit subject
+                # (squash merges include "(#123)" in the subject automatically)
+                if [ -n "$PR_NUM" ] && ! echo "$SUBJECT" | grep -qF "(#${PR_NUM})"; then
+                  echo "- ${SUBJECT} (#${PR_NUM})" >> /tmp/release-notes.md
+                else
+                  echo "- ${SUBJECT}" >> /tmp/release-notes.md
+                fi
+              done <<< "$COMMITS"
+            fi
+          fi
+        working-directory: ${{ github.workspace }}
+
      - name: Publish
        run: npm publish --access public
        env:
          NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
+
+      - name: Create GitHub release
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          TAG="agent-sdk-v${{ steps.version.outputs.version }}"
+          RELEASE_SHA="${{ steps.version.outputs.release_sha }}"
+          TITLE="@browseros-ai/agent-sdk v${{ steps.version.outputs.version }}"
+
+          # Create or reuse tag (idempotent for re-runs)
+          if git rev-parse "$TAG" >/dev/null 2>&1; then
+            echo "Tag $TAG already exists, skipping tag creation"
+          else
+            git tag "$TAG" "$RELEASE_SHA"
+          fi
+
+          # Push tag (skip if already on remote)
+          if git ls-remote --tags origin "$TAG" | grep -q "$TAG"; then
+            echo "Tag $TAG already on remote, skipping push"
+          else
+            git push origin "$TAG"
+          fi
+
+          # Create or update release
+          if gh release view "$TAG" >/dev/null 2>&1; then
+            echo "Release $TAG already exists, updating"
+            gh release edit "$TAG" --title "$TITLE" --notes-file /tmp/release-notes.md
+          else
+            gh release create "$TAG" --title "$TITLE" --notes-file /tmp/release-notes.md
+          fi
+        working-directory: ${{ github.workspace }}
+
+      - name: Update CHANGELOG.md via PR
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          VERSION="${{ steps.version.outputs.version }}"
+          DATE=$(date -u +"%Y-%m-%d")
+          BRANCH="docs/agent-sdk-changelog-v${VERSION}"
+          CHANGELOG="packages/browseros-agent/packages/agent-sdk/CHANGELOG.md"
+
+          # Return to main before branching
+          git checkout main
+
+          # Use head/tail to safely insert without sed quoting issues
+          {
+            head -n 1 "$CHANGELOG"
+            echo ""
+            echo "## v${VERSION} (${DATE})"
+            echo ""
+            cat /tmp/release-notes.md
+            echo ""
+            tail -n +2 "$CHANGELOG"
+          } > /tmp/new-changelog.md
+          mv /tmp/new-changelog.md "$CHANGELOG"
+
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+          git checkout -b "$BRANCH"
+          git add "$CHANGELOG"
+          git commit -m "docs: update agent-sdk changelog for v${VERSION}"
+          git push origin "$BRANCH"
+
+          gh pr create \
+            --title "docs: update agent-sdk changelog for v${VERSION}" \
+            --body "Auto-generated changelog update for @browseros-ai/agent-sdk v${VERSION}." \
+            --base main \
+            --head "$BRANCH"
+
+          gh pr merge "$BRANCH" --squash --auto || true
+        working-directory: ${{ github.workspace }}
--- a/.github/workflows/release-cli.yml
+++ b/.github/workflows/release-cli.yml
@@ -0,0 +1,144 @@
+name: Release CLI
+
+on:
+  workflow_dispatch:
+    inputs:
+      version:
+        description: "Release version (e.g. 0.1.0)"
+        required: true
+        type: string
+
+concurrency:
+  group: release-cli
+  cancel-in-progress: false
+
+jobs:
+  release:
+    if: github.ref == 'refs/heads/main'
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write
+      pull-requests: write
+    defaults:
+      run:
+        working-directory: packages/browseros-agent/apps/cli
+
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          fetch-depth: 0
+
+      - uses: actions/setup-go@v5
+        with:
+          go-version-file: packages/browseros-agent/apps/cli/go.mod
+
+      - name: Run tests
+        run: go test ./... -v
+
+      - name: Run vet
+        run: go vet ./...
+
+      - name: Build all platforms
+        run: |
+          VERSION="${{ inputs.version }}"
+          LDFLAGS="-s -w -X main.version=${VERSION}"
+          DIST="dist"
+          mkdir -p "$DIST"
+
+          for pair in darwin/amd64 darwin/arm64 linux/amd64 linux/arm64 windows/amd64 windows/arm64; do
+            OS="${pair%/*}"
+            ARCH="${pair#*/}"
+            BIN="browseros-cli"
+            EXT=""
+            if [ "$OS" = "windows" ]; then EXT=".exe"; fi
+
+            echo "Building ${OS}/${ARCH}..."
+            GOOS=$OS GOARCH=$ARCH CGO_ENABLED=0 go build -trimpath -ldflags "$LDFLAGS" -o "${DIST}/${BIN}${EXT}" .
+
+            ARCHIVE="browseros-cli_${VERSION}_${OS}_${ARCH}"
+            if [ "$OS" = "windows" ]; then
+              (cd "$DIST" && zip "${ARCHIVE}.zip" "${BIN}${EXT}")
+            else
+              (cd "$DIST" && tar czf "${ARCHIVE}.tar.gz" "${BIN}")
+            fi
+            rm "${DIST}/${BIN}${EXT}"
+          done
+
+          (cd "$DIST" && sha256sum *.tar.gz *.zip > checksums.txt)
+          echo "=== Built artifacts ==="
+          ls -lh "$DIST"
+
+      - name: Generate release notes
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          CLI_PATH="packages/browseros-agent/apps/cli"
+          TAG="browseros-cli-v${{ inputs.version }}"
+          CHANGELOG_FILE="/tmp/release-changelog.md"
+          PREV_TAG=$(git tag -l "browseros-cli-v*" --sort=-v:refname | grep -v "^${TAG}$" | head -n 1)
+
+          if [ -z "$PREV_TAG" ]; then
+            echo "Initial release of browseros-cli." > "$CHANGELOG_FILE"
+          else
+            COMMITS=$(git log "$PREV_TAG"..HEAD --pretty=format:"%H" -- "$CLI_PATH")
+
+            if [ -z "$COMMITS" ]; then
+              echo "No notable changes." > "$CHANGELOG_FILE"
+            else
+              echo "## What's Changed" > "$CHANGELOG_FILE"
+              echo "" >> "$CHANGELOG_FILE"
+
+              while IFS= read -r SHA; do
+                SUBJECT=$(git log -1 --pretty=format:"%s" "$SHA")
+                PR_NUM=$(gh api "/repos/${{ github.repository }}/commits/${SHA}/pulls" --jq '.[0].number // empty' 2>/dev/null)
+
+                if [ -n "$PR_NUM" ] && ! echo "$SUBJECT" | grep -qF "(#${PR_NUM})"; then
+                  echo "- ${SUBJECT} (#${PR_NUM})" >> "$CHANGELOG_FILE"
+                else
+                  echo "- ${SUBJECT}" >> "$CHANGELOG_FILE"
+                fi
+              done <<< "$COMMITS"
+            fi
+          fi
+
+          cat > /tmp/release-notes.md <<'EOF'
+          ## Install `browseros-cli`
+
+          ### macOS / Linux
+
+          ```bash
+          curl -fsSL https://cdn.browseros.com/cli/install.sh | bash
+          ```
+
+          ### Windows
+
+          ```powershell
+          irm https://cdn.browseros.com/cli/install.ps1 | iex
+          ```
+
+          After install, run `browseros-cli init` to point the CLI at your BrowserOS MCP server.
+          EOF
+
+          echo "" >> /tmp/release-notes.md
+          cat "$CHANGELOG_FILE" >> /tmp/release-notes.md
+        working-directory: ${{ github.workspace }}
+
+      - name: Create tag and release
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          TAG="browseros-cli-v${{ inputs.version }}"
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+
+          if ! git rev-parse "$TAG" >/dev/null 2>&1; then
+            git tag -a "$TAG" -m "browseros-cli v${{ inputs.version }}"
+            git push origin "$TAG"
+          fi
+
+          CLI_DIST="packages/browseros-agent/apps/cli/dist"
+          gh release create "$TAG" \
+            --title "browseros-cli v${{ inputs.version }}" \
+            --notes-file /tmp/release-notes.md \
+            ${CLI_DIST}/*
+        working-directory: ${{ github.workspace }}
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -1,15 +1,44 @@
 name: Tests

-on: []
+on:
+  pull_request:
+    types:
+      - opened
+      - synchronize
+      - reopened
+      - ready_for_review
+    paths:
+      - .github/workflows/test.yml
+      - packages/browseros-agent/**
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+env:
+  BROWSEROS_APPIMAGE_URL: https://files.browseros.com/download/BrowserOS.AppImage

 jobs:
  test:
-    name: Run Tests
-    runs-on: macos-latest
-    timeout-minutes: 10
+    name: Tests / ${{ matrix.suite }}
+    runs-on: ubuntu-latest
+    timeout-minutes: 20
    defaults:
      run:
        working-directory: packages/browseros-agent
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - suite: tools
+            test_path: tests/tools
+            junit_path: test-results/tools.xml
+          - suite: integration
+            test_path: tests/server.integration.test.ts
+            junit_path: test-results/integration.xml
+          - suite: sdk
+            test_path: tests/sdk
+            junit_path: test-results/sdk.xml

    steps:
      - name: Checkout code
@@ -21,7 +50,92 @@ jobs:
      - name: Install dependencies
        run: bun ci

-      - name: Run all tests
-        run: bun test:all
+      - name: Resolve BrowserOS cache key
+        id: browseros-cache-key
+        run: |
+          set -euo pipefail
+          headers="$(curl -fsSI "$BROWSEROS_APPIMAGE_URL")"
+          etag="$(printf '%s\n' "$headers" | awk 'BEGIN{IGNORECASE=1} /^etag:/ {sub(/\r$/, "", $2); gsub(/"/, "", $2); print $2; exit}')"
+          last_modified="$(printf '%s\n' "$headers" | awk 'BEGIN{IGNORECASE=1} /^last-modified:/ {$1=""; sub(/^ /, ""); sub(/\r$/, ""); print; exit}')"
+          raw_key="${etag:-$last_modified}"
+          if [ -z "$raw_key" ]; then
+            raw_key="$BROWSEROS_APPIMAGE_URL"
+          fi
+          cache_key="$(printf '%s' "$raw_key" | shasum -a 256 | awk '{print $1}')"
+          echo "key=browseros-appimage-${{ runner.os }}-$cache_key" >> "$GITHUB_OUTPUT"
+
+      - name: Restore BrowserOS cache
+        id: browseros-cache
+        uses: actions/cache@v4
+        with:
+          path: packages/browseros-agent/.ci/bin/BrowserOS.AppImage
+          key: ${{ steps.browseros-cache-key.outputs.key }}
+
+      - name: Download BrowserOS
+        if: steps.browseros-cache.outputs.cache-hit != 'true'
+        run: |
+          mkdir -p .ci/bin
+          curl -fsSL "$BROWSEROS_APPIMAGE_URL" -o .ci/bin/BrowserOS.AppImage
+          chmod +x .ci/bin/BrowserOS.AppImage
+
+      - name: Prepare BrowserOS wrapper
+        run: |
+          mkdir -p .ci/bin
+          cat > .ci/bin/browseros <<'EOF'
+          #!/usr/bin/env bash
+          set -euo pipefail
+          export APPIMAGE_EXTRACT_AND_RUN=1
+          exec "$(dirname "$0")/BrowserOS.AppImage" "$@"
+          EOF
+          chmod +x .ci/bin/browseros
+
+      - name: Create server env file
+        working-directory: packages/browseros-agent/apps/server
+        run: cp .env.example .env.development
+
+      - name: Run ${{ matrix.suite }} tests
+        id: test
        env:
-          PUPPETEER_EXECUTABLE_PATH: /Applications/Google Chrome.app/Contents/MacOS/Google Chrome
+          BROWSEROS_BINARY: ${{ github.workspace }}/packages/browseros-agent/.ci/bin/browseros
+          BROWSEROS_TEST_HEADLESS: "true"
+          BROWSEROS_TEST_EXTRA_ARGS: --no-sandbox --disable-dev-shm-usage
+        run: |
+          set +e
+          mkdir -p test-results
+          cd apps/server
+          bun run test:cleanup
+          bun --env-file=.env.development test "${{ matrix.test_path }}" --reporter=junit --reporter-outfile="../../${{ matrix.junit_path }}"
+          exit_code=$?
+          cd ../..
+          if [ ! -f "${{ matrix.junit_path }}" ]; then
+            cat > "${{ matrix.junit_path }}" <<EOF
+          <?xml version="1.0" encoding="UTF-8"?>
+          <testsuites tests="1" failures="1">
+            <testsuite name="${{ matrix.suite }}" tests="1" failures="1">
+              <testcase classname="workflow" name="${{ matrix.suite }} setup">
+                <failure message="Test run failed before JUnit output was written">See workflow logs for details.</failure>
+              </testcase>
+            </testsuite>
+          </testsuites>
+          EOF
+          fi
+          echo "exit_code=$exit_code" >> "$GITHUB_OUTPUT"
+
+      - name: Upload JUnit XML
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: junit-${{ matrix.suite }}
+          path: packages/browseros-agent/${{ matrix.junit_path }}
+
+      - name: Summarize suite result
+        if: always()
+        run: |
+          if [ "${{ steps.test.outputs.exit_code }}" = "0" ]; then
+            echo "### :white_check_mark: ${{ matrix.suite }} suite passed" >> "$GITHUB_STEP_SUMMARY"
+          else
+            echo "### :x: ${{ matrix.suite }} suite failed (exit code ${{ steps.test.outputs.exit_code }})" >> "$GITHUB_STEP_SUMMARY"
+            echo "" >> "$GITHUB_STEP_SUMMARY"
+            echo "See the uploaded \`junit-${{ matrix.suite }}\` artifact for details." >> "$GITHUB_STEP_SUMMARY"
+            exit 1
+          fi
--- a/README.md
+++ b/README.md
@@ -43,6 +43,24 @@

 4. Start automating!

+## Install `browseros-cli`
+
+Use `browseros-cli` when you want to control BrowserOS from the terminal or scripts via the BrowserOS MCP server.
+
+### macOS / Linux
+
+```bash
+curl -fsSL https://cdn.browseros.com/cli/install.sh | bash
+```
+
+### Windows
+
+```powershell
+irm https://cdn.browseros.com/cli/install.ps1 | iex
+```
+
+After install, run `browseros-cli init` to point the CLI at your BrowserOS MCP server.
+
 ## What makes BrowserOS special
 - 🏠 Feels like home — same Chrome interface, all your extensions just work
 - 🤖 AI agents that run on YOUR browser, not in the cloud
@@ -164,4 +182,3 @@ Thank you to all our supporters!
 Built with ❤️ from San Francisco
 </p>

-
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -23,6 +23,9 @@
        "group": "Core Features",
        "pages": [
          "features/bring-your-own-llm",
+          "features/chatgpt-pro-oauth",
+          "features/github-copilot-oauth",
+          "features/qwen-code-oauth",
          "features/local-models",
          "features/workflows",
          "features/scheduled-tasks",
--- a/docs/features/bring-your-own-llm.mdx
+++ b/docs/features/bring-your-own-llm.mdx
@@ -13,6 +13,33 @@ See how to connect your own LLM in under a minute:
  src="https://pub-80f8a01e6e8b4239ae53a7652ef85877.r2.dev/resources/feature-videos/1-bring-your-own-LLM.mov"
 ></video>

+## Use Your Existing Subscription
+
+Already paying for ChatGPT Pro, GitHub Copilot, or Qwen Code? Connect your existing account to BrowserOS with a single sign-in — no API keys, no extra cost.
+
+<CardGroup cols={3}>
+  <Card href="/features/chatgpt-pro-oauth">
+    <svg fill="currentColor" fillRule="evenodd" height="24" width="24" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><path d="M9.205 8.658v-2.26c0-.19.072-.333.238-.428l4.543-2.616c.619-.357 1.356-.523 2.117-.523 2.854 0 4.662 2.212 4.662 4.566 0 .167 0 .357-.024.547l-4.71-2.759a.797.797 0 00-.856 0l-5.97 3.473zm10.609 8.8V12.06c0-.333-.143-.57-.429-.737l-5.97-3.473 1.95-1.118a.433.433 0 01.476 0l4.543 2.617c1.309.76 2.189 2.378 2.189 3.948 0 1.808-1.07 3.473-2.76 4.163zM7.802 12.703l-1.95-1.142c-.167-.095-.239-.238-.239-.428V5.899c0-2.545 1.95-4.472 4.591-4.472 1 0 1.927.333 2.712.928L8.23 5.067c-.285.166-.428.404-.428.737v6.898zM12 15.128l-2.795-1.57v-3.33L12 8.658l2.795 1.57v3.33L12 15.128zm1.796 7.23c-1 0-1.927-.332-2.712-.927l4.686-2.712c.285-.166.428-.404.428-.737v-6.898l1.974 1.142c.167.095.238.238.238.428v5.233c0 2.545-1.974 4.472-4.614 4.472zm-5.637-5.303l-4.544-2.617c-1.308-.761-2.188-2.378-2.188-3.948A4.482 4.482 0 014.21 6.327v5.423c0 .333.143.571.428.738l5.947 3.449-1.95 1.118a.432.432 0 01-.476 0zm-.262 3.9c-2.688 0-4.662-2.021-4.662-4.519 0-.19.024-.38.047-.57l4.686 2.71c.286.167.571.167.856 0l5.97-3.448v2.26c0 .19-.07.333-.237.428l-4.543 2.616c-.619.357-1.356.523-2.117.523zm5.899 2.83a5.947 5.947 0 005.827-4.756C22.287 18.339 24 15.84 24 13.296c0-1.665-.713-3.282-1.998-4.448.119-.5.19-.999.19-1.498 0-3.401-2.759-5.947-5.946-5.947-.642 0-1.26.095-1.88.31A5.962 5.962 0 0010.205 0a5.947 5.947 0 00-5.827 4.757C1.713 5.447 0 7.945 0 10.49c0 1.666.713 3.283 1.998 4.448-.119.5-.19 1-.19 1.499 0 3.401 2.759 5.946 5.946 5.946.642 0 1.26-.095 1.88-.309a5.96 5.96 0 004.162 1.713z"></path></svg>
+    **ChatGPT Pro / Plus**
+
+    Sign in with your OpenAI account. Access GPT-5 Codex, GPT-5.4, and the full Codex lineup with up to 400K context.
+  </Card>
+  <Card href="/features/github-copilot-oauth">
+    <svg fill="currentColor" fillRule="evenodd" height="24" width="24" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><path d="M19.245 5.364c1.322 1.36 1.877 3.216 2.11 5.817.622 0 1.2.135 1.592.654l.73.964c.21.278.323.61.323.955v2.62c0 .339-.173.669-.453.868C20.239 19.602 16.157 21.5 12 21.5c-4.6 0-9.205-2.583-11.547-4.258-.28-.2-.452-.53-.453-.868v-2.62c0-.345.113-.679.321-.956l.73-.963c.392-.517.974-.654 1.593-.654l.029-.297c.25-2.446.81-4.213 2.082-5.52 2.461-2.54 5.71-2.851 7.146-2.864h.198c1.436.013 4.685.323 7.146 2.864zm-7.244 4.328c-.284 0-.613.016-.962.05-.123.447-.305.85-.57 1.108-1.05 1.023-2.316 1.18-2.994 1.18-.638 0-1.306-.13-1.851-.464-.516.165-1.012.403-1.044.996a65.882 65.882 0 00-.063 2.884l-.002.48c-.002.563-.005 1.126-.013 1.69.002.326.204.63.51.765 2.482 1.102 4.83 1.657 6.99 1.657 2.156 0 4.504-.555 6.985-1.657a.854.854 0 00.51-.766c.03-1.682.006-3.372-.076-5.053-.031-.596-.528-.83-1.046-.996-.546.333-1.212.464-1.85.464-.677 0-1.942-.157-2.993-1.18-.266-.258-.447-.661-.57-1.108-.32-.032-.64-.049-.96-.05zm-2.525 4.013c.539 0 .976.426.976.95v1.753c0 .525-.437.95-.976.95a.964.964 0 01-.976-.95v-1.752c0-.525.437-.951.976-.951zm5 0c.539 0 .976.426.976.95v1.753c0 .525-.437.95-.976.95a.964.964 0 01-.976-.95v-1.752c0-.525.437-.951.976-.951zM7.635 5.087c-1.05.102-1.935.438-2.385.906-.975 1.037-.765 3.668-.21 4.224.405.394 1.17.657 1.995.657h.09c.649-.013 1.785-.176 2.73-1.11.435-.41.705-1.433.675-2.47-.03-.834-.27-1.52-.63-1.813-.39-.336-1.275-.482-2.265-.394zm6.465.394c-.36.292-.6.98-.63 1.813-.03 1.037.24 2.06.675 2.47.968.957 2.136 1.104 2.776 1.11h.044c.825 0 1.59-.263 1.995-.657.555-.556.765-3.187-.21-4.224-.45-.468-1.335-.804-2.385-.906-.99-.088-1.875.058-2.265.394zM12 7.615c-.24 0-.525.015-.84.044.03.16.045.336.06.526l-.001.159a2.94 2.94 0 01-.014.25c.225-.022.425-.027.612-.028h.366c.187 0 .387.006.612.028-.015-.146-.015-.277-.015-.409.015-.19.03-.365.06-.526a9.29 9.29 0 00-.84-.044z"></path></svg>
+    **GitHub Copilot**
+
+    Sign in with your GitHub account. Access 19+ models including Claude, GPT-5, and Gemini through one subscription.
+  </Card>
+  <Card href="/features/qwen-code-oauth">
+    <svg fill="currentColor" fillRule="evenodd" height="24" width="24" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><path d="M12.604 1.34c.393.69.784 1.382 1.174 2.075a.18.18 0 00.157.091h5.552c.174 0 .322.11.446.327l1.454 2.57c.19.337.24.478.024.837-.26.43-.513.864-.76 1.3l-.367.658c-.106.196-.223.28-.04.512l2.652 4.637c.172.301.111.494-.043.77-.437.785-.882 1.564-1.335 2.34-.159.272-.352.375-.68.37-.777-.016-1.552-.01-2.327.016a.099.099 0 00-.081.05 575.097 575.097 0 01-2.705 4.74c-.169.293-.38.363-.725.364-.997.003-2.002.004-3.017.002a.537.537 0 01-.465-.271l-1.335-2.323a.09.09 0 00-.083-.049H4.982c-.285.03-.553-.001-.805-.092l-1.603-2.77a.543.543 0 01-.002-.54l1.207-2.12a.198.198 0 000-.197 550.951 550.951 0 01-1.875-3.272l-.79-1.395c-.16-.31-.173-.496.095-.965.465-.813.927-1.625 1.387-2.436.132-.234.304-.334.584-.335a338.3 338.3 0 012.589-.001.124.124 0 00.107-.063l2.806-4.895a.488.488 0 01.422-.246c.524-.001 1.053 0 1.583-.006L11.704 1c.341-.003.724.032.9.34zm-3.432.403a.06.06 0 00-.052.03L6.254 6.788a.157.157 0 01-.135.078H3.253c-.056 0-.07.025-.041.074l5.81 10.156c.025.042.013.062-.034.063l-2.795.015a.218.218 0 00-.2.116l-1.32 2.31c-.044.078-.021.118.068.118l5.716.008c.046 0 .08.02.104.061l1.403 2.454c.046.081.092.082.139 0l5.006-8.76.783-1.382a.055.055 0 01.096 0l1.424 2.53a.122.122 0 00.107.062l2.763-.02a.04.04 0 00.035-.02.041.041 0 000-.04l-2.9-5.086a.108.108 0 010-.113l.293-.507 1.12-1.977c.024-.041.012-.062-.035-.062H9.2c-.059 0-.073-.026-.043-.077l1.434-2.505a.107.107 0 000-.114L9.225 1.774a.06.06 0 00-.053-.031zm6.29 8.02c.046 0 .058.02.034.06l-.832 1.465-2.613 4.585a.056.056 0 01-.05.029.058.058 0 01-.05-.029L8.498 9.841c-.02-.034-.01-.052.028-.054l.216-.012 6.722-.012z"></path></svg>
+    **Qwen Code**
+
+    Sign in with your Qwen account. Access Qwen 3 Coder with a 1 million token context window.
+  </Card>
+</CardGroup>
+
+---
+
 ## Which Model Should I Use?

 | Mode | What works | Recommendation |
--- a/docs/features/chatgpt-pro-oauth.mdx
+++ b/docs/features/chatgpt-pro-oauth.mdx
@@ -0,0 +1,56 @@
+---
+title: "ChatGPT Pro / Plus"
+description: "Use your ChatGPT subscription to power BrowserOS"
+---
+
+Connect your ChatGPT Pro or Plus subscription to BrowserOS and access GPT-5 Codex, GPT-5.4, and the full lineup of OpenAI's most advanced models — with up to 400K context. No API keys needed.
+
+## Setup
+
+**1.** Open BrowserOS and go to **Settings** (`chrome://browseros/settings`). You'll see the AI Providers section.
+
+![AI Settings screen](/images/setting-up-chatgpt/llm-screen.png)
+
+**2.** Click **USE** on the **ChatGPT Plus/Pro** card. You'll be prompted to sign in with your OpenAI account.
+
+![Login screen](/images/setting-up-chatgpt/login-screen.png)
+
+**3.** Sign in with the OpenAI account that has your ChatGPT Pro or Plus subscription active, and accept the authorization.
+
+![Accept authorization](/images/setting-up-chatgpt/accept-screen.png)
+
+**4.** Once authorized, ChatGPT will appear as a provider in your settings. Select a model and start using it.
+
+## Available Models
+
+| Model | Context Window |
+|-------|---------------|
+| `gpt-5.4` | 400K |
+| `gpt-5.3-codex` | 400K |
+| `gpt-5.2-codex` | 400K |
+| `gpt-5.2` | 200K |
+| `gpt-5.1-codex` | 400K |
+| `gpt-5.1-codex-max` | 400K |
+| `gpt-5.1-codex-mini` | 400K |
+| `gpt-5.1` | 200K |
+
+<Info>
+ChatGPT Pro subscribers have access to the full model lineup. ChatGPT Plus subscribers can access a subset of models depending on their plan. The available models will be shown automatically after you connect.
+</Info>
+
+<Tip>
+The Codex models (e.g., `gpt-5.3-codex`) are optimized for code and reasoning tasks — ideal for complex browser automation workflows that involve form filling, data extraction, and multi-step navigation.
+</Tip>
+
+## Reasoning Settings
+
+ChatGPT Pro includes additional settings for models that support reasoning:
+
+- **Reasoning Effort** — Control how much the model "thinks" before responding. Options: none, low, medium, high.
+- **Reasoning Summary** — Choose how reasoning is displayed. Options: auto, concise, detailed.
+
+These settings are available in the provider configuration after connecting.
+
+## Disconnecting
+
+To disconnect your OpenAI account, go to **Settings**, find the ChatGPT Plus/Pro provider, and click **Disconnect**. Your OAuth tokens will be immediately deleted from your machine.
--- a/docs/features/github-copilot-oauth.mdx
+++ b/docs/features/github-copilot-oauth.mdx
@@ -0,0 +1,60 @@
+---
+title: "GitHub Copilot"
+description: "Use your GitHub Copilot subscription to power BrowserOS"
+---
+
+Connect your GitHub Copilot subscription to BrowserOS and access 19+ models — including Claude, GPT-5, and Gemini — through a single GitHub sign-in. No API keys needed.
+
+<Info>
+**Free tier** includes GPT-5 Mini, Claude Haiku 4.5, GPT-4o, and GPT-4.1. **Copilot Pro** ($10/month) unlocks Claude Sonnet 4.6, Claude Opus 4.6, Gemini 3 Pro, GPT-5.4, and more.
+</Info>
+
+## Setup
+
+**1.** Open BrowserOS and go to **Settings** (`chrome://browseros/settings`). You'll see the AI Providers section.
+
+![AI Settings screen](/images/setting-up-copilot/llm-screen.png)
+
+**2.** Click **USE** on the **GitHub Copilot** card. A device code will appear — copy it, then click the link to open GitHub's device authorization page.
+
+![Device code displayed](/images/setting-up-copilot/device-code.png)
+
+**3.** Select your GitHub account to authorize.
+
+![Select GitHub account](/images/setting-up-copilot/select-account.png)
+
+**4.** Paste the device code and authorize BrowserOS to access your Copilot subscription.
+
+![Authorize device](/images/setting-up-copilot/authorize-device.png)
+
+**5.** Once authorized, GitHub Copilot will appear as a provider in your settings. Select a model and start using it.
+
+## Available Models
+
+### Free Tier
+| Model | Context Window |
+|-------|---------------|
+| `gpt-5-mini` | 128K |
+| `claude-haiku-4.5` | 128K |
+| `gpt-4o` | 64K |
+| `gpt-4.1` | 64K |
+
+### Copilot Pro / Pro+
+| Model | Context Window |
+|-------|---------------|
+| `claude-sonnet-4.6` | 200K |
+| `claude-opus-4.6` | 200K |
+| `gemini-2.5-pro` | 1M |
+| `gemini-3-pro-preview` | 1M |
+| `gpt-5.4` | 400K |
+| `gpt-5.3-codex` | 400K |
+| `gpt-5.2-codex` | 400K |
+| `grok-code-fast-1` | 128K |
+
+<Tip>
+GitHub Copilot is the most versatile provider — one subscription gives you access to models from OpenAI, Anthropic, Google, and xAI. Great if you want to switch between models for different tasks.
+</Tip>
+
+## Disconnecting
+
+To disconnect your GitHub account, go to **Settings**, find the GitHub Copilot provider, and click **Disconnect**. Your OAuth tokens will be immediately deleted from your machine.
--- a/docs/features/qwen-code-oauth.mdx
+++ b/docs/features/qwen-code-oauth.mdx
@@ -0,0 +1,39 @@
+---
+title: "Qwen Code"
+description: "Use your Qwen Code account to power BrowserOS"
+---
+
+Connect your Qwen Code account to BrowserOS and access Alibaba's coding models with up to a **1 million token context window** — the largest of any provider we support. No API keys needed.
+
+## Setup
+
+**1.** Open BrowserOS and go to **Settings** (`chrome://browseros/settings`). You'll see the AI Providers section.
+
+![AI Settings screen](/images/setting-up-qwen/llm-screen.png)
+
+**2.** Click **USE** on the **Qwen Code** card. You'll be prompted to sign in with your Qwen account.
+
+![Select Qwen Code](/images/setting-up-qwen/select-qwen.png)
+
+**3.** Sign in with your Alibaba Cloud / Qwen account to authorize BrowserOS.
+
+![Qwen sign in](/images/setting-up-qwen/qwen-signin.png)
+
+**4.** Once authorized, Qwen Code will appear as a provider in your settings. Select a model and start using it.
+
+## Available Models
+
+| Model | Context Window |
+|-------|---------------|
+| `coder-model` | 1M |
+| `qwen3-coder-plus` | 1M |
+| `qwen3-coder-flash` | 1M |
+| `qwen3.5-plus` | 1M |
+
+<Tip>
+Qwen Code's 1 million token context window is ideal for tasks that involve long documents, entire documentation sites, or working across many browser tabs simultaneously — the agent can hold everything in context at once.
+</Tip>
+
+## Disconnecting
+
+To disconnect your Qwen account, go to **Settings**, find the Qwen Code provider, and click **Disconnect**. Your OAuth tokens will be immediately deleted from your machine.
--- a/docs/images/icons/githubcopilot.svg
+++ b/docs/images/icons/githubcopilot.svg
@@ -0,0 +1 @@
+<svg fill="currentColor" fill-rule="evenodd" height="1em" style="flex:none;line-height:1" viewBox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><title>GithubCopilot</title><path d="M19.245 5.364c1.322 1.36 1.877 3.216 2.11 5.817.622 0 1.2.135 1.592.654l.73.964c.21.278.323.61.323.955v2.62c0 .339-.173.669-.453.868C20.239 19.602 16.157 21.5 12 21.5c-4.6 0-9.205-2.583-11.547-4.258-.28-.2-.452-.53-.453-.868v-2.62c0-.345.113-.679.321-.956l.73-.963c.392-.517.974-.654 1.593-.654l.029-.297c.25-2.446.81-4.213 2.082-5.52 2.461-2.54 5.71-2.851 7.146-2.864h.198c1.436.013 4.685.323 7.146 2.864zm-7.244 4.328c-.284 0-.613.016-.962.05-.123.447-.305.85-.57 1.108-1.05 1.023-2.316 1.18-2.994 1.18-.638 0-1.306-.13-1.851-.464-.516.165-1.012.403-1.044.996a65.882 65.882 0 00-.063 2.884l-.002.48c-.002.563-.005 1.126-.013 1.69.002.326.204.63.51.765 2.482 1.102 4.83 1.657 6.99 1.657 2.156 0 4.504-.555 6.985-1.657a.854.854 0 00.51-.766c.03-1.682.006-3.372-.076-5.053-.031-.596-.528-.83-1.046-.996-.546.333-1.212.464-1.85.464-.677 0-1.942-.157-2.993-1.18-.266-.258-.447-.661-.57-1.108-.32-.032-.64-.049-.96-.05zm-2.525 4.013c.539 0 .976.426.976.95v1.753c0 .525-.437.95-.976.95a.964.964 0 01-.976-.95v-1.752c0-.525.437-.951.976-.951zm5 0c.539 0 .976.426.976.95v1.753c0 .525-.437.95-.976.95a.964.964 0 01-.976-.95v-1.752c0-.525.437-.951.976-.951zM7.635 5.087c-1.05.102-1.935.438-2.385.906-.975 1.037-.765 3.668-.21 4.224.405.394 1.17.657 1.995.657h.09c.649-.013 1.785-.176 2.73-1.11.435-.41.705-1.433.675-2.47-.03-.834-.27-1.52-.63-1.813-.39-.336-1.275-.482-2.265-.394zm6.465.394c-.36.292-.6.98-.63 1.813-.03 1.037.24 2.06.675 2.47.968.957 2.136 1.104 2.776 1.11h.044c.825 0 1.59-.263 1.995-.657.555-.556.765-3.187-.21-4.224-.45-.468-1.335-.804-2.385-.906-.99-.088-1.875.058-2.265.394zM12 7.615c-.24 0-.525.015-.84.044.03.16.045.336.06.526l-.001.159a2.94 2.94 0 01-.014.25c.225-.022.425-.027.612-.028h.366c.187 0 .387.006.612.028-.015-.146-.015-.277-.015-.409.015-.19.03-.365.06-.526a9.29 9.29 0 00-.84-.044z"></path></svg>
--- a/docs/images/icons/openai.svg
+++ b/docs/images/icons/openai.svg
@@ -0,0 +1 @@
+<svg fill="currentColor" fill-rule="evenodd" height="1em" style="flex:none;line-height:1" viewBox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><title>OpenAI</title><path d="M9.205 8.658v-2.26c0-.19.072-.333.238-.428l4.543-2.616c.619-.357 1.356-.523 2.117-.523 2.854 0 4.662 2.212 4.662 4.566 0 .167 0 .357-.024.547l-4.71-2.759a.797.797 0 00-.856 0l-5.97 3.473zm10.609 8.8V12.06c0-.333-.143-.57-.429-.737l-5.97-3.473 1.95-1.118a.433.433 0 01.476 0l4.543 2.617c1.309.76 2.189 2.378 2.189 3.948 0 1.808-1.07 3.473-2.76 4.163zM7.802 12.703l-1.95-1.142c-.167-.095-.239-.238-.239-.428V5.899c0-2.545 1.95-4.472 4.591-4.472 1 0 1.927.333 2.712.928L8.23 5.067c-.285.166-.428.404-.428.737v6.898zM12 15.128l-2.795-1.57v-3.33L12 8.658l2.795 1.57v3.33L12 15.128zm1.796 7.23c-1 0-1.927-.332-2.712-.927l4.686-2.712c.285-.166.428-.404.428-.737v-6.898l1.974 1.142c.167.095.238.238.238.428v5.233c0 2.545-1.974 4.472-4.614 4.472zm-5.637-5.303l-4.544-2.617c-1.308-.761-2.188-2.378-2.188-3.948A4.482 4.482 0 014.21 6.327v5.423c0 .333.143.571.428.738l5.947 3.449-1.95 1.118a.432.432 0 01-.476 0zm-.262 3.9c-2.688 0-4.662-2.021-4.662-4.519 0-.19.024-.38.047-.57l4.686 2.71c.286.167.571.167.856 0l5.97-3.448v2.26c0 .19-.07.333-.237.428l-4.543 2.616c-.619.357-1.356.523-2.117.523zm5.899 2.83a5.947 5.947 0 005.827-4.756C22.287 18.339 24 15.84 24 13.296c0-1.665-.713-3.282-1.998-4.448.119-.5.19-.999.19-1.498 0-3.401-2.759-5.947-5.946-5.947-.642 0-1.26.095-1.88.31A5.962 5.962 0 0010.205 0a5.947 5.947 0 00-5.827 4.757C1.713 5.447 0 7.945 0 10.49c0 1.666.713 3.283 1.998 4.448-.119.5-.19 1-.19 1.499 0 3.401 2.759 5.946 5.946 5.946.642 0 1.26-.095 1.88-.309a5.96 5.96 0 004.162 1.713z"></path></svg>
--- a/docs/images/icons/qwen.svg
+++ b/docs/images/icons/qwen.svg
@@ -0,0 +1 @@
+<svg fill="currentColor" fill-rule="evenodd" height="1em" style="flex:none;line-height:1" viewBox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><title>Qwen</title><path d="M12.604 1.34c.393.69.784 1.382 1.174 2.075a.18.18 0 00.157.091h5.552c.174 0 .322.11.446.327l1.454 2.57c.19.337.24.478.024.837-.26.43-.513.864-.76 1.3l-.367.658c-.106.196-.223.28-.04.512l2.652 4.637c.172.301.111.494-.043.77-.437.785-.882 1.564-1.335 2.34-.159.272-.352.375-.68.37-.777-.016-1.552-.01-2.327.016a.099.099 0 00-.081.05 575.097 575.097 0 01-2.705 4.74c-.169.293-.38.363-.725.364-.997.003-2.002.004-3.017.002a.537.537 0 01-.465-.271l-1.335-2.323a.09.09 0 00-.083-.049H4.982c-.285.03-.553-.001-.805-.092l-1.603-2.77a.543.543 0 01-.002-.54l1.207-2.12a.198.198 0 000-.197 550.951 550.951 0 01-1.875-3.272l-.79-1.395c-.16-.31-.173-.496.095-.965.465-.813.927-1.625 1.387-2.436.132-.234.304-.334.584-.335a338.3 338.3 0 012.589-.001.124.124 0 00.107-.063l2.806-4.895a.488.488 0 01.422-.246c.524-.001 1.053 0 1.583-.006L11.704 1c.341-.003.724.032.9.34zm-3.432.403a.06.06 0 00-.052.03L6.254 6.788a.157.157 0 01-.135.078H3.253c-.056 0-.07.025-.041.074l5.81 10.156c.025.042.013.062-.034.063l-2.795.015a.218.218 0 00-.2.116l-1.32 2.31c-.044.078-.021.118.068.118l5.716.008c.046 0 .08.02.104.061l1.403 2.454c.046.081.092.082.139 0l5.006-8.76.783-1.382a.055.055 0 01.096 0l1.424 2.53a.122.122 0 00.107.062l2.763-.02a.04.04 0 00.035-.02.041.041 0 000-.04l-2.9-5.086a.108.108 0 010-.113l.293-.507 1.12-1.977c.024-.041.012-.062-.035-.062H9.2c-.059 0-.073-.026-.043-.077l1.434-2.505a.107.107 0 000-.114L9.225 1.774a.06.06 0 00-.053-.031zm6.29 8.02c.046 0 .058.02.034.06l-.832 1.465-2.613 4.585a.056.056 0 01-.05.029.058.058 0 01-.05-.029L8.498 9.841c-.02-.034-.01-.052.028-.054l.216-.012 6.722-.012z"></path></svg>
--- a/docs/images/setting-up-chatgpt/accept-screen.png
+++ b/docs/images/setting-up-chatgpt/accept-screen.png
--- a/docs/images/setting-up-chatgpt/llm-screen.png
+++ b/docs/images/setting-up-chatgpt/llm-screen.png
--- a/docs/images/setting-up-chatgpt/login-screen.png
+++ b/docs/images/setting-up-chatgpt/login-screen.png
--- a/docs/images/setting-up-copilot/authorize-device.png
+++ b/docs/images/setting-up-copilot/authorize-device.png
--- a/docs/images/setting-up-copilot/device-code.png
+++ b/docs/images/setting-up-copilot/device-code.png
--- a/docs/images/setting-up-copilot/llm-screen.png
+++ b/docs/images/setting-up-copilot/llm-screen.png
--- a/docs/images/setting-up-copilot/select-account.png
+++ b/docs/images/setting-up-copilot/select-account.png
--- a/docs/images/setting-up-qwen/llm-screen.png
+++ b/docs/images/setting-up-qwen/llm-screen.png
--- a/docs/images/setting-up-qwen/qwen-signin.png
+++ b/docs/images/setting-up-qwen/qwen-signin.png
--- a/docs/images/setting-up-qwen/select-qwen.png
+++ b/docs/images/setting-up-qwen/select-qwen.png
--- a/packages/browseros-agent/.gitignore
+++ b/packages/browseros-agent/.gitignore
@@ -187,7 +187,12 @@ log.txt
 # Testing iteration temp files
 tmp/

+# CI artifacts
+.ci/
+test-results/
+
 # Coding agent artifacts
 .agent/
 .llm/
 .grove/
+docs/plans/2026-03-24-models-dev-integration.md
--- a/packages/browseros-agent/CLAUDE.md
+++ b/packages/browseros-agent/CLAUDE.md
@@ -81,6 +81,9 @@ bun run dev:server               # Build server for development
 bun run dev:ext                  # Build extension for development
 bun run dist:server              # Build server for production (all targets)
 bun run dist:ext                 # Build extension for production
+
+# Refresh models.dev data
+bun run generate:models          # Fetches latest from models.dev/api.json
 ```

 ## Architecture
--- a/packages/browseros-agent/apps/agent/CHANGELOG.md
+++ b/packages/browseros-agent/apps/agent/CHANGELOG.md
@@ -0,0 +1,6 @@
+# BrowserOS Agent Extension
+
+## v0.0.52 (2026-03-26)
+
+Initial release
+
--- a/packages/browseros-agent/apps/agent/components/ai-elements/run-result-dialog.tsx
+++ b/packages/browseros-agent/apps/agent/components/ai-elements/run-result-dialog.tsx
@@ -66,7 +66,7 @@ export const RunResultDialog: FC<RunResultDialogProps> = ({

  return (
    <Dialog open={!!run} onOpenChange={onOpenChange}>
-      <DialogContent className="sm:max-w-2xl">
+      <DialogContent className="sm:w-[70vw] sm:max-w-4xl">
        <DialogHeader>
          <DialogTitle className="flex items-center gap-2">
            {run.status === 'completed' ? (
@@ -94,7 +94,7 @@ export const RunResultDialog: FC<RunResultDialogProps> = ({
              <p className="text-destructive text-sm">{run.result}</p>
            </div>
          ) : run.result ? (
-            <div className="prose prose-sm dark:prose-invert [&_[data-streamdown='code-block']]:!w-full [&_[data-streamdown='table-wrapper']]:!w-full max-w-none break-words rounded-lg border border-border bg-muted/50 p-4">
+            <div className="prose prose-sm dark:prose-invert [&_[data-streamdown='code-block']]:!w-full [&_[data-streamdown='table-wrapper']]:!w-full max-w-none break-words rounded-lg border border-border bg-muted/50 p-4 [&_[data-streamdown='table-wrapper']]:overflow-x-auto">
              <MessageResponse>{run.result}</MessageResponse>
            </div>
          ) : (
--- a/packages/browseros-agent/apps/agent/components/chat/ChatProviderSelector.tsx
+++ b/packages/browseros-agent/apps/agent/components/chat/ChatProviderSelector.tsx
@@ -1,4 +1,4 @@
-import { Check } from 'lucide-react'
+import { Check, Plus } from 'lucide-react'
 import type { FC, PropsWithChildren } from 'react'
 import { useState } from 'react'
 import {
@@ -77,6 +77,19 @@ export const ChatProviderSelector: FC<
                )
              })}
            </CommandGroup>
+            <div className="border-border border-t p-1">
+              <button
+                type="button"
+                className="flex w-full items-center gap-3 rounded-md p-2 text-muted-foreground text-sm transition-colors hover:bg-accent hover:text-foreground"
+                onClick={() => {
+                  window.open('/app.html#/settings/ai', '_blank')
+                  setOpen(false)
+                }}
+              >
+                <Plus className="h-4 w-4" />
+                Add Provider
+              </button>
+            </div>
          </CommandList>
        </Command>
      </PopoverContent>
--- a/packages/browseros-agent/apps/agent/components/credits/CreditBadge.tsx
+++ b/packages/browseros-agent/apps/agent/components/credits/CreditBadge.tsx
@@ -14,7 +14,7 @@ export const CreditBadge: FC<CreditBadgeProps> = ({ credits, onClick }) => {
      type="button"
      onClick={onClick}
      className={cn(
-        'inline-flex items-center gap-1 rounded-md px-1.5 py-0.5 font-medium text-xs transition-colors hover:bg-muted/50',
+        'inline-flex cursor-pointer items-center gap-1 rounded-md px-1.5 py-0.5 font-medium text-xs transition-colors hover:bg-muted/50',
        getCreditTextColor(credits),
      )}
      title={`${credits} credits remaining`}
--- a/packages/browseros-agent/apps/agent/entrypoints/app/ai-settings/AISettingsPage.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/ai-settings/AISettingsPage.tsx
@@ -38,6 +38,7 @@ import {
 } from '@/lib/llm-providers/useOAuthProviderFlow'
 import { track } from '@/lib/metrics/track'
 import { ConfiguredProvidersList } from './ConfiguredProvidersList'
+import { DeviceCodeDialog } from './DeviceCodeDialog'
 import {
  DeleteRemoteLlmProviderDocument,
  GetRemoteLlmProvidersDocument,
@@ -45,6 +46,7 @@ import {
 import type { IncompleteProvider } from './IncompleteProviderCard'
 import { IncompleteProvidersList } from './IncompleteProvidersList'
 import { LlmProvidersHeader } from './LlmProvidersHeader'
+import { McpPromoBanner } from './McpPromoBanner'
 import { NewProviderDialog } from './NewProviderDialog'
 import { ProviderTemplatesSection } from './ProviderTemplatesSection'

@@ -173,6 +175,16 @@ export const AISettingsPage: FC = () => {
    saveProvider,
  )

+  const activeDeviceCode =
+    chatgptPro.pendingDeviceCode ??
+    copilot.pendingDeviceCode ??
+    qwenCode.pendingDeviceCode
+  const clearActiveDeviceCode = () => {
+    chatgptPro.clearDeviceCode()
+    copilot.clearDeviceCode()
+    qwenCode.clearDeviceCode()
+  }
+
  const oauthFlows: Record<
    string,
    {
@@ -347,6 +359,8 @@ export const AISettingsPage: FC = () => {
        onAddProvider={handleAddProvider}
      />

+      <McpPromoBanner />
+
      <ProviderTemplatesSection onUseTemplate={handleUseTemplate} />

      <ConfiguredProvidersList
@@ -421,6 +435,11 @@ export const AISettingsPage: FC = () => {
          </AlertDialogFooter>
        </AlertDialogContent>
      </AlertDialog>
+
+      <DeviceCodeDialog
+        deviceCode={activeDeviceCode}
+        onClose={clearActiveDeviceCode}
+      />
    </div>
  )
 }
--- a/packages/browseros-agent/apps/agent/entrypoints/app/ai-settings/DeviceCodeDialog.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/ai-settings/DeviceCodeDialog.tsx
@@ -0,0 +1,81 @@
+import { Check, Copy, ExternalLink } from 'lucide-react'
+import { type FC, useState } from 'react'
+import { Button } from '@/components/ui/button'
+import {
+  Dialog,
+  DialogContent,
+  DialogDescription,
+  DialogHeader,
+  DialogTitle,
+} from '@/components/ui/dialog'
+import type { PendingDeviceCode } from '@/lib/llm-providers/useOAuthProviderFlow'
+
+interface DeviceCodeDialogProps {
+  deviceCode: PendingDeviceCode | null
+  onClose: () => void
+}
+
+export const DeviceCodeDialog: FC<DeviceCodeDialogProps> = ({
+  deviceCode,
+  onClose,
+}) => {
+  const [copied, setCopied] = useState(false)
+
+  const handleCopy = async () => {
+    if (!deviceCode) return
+    try {
+      await navigator.clipboard.writeText(deviceCode.userCode)
+      setCopied(true)
+      setTimeout(() => setCopied(false), 2000)
+    } catch {
+      // Clipboard API failed
+    }
+  }
+
+  return (
+    <Dialog open={!!deviceCode} onOpenChange={(open) => !open && onClose()}>
+      <DialogContent className="sm:max-w-md">
+        <DialogHeader>
+          <DialogTitle>Connect to {deviceCode?.providerName}</DialogTitle>
+          <DialogDescription>
+            Paste this code on the {deviceCode?.providerName} page that just
+            opened in your browser.
+          </DialogDescription>
+        </DialogHeader>
+        <div className="flex flex-col items-center gap-4 py-4">
+          <div className="flex items-center gap-3 rounded-xl border-2 border-[var(--accent-orange)]/40 border-dashed bg-[var(--accent-orange)]/5 px-6 py-4">
+            <code className="font-bold font-mono text-2xl text-foreground tracking-widest">
+              {deviceCode?.userCode}
+            </code>
+            <Button
+              variant="ghost"
+              size="icon"
+              onClick={handleCopy}
+              className="shrink-0"
+            >
+              {copied ? (
+                <Check className="h-4 w-4 text-green-600" />
+              ) : (
+                <Copy className="h-4 w-4" />
+              )}
+            </Button>
+          </div>
+          <p className="text-center text-muted-foreground text-xs">
+            This dialog will close automatically once authentication completes.
+          </p>
+          {deviceCode?.verificationUri && (
+            <a
+              href={deviceCode.verificationUri}
+              target="_blank"
+              rel="noopener noreferrer"
+              className="flex items-center gap-1 text-[var(--accent-orange)] text-xs transition-colors hover:underline"
+            >
+              Open verification page
+              <ExternalLink className="h-3 w-3" />
+            </a>
+          )}
+        </div>
+      </DialogContent>
+    </Dialog>
+  )
+}
--- a/packages/browseros-agent/apps/agent/entrypoints/app/ai-settings/McpPromoBanner.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/ai-settings/McpPromoBanner.tsx
@@ -0,0 +1,57 @@
+import { ArrowRight, Server, X } from 'lucide-react'
+import { type FC, useState } from 'react'
+import { useNavigate } from 'react-router'
+import { Button } from '@/components/ui/button'
+import { MCP_PROMO_BANNER_CLICKED_EVENT } from '@/lib/constants/analyticsEvents'
+import { track } from '@/lib/metrics/track'
+
+export const McpPromoBanner: FC = () => {
+  const [dismissed, setDismissed] = useState(false)
+  const navigate = useNavigate()
+
+  if (dismissed) return null
+
+  const handleClick = () => {
+    track(MCP_PROMO_BANNER_CLICKED_EVENT)
+    navigate('/settings/mcp')
+  }
+
+  return (
+    <div className="flex items-center gap-4 rounded-xl border border-border bg-card p-4 shadow-sm transition-all hover:shadow-md">
+      <div className="flex h-10 w-10 shrink-0 items-center justify-center rounded-lg bg-[var(--accent-orange)]/10">
+        <Server className="h-5 w-5 text-[var(--accent-orange)]" />
+      </div>
+      <div className="min-w-0 flex-1">
+        <p className="flex items-center gap-2 font-semibold text-sm">
+          Use BrowserOS with Claude Code, Cursor & more
+          <span className="text-[var(--accent-orange)] text-xs">
+            (66+ tools)
+          </span>
+          <span className="inline-flex items-center gap-1 rounded-full bg-[var(--accent-orange)]/10 px-2.5 py-1 font-semibold text-[var(--accent-orange)] text-xs">
+            <span className="h-1.5 w-1.5 rounded-full bg-[var(--accent-orange)]" />
+            New
+          </span>
+        </p>
+        <p className="text-muted-foreground text-xs">
+          Connect your favorite coding tools to BrowserOS as an MCP server
+        </p>
+      </div>
+      <Button
+        variant="outline"
+        size="sm"
+        onClick={handleClick}
+        className="shrink-0 border-[var(--accent-orange)] bg-[var(--accent-orange)]/10 text-[var(--accent-orange)] hover:bg-[var(--accent-orange)]/20 hover:text-[var(--accent-orange)]"
+      >
+        Set up
+        <ArrowRight className="ml-1 h-3 w-3" />
+      </Button>
+      <button
+        type="button"
+        onClick={() => setDismissed(true)}
+        className="shrink-0 rounded-sm p-1 text-muted-foreground opacity-50 transition-opacity hover:opacity-100"
+      >
+        <X className="h-3.5 w-3.5" />
+      </button>
+    </div>
+  )
+}
--- a/packages/browseros-agent/apps/agent/entrypoints/app/ai-settings/NewProviderDialog.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/ai-settings/NewProviderDialog.tsx
@@ -1,6 +1,13 @@
 import { zodResolver } from '@hookform/resolvers/zod'
-import { CheckCircle2, ExternalLink, Loader2, XCircle } from 'lucide-react'
-import { type FC, useEffect, useState } from 'react'
+import {
+  CheckCircle2,
+  ChevronDown,
+  ExternalLink,
+  Loader2,
+  SearchIcon,
+  XCircle,
+} from 'lucide-react'
+import { type FC, useEffect, useRef, useState } from 'react'
 import { useForm } from 'react-hook-form'
 import { z } from 'zod/v3'
 import { Button } from '@/components/ui/button'
@@ -47,7 +54,12 @@ import {
 import { type TestResult, testProvider } from '@/lib/llm-providers/testProvider'
 import type { LlmProviderConfig, ProviderType } from '@/lib/llm-providers/types'
 import { track } from '@/lib/metrics/track'
-import { getModelContextLength, getModelOptions } from './models'
+import { cn } from '@/lib/utils'
+import {
+  getModelContextLength,
+  getModelsForProvider,
+  type ModelInfo,
+} from './models'

 const providerTypeEnum = z.enum([
  'moonshot',
@@ -163,6 +175,107 @@ export const providerFormSchema = z
 */
 export type ProviderFormValues = z.infer<typeof providerFormSchema>

+function formatContextWindow(tokens: number): string {
+  if (tokens >= 1000000)
+    return `${(tokens / 1000000).toFixed(tokens % 1000000 === 0 ? 0 : 1)}M`
+  if (tokens >= 1000) return `${Math.round(tokens / 1000)}K`
+  return `${tokens}`
+}
+
+function ModelPickerList({
+  models,
+  selectedModelId,
+  onSelect,
+  onCustomSubmit,
+  onClose,
+}: {
+  models: ModelInfo[]
+  selectedModelId: string
+  onSelect: (modelId: string) => void
+  onCustomSubmit: (modelId: string) => void
+  onClose: () => void
+}) {
+  const [search, setSearch] = useState('')
+  const inputRef = useRef<HTMLInputElement>(null)
+  const containerRef = useRef<HTMLDivElement>(null)
+
+  useEffect(() => {
+    inputRef.current?.focus()
+  }, [])
+
+  useEffect(() => {
+    const handleClickOutside = (e: MouseEvent) => {
+      if (
+        containerRef.current &&
+        !containerRef.current.contains(e.target as Node)
+      ) {
+        onClose()
+      }
+    }
+    document.addEventListener('mousedown', handleClickOutside)
+    return () => document.removeEventListener('mousedown', handleClickOutside)
+  }, [onClose])
+
+  const query = search.toLowerCase()
+  const filtered = query
+    ? models.filter((m) => m.modelId.toLowerCase().includes(query))
+    : models
+
+  const handleKeyDown = (e: React.KeyboardEvent) => {
+    if (e.key === 'Enter' && search) {
+      e.preventDefault()
+      onCustomSubmit(search)
+    }
+    if (e.key === 'Escape') {
+      onClose()
+    }
+  }
+
+  return (
+    <div ref={containerRef} className="rounded-md border">
+      <div className="flex items-center gap-2 border-b px-3">
+        <SearchIcon className="h-4 w-4 shrink-0 text-muted-foreground opacity-50" />
+        <input
+          ref={inputRef}
+          type="text"
+          value={search}
+          onChange={(e) => setSearch(e.target.value)}
+          onKeyDown={handleKeyDown}
+          placeholder="Search or type a custom model ID..."
+          className="flex h-9 w-full bg-transparent py-2 text-sm outline-none placeholder:text-muted-foreground"
+        />
+      </div>
+      <div className="max-h-[200px] overflow-y-auto">
+        {filtered.length > 0 ? (
+          filtered.map((model) => {
+            const isSelected = selectedModelId === model.modelId
+            return (
+              <button
+                key={model.modelId}
+                type="button"
+                onClick={() => onSelect(model.modelId)}
+                className={cn(
+                  'flex w-full items-center justify-between px-3 py-2 text-left text-sm transition-colors hover:bg-accent',
+                  isSelected && 'bg-accent font-medium',
+                )}
+              >
+                <span className="truncate">{model.modelId}</span>
+                <span className="ml-2 shrink-0 rounded-md bg-muted px-1.5 py-0.5 font-mono text-[10px] text-muted-foreground">
+                  {formatContextWindow(model.contextLength)}
+                </span>
+              </button>
+            )
+          })
+        ) : (
+          <div className="px-3 py-6 text-center text-muted-foreground text-sm">
+            No models match. Press Enter to use &quot;{search}&quot;
+          </div>
+        )}
+      </div>
+    </div>
+  )
+}
+
 /**
 * Props for NewProviderDialog
 * @public
@@ -188,9 +301,9 @@ export const NewProviderDialog: FC<NewProviderDialogProps> = ({
  initialValues,
  onSave,
 }) => {
-  const [isCustomModel, setIsCustomModel] = useState(false)
  const [isTesting, setIsTesting] = useState(false)
  const [testResult, setTestResult] = useState<TestResult | null>(null)
+  const [modelListOpen, setModelListOpen] = useState(false)
  const { supports } = useCapabilities()
  const { baseUrl: agentServerUrl } = useAgentServerUrl()
  const kimiLaunch = useKimiLaunch()
@@ -261,8 +374,7 @@ export const NewProviderDialog: FC<NewProviderDialogProps> = ({
    watchedSessionToken,
  ])

-  // Get model options for current provider type
-  const modelOptions = getModelOptions(watchedType as ProviderType)
+  const modelInfoList = getModelsForProvider(watchedType as ProviderType)

  // Handle provider type change (user-initiated via Select)
  const handleTypeChange = (newType: ProviderType) => {
@@ -272,14 +384,13 @@ export const NewProviderDialog: FC<NewProviderDialogProps> = ({
      form.setValue('baseUrl', defaultUrl)
    }
    form.setValue('modelId', '')
-    setIsCustomModel(false)
  }

  // Auto-fill context window when model changes (only for new providers)
  useEffect(() => {
    if (initialValues?.id) return

-    if (watchedModelId && watchedModelId !== 'custom') {
+    if (watchedModelId) {
      const contextLength = getModelContextLength(
        watchedType as ProviderType,
        watchedModelId,
@@ -290,17 +401,6 @@ export const NewProviderDialog: FC<NewProviderDialogProps> = ({
    }
  }, [watchedModelId, watchedType, form, initialValues?.id])

-  // Handle model selection (including custom option)
-  const handleModelChange = (value: string) => {
-    if (value === 'custom') {
-      setIsCustomModel(true)
-      form.setValue('modelId', '')
-    } else {
-      setIsCustomModel(false)
-      form.setValue('modelId', value)
-    }
-  }
-
  // Reset form when initialValues change
  useEffect(() => {
    if (initialValues) {
@@ -325,7 +425,6 @@ export const NewProviderDialog: FC<NewProviderDialogProps> = ({
        reasoningEffort: initialValues.reasoningEffort || 'high',
        reasoningSummary: initialValues.reasoningSummary || 'auto',
      })
-      setIsCustomModel(false)
    }
  }, [initialValues, form])

@@ -352,7 +451,6 @@ export const NewProviderDialog: FC<NewProviderDialogProps> = ({
        reasoningEffort: 'high',
        reasoningSummary: 'auto',
      })
-      setIsCustomModel(false)
    }
    // Clear test result when dialog opens/closes
    setTestResult(null)
@@ -811,52 +909,51 @@ export const NewProviderDialog: FC<NewProviderDialogProps> = ({
              control={form.control}
              name="modelId"
              render={({ field }) => (
-                <FormItem>
+                <FormItem className="flex flex-col">
                  <FormLabel>Model *</FormLabel>
-                  {isCustomModel || modelOptions.length === 1 ? (
-                    <>
-                      <FormControl>
-                        <Input
-                          placeholder={
-                            watchedType === 'azure'
-                              ? 'Enter your deployment name'
-                              : watchedType === 'bedrock'
-                                ? 'e.g., anthropic.claude-3-5-sonnet-20241022-v2:0'
-                                : 'Enter custom model ID'
-                          }
-                          {...field}
-                        />
-                      </FormControl>
-                      {modelOptions.length > 1 && (
-                        <Button
-                          type="button"
-                          variant="link"
-                          size="sm"
-                          className="h-auto p-0 text-xs"
-                          onClick={() => setIsCustomModel(false)}
-                        >
-                          ← Back to model list
-                        </Button>
-                      )}
-                    </>
+                  {modelInfoList.length === 0 ? (
+                    <FormControl>
+                      <Input
+                        placeholder={
+                          watchedType === 'azure'
+                            ? 'Enter your deployment name'
+                            : watchedType === 'bedrock'
+                              ? 'e.g., anthropic.claude-3-5-sonnet-20241022-v2:0'
+                              : 'Enter model ID'
+                        }
+                        {...field}
+                      />
+                    </FormControl>
+                  ) : modelListOpen ? (
+                    <ModelPickerList
+                      models={modelInfoList}
+                      selectedModelId={field.value}
+                      onSelect={(modelId) => {
+                        form.setValue('modelId', modelId)
+                        setModelListOpen(false)
+                      }}
+                      onCustomSubmit={(modelId) => {
+                        form.setValue('modelId', modelId)
+                        setModelListOpen(false)
+                      }}
+                      onClose={() => setModelListOpen(false)}
+                    />
                  ) : (
-                    <Select
-                      onValueChange={handleModelChange}
-                      value={field.value}
+                    <button
+                      type="button"
+                      onClick={() => setModelListOpen(true)}
+                      className={cn(
+                        'flex h-9 w-full items-center justify-between rounded-md border border-input bg-transparent px-3 py-1 text-sm shadow-xs',
+                        field.value
+                          ? 'text-foreground'
+                          : 'text-muted-foreground',
+                      )}
                    >
-                      <FormControl>
-                        <SelectTrigger className="w-full">
-                          <SelectValue placeholder="Select a model" />
-                        </SelectTrigger>
-                      </FormControl>
-                      <SelectContent>
-                        {modelOptions.map((modelId) => (
-                          <SelectItem key={modelId} value={modelId}>
-                            {modelId === 'custom' ? '+ Custom model' : modelId}
-                          </SelectItem>
-                        ))}
-                      </SelectContent>
-                    </Select>
+                      <span className="truncate">
+                        {field.value || 'Select a model...'}
+                      </span>
+                      <ChevronDown className="ml-2 h-4 w-4 shrink-0 opacity-50" />
+                    </button>
                  )}
                  <FormMessage />
                </FormItem>
--- a/packages/browseros-agent/apps/agent/entrypoints/app/ai-settings/ProviderTemplateCard.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/ai-settings/ProviderTemplateCard.tsx
@@ -7,12 +7,14 @@ import { cn } from '@/lib/utils'
 interface ProviderTemplateCardProps {
  template: ProviderTemplate
  highlighted?: boolean
+  isNew?: boolean
  onUseTemplate: (template: ProviderTemplate) => void
 }

 export const ProviderTemplateCard: FC<ProviderTemplateCardProps> = ({
  template,
  highlighted = false,
+  isNew = false,
  onUseTemplate,
 }) => {
  return (
@@ -20,12 +22,19 @@ export const ProviderTemplateCard: FC<ProviderTemplateCardProps> = ({
      type="button"
      onClick={() => onUseTemplate(template)}
      className={cn(
-        'group flex w-full items-center gap-3 rounded-lg border bg-background p-4 text-left transition-all hover:border-[var(--accent-orange)] hover:shadow-md',
+        'group relative flex w-full items-center gap-3 rounded-lg border bg-background p-4 text-left transition-all hover:border-[var(--accent-orange)] hover:shadow-md',
        highlighted
          ? 'border-orange-300/80 bg-orange-50/30 shadow-sm ring-1 ring-orange-300/45 dark:bg-orange-500/5'
-          : 'border-border',
+          : isNew
+            ? 'border-2 border-[var(--accent-orange)]/50'
+            : 'border-border',
      )}
    >
+      {isNew && (
+        <span className="absolute -top-2 left-3 rounded-full bg-[var(--accent-orange)] px-2 py-0.5 font-semibold text-[9px] text-white uppercase tracking-wider">
+          New
+        </span>
+      )}
      <div className="flex min-w-0 flex-1 items-center gap-3">
        <ProviderIcon
          type={template.id}
--- a/packages/browseros-agent/apps/agent/entrypoints/app/ai-settings/ProviderTemplatesSection.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/ai-settings/ProviderTemplatesSection.tsx
@@ -58,14 +58,21 @@ export const ProviderTemplatesSection: FC<ProviderTemplatesSectionProps> = ({

        <CollapsibleContent>
          <div className="mt-4 grid gap-3 sm:grid-cols-2 lg:grid-cols-3">
-            {filteredTemplates.map((template) => (
-              <ProviderTemplateCard
-                key={template.id}
-                template={template}
-                highlighted={template.id === 'moonshot'}
-                onUseTemplate={onUseTemplate}
-              />
-            ))}
+            {filteredTemplates.map((template) => {
+              const isNew =
+                template.id === 'chatgpt-pro' ||
+                template.id === 'github-copilot' ||
+                template.id === 'qwen-code'
+              return (
+                <ProviderTemplateCard
+                  key={template.id}
+                  template={template}
+                  highlighted={template.id === 'moonshot'}
+                  isNew={isNew}
+                  onUseTemplate={onUseTemplate}
+                />
+              )
+            })}
          </div>
        </CollapsibleContent>
      </div>
--- a/packages/browseros-agent/apps/agent/entrypoints/app/ai-settings/models.ts
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/ai-settings/models.ts
@@ -1,98 +1,21 @@
+import {
+  getModelsDevModels,
+  type ModelsDevModel,
+} from '@/lib/llm-providers/models-dev'
 import type { ProviderType } from '@/lib/llm-providers/types'

-/**
- * Model information with context length
- */
 export interface ModelInfo {
  modelId: string
  contextLength: number
+  supportsImages?: boolean
+  supportsReasoning?: boolean
+  supportsToolCall?: boolean
 }

-/**
- * Models data organized by provider type (matches backend AIProvider enum)
- */
-export interface ModelsData {
-  anthropic: ModelInfo[]
-  openai: ModelInfo[]
-  'openai-compatible': ModelInfo[]
-  google: ModelInfo[]
-  openrouter: ModelInfo[]
-  azure: ModelInfo[]
-  ollama: ModelInfo[]
-  lmstudio: ModelInfo[]
-  bedrock: ModelInfo[]
-  browseros: ModelInfo[]
-  moonshot: ModelInfo[]
-  'chatgpt-pro': ModelInfo[]
-  'github-copilot': ModelInfo[]
-  'qwen-code': ModelInfo[]
-}
-
-/**
- * Available models per provider with context lengths
- * Based on: https://github.com/browseros-ai/BrowserOS-agent/blob/main/src/options/data/models.ts
- */
-export const MODELS_DATA: ModelsData = {
-  moonshot: [{ modelId: 'kimi-k2.5', contextLength: 200000 }],
-  anthropic: [
-    { modelId: 'claude-opus-4-5-20251101', contextLength: 200000 },
-    { modelId: 'claude-haiku-4-5-20251001', contextLength: 200000 },
-    { modelId: 'claude-sonnet-4-5-20250929', contextLength: 200000 },
-    { modelId: 'claude-sonnet-4-20250514', contextLength: 200000 },
-    { modelId: 'claude-opus-4-20250514', contextLength: 200000 },
-    { modelId: 'claude-3-7-sonnet-20250219', contextLength: 200000 },
-    { modelId: 'claude-3-5-haiku-20241022', contextLength: 200000 },
-  ],
-  openai: [
-    { modelId: 'gpt-5.2', contextLength: 200000 },
-    { modelId: 'gpt-5.2-pro', contextLength: 200000 },
-    { modelId: 'gpt-5', contextLength: 200000 },
-    { modelId: 'gpt-5-mini', contextLength: 200000 },
-    { modelId: 'gpt-5-nano', contextLength: 200000 },
-    { modelId: 'gpt-4.1', contextLength: 200000 },
-    { modelId: 'gpt-4.1-mini', contextLength: 200000 },
-    { modelId: 'o4-mini', contextLength: 200000 },
-    { modelId: 'o3-mini', contextLength: 200000 },
-    { modelId: 'gpt-4o', contextLength: 128000 },
-    { modelId: 'gpt-4o-mini', contextLength: 128000 },
-  ],
-  'openai-compatible': [],
-  google: [
-    { modelId: 'gemini-3-pro-preview', contextLength: 1048576 },
-    { modelId: 'gemini-3-flash-preview', contextLength: 1048576 },
-    { modelId: 'gemini-2.5-flash', contextLength: 1048576 },
-    { modelId: 'gemini-2.5-pro', contextLength: 1048576 },
-  ],
-  openrouter: [
-    { modelId: 'google/gemini-3-pro-preview', contextLength: 1048576 },
-    { modelId: 'google/gemini-3-flash-preview', contextLength: 1048576 },
-    { modelId: 'google/gemini-2.5-flash', contextLength: 1048576 },
-    { modelId: 'anthropic/claude-opus-4.5', contextLength: 200000 },
-    { modelId: 'anthropic/claude-haiku-4.5', contextLength: 200000 },
-    { modelId: 'anthropic/claude-sonnet-4.5', contextLength: 200000 },
-    { modelId: 'anthropic/claude-sonnet-4', contextLength: 200000 },
-    { modelId: 'anthropic/claude-3.7-sonnet', contextLength: 200000 },
-    { modelId: 'openai/gpt-4o', contextLength: 128000 },
-    { modelId: 'openai/gpt-oss-120b', contextLength: 128000 },
-    { modelId: 'openai/gpt-oss-20b', contextLength: 128000 },
-    { modelId: 'qwen/qwen3-14b', contextLength: 131072 },
-    { modelId: 'qwen/qwen3-8b', contextLength: 131072 },
-  ],
-  azure: [],
-  ollama: [
-    { modelId: 'qwen3:4b', contextLength: 262144 },
-    { modelId: 'qwen3:8b', contextLength: 40960 },
-    { modelId: 'qwen3:14b', contextLength: 40960 },
-    { modelId: 'gpt-oss:20b', contextLength: 128000 },
-    { modelId: 'gpt-oss:120b', contextLength: 128000 },
-  ],
-  lmstudio: [
-    { modelId: 'openai/gpt-oss-20b', contextLength: 128000 },
-    { modelId: 'openai/gpt-oss-120b', contextLength: 128000 },
-    { modelId: 'qwen/qwen3-vl-8b', contextLength: 131072 },
-  ],
-  bedrock: [],
+const CUSTOM_PROVIDER_MODELS: Partial<Record<ProviderType, ModelInfo[]>> = {
  browseros: [{ modelId: 'browseros-auto', contextLength: 200000 }],
+  'openai-compatible': [],
+  ollama: [],
  'chatgpt-pro': [
    { modelId: 'gpt-5.4', contextLength: 400000 },
    { modelId: 'gpt-5.3-codex', contextLength: 400000 },
@@ -103,32 +26,6 @@ export const MODELS_DATA: ModelsData = {
    { modelId: 'gpt-5.1-codex-mini', contextLength: 400000 },
    { modelId: 'gpt-5.1', contextLength: 200000 },
  ],
-  'github-copilot': [
-    // Free tier (unlimited with Pro)
-    { modelId: 'gpt-5-mini', contextLength: 128000 },
-    { modelId: 'claude-haiku-4.5', contextLength: 128000 },
-    { modelId: 'gpt-4o', contextLength: 64000 },
-    { modelId: 'gpt-4.1', contextLength: 64000 },
-    // Premium models (Pro: 300/mo, Pro+: 1500/mo)
-    { modelId: 'claude-sonnet-4.6', contextLength: 128000 },
-    { modelId: 'claude-sonnet-4.5', contextLength: 128000 },
-    { modelId: 'claude-sonnet-4', contextLength: 128000 },
-    { modelId: 'claude-opus-4.6', contextLength: 128000 },
-    { modelId: 'claude-opus-4.5', contextLength: 128000 },
-    { modelId: 'gemini-2.5-pro', contextLength: 128000 },
-    { modelId: 'gemini-3-pro-preview', contextLength: 128000 },
-    { modelId: 'gemini-3-flash-preview', contextLength: 128000 },
-    { modelId: 'gemini-3.1-pro-preview', contextLength: 128000 },
-    { modelId: 'gpt-5.4', contextLength: 272000 },
-    { modelId: 'gpt-5.4-mini', contextLength: 128000 },
-    { modelId: 'gpt-5.3-codex', contextLength: 272000 },
-    { modelId: 'gpt-5.2-codex', contextLength: 272000 },
-    { modelId: 'gpt-5.2', contextLength: 128000 },
-    { modelId: 'gpt-5.1-codex', contextLength: 128000 },
-    { modelId: 'gpt-5.1-codex-max', contextLength: 128000 },
-    { modelId: 'gpt-5.1', contextLength: 128000 },
-    { modelId: 'grok-code-fast-1', contextLength: 128000 },
-  ],
  'qwen-code': [
    { modelId: 'coder-model', contextLength: 1000000 },
    { modelId: 'qwen3-coder-plus', contextLength: 1000000 },
@@ -137,25 +34,23 @@ export const MODELS_DATA: ModelsData = {
  ],
 }

-/**
- * Get models for a specific provider type
- */
+function fromModelsDevModel(m: ModelsDevModel): ModelInfo {
+  return {
+    modelId: m.id,
+    contextLength: m.contextWindow,
+    supportsImages: m.supportsImages,
+    supportsReasoning: m.supportsReasoning,
+    supportsToolCall: m.supportsToolCall,
+  }
+}
+
 export function getModelsForProvider(providerType: ProviderType): ModelInfo[] {
-  return MODELS_DATA[providerType] || []
+  const custom = CUSTOM_PROVIDER_MODELS[providerType]
+  if (custom !== undefined) return custom
+
+  return getModelsDevModels(providerType).map(fromModelsDevModel)
 }

-/**
- * Get model options for select dropdown (model IDs + custom option)
- */
-export function getModelOptions(providerType: ProviderType): string[] {
-  const models = getModelsForProvider(providerType)
-  const modelIds = models.map((m) => m.modelId)
-  return modelIds.length > 0 ? [...modelIds, 'custom'] : ['custom']
-}
-
-/**
- * Get context length for a specific model
- */
 export function getModelContextLength(
  providerType: ProviderType,
  modelId: string,
@@ -164,14 +59,3 @@ export function getModelContextLength(
  const model = models.find((m) => m.modelId === modelId)
  return model?.contextLength
 }
-
-/**
- * Check if model ID is a custom (user-entered) value
- */
-export function isCustomModel(
-  providerType: ProviderType,
-  modelId: string,
-): boolean {
-  const models = getModelsForProvider(providerType)
-  return !models.some((m) => m.modelId === modelId)
-}
--- a/packages/browseros-agent/apps/agent/entrypoints/app/create-graph/CreateGraph.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/create-graph/CreateGraph.tsx
@@ -259,11 +259,23 @@ export const CreateGraph: FC = () => {
  })

  const onClickTest = async () => {
-    const backgroundWindow = await chrome.windows.create({
-      url: 'chrome://newtab',
-      focused: true,
-      type: 'normal',
-    })
+    let backgroundWindow: chrome.windows.Window | undefined
+    try {
+      backgroundWindow = await chrome.windows.create({
+        url: 'chrome://newtab',
+        focused: true,
+        type: 'normal',
+      })
+    } catch {
+      // Fallback when no window context is available (e.g. all windows closed)
+      const tab = await chrome.tabs.create({
+        url: 'chrome://newtab',
+        active: true,
+      })
+      if (tab.windowId) {
+        backgroundWindow = await chrome.windows.get(tab.windowId)
+      }
+    }

    sendMessage({
      text: 'Run a test of the graph you just created.',
--- a/packages/browseros-agent/apps/agent/entrypoints/app/login/LogoutPage.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/login/LogoutPage.tsx
@@ -10,6 +10,7 @@ import {
  CardHeader,
  CardTitle,
 } from '@/components/ui/card'
+import { resetIdentity } from '@/lib/analytics/identify'
 import { signOut } from '@/lib/auth/auth-client'
 import { providersStorage } from '@/lib/llm-providers/storage'
 import { scheduledJobStorage } from '@/lib/schedules/scheduleStorage'
@@ -26,6 +27,7 @@ export const LogoutPage: FC = () => {
      queryClient.clear()
      await localforage.clear()

+      resetIdentity()
      await signOut()
      navigate('/home', { replace: true })
    }
--- a/packages/browseros-agent/apps/agent/entrypoints/app/mcp-settings/MCPServerHeader.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/mcp-settings/MCPServerHeader.tsx
@@ -1,31 +1,40 @@
-import { Check, Copy, ExternalLink, Globe, Server } from 'lucide-react'
-import { type FC, useState } from 'react'
+import {
+  Check,
+  Copy,
+  ExternalLink,
+  Loader2,
+  RefreshCw,
+  Server,
+} from 'lucide-react'
+import { type FC, useCallback, useState } from 'react'
+import { toast } from 'sonner'
 import { Button } from '@/components/ui/button'
+import { MCP_SERVER_RESTARTED_EVENT } from '@/lib/constants/analyticsEvents'
+import { sendServerMessage } from '@/lib/messaging/server/serverMessages'
+import { track } from '@/lib/metrics/track'

 interface MCPServerHeaderProps {
  serverUrl: string | null
  isLoading: boolean
  error: string | null
-  title?: string
-  description?: string
-  remoteAccessEnabled?: boolean
+  onServerRestart?: () => void
 }

 const DOCS_URL = 'https://docs.browseros.com/features/use-with-claude-code'
+const HEALTH_CHECK_TIMEOUT_MS = 60_000
+const HEALTH_CHECK_INTERVAL_MS = 2_000

 export const MCPServerHeader: FC<MCPServerHeaderProps> = ({
  serverUrl,
  isLoading,
  error,
-  title = 'BrowserOS MCP Server',
-  description = 'Connect BrowserOS to MCP clients like claude code, gemini and others.',
-  remoteAccessEnabled = false,
+  onServerRestart,
 }) => {
  const [isCopied, setIsCopied] = useState(false)
+  const [isRestarting, setIsRestarting] = useState(false)

  const handleCopy = async () => {
    if (!serverUrl) return
-
    try {
      await navigator.clipboard.writeText(serverUrl)
      setIsCopied(true)
@@ -35,6 +44,57 @@ export const MCPServerHeader: FC<MCPServerHeaderProps> = ({
    }
  }

+  const checkServerHealth = useCallback(async (): Promise<boolean> => {
+    try {
+      const result = await sendServerMessage('checkHealth', undefined)
+      return result.healthy
+    } catch {
+      return false
+    }
+  }, [])
+
+  const handleRestart = async () => {
+    setIsRestarting(true)
+    try {
+      const { getBrowserOSAdapter } = await import('@/lib/browseros/adapter')
+      const { BROWSEROS_PREFS } = await import('@/lib/browseros/prefs')
+      const adapter = getBrowserOSAdapter()
+      await adapter.setPref(BROWSEROS_PREFS.RESTART_SERVER, true)
+
+      const startTime = Date.now()
+      const waitForHealth = (): Promise<boolean> =>
+        new Promise((resolve) => {
+          const check = async () => {
+            if (Date.now() - startTime >= HEALTH_CHECK_TIMEOUT_MS) {
+              resolve(false)
+              return
+            }
+            if (await checkServerHealth()) {
+              resolve(true)
+              return
+            }
+            setTimeout(check, HEALTH_CHECK_INTERVAL_MS)
+          }
+          setTimeout(check, HEALTH_CHECK_INTERVAL_MS)
+        })
+
+      const healthy = await waitForHealth()
+      if (healthy) {
+        track(MCP_SERVER_RESTARTED_EVENT)
+        toast.success('Server restarted successfully')
+        onServerRestart?.()
+      } else {
+        toast.error('Server did not respond. Try restarting the browser.')
+      }
+    } catch (err) {
+      toast.error(
+        err instanceof Error ? err.message : 'Failed to restart server',
+      )
+    } finally {
+      setIsRestarting(false)
+    }
+  }
+
  return (
    <div className="rounded-xl border border-border bg-card p-6 shadow-sm transition-all hover:shadow-md">
      <div className="flex items-start gap-4">
@@ -43,18 +103,21 @@ export const MCPServerHeader: FC<MCPServerHeaderProps> = ({
        </div>
        <div className="flex-1">
          <div className="mb-1 flex items-center justify-between">
-            <h2 className="font-semibold text-xl">{title}</h2>
+            <h2 className="font-semibold text-xl">BrowserOS MCP Server</h2>
            <a
              href={DOCS_URL}
              target="_blank"
              rel="noopener noreferrer"
              className="flex items-center gap-1 text-muted-foreground text-sm transition-colors hover:text-[var(--accent-orange)]"
            >
-              Setup a client
+              Docs
              <ExternalLink className="h-3.5 w-3.5" />
            </a>
          </div>
-          <p className="mb-6 text-muted-foreground text-sm">{description}</p>
+          <p className="mb-6 text-muted-foreground text-sm">
+            Connect BrowserOS to MCP clients like Claude Code, Gemini CLI and
+            others.
+          </p>

          <div className="flex flex-col gap-2 sm:flex-row sm:items-center">
            <span className="whitespace-nowrap font-medium text-sm">
@@ -76,6 +139,7 @@ export const MCPServerHeader: FC<MCPServerHeaderProps> = ({
                onClick={handleCopy}
                disabled={!serverUrl || isLoading}
                className="shrink-0"
+                title="Copy URL"
              >
                {isCopied ? (
                  <Check className="h-4 w-4 text-green-600" />
@@ -83,19 +147,22 @@ export const MCPServerHeader: FC<MCPServerHeaderProps> = ({
                  <Copy className="h-4 w-4" />
                )}
              </Button>
+              <Button
+                variant="outline"
+                size="icon"
+                onClick={handleRestart}
+                disabled={isLoading || isRestarting}
+                className="shrink-0"
+                title="Restart server"
+              >
+                {isRestarting ? (
+                  <Loader2 className="h-4 w-4 animate-spin" />
+                ) : (
+                  <RefreshCw className="h-4 w-4" />
+                )}
+              </Button>
            </div>
          </div>
-
-          {remoteAccessEnabled && serverUrl && !isLoading && (
-            <div className="mt-3 flex items-start gap-2 rounded-lg bg-muted/50 px-3 py-2">
-              <Globe className="mt-0.5 h-3.5 w-3.5 shrink-0 text-muted-foreground" />
-              <p className="text-muted-foreground text-xs">
-                External access is enabled. To connect from another device,
-                replace <span className="font-mono">127.0.0.1</span> with this
-                machine's IP address.
-              </p>
-            </div>
-          )}
        </div>
      </div>
    </div>
--- a/packages/browseros-agent/apps/agent/entrypoints/app/mcp-settings/MCPSettingsPage.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/mcp-settings/MCPSettingsPage.tsx
@@ -4,7 +4,7 @@ import type { McpTool } from '@/lib/mcp/client'
 import { sendServerMessage } from '@/lib/messaging/server/serverMessages'
 import { MCPServerHeader } from './MCPServerHeader'
 import { MCPToolsSection } from './MCPToolsSection'
-import { ServerSettingsCard } from './ServerSettingsCard'
+import { QuickSetupSection } from './QuickSetupSection'

 /** @public */
 export const MCPSettingsPage: FC = () => {
@@ -12,8 +12,6 @@ export const MCPSettingsPage: FC = () => {
  const [urlLoading, setUrlLoading] = useState(true)
  const [urlError, setUrlError] = useState<string | null>(null)

-  const [remoteAccessEnabled, setRemoteAccessEnabled] = useState(false)
-
  const [tools, setTools] = useState<McpTool[]>([])
  const [toolsLoading, setToolsLoading] = useState(false)
  const [toolsError, setToolsError] = useState<string | null>(null)
@@ -82,13 +80,10 @@ export const MCPSettingsPage: FC = () => {
        serverUrl={serverUrl}
        isLoading={urlLoading}
        error={urlError}
-        remoteAccessEnabled={remoteAccessEnabled}
+        onServerRestart={loadServerUrlAndTools}
      />

-      <ServerSettingsCard
-        onServerRestart={loadServerUrlAndTools}
-        onRemoteAccessChange={setRemoteAccessEnabled}
-      />
+      <QuickSetupSection serverUrl={serverUrl} />

      <MCPToolsSection
        tools={tools}
--- a/packages/browseros-agent/apps/agent/entrypoints/app/mcp-settings/QuickSetupSection.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/mcp-settings/QuickSetupSection.tsx
@@ -0,0 +1,162 @@
+import { Check, Copy, Terminal } from 'lucide-react'
+import { type FC, useState } from 'react'
+import { Button } from '@/components/ui/button'
+import { Tabs, TabsContent, TabsList, TabsTrigger } from '@/components/ui/tabs'
+
+interface QuickSetupSectionProps {
+  serverUrl: string | null
+}
+
+interface ClientConfig {
+  id: string
+  name: string
+  type: 'command' | 'json'
+  getSnippet: (url: string) => string
+  fileName?: string
+}
+
+const clients: ClientConfig[] = [
+  {
+    id: 'claude-code',
+    name: 'Claude Code',
+    type: 'command',
+    getSnippet: (url) =>
+      `claude mcp add --transport http browseros ${url} --scope user`,
+  },
+  {
+    id: 'gemini-cli',
+    name: 'Gemini CLI',
+    type: 'command',
+    getSnippet: (url) =>
+      `gemini mcp add local-server ${url} --transport http --scope user`,
+  },
+  {
+    id: 'codex',
+    name: 'Codex',
+    type: 'command',
+    getSnippet: (url) => `codex mcp add browseros ${url}`,
+  },
+  {
+    id: 'claude-desktop',
+    name: 'Claude Desktop',
+    type: 'json',
+    fileName: 'claude_desktop_config.json',
+    getSnippet: (url) =>
+      JSON.stringify(
+        {
+          mcpServers: {
+            browserOS: {
+              command: 'npx',
+              args: ['mcp-remote', url],
+            },
+          },
+        },
+        null,
+        2,
+      ),
+  },
+  {
+    id: 'openclaw',
+    name: 'OpenClaw',
+    type: 'json',
+    fileName: 'openclaw.json',
+    getSnippet: (url) =>
+      JSON.stringify(
+        {
+          mcpServers: {
+            browseros: { url },
+          },
+        },
+        null,
+        2,
+      ),
+  },
+]
+
+const CopyButton: FC<{ text: string }> = ({ text }) => {
+  const [copied, setCopied] = useState(false)
+
+  const handleCopy = async () => {
+    try {
+      await navigator.clipboard.writeText(text)
+      setCopied(true)
+      setTimeout(() => setCopied(false), 2000)
+    } catch {
+      // Clipboard API failed
+    }
+  }
+
+  return (
+    <Button
+      variant="ghost"
+      size="icon-sm"
+      onClick={handleCopy}
+      className="shrink-0 text-muted-foreground hover:text-foreground"
+    >
+      {copied ? (
+        <Check className="h-3.5 w-3.5 text-green-600" />
+      ) : (
+        <Copy className="h-3.5 w-3.5" />
+      )}
+    </Button>
+  )
+}
+
+export const QuickSetupSection: FC<QuickSetupSectionProps> = ({
+  serverUrl,
+}) => {
+  if (!serverUrl) return null
+
+  return (
+    <div className="rounded-xl border border-border bg-card p-6 shadow-sm transition-all hover:shadow-md">
+      <div className="flex items-start gap-4">
+        <div className="flex h-12 w-12 shrink-0 items-center justify-center rounded-xl bg-[var(--accent-orange)]/10">
+          <Terminal className="h-6 w-6 text-[var(--accent-orange)]" />
+        </div>
+        <div className="flex-1">
+          <h2 className="mb-1 font-semibold text-xl">Quick Setup</h2>
+          <p className="mb-4 text-muted-foreground text-sm">
+            Copy and run the command for your tool
+          </p>
+
+          <Tabs defaultValue="claude-code">
+            <TabsList className="mb-3 flex-wrap">
+              {clients.map((client) => (
+                <TabsTrigger key={client.id} value={client.id}>
+                  {client.name}
+                </TabsTrigger>
+              ))}
+            </TabsList>
+
+            {clients.map((client) => {
+              const snippet = client.getSnippet(serverUrl)
+              return (
+                <TabsContent key={client.id} value={client.id}>
+                  <div className="space-y-3">
+                    {client.fileName && (
+                      <p className="text-muted-foreground text-xs">
+                        Add to{' '}
+                        <code className="rounded bg-muted px-1 py-0.5 font-mono text-xs">
+                          {client.fileName}
+                        </code>
+                      </p>
+                    )}
+                    <div className="flex items-start gap-2 rounded-lg border border-border bg-background px-3 py-2.5">
+                      <pre className="flex-1 overflow-x-auto whitespace-pre-wrap break-all font-mono text-xs">
+                        {client.type === 'command' && (
+                          <span className="mr-1 text-muted-foreground">$</span>
+                        )}
+                        {snippet}
+                      </pre>
+                      <CopyButton text={snippet} />
+                    </div>
+                  </div>
+                </TabsContent>
+              )
+            })}
+          </Tabs>
+        </div>
+      </div>
+    </div>
+  )
+}
--- a/packages/browseros-agent/apps/agent/entrypoints/app/scheduled-tasks/ScheduledTasksPage.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/scheduled-tasks/ScheduledTasksPage.tsx
@@ -22,9 +22,7 @@ import {
  SCHEDULED_TASK_TOGGLED_EVENT,
  SCHEDULED_TASK_VIEW_RESULTS_EVENT,
 } from '@/lib/constants/analyticsEvents'
-import { useGraphqlMutation } from '@/lib/graphql/useGraphqlMutation'
 import { track } from '@/lib/metrics/track'
-import { DeleteScheduledJobDocument } from '@/lib/schedules/graphql/syncSchedulesDocument'
 import {
  scheduledJobRunStorage,
  useScheduledJobRuns,
@@ -46,8 +44,6 @@ export const ScheduledTasksPage: FC = () => {
    useScheduledJobs()
  const { jobRuns, cancelJobRun } = useScheduledJobRuns()

-  const deleteRemoteJobMutation = useGraphqlMutation(DeleteScheduledJobDocument)
-
  const [activeTab, setActiveTab] = useState<string | null>(null)
  const [isDialogOpen, setIsDialogOpen] = useState(false)
  const [editingJob, setEditingJob] = useState<ScheduledJob | null>(null)
@@ -102,7 +98,6 @@ export const ScheduledTasksPage: FC = () => {
  const confirmDelete = async () => {
    if (deleteJobId) {
      await removeJob(deleteJobId)
-      deleteRemoteJobMutation.mutate({ rowId: deleteJobId })
      setDeleteJobId(null)
      track(SCHEDULED_TASK_DELETED_EVENT)
    }
--- a/packages/browseros-agent/apps/agent/entrypoints/app/usage/UsagePage.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/usage/UsagePage.tsx
@@ -105,18 +105,40 @@ export const UsagePage: FC = () => {
      </div>

      <div className="rounded-xl border p-5">
+        <div className="flex items-center gap-3">
+          <CreditCard className="h-5 w-5 text-muted-foreground" />
+          <div>
+            <p className="flex items-center gap-2 font-semibold text-sm">
+              Need more credits?
+              <span className="rounded-full bg-muted px-2 py-0.5 font-medium text-[10px] text-muted-foreground uppercase tracking-wide">
+                Coming soon
+              </span>
+            </p>
+            <p className="text-muted-foreground text-xs">
+              Additional credit packages will be available soon
+            </p>
+          </div>
+        </div>
+      </div>
+
+      <div className="rounded-xl border border-[var(--accent-orange)]/30 bg-[var(--accent-orange)]/5 p-5">
        <div className="flex items-center justify-between">
          <div className="flex items-center gap-3">
-            <CreditCard className="h-5 w-5 text-muted-foreground" />
+            <Zap className="h-5 w-5 text-[var(--accent-orange)]" />
            <div>
-              <p className="font-semibold text-sm">Need more credits?</p>
+              <p className="font-semibold text-sm">Want unlimited usage?</p>
              <p className="text-muted-foreground text-xs">
-                Additional credit packages coming soon
+                Add your own LLM provider — no credit limits
              </p>
            </div>
          </div>
-          <Button variant="outline" size="sm" disabled className="opacity-50">
-            Add Credits
+          <Button
+            variant="outline"
+            size="sm"
+            className="border-[var(--accent-orange)] bg-[var(--accent-orange)]/10 text-[var(--accent-orange)] hover:bg-[var(--accent-orange)]/20"
+            asChild
+          >
+            <a href="/app.html#/settings/ai">Add Provider</a>
          </Button>
        </div>
      </div>
--- a/packages/browseros-agent/apps/agent/entrypoints/app/workflows/useRunWorkflow.ts
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/workflows/useRunWorkflow.ts
@@ -101,11 +101,23 @@ export const useRunWorkflow = () => {
    setMessages([])
    setWasCancelled(false)

-    const backgroundWindow = await chrome.windows.create({
-      url: 'chrome://newtab',
-      focused: true,
-      type: 'normal',
-    })
+    let backgroundWindow: chrome.windows.Window | undefined
+    try {
+      backgroundWindow = await chrome.windows.create({
+        url: 'chrome://newtab',
+        focused: true,
+        type: 'normal',
+      })
+    } catch {
+      // Fallback when no window context is available (e.g. all windows closed)
+      const tab = await chrome.tabs.create({
+        url: 'chrome://newtab',
+        active: true,
+      })
+      if (tab.windowId) {
+        backgroundWindow = await chrome.windows.get(tab.windowId)
+      }
+    }

    sendMessage({
      text: 'Run the workflow.',
--- a/packages/browseros-agent/apps/agent/entrypoints/newtab/index/NewTabChat.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/newtab/index/NewTabChat.tsx
@@ -169,8 +169,15 @@ export const NewTabChat: FC = () => {
            onDismissJtbdPopup={() => {}}
          />
        )}
-        {agentUrlError && <ChatError error={agentUrlError} />}
-        {chatError && <ChatError error={chatError} />}
+        {agentUrlError && (
+          <ChatError
+            error={agentUrlError}
+            providerType={selectedProvider?.type}
+          />
+        )}
+        {chatError && (
+          <ChatError error={chatError} providerType={selectedProvider?.type} />
+        )}
      </main>

      <div className="mx-auto w-full max-w-3xl flex-shrink-0 px-4 pb-2">
--- a/packages/browseros-agent/apps/agent/entrypoints/sidepanel/index/Chat.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/sidepanel/index/Chat.tsx
@@ -36,6 +36,7 @@ export const Chat = () => {
    stop,
    agentUrlError,
    chatError,
+    selectedProvider,
    getActionForMessage,
    liked,
    onClickLike,
@@ -223,8 +224,15 @@ export const Chat = () => {
            onDismissJtbdPopup={onDismissJtbdPopup}
          />
        )}
-        {agentUrlError && <ChatError error={agentUrlError} />}
-        {chatError && <ChatError error={chatError} />}
+        {agentUrlError && (
+          <ChatError
+            error={agentUrlError}
+            providerType={selectedProvider?.type}
+          />
+        )}
+        {chatError && (
+          <ChatError error={chatError} providerType={selectedProvider?.type} />
+        )}
      </main>

      <ChatFooter
--- a/packages/browseros-agent/apps/agent/entrypoints/sidepanel/index/ChatError.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/sidepanel/index/ChatError.tsx
@@ -2,11 +2,6 @@ import { AlertCircle, RefreshCw } from 'lucide-react'
 import type { FC } from 'react'
 // import { useMemo } from 'react'
 import { Button } from '@/components/ui/button'
-import {
-  KIMI_RATE_LIMIT_DOCS_CLICKED_EVENT,
-  KIMI_RATE_LIMIT_PLATFORM_CLICKED_EVENT,
-} from '@/lib/constants/analyticsEvents'
-import { track } from '@/lib/metrics/track'

 // --- Commented out for Kimi partnership launch (restore after) ---
 // const SURVEY_DIRECTIONS = [
@@ -24,20 +19,24 @@ import { track } from '@/lib/metrics/track'
 interface ChatErrorProps {
  error: Error
  onRetry?: () => void
+  providerType?: string
 }

-function parseErrorMessage(message: string): {
+function parseErrorMessage(
+  message: string,
+  providerType?: string,
+): {
  text: string
  url?: string
  isRateLimit?: boolean
  isCreditsExhausted?: boolean
  isConnectionError?: boolean
 } {
-  // Detect MCP server connection failures
-  if (
-    (message.includes('Failed to fetch') || message.includes('fetch failed')) &&
-    message.includes('127.0.0.1')
-  ) {
+  const isBrowserosProvider = providerType === 'browseros'
+
+  // All chat requests go through the local BrowserOS agent server, so any
+  // fetch failure is always a local connection issue.
+  if (message.includes('Failed to fetch') || message.includes('fetch failed')) {
    return {
      text: 'Unable to connect to BrowserOS agent. Follow below instructions.',
      url: 'https://docs.browseros.com/troubleshooting/connection-issues',
@@ -45,10 +44,12 @@ function parseErrorMessage(message: string): {
    }
  }

-  // Detect credit exhaustion from gateway
+  // Detect credit exhaustion from gateway (BrowserOS provider only)
  if (
-    message.includes('CREDITS_EXHAUSTED') ||
-    message.includes('Daily credits exhausted')
+    isBrowserosProvider &&
+    (message.includes('CREDITS_EXHAUSTED') ||
+      message.includes('Credits exhausted') ||
+      message.includes('Daily credits exhausted'))
  ) {
    return {
      text: 'Daily credits exhausted. Credits reset at midnight UTC.',
@@ -58,8 +59,11 @@ function parseErrorMessage(message: string): {
    }
  }

-  // Detect BrowserOS rate limit (unique pattern, no provider uses this)
-  if (message.includes('BrowserOS LLM daily limit reached')) {
+  // Detect BrowserOS rate limit (BrowserOS provider only)
+  if (
+    isBrowserosProvider &&
+    message.includes('BrowserOS LLM daily limit reached')
+  ) {
    return {
      text: 'Add your own API key for unlimited usage.',
      url: 'https://dub.sh/browseros-usage-limit',
@@ -83,9 +87,13 @@ function parseErrorMessage(message: string): {
  return { text: text || 'An unexpected error occurred', url }
 }

-export const ChatError: FC<ChatErrorProps> = ({ error, onRetry }) => {
+export const ChatError: FC<ChatErrorProps> = ({
+  error,
+  onRetry,
+  providerType,
+}) => {
  const { text, url, isRateLimit, isCreditsExhausted, isConnectionError } =
-    parseErrorMessage(error.message)
+    parseErrorMessage(error.message, providerType)

  // --- Commented out for Kimi partnership launch (restore after) ---
  // const surveyUrl = useMemo(
@@ -151,31 +159,15 @@ export const ChatError: FC<ChatErrorProps> = ({ error, onRetry }) => {
          View Usage & Billing
        </a>
      )}
-      {isRateLimit && !isCreditsExhausted && (
-        <div className="flex flex-col items-center gap-1">
-          <p className="text-muted-foreground text-xs">
-            {/* biome-ignore lint/a11y/useValidAnchor: link with click tracking */}
-            <a
-              href="https://docs.browseros.com/features/bring-your-own-llm#kimi-k2-5-%E2%80%94-in-partnership-with-moonshot-ai"
-              target="_blank"
-              rel="noopener noreferrer"
-              className="underline hover:text-foreground"
-              onClick={() => track(KIMI_RATE_LIMIT_DOCS_CLICKED_EVENT)}
-            >
-              Learn how to get a Kimi API key
-            </a>
-            {' or '}
-            <a
-              href="https://platform.moonshot.ai"
-              target="_blank"
-              rel="noopener noreferrer"
-              className="underline hover:text-foreground"
-              onClick={() => track(KIMI_RATE_LIMIT_PLATFORM_CLICKED_EVENT)}
-            >
-              get your API key
-            </a>
-          </p>
-        </div>
+      {isRateLimit && providerType === 'browseros' && (
+        <a
+          href="/app.html#/settings/ai"
+          target="_blank"
+          rel="noopener noreferrer"
+          className="inline-flex items-center gap-1.5 rounded-md border border-[var(--accent-orange)] bg-[var(--accent-orange)]/10 px-3 py-1.5 font-medium text-[var(--accent-orange)] text-xs transition-colors hover:bg-[var(--accent-orange)]/20"
+        >
+          Add your own provider for unlimited usage
+        </a>
      )}
      {onRetry && (
        <Button
--- a/packages/browseros-agent/apps/agent/lib/analytics/identify.ts
+++ b/packages/browseros-agent/apps/agent/lib/analytics/identify.ts
@@ -0,0 +1,23 @@
+import { sentry } from '../sentry/sentry'
+import { posthog } from './posthog'
+
+/**
+ * Identify the current user across all analytics and error tracking services.
+ * Call this when the user logs in or when a stored session is restored.
+ */
+export function identify(user: { id: string; email?: string; name?: string }) {
+  sentry.setUser({ id: user.id, email: user.email })
+  posthog.identify(user.id, {
+    email: user.email,
+    name: user.name,
+  })
+}
+
+/**
+ * Clear user identity across all services.
+ * Call this when the user logs out.
+ */
+export function resetIdentity() {
+  sentry.setUser(null)
+  posthog.reset()
+}
--- a/packages/browseros-agent/apps/agent/lib/auth/AuthProvider.tsx
+++ b/packages/browseros-agent/apps/agent/lib/auth/AuthProvider.tsx
@@ -1,5 +1,6 @@
 import type { FC, PropsWithChildren } from 'react'
 import { useEffect } from 'react'
+import { identify, resetIdentity } from '@/lib/analytics/identify'
 import { useSession } from './auth-client'
 import { useSessionInfo } from './sessionStorage'

@@ -14,6 +15,16 @@ export const AuthProvider: FC<PropsWithChildren> = ({ children }) => {
        session: data?.session,
        user: data?.user,
      })
+
+      if (data?.user?.id) {
+        identify({
+          id: data.user.id,
+          email: data.user.email,
+          name: data.user.name || undefined,
+        })
+      } else {
+        resetIdentity()
+      }
    }
  }, [data, isPending])

--- a/packages/browseros-agent/apps/agent/lib/constants/analyticsEvents.ts
+++ b/packages/browseros-agent/apps/agent/lib/constants/analyticsEvents.ts
@@ -67,6 +67,10 @@ export const QWEN_CODE_OAUTH_DISCONNECTED_EVENT =
 /** @public */
 export const HUB_PROVIDER_ADDED_EVENT = 'settings.hub_provider.added'

+/** @public */
+export const MCP_PROMO_BANNER_CLICKED_EVENT =
+  'settings.mcp_promo_banner.clicked'
+
 /** @public */
 export const MCP_EXTERNAL_ACCESS_ENABLED_EVENT =
  'settings.mcp_external_access.enabled'
--- a/packages/browseros-agent/apps/agent/lib/llm-providers/models-dev-data.json
+++ b/packages/browseros-agent/apps/agent/lib/llm-providers/models-dev-data.json
--- a/packages/browseros-agent/apps/agent/lib/llm-providers/models-dev.ts
+++ b/packages/browseros-agent/apps/agent/lib/llm-providers/models-dev.ts
@@ -0,0 +1,35 @@
+import data from './models-dev-data.json'
+
+export interface ModelsDevModel {
+  id: string
+  name: string
+  contextWindow: number
+  maxOutput: number
+  supportsImages: boolean
+  supportsReasoning: boolean
+  supportsToolCall: boolean
+  inputCost?: number
+  outputCost?: number
+}
+
+export interface ModelsDevProvider {
+  name: string
+  api?: string
+  doc: string
+  models: ModelsDevModel[]
+}
+
+const modelsDevData: Record<string, ModelsDevProvider> = data as Record<
+  string,
+  ModelsDevProvider
+>
+
+export function getModelsDevProvider(
+  providerId: string,
+): ModelsDevProvider | undefined {
+  return modelsDevData[providerId]
+}
+
+export function getModelsDevModels(providerId: string): ModelsDevModel[] {
+  return modelsDevData[providerId]?.models ?? []
+}
--- a/packages/browseros-agent/apps/agent/lib/llm-providers/providerTemplates.ts
+++ b/packages/browseros-agent/apps/agent/lib/llm-providers/providerTemplates.ts
@@ -1,3 +1,4 @@
+import { getModelsDevProvider } from './models-dev'
 import type { ProviderType } from './types'

 /**
@@ -15,6 +16,30 @@ export interface ProviderTemplate {
  apiKeyUrl?: string
 }

+function enrichTemplate(
+  providerId: ProviderType,
+  overrides: {
+    defaultModelId: string
+    defaultBaseUrl?: string
+    apiKeyUrl?: string
+    setupGuideUrl?: string
+  },
+): ProviderTemplate {
+  const provider = getModelsDevProvider(providerId)
+  const model = provider?.models.find((m) => m.id === overrides.defaultModelId)
+
+  return {
+    id: providerId,
+    name: provider?.name ?? providerId,
+    defaultBaseUrl: overrides.defaultBaseUrl ?? provider?.api ?? '',
+    defaultModelId: overrides.defaultModelId,
+    supportsImages: model?.supportsImages ?? true,
+    contextWindow: model?.contextWindow ?? 128000,
+    ...(overrides.apiKeyUrl && { apiKeyUrl: overrides.apiKeyUrl }),
+    ...(overrides.setupGuideUrl && { setupGuideUrl: overrides.setupGuideUrl }),
+  }
+}
+
 /**
 * Available provider templates for quick setup
 * @public
@@ -57,17 +82,12 @@ export const providerTemplates: ProviderTemplate[] = [
    apiKeyUrl: 'https://platform.moonshot.ai/console/api-keys',
    setupGuideUrl: 'https://platform.moonshot.ai/console/api-keys',
  },
-  {
-    id: 'openai',
-    name: 'OpenAI',
-    defaultBaseUrl: 'https://api.openai.com/v1',
-    defaultModelId: 'gpt-4',
-    supportsImages: true,
-    contextWindow: 128000,
+  enrichTemplate('openai', {
+    defaultModelId: 'gpt-5',
    apiKeyUrl: 'https://platform.openai.com/api-keys',
    setupGuideUrl:
      'https://docs.browseros.com/features/bring-your-own-llm#openai',
-  },
+  }),
  {
    id: 'openai-compatible',
    name: 'OpenAI Compatible',
@@ -76,28 +96,18 @@ export const providerTemplates: ProviderTemplate[] = [
    supportsImages: true,
    contextWindow: 128000,
  },
-  {
-    id: 'anthropic',
-    name: 'Anthropic',
-    defaultBaseUrl: 'https://api.anthropic.com/v1',
-    defaultModelId: 'claude-3-5-sonnet-20241022',
-    supportsImages: true,
-    contextWindow: 200000,
+  enrichTemplate('anthropic', {
+    defaultModelId: 'claude-sonnet-4-6',
    apiKeyUrl: 'https://console.anthropic.com/settings/keys',
    setupGuideUrl:
      'https://docs.browseros.com/features/bring-your-own-llm#claude',
-  },
-  {
-    id: 'google',
-    name: 'Gemini',
-    defaultBaseUrl: 'https://generativelanguage.googleapis.com/v1beta',
-    defaultModelId: 'gemini-1.5-pro',
-    supportsImages: true,
-    contextWindow: 1000000,
+  }),
+  enrichTemplate('google', {
+    defaultModelId: 'gemini-2.5-flash',
    apiKeyUrl: 'https://aistudio.google.com/app/apikey',
    setupGuideUrl:
      'https://docs.browseros.com/features/bring-your-own-llm#gemini',
-  },
+  }),
  {
    id: 'ollama',
    name: 'Ollama',
@@ -108,47 +118,28 @@ export const providerTemplates: ProviderTemplate[] = [
    setupGuideUrl:
      'https://docs.browseros.com/features/bring-your-own-llm#ollama',
  },
-  {
-    id: 'openrouter',
-    name: 'OpenRouter',
-    defaultBaseUrl: 'https://openrouter.ai/api/v1',
-    defaultModelId: 'openai/gpt-4-turbo',
-    supportsImages: true,
-    contextWindow: 128000,
+  enrichTemplate('openrouter', {
+    defaultModelId: 'anthropic/claude-sonnet-4.5',
    apiKeyUrl: 'https://openrouter.ai/keys',
    setupGuideUrl:
      'https://docs.browseros.com/features/bring-your-own-llm#openrouter',
-  },
-  {
-    id: 'lmstudio',
-    name: 'LM Studio',
+  }),
+  enrichTemplate('lmstudio', {
+    defaultModelId: 'openai/gpt-oss-20b',
    defaultBaseUrl: 'http://localhost:1234/v1',
-    defaultModelId: 'local-model',
-    supportsImages: false,
-    contextWindow: 32000,
    setupGuideUrl:
      'https://docs.browseros.com/features/bring-your-own-llm#lmstudio',
-  },
-  {
-    id: 'azure',
-    name: 'Azure',
-    defaultBaseUrl: '',
+  }),
+  enrichTemplate('azure', {
    defaultModelId: '',
-    supportsImages: true,
-    contextWindow: 128000,
    apiKeyUrl:
      'https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/OpenAI',
-  },
-  {
-    id: 'bedrock',
-    name: 'AWS Bedrock',
-    defaultBaseUrl: '',
-    defaultModelId: '',
-    supportsImages: true,
-    contextWindow: 200000,
+  }),
+  enrichTemplate('bedrock', {
+    defaultModelId: 'anthropic.claude-sonnet-4-6',
    setupGuideUrl:
      'https://docs.aws.amazon.com/bedrock/latest/userguide/getting-started.html',
-  },
+  }),
 ]

 /**
--- a/packages/browseros-agent/apps/agent/lib/llm-providers/useOAuthProviderFlow.ts
+++ b/packages/browseros-agent/apps/agent/lib/llm-providers/useOAuthProviderFlow.ts
@@ -1,4 +1,4 @@
-import { useEffect, useRef } from 'react'
+import { useEffect, useRef, useState } from 'react'
 import { toast } from 'sonner'
 import { track } from '@/lib/metrics/track'
 import {
@@ -20,10 +20,18 @@ export interface OAuthProviderFlowConfig {
  clientAuth?: ClientAuthConfig
 }

+export interface PendingDeviceCode {
+  userCode: string
+  providerName: string
+  verificationUri: string
+}
+
 interface OAuthProviderFlowReturn {
  status: { authenticated: boolean; email?: string } | null
  disconnect: () => Promise<void>
  startOAuthFlow: (agentServerUrl: string | undefined) => Promise<void>
+  pendingDeviceCode: PendingDeviceCode | null
+  clearDeviceCode: () => void
 }

 export function useOAuthProviderFlow(
@@ -35,6 +43,8 @@ export function useOAuthProviderFlow(
    config.providerType,
  )
  const flowStartedRef = useRef(false)
+  const [pendingDeviceCode, setPendingDeviceCode] =
+    useState<PendingDeviceCode | null>(null)

  // Auto-create provider when OAuth completes
  // biome-ignore lint/correctness/useExhaustiveDependencies: intentional — only trigger on auth status change
@@ -57,6 +67,7 @@ export function useOAuthProviderFlow(
        createdAt: now,
        updatedAt: now,
      })
+      setPendingDeviceCode(null)
      track(config.completedEvent, { email: status.email })
      toast.success(`${config.displayName} Connected`, {
        description: status.email
@@ -104,9 +115,10 @@ export function useOAuthProviderFlow(
      deviceData.verification_uri_complete ?? deviceData.verification_uri
    window.open(verificationUri, '_blank')
    track(config.startedEvent)
-    toast.info(`Enter code: ${deviceData.user_code}`, {
-      description: `Paste this code on the ${config.displayName} page that just opened.`,
-      duration: 60_000,
+    setPendingDeviceCode({
+      userCode: deviceData.user_code,
+      providerName: config.displayName,
+      verificationUri,
    })

    startTokenPolling(auth, deviceData, codeVerifier, async (token) => {
@@ -142,9 +154,10 @@ export function useOAuthProviderFlow(
      window.open(data.verificationUri, '_blank')
      startPolling()
      track(config.startedEvent)
-      toast.info(`Enter code: ${data.userCode}`, {
-        description: `Paste this code on the ${config.displayName} page that just opened.`,
-        duration: 60_000,
+      setPendingDeviceCode({
+        userCode: data.userCode,
+        providerName: config.displayName,
+        verificationUri: data.verificationUri,
      })
      return
    }
@@ -163,5 +176,7 @@ export function useOAuthProviderFlow(
    status,
    disconnect,
    startOAuthFlow,
+    pendingDeviceCode,
+    clearDeviceCode: () => setPendingDeviceCode(null),
  }
 }
--- a/packages/browseros-agent/apps/agent/lib/schedules/scheduleStorage.ts
+++ b/packages/browseros-agent/apps/agent/lib/schedules/scheduleStorage.ts
@@ -22,6 +22,13 @@ export const scheduledJobRunStorage = storage.defineItem<ScheduledJobRun[]>(
  },
 )

+export const pendingDeletionStorage = storage.defineItem<string[]>(
+  'local:scheduledJobsPendingDeletion',
+  {
+    fallback: [],
+  },
+)
+
 export function useScheduledJobs() {
  const [jobs, setJobs] = useState<ScheduledJob[]>([])

@@ -54,6 +61,11 @@ export function useScheduledJobs() {
  const removeJob = async (id: string) => {
    await chrome.alarms.clear(getAlarmName(id))

+    const pending = (await pendingDeletionStorage.getValue()) ?? []
+    if (!pending.includes(id)) {
+      await pendingDeletionStorage.setValue([...pending, id])
+    }
+
    const currentJobs = (await scheduledJobStorage.getValue()) ?? []
    await scheduledJobStorage.setValue(currentJobs.filter((j) => j.id !== id))

--- a/packages/browseros-agent/apps/agent/lib/schedules/syncSchedulesToBackend.ts
+++ b/packages/browseros-agent/apps/agent/lib/schedules/syncSchedulesToBackend.ts
@@ -5,10 +5,11 @@ import { sentry } from '@/lib/sentry/sentry'
 import { createAlarmFromJob } from './createAlarmFromJob'
 import {
  CreateScheduledJobDocument,
+  DeleteScheduledJobDocument,
  GetScheduledJobsByProfileIdDocument,
  UpdateScheduledJobDocument,
 } from './graphql/syncSchedulesDocument'
-import { scheduledJobStorage } from './scheduleStorage'
+import { pendingDeletionStorage, scheduledJobStorage } from './scheduleStorage'
 import type { ScheduledJob } from './scheduleTypes'

 type RemoteScheduledJob = {
@@ -99,6 +100,32 @@ export async function syncSchedulesToBackend(
    }
  }

+  const pendingDeletions = new Set(
+    (await pendingDeletionStorage.getValue()) ?? [],
+  )
+  const resolvedDeletions = new Set<string>()
+
+  for (const rowId of pendingDeletions) {
+    if (remoteJobs.has(rowId)) {
+      try {
+        await execute(DeleteScheduledJobDocument, { rowId })
+        remoteJobs.delete(rowId)
+        resolvedDeletions.add(rowId)
+      } catch (error) {
+        sentry.captureException(error, {
+          extra: { jobId: rowId, context: 'sync-pending-deletion' },
+        })
+      }
+    } else {
+      resolvedDeletions.add(rowId)
+    }
+  }
+
+  const latestPending = (await pendingDeletionStorage.getValue()) ?? []
+  await pendingDeletionStorage.setValue(
+    latestPending.filter((id) => !resolvedDeletions.has(id)),
+  )
+
  const localJobsMap = new Map(localJobs.map((j) => [j.id, j]))
  const jobsToAddLocally: ScheduledJob[] = []
  const jobsToUpdateLocally: ScheduledJob[] = []
--- a/packages/browseros-agent/apps/agent/lib/sentry/sanitize.ts
+++ b/packages/browseros-agent/apps/agent/lib/sentry/sanitize.ts
@@ -0,0 +1,77 @@
+/**
+ * Sanitize Sentry event data by redacting values at keys that match known
+ * sensitive patterns. Used in `beforeSend` to prevent credentials from
+ * leaking into error reports.
+ */
+
+const REDACTED = '[REDACTED]'
+
+const SENSITIVE_KEY_PATTERNS = [
+  'apikey',
+  'api_key',
+  'accesskeyid',
+  'secretaccesskey',
+  'sessiontoken',
+  'authorization',
+  'token',
+  'password',
+  'secret',
+  'credential',
+]
+
+function isSensitiveKey(key: string): boolean {
+  const lower = key.toLowerCase()
+  return SENSITIVE_KEY_PATTERNS.some((p) => lower.includes(p))
+}
+
+function sanitize<T>(obj: T): T {
+  if (obj === null || obj === undefined) return obj
+  if (
+    typeof obj === 'string' ||
+    typeof obj === 'number' ||
+    typeof obj === 'boolean'
+  ) {
+    return obj
+  }
+  if (Array.isArray(obj)) {
+    return obj.map(sanitize) as T
+  }
+  if (typeof obj === 'object') {
+    const result: Record<string, unknown> = {}
+    for (const [key, value] of Object.entries(obj)) {
+      result[key] = isSensitiveKey(key) ? REDACTED : sanitize(value)
+    }
+    return result as T
+  }
+  return obj
+}
+
+// eslint-disable-next-line @typescript-eslint/no-explicit-any -- Sentry event type varies by SDK
+export function sanitizeEvent<E>(event: E): E {
+  const e = event as Record<string, any>
+
+  if (Array.isArray(e.breadcrumbs)) {
+    e.breadcrumbs = e.breadcrumbs.map((b: Record<string, unknown>) => ({
+      ...b,
+      data: b.data ? sanitize(b.data) : b.data,
+    }))
+  }
+
+  if (e.contexts) {
+    e.contexts = sanitize(e.contexts)
+  }
+
+  if (e.extra) {
+    e.extra = sanitize(e.extra)
+  }
+
+  for (const value of e.exception?.values ?? []) {
+    for (const frame of value.stacktrace?.frames ?? []) {
+      if (frame.vars) {
+        frame.vars = sanitize(frame.vars)
+      }
+    }
+  }
+
+  return event
+}
--- a/packages/browseros-agent/apps/agent/lib/sentry/sentry.ts
+++ b/packages/browseros-agent/apps/agent/lib/sentry/sentry.ts
@@ -1,6 +1,21 @@
 import * as Sentry from '@sentry/react'
 import { getBrowserOSAdapter } from '../browseros/adapter'
 import { env } from '../env'
+import { sanitizeEvent } from './sanitize'
+
+/** Errors that are expected during normal operation and should not be reported */
+const SUPPRESSED_ERRORS = ['The browser is shutting down', 'No current window']
+
+function getExtensionPage(): string {
+  try {
+    const url = new URL(location.href)
+    // Extract the entry point name from the extension URL pathname
+    // e.g. chrome-extension://<id>/sidepanel.html -> sidepanel
+    return url.pathname.replace(/^\//, '').replace(/\.html$/, '') || 'unknown'
+  } catch {
+    return 'unknown'
+  }
+}

 if (env.VITE_PUBLIC_SENTRY_DSN) {
  Sentry.init({
@@ -10,6 +25,29 @@ if (env.VITE_PUBLIC_SENTRY_DSN) {
    sendDefaultPii: true,
    environment: env.PROD ? 'production' : 'development',
    release: chrome.runtime.getManifest().version,
+
+    beforeSend(event) {
+      const message = event.exception?.values?.[0]?.value ?? ''
+      if (SUPPRESSED_ERRORS.some((s) => message.includes(s))) {
+        return null
+      }
+
+      event.tags = {
+        ...event.tags,
+        extensionPage: getExtensionPage(),
+      }
+
+      return sanitizeEvent(event)
+    },
+
+    integrations: [
+      Sentry.breadcrumbsIntegration({
+        console: true,
+        dom: true,
+        fetch: true,
+        xhr: true,
+      }),
+    ],
  })

  ;(async () => {
--- a/packages/browseros-agent/apps/agent/wxt.config.ts
+++ b/packages/browseros-agent/apps/agent/wxt.config.ts
@@ -54,12 +54,18 @@ export default defineConfig({
    },
    permissions: [
      'topSites',
+      'storage',
+      'unlimitedStorage',
+      'scripting',
      'tabs',
      'tabGroups',
-      'storage',
      'sidePanel',
+      'bookmarks',
+      'history',
      'browserOS',
      'alarms',
+      'webNavigation',
+      'downloads',
    ],
    host_permissions: [
      'http://127.0.0.1/*',
--- a/packages/browseros-agent/apps/cli/.env.production.example
+++ b/packages/browseros-agent/apps/cli/.env.production.example
@@ -0,0 +1,7 @@
+# Production upload env for CLI installer scripts
+
+R2_ACCOUNT_ID=
+R2_ACCESS_KEY_ID=
+R2_SECRET_ACCESS_KEY=
+R2_BUCKET=browseros
+R2_UPLOAD_PREFIX=cli
--- a/packages/browseros-agent/apps/cli/.gitignore
+++ b/packages/browseros-agent/apps/cli/.gitignore
@@ -1 +1,2 @@
 browseros-cli
+dist
--- a/packages/browseros-agent/apps/cli/.goreleaser.yml
+++ b/packages/browseros-agent/apps/cli/.goreleaser.yml
@@ -0,0 +1,50 @@
+version: 2
+
+project_name: browseros-cli
+
+monorepo:
+  tag_prefix: browseros-cli-
+
+builds:
+  - main: .
+    binary: browseros-cli
+    env:
+      - CGO_ENABLED=0
+    flags:
+      - -trimpath
+    ldflags:
+      - -s -w -X main.version={{ .Version }}
+    targets:
+      - darwin_amd64
+      - darwin_arm64
+      - linux_amd64
+      - linux_arm64
+      - windows_amd64
+      - windows_arm64
+
+archives:
+  - format: tar.gz
+    format_overrides:
+      - goos: windows
+        format: zip
+    name_template: "{{ .ProjectName }}_{{ .Version }}_{{ .Os }}_{{ .Arch }}"
+    files:
+      - "none*"
+
+checksum:
+  name_template: checksums.txt
+
+changelog:
+  sort: asc
+  filters:
+    exclude:
+      - "^docs:"
+      - "^test:"
+      - "^ci:"
+
+release:
+  github:
+    owner: browseros-ai
+    name: BrowserOS
+  prerelease: auto
+  name_template: "browseros-cli v{{ .Version }}"
--- a/packages/browseros-agent/apps/cli/CHANGELOG.md
+++ b/packages/browseros-agent/apps/cli/CHANGELOG.md
@@ -0,0 +1 @@
+# BrowserOS CLI
--- a/packages/browseros-agent/apps/cli/Makefile
+++ b/packages/browseros-agent/apps/cli/Makefile
@@ -18,3 +18,9 @@ vet:

 test:
 	go test -tags integration -v -timeout 120s ./...
+
+release-dry:
+	goreleaser release --snapshot --clean
+
+release:
+	goreleaser release --clean
--- a/packages/browseros-agent/apps/cli/cmd/init.go
+++ b/packages/browseros-agent/apps/cli/cmd/init.go
@@ -17,8 +17,10 @@ import (
 )

 func init() {
+	var autoDiscover bool
+
 	cmd := &cobra.Command{
-		Use:   "init",
+		Use:   "init [url]",
 		Short: "Configure the BrowserOS server connection",
 		Long: `Set up the CLI by providing the MCP server URL from BrowserOS.

@@ -26,33 +28,59 @@ Open BrowserOS → Settings → BrowserOS MCP to find your Server URL.
 The URL looks like: http://127.0.0.1:9004/mcp

 The port varies per installation, so this step is required on first use.
-Run again if your port changes.`,
+Run again if your port changes.
+
+Three modes:
+  browseros-cli init <url>    Non-interactive, use the provided URL
+  browseros-cli init --auto   Auto-discover from ~/.browseros/server.json
+  browseros-cli init          Interactive prompt`,
 		Annotations: map[string]string{"group": "Setup:"},
-		Args:        cobra.NoArgs,
+		Args:        cobra.MaximumNArgs(1),
 		Run: func(cmd *cobra.Command, args []string) {
 			bold := color.New(color.Bold)
 			green := color.New(color.FgGreen)
 			dim := color.New(color.Faint)

-			fmt.Println()
-			bold.Println("BrowserOS CLI Setup")
-			fmt.Println()
-			fmt.Println("Open BrowserOS → Settings → BrowserOS MCP")
-			fmt.Println("Copy the Server URL shown there.")
-			fmt.Println()
-			dim.Println("It looks like: http://127.0.0.1:9004/mcp")
-			fmt.Println()
+			var input string

-			reader := bufio.NewReader(os.Stdin)
-			fmt.Print("Server URL: ")
-			input, err := reader.ReadString('\n')
-			if err != nil {
-				output.Error("failed to read input", 1)
-			}
-			input = strings.TrimSpace(input)
+			switch {
+			case len(args) == 1:
+				// Non-interactive: URL provided as argument
+				input = args[0]

-			if input == "" {
-				output.Error("no URL provided", 1)
+			case autoDiscover:
+				// Auto-discover: server.json → config → probe common ports
+				discovered := probeRunningServer()
+				if discovered == "" {
+					output.Error("auto-discovery failed: no running BrowserOS found.\n\n"+
+						"  If not running:    browseros-cli launch\n"+
+						"  If not installed:  browseros-cli install", 1)
+				}
+				input = discovered
+				fmt.Printf("Auto-discovered server at %s\n", input)
+
+			default:
+				// Interactive prompt (original behavior)
+				fmt.Println()
+				bold.Println("BrowserOS CLI Setup")
+				fmt.Println()
+				fmt.Println("Open BrowserOS → Settings → BrowserOS MCP")
+				fmt.Println("Copy the Server URL shown there.")
+				fmt.Println()
+				dim.Println("It looks like: http://127.0.0.1:9004/mcp")
+				fmt.Println()
+
+				reader := bufio.NewReader(os.Stdin)
+				fmt.Print("Server URL: ")
+				line, err := reader.ReadString('\n')
+				if err != nil {
+					output.Error("failed to read input", 1)
+				}
+				input = strings.TrimSpace(line)
+
+				if input == "" {
+					output.Error("no URL provided", 1)
+				}
 			}

 			baseURL := normalizeServerURL(input)
@@ -88,5 +116,6 @@ Run again if your port changes.`,
 		},
 	}

+	cmd.Flags().BoolVar(&autoDiscover, "auto", false, "Auto-discover server URL from ~/.browseros/server.json")
 	rootCmd.AddCommand(cmd)
 }
--- a/packages/browseros-agent/apps/cli/cmd/install.go
+++ b/packages/browseros-agent/apps/cli/cmd/install.go
@@ -0,0 +1,247 @@
+package cmd
+
+import (
+	"fmt"
+	"io"
+	"net/http"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"runtime"
+	"time"
+
+	"browseros-cli/output"
+
+	"github.com/fatih/color"
+	"github.com/spf13/cobra"
+)
+
+func init() {
+	cmd := &cobra.Command{
+		Use:   "install",
+		Short: "Download and install BrowserOS for the current platform",
+		Long: `Download BrowserOS for your platform and start the installation.
+
+macOS:   Downloads .dmg, mounts it, and copies BrowserOS to /Applications
+Windows: Downloads installer .exe and launches it
+Linux:   Downloads AppImage (or .deb with --deb flag)
+
+After installation:
+  browseros-cli launch        # start BrowserOS
+  browseros-cli init --auto   # configure the CLI`,
+		Annotations: map[string]string{"group": "Setup:"},
+		Args:        cobra.NoArgs,
+		Run: func(cmd *cobra.Command, args []string) {
+			dir, _ := cmd.Flags().GetString("dir")
+			deb, _ := cmd.Flags().GetBool("deb")
+
+			if deb && runtime.GOOS != "linux" {
+				output.Error("--deb is only available on Linux", 1)
+			}
+
+			downloadURL, filename := resolveDownload(deb)
+			destPath := filepath.Join(dir, filename)
+
+			bold := color.New(color.Bold)
+			green := color.New(color.FgGreen)
+			dim := color.New(color.Faint)
+
+			bold.Printf("Downloading BrowserOS for %s...\n", platformDisplayName())
+			dim.Printf("  %s\n", downloadURL)
+			fmt.Println()
+
+			client := &http.Client{Timeout: 10 * time.Minute}
+			resp, err := client.Get(downloadURL)
+			if err != nil {
+				output.Errorf(1, "download failed: %v", err)
+			}
+			defer resp.Body.Close()
+
+			if resp.StatusCode != http.StatusOK {
+				output.Errorf(1, "download failed: HTTP %d", resp.StatusCode)
+			}
+
+			file, err := os.Create(destPath)
+			if err != nil {
+				output.Errorf(1, "create file: %v", err)
+			}
+
+			written, err := io.Copy(file, resp.Body)
+			file.Close()
+			if err != nil {
+				os.Remove(destPath)
+				output.Errorf(1, "download interrupted: %v", err)
+			}
+
+			green.Printf("Downloaded %s (%.1f MB)\n", filename, float64(written)/(1024*1024))
+			fmt.Println()
+
+			runPostInstall(destPath, deb, dim)
+
+			fmt.Println()
+			bold.Println("Next steps:")
+			dim.Println("  browseros-cli launch        # start BrowserOS")
+			dim.Println("  browseros-cli init --auto   # configure the CLI")
+		},
+	}
+
+	cmd.Flags().String("dir", ".", "Directory to download the installer to")
+	cmd.Flags().Bool("deb", false, "Download .deb package instead of AppImage (Linux only)")
+
+	rootCmd.AddCommand(cmd)
+}
+
+func resolveDownload(deb bool) (url, filename string) {
+	switch runtime.GOOS {
+	case "darwin":
+		return "https://files.browseros.com/download/BrowserOS.dmg", "BrowserOS.dmg"
+	case "windows":
+		return "https://files.browseros.com/download/BrowserOS_installer.exe", "BrowserOS_installer.exe"
+	case "linux":
+		if deb {
+			return "https://cdn.browseros.com/download/BrowserOS.deb", "BrowserOS.deb"
+		}
+		return "https://files.browseros.com/download/BrowserOS.AppImage", "BrowserOS.AppImage"
+	default:
+		output.Errorf(1, "unsupported platform: %s/%s\n  Download manually from https://browseros.com", runtime.GOOS, runtime.GOARCH)
+		return "", ""
+	}
+}
+
+func platformDisplayName() string {
+	switch runtime.GOOS {
+	case "darwin":
+		return "macOS"
+	case "windows":
+		return "Windows"
+	case "linux":
+		return "Linux"
+	default:
+		return runtime.GOOS
+	}
+}
+
+func runPostInstall(path string, deb bool, dim *color.Color) {
+	switch runtime.GOOS {
+	case "darwin":
+		installMacOS(path, dim)
+
+	case "linux":
+		if deb {
+			dim.Println("Install the .deb package:")
+			fmt.Printf("  sudo dpkg -i %s\n", path)
+		} else {
+			os.Chmod(path, 0755)
+			dim.Printf("AppImage is ready to run: ./%s\n", filepath.Base(path))
+		}
+
+	case "windows":
+		fmt.Println("Launching installer...")
+		if err := exec.Command("cmd", "/c", "start", "", path).Run(); err != nil {
+			dim.Printf("Could not launch installer automatically. Run: %s\n", path)
+		} else {
+			dim.Println("Follow the installer prompts to complete setup.")
+		}
+	}
+}
+
+// installMacOS mounts the DMG and copies BrowserOS.app to /Applications.
+func installMacOS(dmgPath string, dim *color.Color) {
+	fmt.Println("Mounting disk image...")
+	mountOut, err := exec.Command("hdiutil", "attach", dmgPath, "-nobrowse", "-quiet").Output()
+	if err != nil {
+		dim.Println("Could not mount DMG automatically.")
+		dim.Printf("  Open it manually: open %s\n", dmgPath)
+		return
+	}
+
+	// Find the mount point (last field of last line of hdiutil output)
+	mountPoint := ""
+	for _, line := range splitLines(string(mountOut)) {
+		fields := splitTabs(line)
+		if len(fields) > 0 {
+			mountPoint = fields[len(fields)-1]
+		}
+	}
+
+	if mountPoint == "" {
+		dim.Println("DMG mounted but could not determine mount point.")
+		dim.Printf("  Open it manually: open %s\n", dmgPath)
+		return
+	}
+
+	// Look for BrowserOS.app in the mounted volume
+	appSrc := filepath.Join(mountPoint, "BrowserOS.app")
+	if _, err := os.Stat(appSrc); err != nil {
+		dim.Printf("DMG mounted at %s but BrowserOS.app not found inside.\n", mountPoint)
+		dim.Printf("  Check the volume manually: open %s\n", mountPoint)
+		exec.Command("hdiutil", "detach", mountPoint, "-quiet").Run()
+		return
+	}
+
+	appDest := "/Applications/BrowserOS.app"
+	fmt.Printf("Installing to %s...\n", appDest)
+
+	// Remove existing installation if present
+	os.RemoveAll(appDest)
+
+	// Copy using cp -R (preserves code signatures, symlinks, etc.)
+	if err := exec.Command("cp", "-R", appSrc, appDest).Run(); err != nil {
+		dim.Printf("Could not copy to /Applications (may need sudo).\n")
+		dim.Printf("  Try: sudo cp -R \"%s\" /Applications/\n", appSrc)
+		exec.Command("hdiutil", "detach", mountPoint, "-quiet").Run()
+		return
+	}
+
+	// Unmount
+	exec.Command("hdiutil", "detach", mountPoint, "-quiet").Run()
+
+	// Clean up DMG
+	os.Remove(dmgPath)
+
+	fmt.Println("BrowserOS installed to /Applications/BrowserOS.app")
+}
+
+func splitLines(s string) []string {
+	var lines []string
+	for _, line := range filepath.SplitList(s) {
+		lines = append(lines, line)
+	}
+	// filepath.SplitList uses : on unix, not newlines — use manual split
+	result := []string{}
+	start := 0
+	for i := 0; i < len(s); i++ {
+		if s[i] == '\n' {
+			line := s[start:i]
+			if len(line) > 0 {
+				result = append(result, line)
+			}
+			start = i + 1
+		}
+	}
+	if start < len(s) {
+		result = append(result, s[start:])
+	}
+	return result
+}
+
+func splitTabs(s string) []string {
+	result := []string{}
+	start := 0
+	for i := 0; i < len(s); i++ {
+		if s[i] == '\t' {
+			field := s[start:i]
+			if len(field) > 0 {
+				result = append(result, field)
+			}
+			start = i + 1
+		}
+	}
+	if start < len(s) {
+		field := s[start:]
+		if len(field) > 0 {
+			result = append(result, field)
+		}
+	}
+	return result
+}
--- a/packages/browseros-agent/apps/cli/cmd/launch.go
+++ b/packages/browseros-agent/apps/cli/cmd/launch.go
@@ -0,0 +1,287 @@
+package cmd
+
+import (
+	"fmt"
+	"net/http"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"runtime"
+	"strings"
+	"time"
+
+	"browseros-cli/output"
+
+	"github.com/fatih/color"
+	"github.com/spf13/cobra"
+)
+
+// macOS bundle identifier — verified from BrowserOS.app/Contents/Info.plist
+const browserOSBundleID = "com.browseros.BrowserOS"
+
+func init() {
+	cmd := &cobra.Command{
+		Use:   "launch",
+		Short: "Launch the BrowserOS application",
+		Long: `Find and launch the BrowserOS application.
+
+Uses platform-native detection to find BrowserOS, launches it,
+and waits for the server to become ready.
+
+If BrowserOS is already running, reports the server URL.`,
+		Annotations: map[string]string{"group": "Setup:"},
+		Args:        cobra.NoArgs,
+		Run: func(cmd *cobra.Command, args []string) {
+			green := color.New(color.FgGreen)
+			dim := color.New(color.Faint)
+			waitSecs, _ := cmd.Flags().GetInt("wait")
+
+			if url := probeRunningServer(); url != "" {
+				green.Printf("BrowserOS is already running at %s\n", url)
+				return
+			}
+
+			if !isBrowserOSInstalled() {
+				output.Error("BrowserOS is not installed.\n\n"+
+					"  To install:  browseros-cli install", 1)
+			}
+
+			fmt.Println("Launching BrowserOS...")
+			if err := startBrowserOS(); err != nil {
+				output.Errorf(1, "failed to launch: %v", err)
+			}
+
+			fmt.Print("Waiting for server")
+			url, ok := waitForServer(time.Duration(waitSecs) * time.Second)
+			fmt.Println()
+
+			if !ok {
+				output.Error("BrowserOS launched but server didn't respond within "+
+					fmt.Sprintf("%d seconds.\n", waitSecs)+
+					"  Check if BrowserOS is fully loaded, then retry.", 1)
+			}
+
+			green.Printf("BrowserOS is ready at %s\n", url)
+			fmt.Println()
+			dim.Println("Next: browseros-cli init --auto")
+		},
+	}
+
+	cmd.Flags().Int("wait", 30, "Seconds to wait for server to start")
+	rootCmd.AddCommand(cmd)
+}
+
+// ---------------------------------------------------------------------------
+// Server probing
+// ---------------------------------------------------------------------------
+
+// probeRunningServer checks server.json, config, and common ports for a running server.
+func probeRunningServer() string {
+	check := func(baseURL string) bool {
+		client := &http.Client{Timeout: 2 * time.Second}
+		resp, err := client.Get(baseURL + "/health")
+		if err != nil {
+			return false
+		}
+		resp.Body.Close()
+		return resp.StatusCode == 200
+	}
+
+	// 1. server.json — written by BrowserOS on startup with the actual port
+	if url := loadBrowserosServerURL(); url != "" && check(url) {
+		return url
+	}
+
+	// 2. Saved config / env var
+	if url := defaultServerURL(); url != "" && check(url) {
+		return url
+	}
+
+	// 3. Probe common BrowserOS ports as last resort
+	for _, port := range []int{9100, 9200, 9300} {
+		url := fmt.Sprintf("http://127.0.0.1:%d", port)
+		if check(url) {
+			return url
+		}
+	}
+
+	return ""
+}
+
+// ---------------------------------------------------------------------------
+// Platform-native installation detection
+// ---------------------------------------------------------------------------
+
+// isBrowserOSInstalled checks if BrowserOS is installed using platform-native methods.
+//
+// macOS:   `open -Ra "BrowserOS"` — queries Launch Services (finds apps anywhere)
+// Linux:   checks /usr/bin/browseros (.deb), browseros.desktop, or AppImage files
+// Windows: checks executable at %LOCALAPPDATA%\BrowserOS\Application\BrowserOS.exe
+//          and registry uninstall key (per-user Chromium install pattern)
+func isBrowserOSInstalled() bool {
+	switch runtime.GOOS {
+	case "darwin":
+		// open -Ra checks if Launch Services knows about the app without launching it.
+		// Works regardless of where the app is installed.
+		return exec.Command("open", "-Ra", "BrowserOS").Run() == nil
+
+	case "linux":
+		// .deb install puts `browseros` in /usr/bin/
+		if _, err := exec.LookPath("browseros"); err == nil {
+			return true
+		}
+		// .deb also creates browseros.desktop
+		for _, dir := range []string{
+			"/usr/share/applications",
+			filepath.Join(userHomeDir(), ".local/share/applications"),
+		} {
+			if _, err := os.Stat(filepath.Join(dir, "browseros.desktop")); err == nil {
+				return true
+			}
+		}
+		// AppImage — user may have it in ~/Downloads, ~/Applications, etc.
+		return findLinuxAppImage() != ""
+
+	case "windows":
+		// Chromium per-user install: %LOCALAPPDATA%\BrowserOS\Application\BrowserOS.exe
+		if exePath := windowsBrowserOSExe(); exePath != "" {
+			if _, err := os.Stat(exePath); err == nil {
+				return true
+			}
+		}
+		// Fallback: check uninstall registry (per-user install uses HKCU)
+		for _, root := range []string{"HKCU", "HKLM"} {
+			key := root + `\Software\Microsoft\Windows\CurrentVersion\Uninstall\BrowserOS`
+			if exec.Command("reg", "query", key, "/v", "DisplayName").Run() == nil {
+				return true
+			}
+		}
+		return false
+	}
+
+	return false
+}
+
+// ---------------------------------------------------------------------------
+// Platform-native launch
+// ---------------------------------------------------------------------------
+
+// startBrowserOS launches BrowserOS using platform-native methods.
+//
+// macOS:   `open -b com.browseros.BrowserOS` — launches by bundle ID
+// Linux:   runs `browseros` binary or AppImage directly
+// Windows: runs BrowserOS.exe from the known install path
+func startBrowserOS() error {
+	switch runtime.GOOS {
+	case "darwin":
+		// Launch by bundle ID via Launch Services — no hardcoded paths needed.
+		return exec.Command("open", "-b", browserOSBundleID).Run()
+
+	case "linux":
+		// .deb install: browseros is in PATH
+		if p, err := exec.LookPath("browseros"); err == nil {
+			return startDetached(p)
+		}
+		// AppImage: run it directly
+		if appImage := findLinuxAppImage(); appImage != "" {
+			return startDetached(appImage)
+		}
+		// .desktop file: use gtk-launch (not xdg-open, which opens by MIME type)
+		if _, err := exec.LookPath("gtk-launch"); err == nil {
+			return exec.Command("gtk-launch", "browseros").Run()
+		}
+		return fmt.Errorf("BrowserOS found but could not determine how to launch it")
+
+	case "windows":
+		if exePath := windowsBrowserOSExe(); exePath != "" {
+			if _, err := os.Stat(exePath); err == nil {
+				return startDetached(exePath)
+			}
+		}
+		return fmt.Errorf("BrowserOS.exe not found at expected location")
+
+	default:
+		return fmt.Errorf("unsupported platform: %s", runtime.GOOS)
+	}
+}
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+// startDetached starts a process in the background without inheriting stdio.
+func startDetached(path string, args ...string) error {
+	cmd := exec.Command(path, args...)
+	cmd.Stdout = nil
+	cmd.Stderr = nil
+	cmd.Stdin = nil
+	return cmd.Start()
+}
+
+// windowsBrowserOSExe returns the expected BrowserOS.exe path on Windows.
+// Chromium per-user installs go to %LOCALAPPDATA%\<base_app_name>\Application\<binary>.
+// base_app_name = "BrowserOS" (from chromium_install_modes.h)
+func windowsBrowserOSExe() string {
+	localAppData := os.Getenv("LOCALAPPDATA")
+	if localAppData == "" {
+		return ""
+	}
+	return filepath.Join(localAppData, "BrowserOS", "Application", "BrowserOS.exe")
+}
+
+// findLinuxAppImage searches common locations for a BrowserOS AppImage.
+func findLinuxAppImage() string {
+	home := userHomeDir()
+	if home == "" {
+		return ""
+	}
+	for _, dir := range []string{
+		home,
+		filepath.Join(home, "Applications"),
+		filepath.Join(home, "Downloads"),
+		"/opt",
+	} {
+		entries, err := os.ReadDir(dir)
+		if err != nil {
+			continue
+		}
+		for _, e := range entries {
+			name := e.Name()
+			if strings.HasPrefix(name, "BrowserOS") && strings.HasSuffix(name, ".AppImage") {
+				return filepath.Join(dir, name)
+			}
+		}
+	}
+	return ""
+}
+
+// userHomeDir returns the home directory or empty string.
+func userHomeDir() string {
+	home, err := os.UserHomeDir()
+	if err != nil {
+		return ""
+	}
+	return home
+}
+
+// waitForServer polls until a BrowserOS server responds or timeout.
+func waitForServer(maxWait time.Duration) (string, bool) {
+	client := &http.Client{Timeout: 2 * time.Second}
+	deadline := time.Now().Add(maxWait)
+
+	for time.Now().Before(deadline) {
+		// server.json is written by BrowserOS on startup with the actual port
+		if url := loadBrowserosServerURL(); url != "" {
+			resp, err := client.Get(url + "/health")
+			if err == nil {
+				resp.Body.Close()
+				if resp.StatusCode == 200 {
+					return url, true
+				}
+			}
+		}
+		fmt.Print(".")
+		time.Sleep(1 * time.Second)
+	}
+	return "", false
+}
--- a/packages/browseros-agent/apps/cli/cmd/root.go
+++ b/packages/browseros-agent/apps/cli/cmd/root.go
@@ -167,10 +167,17 @@ func envBool(key string) bool {
 }

 func defaultServerURL() string {
+	// 1. Explicit env var always wins
 	if env := normalizeServerURL(os.Getenv("BROWSEROS_URL")); env != "" {
 		return env
 	}

+	// 2. Live discovery file from running BrowserOS (most current)
+	if url := loadBrowserosServerURL(); url != "" {
+		return url
+	}
+
+	// 3. Saved config (may be stale if port changed)
 	cfg, err := config.Load()
 	if err == nil {
 		if url := normalizeServerURL(cfg.ServerURL); url != "" {
@@ -178,10 +185,6 @@ func defaultServerURL() string {
 		}
 	}

-	if url := loadBrowserosServerURL(); url != "" {
-		return url
-	}
-
 	return ""
 }

@@ -225,6 +228,9 @@ func validateServerURL(raw string) (string, error) {
 	}

 	return "", fmt.Errorf(
-		"BrowserOS server URL is not configured.\n  Open BrowserOS -> Settings -> BrowserOS MCP and copy the Server URL.\n  Then run: browseros-cli init",
+		"BrowserOS server URL is not configured.\n\n" +
+			"  If BrowserOS is running:  browseros-cli init --auto\n" +
+			"  If BrowserOS is closed:   browseros-cli launch\n" +
+			"  If not installed:         browseros-cli install",
 	)
 }
--- a/packages/browseros-agent/apps/cli/mcp/client.go
+++ b/packages/browseros-agent/apps/cli/mcp/client.go
@@ -44,7 +44,10 @@ func (c *Client) connect(ctx context.Context) (*sdkmcp.ClientSession, error) {

 	session, err := sdkClient.Connect(ctx, transport, nil)
 	if err != nil {
-		return nil, fmt.Errorf("cannot connect to BrowserOS at %s: %w\n  Is the server running? Try: browseros-cli init", c.BaseURL, err)
+		return nil, fmt.Errorf("cannot connect to BrowserOS at %s: %w\n\n"+
+			"  If BrowserOS is running on a different port:  browseros-cli init --auto\n"+
+			"  If BrowserOS is not running:                  browseros-cli launch\n"+
+			"  If not installed:                             browseros-cli install", c.BaseURL, err)
 	}
 	return session, nil
 }
@@ -184,7 +187,10 @@ func (c *Client) Status() (map[string]any, error) {
 func (c *Client) restGET(path string) (map[string]any, error) {
 	resp, err := c.HTTPClient.Get(c.BaseURL + path)
 	if err != nil {
-		return nil, fmt.Errorf("cannot connect to BrowserOS at %s: %w\n  Try: browseros-cli init", c.BaseURL, err)
+		return nil, fmt.Errorf("cannot connect to BrowserOS at %s: %w\n\n"+
+			"  If BrowserOS is running on a different port:  browseros-cli init --auto\n"+
+			"  If BrowserOS is not running:                  browseros-cli launch\n"+
+			"  If not installed:                             browseros-cli install", c.BaseURL, err)
 	}
 	defer resp.Body.Close()

--- a/packages/browseros-agent/apps/cli/scripts/install.ps1
+++ b/packages/browseros-agent/apps/cli/scripts/install.ps1
@@ -0,0 +1,147 @@
+#
+# Install browseros-cli for Windows — downloads the latest release binary.
+#
+# Usage (PowerShell — save and run):
+#   Invoke-WebRequest -Uri "https://cdn.browseros.com/cli/install.ps1" -OutFile install.ps1
+#   .\install.ps1
+#   .\install.ps1 -Version "0.1.0" -Dir "C:\tools\browseros"
+#
+# Usage (one-liner, uses env vars for options):
+#   & { $env:BROWSEROS_VERSION="0.1.0"; irm https://cdn.browseros.com/cli/install.ps1 | iex }
+#
+
+param(
+    [string]$Version = "",
+    [string]$Dir = ""
+)
+
+$ErrorActionPreference = "Stop"
+
+# TLS 1.2 — required for GitHub, older PS 5.1 defaults to TLS 1.0
+[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12
+
+$Repo = "browseros-ai/BrowserOS"
+$Binary = "browseros-cli"
+
+# When piped via irm | iex, param() is ignored — fall back to env vars
+if (-not $Version) { $Version = $env:BROWSEROS_VERSION }
+if (-not $Dir) { $Dir = if ($env:BROWSEROS_DIR) { $env:BROWSEROS_DIR } else { "$env:LOCALAPPDATA\browseros-cli\bin" } }
+
+# ── Resolve latest version ───────────────────────────────────────────────────
+
+if (-not $Version) {
+    Write-Host "Fetching latest version..."
+    $releases = Invoke-RestMethod "https://api.github.com/repos/$Repo/releases?per_page=100"
+    $tag = ($releases `
+        | Where-Object { $_.tag_name -match "^browseros-cli-v" -and $_.tag_name -notmatch "-rc" } `
+        | Select-Object -First 1).tag_name
+    if (-not $tag) {
+        Write-Error "Could not determine latest version. Try: -Version 0.1.0"
+        exit 1
+    }
+    $Version = $tag -replace "^browseros-cli-v", ""
+}
+
+Write-Host "Installing browseros-cli v$Version..."
+
+# ── Detect architecture ──────────────────────────────────────────────────────
+
+# $env:PROCESSOR_ARCHITECTURE lies under x64 emulation on ARM64 Windows.
+# Use .NET RuntimeInformation when available, fall back to PROCESSOR_ARCHITEW6432.
+$Arch = "amd64"
+try {
+    $osArch = [System.Runtime.InteropServices.RuntimeInformation]::OSArchitecture
+    if ($osArch -eq [System.Runtime.InteropServices.Architecture]::Arm64) { $Arch = "arm64" }
+} catch {
+    if ($env:PROCESSOR_ARCHITEW6432 -eq "ARM64" -or $env:PROCESSOR_ARCHITECTURE -eq "ARM64") {
+        $Arch = "arm64"
+    }
+}
+
+if (-not [Environment]::Is64BitOperatingSystem) {
+    Write-Error "32-bit Windows is not supported."
+    exit 1
+}
+
+# ── Download and extract ─────────────────────────────────────────────────────
+
+$Tag = "browseros-cli-v$Version"
+$Filename = "${Binary}_${Version}_windows_${Arch}.zip"
+$Url = "https://github.com/$Repo/releases/download/$Tag/$Filename"
+$ChecksumUrl = "https://github.com/$Repo/releases/download/$Tag/checksums.txt"
+$TmpDir = Join-Path ([System.IO.Path]::GetTempPath()) ("browseros-cli-install-" + [System.IO.Path]::GetRandomFileName())
+
+try {
+    New-Item -ItemType Directory -Path $TmpDir | Out-Null
+
+    $ZipPath = Join-Path $TmpDir $Filename
+
+    Write-Host "Downloading $Url..."
+    Invoke-WebRequest -Uri $Url -OutFile $ZipPath -UseBasicParsing
+
+    $ChecksumPath = Join-Path $TmpDir "checksums.txt"
+    $ChecksumAvailable = $true
+    try {
+        Invoke-WebRequest -Uri $ChecksumUrl -OutFile $ChecksumPath -UseBasicParsing
+    } catch {
+        $ChecksumAvailable = $false
+        Write-Warning "Could not fetch checksums.txt; skipping checksum verification. $($_.Exception.Message)"
+    }
+
+    if ($ChecksumAvailable) {
+        $ExpectedChecksum = $null
+        foreach ($line in Get-Content $ChecksumPath) {
+            $parts = $line -split '\s+', 2
+            if ($parts.Length -eq 2 -and $parts[1] -eq $Filename) {
+                $ExpectedChecksum = $parts[0].ToLowerInvariant()
+                break
+            }
+        }
+
+        if ($ExpectedChecksum) {
+            $ActualChecksum = (Get-FileHash -Path $ZipPath -Algorithm SHA256).Hash.ToLowerInvariant()
+            if ($ActualChecksum -ne $ExpectedChecksum) {
+                Write-Error "Checksum mismatch (expected $ExpectedChecksum, got $ActualChecksum)"
+                exit 1
+            }
+            Write-Host "Checksum verified."
+        } else {
+            Write-Warning "Checksum not found in checksums.txt; skipping checksum verification."
+        }
+    }
+
+    Expand-Archive -Path $ZipPath -DestinationPath $TmpDir -Force
+
+    $Exe = Get-ChildItem -Path $TmpDir -Filter "$Binary.exe" -File -Recurse | Select-Object -First 1
+    if (-not $Exe) {
+        Write-Error "Binary not found in archive."
+        exit 1
+    }
+
+    # ── Install ──────────────────────────────────────────────────────────────
+
+    if (-not (Test-Path $Dir)) {
+        New-Item -ItemType Directory -Path $Dir -Force | Out-Null
+    }
+
+    Move-Item -Force $Exe.FullName (Join-Path $Dir "$Binary.exe")
+
+    Write-Host "Installed $Binary.exe to $Dir"
+} finally {
+    if (Test-Path $TmpDir) { Remove-Item -Recurse -Force $TmpDir -ErrorAction SilentlyContinue }
+}
+
+# ── PATH ─────────────────────────────────────────────────────────────────────
+
+$UserPath = [Environment]::GetEnvironmentVariable("Path", "User")
+$PathEntries = $UserPath -split ";" | Where-Object { $_ -ne "" }
+if ($Dir -notin $PathEntries) {
+    Write-Host ""
+    Write-Host "Adding $Dir to your user PATH..."
+    [Environment]::SetEnvironmentVariable("Path", "$Dir;$UserPath", "User")
+    $env:Path = "$Dir;$env:Path"
+    Write-Host "Done. Restart your terminal for PATH changes to take effect."
+}
+
+Write-Host ""
+Write-Host "Run 'browseros-cli --help' to get started."
--- a/packages/browseros-agent/apps/cli/scripts/install.sh
+++ b/packages/browseros-agent/apps/cli/scripts/install.sh
@@ -0,0 +1,153 @@
+#!/usr/bin/env bash
+#
+# Install browseros-cli — downloads the latest release binary for your platform.
+#
+# Usage:
+#   curl -fsSL https://cdn.browseros.com/cli/install.sh | bash
+#
+#   # Or with options:
+#   curl -fsSL https://cdn.browseros.com/cli/install.sh | bash -s -- --version 0.1.0 --dir /usr/local/bin
+
+set -euo pipefail
+
+REPO="browseros-ai/BrowserOS"
+BINARY="browseros-cli"
+INSTALL_DIR="${HOME}/.browseros/bin"
+
+# ── Parse arguments ──────────────────────────────────────────────────────────
+
+VERSION=""
+CUSTOM_DIR=""
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --version)
+      [[ $# -lt 2 ]] && { echo "Error: --version requires a value" >&2; exit 1; }
+      VERSION="$2"; shift 2 ;;
+    --dir)
+      [[ $# -lt 2 ]] && { echo "Error: --dir requires a value" >&2; exit 1; }
+      CUSTOM_DIR="$2"; shift 2 ;;
+    --help)
+      echo "Usage: install.sh [--version VERSION] [--dir INSTALL_DIR]"
+      echo ""
+      echo "  --version   Install a specific version (default: latest)"
+      echo "  --dir       Install directory (default: ~/.browseros/bin)"
+      exit 0
+      ;;
+    *) echo "Unknown option: $1" >&2; exit 1 ;;
+  esac
+done
+
+[[ -n "$CUSTOM_DIR" ]] && INSTALL_DIR="$CUSTOM_DIR"
+
+# ── Resolve latest version ───────────────────────────────────────────────────
+
+if [[ -z "$VERSION" ]]; then
+  # Use per_page=1 with a tag name filter via the releases endpoint.
+  # The tags all start with "browseros-cli-v" so we grab page 1 of those.
+  VERSION=$(curl -fsSL "https://api.github.com/repos/${REPO}/releases?per_page=100" \
+    | grep -o '"tag_name": *"browseros-cli-v[^"]*"' \
+    | grep -v -- "-rc" \
+    | head -1 \
+    | sed 's/.*browseros-cli-v//; s/"//')
+
+  if [[ -z "$VERSION" ]]; then
+    echo "Error: could not determine latest version." >&2
+    echo "  Try: install.sh --version 0.1.0" >&2
+    exit 1
+  fi
+fi
+
+echo "Installing browseros-cli v${VERSION}..."
+
+# ── Detect platform ──────────────────────────────────────────────────────────
+
+OS=$(uname -s | tr '[:upper:]' '[:lower:]')
+ARCH=$(uname -m)
+
+case "$OS" in
+  darwin) OS="darwin" ;;
+  linux)  OS="linux" ;;
+  *)      echo "Error: unsupported OS: $OS" >&2; exit 1 ;;
+esac
+
+case "$ARCH" in
+  x86_64|amd64) ARCH="amd64" ;;
+  arm64|aarch64) ARCH="arm64" ;;
+  *)             echo "Error: unsupported architecture: $ARCH" >&2; exit 1 ;;
+esac
+
+# ── Download and extract ─────────────────────────────────────────────────────
+
+FILENAME="${BINARY}_${VERSION}_${OS}_${ARCH}.tar.gz"
+TAG="browseros-cli-v${VERSION}"
+URL="https://github.com/${REPO}/releases/download/${TAG}/${FILENAME}"
+CHECKSUM_URL="https://github.com/${REPO}/releases/download/${TAG}/checksums.txt"
+
+TMPDIR_DL=$(mktemp -d)
+trap 'rm -rf "$TMPDIR_DL"' EXIT
+
+echo "Downloading ${URL}..."
+curl -fSL --progress-bar -o "${TMPDIR_DL}/${FILENAME}" "$URL"
+
+# Verify checksum if sha256sum/shasum is available
+if curl -fsSL -o "${TMPDIR_DL}/checksums.txt" "$CHECKSUM_URL" 2>/dev/null; then
+  expected=$(awk -v filename="$FILENAME" '$2 == filename { print $1; exit }' "${TMPDIR_DL}/checksums.txt")
+  if [[ -n "$expected" ]]; then
+    if command -v sha256sum >/dev/null 2>&1; then
+      actual=$(sha256sum "${TMPDIR_DL}/${FILENAME}" | awk '{print $1}')
+    elif command -v shasum >/dev/null 2>&1; then
+      actual=$(shasum -a 256 "${TMPDIR_DL}/${FILENAME}" | awk '{print $1}')
+    else
+      actual=""
+      echo "Warning: no sha256sum/shasum found; skipping checksum verification." >&2
+    fi
+    if [[ -n "$actual" && "$actual" != "$expected" ]]; then
+      echo "Error: checksum mismatch (expected ${expected}, got ${actual})" >&2
+      exit 1
+    fi
+    [[ -n "$actual" ]] && echo "Checksum verified."
+  else
+    echo "Warning: checksum not found in checksums.txt; skipping verification." >&2
+  fi
+else
+  echo "Warning: could not fetch checksums.txt; skipping checksum verification." >&2
+fi
+
+tar -xzf "${TMPDIR_DL}/${FILENAME}" -C "$TMPDIR_DL"
+
+BINARY_PATH="${TMPDIR_DL}/${BINARY}"
+if [[ ! -f "$BINARY_PATH" ]]; then
+  BINARY_PATH=$(find "$TMPDIR_DL" -type f -name "$BINARY" -print -quit)
+fi
+
+if [[ -z "$BINARY_PATH" || ! -f "$BINARY_PATH" ]]; then
+  echo "Error: binary not found in archive." >&2
+  exit 1
+fi
+
+# ── Install ──────────────────────────────────────────────────────────────────
+
+mkdir -p "$INSTALL_DIR"
+mv "$BINARY_PATH" "${INSTALL_DIR}/${BINARY}"
+chmod +x "${INSTALL_DIR}/${BINARY}"
+
+echo "Installed ${BINARY} to ${INSTALL_DIR}/${BINARY}"
+
+# ── PATH hint ────────────────────────────────────────────────────────────────
+
+if ! echo "$PATH" | tr ':' '\n' | grep -qx "$INSTALL_DIR"; then
+  echo ""
+  echo "Add browseros-cli to your PATH:"
+  echo ""
+
+  SHELL_NAME=$(basename "${SHELL:-/bin/bash}")
+  case "$SHELL_NAME" in
+    zsh)  echo "  echo 'export PATH=\"${INSTALL_DIR}:\$PATH\"' >> ~/.zshrc && source ~/.zshrc" ;;
+    fish) echo "  fish_add_path ${INSTALL_DIR}" ;;
+    *)    echo "  echo 'export PATH=\"${INSTALL_DIR}:\$PATH\"' >> ~/.bashrc && source ~/.bashrc" ;;
+  esac
+fi
+
+echo ""
+echo "Run 'browseros-cli --help' to get started."
--- a/packages/browseros-agent/apps/eval/.gitignore
+++ b/packages/browseros-agent/apps/eval/.gitignore
@@ -1,2 +1,3 @@
 data/raw/
 results/
+extensions/
--- a/packages/browseros-agent/apps/eval/configs/browseros-agent-weekly.json
+++ b/packages/browseros-agent/apps/eval/configs/browseros-agent-weekly.json
@@ -7,19 +7,23 @@
    "baseUrl": "https://api.fireworks.ai/inference/v1",
    "supportsImages": true
  },
-  "dataset": "../data/webvoyager_e2e_test.jsonl",
-  "output_dir": "../results/webvoyager-test",
-  "num_workers": 3,
+  "dataset": "../data/webbench-2of4-50.jsonl",
+  "num_workers": 10,
+  "restart_server_per_task": true,
  "browseros": {
    "server_url": "http://127.0.0.1:9110",
    "base_cdp_port": 9010,
    "base_server_port": 9110,
    "base_extension_port": 9310,
    "load_extensions": false,
-    "headless": true
+    "headless": false
  },
+  "captcha": {
+    "api_key_env": "NOPECHA_API_KEY"
+  },
+  "graders": ["performance_grader"],
  "grader_api_key_env": "OPENROUTER_API_KEY",
  "grader_base_url": "https://openrouter.ai/api/v1",
  "grader_model": "openai/gpt-4.1",
-  "timeout_ms": 1200000
+  "timeout_ms": 1800000
 }
--- a/packages/browseros-agent/apps/eval/configs/orchestrator-executor-test.json
+++ b/packages/browseros-agent/apps/eval/configs/orchestrator-executor-test.json
@@ -2,24 +2,21 @@
  "agent": {
    "type": "orchestrator-executor",
    "orchestrator": {
-      "type": "single",
      "provider": "openai-compatible",
      "model": "accounts/fireworks/models/kimi-k2p5",
      "apiKey": "FIREWORKS_API_KEY",
-      "baseUrl": "https://api.fireworks.ai/inference/v1",
-      "supportsImages": true
+      "baseUrl": "https://api.fireworks.ai/inference/v1"
    },
    "executor": {
      "provider": "openai-compatible",
      "model": "accounts/fireworks/models/kimi-k2p5",
      "apiKey": "FIREWORKS_API_KEY",
-      "baseUrl": "https://api.fireworks.ai/inference/v1",
-      "supportsImages": true
+      "baseUrl": "https://api.fireworks.ai/inference/v1"
    }
  },
-  "dataset": "../data/webvoyager_e2e_test.jsonl",
-  "output_dir": "../results/orchestrator-executor-webvoyager-test",
-  "num_workers": 3,
+  "dataset": "../data/webbench-2of4-50.jsonl",
+  "num_workers": 10,
+  "restart_server_per_task": true,
  "browseros": {
    "server_url": "http://127.0.0.1:9110",
    "base_cdp_port": 9010,
@@ -28,8 +25,12 @@
    "load_extensions": false,
    "headless": false
  },
+  "captcha": {
+    "api_key_env": "NOPECHA_API_KEY"
+  },
+  "graders": ["performance_grader"],
  "grader_api_key_env": "OPENROUTER_API_KEY",
  "grader_base_url": "https://openrouter.ai/api/v1",
  "grader_model": "openai/gpt-4.1",
-  "timeout_ms": 1200000
+  "timeout_ms": 1800000
 }
--- a/packages/browseros-agent/apps/eval/configs/orchestrator-executor-clado-test.json
+++ b/packages/browseros-agent/apps/eval/configs/orchestrator-executor-clado-test.json
@@ -14,19 +14,23 @@
      "baseUrl": "https://clado-ai--clado-browseros-action-actionmodel-generate.modal.run"
    }
  },
-  "dataset": "../data/webvoyager_e2e_test.jsonl",
-  "output_dir": "../results/orchestrator-executor-clado-webvoyager-test",
-  "num_workers": 3,
+  "dataset": "../data/webbench-2of4-50.jsonl",
+  "num_workers": 10,
+  "restart_server_per_task": true,
  "browseros": {
    "server_url": "http://127.0.0.1:9110",
    "base_cdp_port": 9010,
    "base_server_port": 9110,
    "base_extension_port": 9310,
    "load_extensions": false,
-    "headless": true
+    "headless": false
  },
+  "captcha": {
+    "api_key_env": "NOPECHA_API_KEY"
+  },
+  "graders": ["performance_grader"],
  "grader_api_key_env": "OPENROUTER_API_KEY",
  "grader_base_url": "https://openrouter.ai/api/v1",
  "grader_model": "openai/gpt-4.1",
-  "timeout_ms": 1200000
+  "timeout_ms": 1800000
 }
--- a/packages/browseros-agent/apps/eval/configs/debug-test.json
+++ b/packages/browseros-agent/apps/eval/configs/debug-test.json
@@ -1,23 +0,0 @@
-{
-  "agent": {
-    "type": "orchestrator-executor",
-    "orchestrator": {
-      "provider": "openrouter",
-      "model": "openai/gpt-4o",
-      "apiKey": "OPENROUTER_API_KEY",
-      "maxTurns": 3
-    },
-    "executor": {
-      "provider": "openrouter",
-      "model": "openai/gpt-4o",
-      "apiKey": "OPENROUTER_API_KEY"
-    }
-  },
-  "dataset": "../data/webvoyager_e2e_test.jsonl",
-  "output_dir": "../results/debug-test",
-  "num_workers": 1,
-  "browseros": {
-    "server_url": "http://127.0.0.1:9110"
-  },
-  "timeout_ms": 90000
-}
--- a/packages/browseros-agent/apps/eval/configs/fireworks-minimax-m2.json
+++ b/packages/browseros-agent/apps/eval/configs/fireworks-minimax-m2.json
@@ -1,21 +0,0 @@
-{
-  "agent": {
-    "type": "single",
-    "provider": "openai-compatible",
-    "model": "accounts/fireworks/models/kimi-k2p5",
-    "apiKey": "FIREWORKS_API_KEY",
-    "baseUrl": "https://api.fireworks.ai/inference/v1",
-    "supportsImages": true
-  },
-  "dataset": "../data/test-set.jsonl",
-  "output_dir": "../results/fireworks-minimax-k2p5-test-set",
-  "num_workers": 1,
-  "restart_server_per_task": true,
-  "browseros": {
-    "server_url": "http://127.0.0.1:9110"
-  },
-  "grader_api_key_env": "OPENROUTER_API_KEY",
-  "grader_base_url": "https://openrouter.ai/api/v1",
-  "grader_model": "openai/o4-mini-high",
-  "timeout_ms": 3600000
-}
--- a/packages/browseros-agent/apps/eval/configs/mind2web-test.json
+++ b/packages/browseros-agent/apps/eval/configs/mind2web-test.json
@@ -1,18 +0,0 @@
-{
-  "agent": {
-    "type": "single",
-    "provider": "openrouter",
-    "model": "openai/gpt-4.1",
-    "apiKey": "OPENROUTER_API_KEY"
-  },
-  "dataset": "../data/mind2web_e2e_test.jsonl",
-  "output_dir": "../results/mind2web-test",
-  "num_workers": 5,
-  "browseros": {
-    "server_url": "http://127.0.0.1:9110"
-  },
-  "grader_api_key_env": "OPENROUTER_API_KEY",
-  "grader_base_url": "https://openrouter.ai/api/v1",
-  "grader_model": "openai/gpt-4.1",
-  "timeout_ms": 300000
-}
--- a/packages/browseros-agent/apps/eval/configs/test_gemini-computer-use.json
+++ b/packages/browseros-agent/apps/eval/configs/test_gemini-computer-use.json
@@ -9,12 +9,20 @@
    "turnLimit": 100
  },
  "dataset": "../data/test-set.jsonl",
-  "output_dir": "../results/gemini-computer-use-test-set2",
  "num_workers": 1,
  "restart_server_per_task": true,
  "browseros": {
-    "server_url": "http://127.0.0.1:9110"
+    "server_url": "http://127.0.0.1:9110",
+    "base_cdp_port": 9010,
+    "base_server_port": 9110,
+    "base_extension_port": 9310,
+    "load_extensions": false,
+    "headless": false
  },
+  "captcha": {
+    "api_key_env": "NOPECHA_API_KEY"
+  },
+  "graders": ["performance_grader"],
  "grader_api_key_env": "OPENROUTER_API_KEY",
  "grader_base_url": "https://openrouter.ai/api/v1",
  "grader_model": "openai/gpt-4.1",
--- a/packages/browseros-agent/apps/eval/configs/test_mind2web.json
+++ b/packages/browseros-agent/apps/eval/configs/test_mind2web.json
@@ -6,11 +6,20 @@
    "apiKey": "OPENROUTER_API_KEY"
  },
  "dataset": "../data/mind2web.jsonl",
-  "output_dir": "../results/mind2web-full",
  "num_workers": 5,
+  "restart_server_per_task": true,
  "browseros": {
-    "server_url": "http://127.0.0.1:9110"
+    "server_url": "http://127.0.0.1:9110",
+    "base_cdp_port": 9010,
+    "base_server_port": 9110,
+    "base_extension_port": 9310,
+    "load_extensions": false,
+    "headless": false
  },
+  "captcha": {
+    "api_key_env": "NOPECHA_API_KEY"
+  },
+  "graders": ["performance_grader"],
  "grader_api_key_env": "OPENROUTER_API_KEY",
  "grader_base_url": "https://openrouter.ai/api/v1",
  "grader_model": "openai/gpt-4.1",
--- a/packages/browseros-agent/apps/eval/configs/test_webvoyager.json
+++ b/packages/browseros-agent/apps/eval/configs/test_webvoyager.json
@@ -8,16 +8,20 @@
    "supportsImages": true
  },
  "dataset": "../data/webvoyager.jsonl",
-  "output_dir": "../results/webvoyager-cdp-server",
  "num_workers": 3,
+  "restart_server_per_task": true,
  "browseros": {
    "server_url": "http://127.0.0.1:9110",
    "base_cdp_port": 9010,
    "base_server_port": 9110,
    "base_extension_port": 9310,
    "load_extensions": false,
-    "headless": true
+    "headless": false
  },
+  "captcha": {
+    "api_key_env": "NOPECHA_API_KEY"
+  },
+  "graders": ["performance_grader"],
  "grader_api_key_env": "OPENROUTER_API_KEY",
  "grader_base_url": "https://openrouter.ai/api/v1",
  "grader_model": "openai/gpt-4.1",
--- a/packages/browseros-agent/apps/eval/configs/test_yutori-navigator.json
+++ b/packages/browseros-agent/apps/eval/configs/test_yutori-navigator.json
@@ -9,14 +9,22 @@
    "turnLimit": 100
  },
  "dataset": "../data/test-set.jsonl",
-  "output_dir": "../results/yutori-navigator",
  "num_workers": 1,
  "restart_server_per_task": true,
  "browseros": {
-    "server_url": "http://127.0.0.1:9110"
+    "server_url": "http://127.0.0.1:9110",
+    "base_cdp_port": 9010,
+    "base_server_port": 9110,
+    "base_extension_port": 9310,
+    "load_extensions": false,
+    "headless": false
  },
-  "timeout_ms": 1200000,
+  "captcha": {
+    "api_key_env": "NOPECHA_API_KEY"
+  },
+  "graders": ["performance_grader"],
  "grader_api_key_env": "OPENROUTER_API_KEY",
  "grader_base_url": "https://openrouter.ai/api/v1",
-  "grader_model": "openai/gpt-4.1"
+  "grader_model": "openai/gpt-4.1",
+  "timeout_ms": 1200000
 }
--- a/packages/browseros-agent/apps/eval/configs/tool-loop-test.json
+++ b/packages/browseros-agent/apps/eval/configs/tool-loop-test.json
@@ -1,25 +0,0 @@
-{
-  "agent": {
-    "type": "single",
-    "provider": "openai-compatible",
-    "model": "accounts/fireworks/models/kimi-k2p5",
-    "apiKey": "FIREWORKS_API_KEY",
-    "baseUrl": "https://api.fireworks.ai/inference/v1",
-    "supportsImages": true
-  },
-  "dataset": "../data/webvoyager_e2e_test.jsonl",
-  "output_dir": "../results/tool-loop-webvoyager-test",
-  "num_workers": 3,
-  "browseros": {
-    "server_url": "http://127.0.0.1:9110",
-    "base_cdp_port": 9010,
-    "base_server_port": 9110,
-    "base_extension_port": 9310,
-    "load_extensions": false,
-    "headless": true
-  },
-  "grader_api_key_env": "OPENROUTER_API_KEY",
-  "grader_base_url": "https://openrouter.ai/api/v1",
-  "grader_model": "openai/gpt-4.1",
-  "timeout_ms": 1200000
-}
--- a/packages/browseros-agent/apps/eval/package.json
+++ b/packages/browseros-agent/apps/eval/package.json
@@ -9,12 +9,13 @@
  },
  "dependencies": {
    "@anthropic-ai/claude-agent-sdk": "^0.2.63",
+    "@aws-sdk/client-s3": "^3.1014.0",
    "@browseros/server": "workspace:*",
    "@browseros/shared": "workspace:*",
    "@google/gemini-cli-core": "^0.16.0",
-    "ai": "^6.0.94",
    "@google/genai": "1.30.0",
    "@modelcontextprotocol/sdk": "^1.25.2",
+    "ai": "^6.0.94",
    "hono": "^4.6.0",
    "openai": "^4.0.0",
    "sharp": "^0.34.5",
--- a/packages/browseros-agent/apps/eval/scripts/test-clado-api.ts
+++ b/packages/browseros-agent/apps/eval/scripts/test-clado-api.ts
@@ -0,0 +1,220 @@
+/**
+ * Test script for Clado API endpoints (grounding + action models)
+ *
+ * Usage:
+ *   bun apps/eval/scripts/test-clado-api.ts [screenshot-path]
+ *
+ * If no screenshot provided, captures one from a running BrowserOS server.
+ */
+
+import { readFile } from 'node:fs/promises'
+import { resolve } from 'node:path'
+
+const ACTION_URL =
+  'https://clado-ai--clado-browseros-action-actionmodel-generate.modal.run'
+const ACTION_HEALTH_URL =
+  'https://clado-ai--clado-browseros-action-actionmodel-health.modal.run'
+const GROUNDING_URL =
+  'https://clado-ai--clado-browseros-grounding-groundingmodel-generate.modal.run'
+const GROUNDING_HEALTH_URL =
+  'https://clado-ai--clado-browseros-grounding-groundingmodel-health.modal.run'
+
+async function checkHealth(name: string, url: string): Promise<boolean> {
+  console.log(`\n--- ${name} health check ---`)
+  console.log(`  URL: ${url}`)
+  const start = performance.now()
+  try {
+    const resp = await fetch(url, { signal: AbortSignal.timeout(30_000) })
+    const elapsed = ((performance.now() - start) / 1000).toFixed(2)
+    const body = await resp.text()
+    console.log(`  Status: ${resp.status} (${elapsed}s)`)
+    console.log(`  Body: ${body.slice(0, 200)}`)
+    return resp.ok
+  } catch (err) {
+    const elapsed = ((performance.now() - start) / 1000).toFixed(2)
+    console.log(
+      `  FAILED (${elapsed}s): ${err instanceof Error ? err.message : err}`,
+    )
+    return false
+  }
+}
+
+async function testGenerate(
+  name: string,
+  url: string,
+  payload: Record<string, unknown>,
+): Promise<Record<string, unknown> | null> {
+  console.log(`\n--- ${name} generate ---`)
+  console.log(`  URL: ${url}`)
+  console.log(`  Instruction: ${payload.instruction}`)
+  console.log(
+    `  Image size: ${((payload.image_base64 as string).length / 1024).toFixed(0)} KB (base64)`,
+  )
+  if (payload.history) console.log(`  History: ${payload.history}`)
+
+  const start = performance.now()
+  try {
+    const resp = await fetch(url, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify(payload),
+      signal: AbortSignal.timeout(120_000),
+    })
+    const elapsed = ((performance.now() - start) / 1000).toFixed(2)
+
+    if (!resp.ok) {
+      const body = await resp.text()
+      console.log(`  FAILED: HTTP ${resp.status} (${elapsed}s)`)
+      console.log(`  Body: ${body.slice(0, 400)}`)
+      return null
+    }
+
+    const result = (await resp.json()) as Record<string, unknown>
+    console.log(`  Status: ${resp.status} (${elapsed}s)`)
+    console.log(`  Action: ${result.action}`)
+    if (result.x !== null && result.x !== undefined)
+      console.log(`  Coordinates: (${result.x}, ${result.y})`)
+    if (result.text)
+      console.log(`  Text: ${(result.text as string).slice(0, 100)}`)
+    if (result.key) console.log(`  Key: ${result.key}`)
+    if (result.inference_time_seconds)
+      console.log(`  Inference: ${result.inference_time_seconds}s`)
+
+    // Show thinking if present
+    const raw = result.raw_response as string | undefined
+    if (raw) {
+      const thinkMatch = raw.match(/<thinking>([\s\S]*?)<\/thinking>/)
+      if (thinkMatch) {
+        const thinking = thinkMatch[1].trim()
+        console.log(
+          `  Thinking: ${thinking.slice(0, 200)}${thinking.length > 200 ? '...' : ''}`,
+        )
+      }
+    }
+
+    return result
+  } catch (err) {
+    const elapsed = ((performance.now() - start) / 1000).toFixed(2)
+    console.log(
+      `  FAILED (${elapsed}s): ${err instanceof Error ? err.message : err}`,
+    )
+    return null
+  }
+}
+
+async function loadScreenshot(path?: string): Promise<string> {
+  if (path) {
+    const resolved = resolve(path)
+    console.log(`Loading screenshot: ${resolved}`)
+    const data = await readFile(resolved)
+    return data.toString('base64')
+  }
+
+  // Try to capture from a running BrowserOS server
+  const serverUrl = process.env.BROWSEROS_URL || 'http://127.0.0.1:9110'
+  console.log(
+    `No screenshot path provided. Trying to capture from ${serverUrl}...`,
+  )
+
+  const { Client } = await import('@modelcontextprotocol/sdk/client/index.js')
+  const { StreamableHTTPClientTransport } = await import(
+    '@modelcontextprotocol/sdk/client/streamableHttp.js'
+  )
+
+  const client = new Client({ name: 'clado-test', version: '1.0.0' })
+  const transport = new StreamableHTTPClientTransport(
+    new URL(`${serverUrl}/mcp`),
+    { requestInit: { headers: { 'X-BrowserOS-Source': 'sdk-internal' } } },
+  )
+
+  try {
+    await client.connect(transport)
+    const result = (await client.callTool({
+      name: 'take_screenshot',
+      arguments: { format: 'png', page: 1 },
+    })) as { content: Array<{ type: string; data?: string }> }
+
+    const imageContent = result.content?.find((c) => c.type === 'image')
+    if (!imageContent?.data)
+      throw new Error('No image data in screenshot response')
+
+    console.log(
+      `Captured screenshot (${(imageContent.data.length / 1024).toFixed(0)} KB base64)`,
+    )
+    return imageContent.data
+  } finally {
+    try {
+      await transport.close()
+    } catch {}
+  }
+}
+
+async function main() {
+  const screenshotPath = process.argv[2]
+
+  console.log('=== Clado API Test ===\n')
+
+  // Health checks (parallel)
+  const [actionHealthy, groundingHealthy] = await Promise.all([
+    checkHealth('Action Model', ACTION_HEALTH_URL),
+    checkHealth('Grounding Model', GROUNDING_HEALTH_URL),
+  ])
+
+  if (!actionHealthy && !groundingHealthy) {
+    console.log('\nBoth endpoints are down. Exiting.')
+    process.exit(1)
+  }
+
+  // Load screenshot
+  let imageBase64: string
+  try {
+    imageBase64 = await loadScreenshot(screenshotPath)
+  } catch (err) {
+    console.log(
+      `\nFailed to load screenshot: ${err instanceof Error ? err.message : err}`,
+    )
+    console.log(
+      'Provide a screenshot path: bun apps/eval/scripts/test-clado-api.ts path/to/screenshot.png',
+    )
+    process.exit(1)
+  }
+
+  const instruction = 'Click on the search button or search bar'
+
+  // Test grounding model
+  if (groundingHealthy) {
+    await testGenerate('Grounding Model', GROUNDING_URL, {
+      instruction,
+      image_base64: imageBase64,
+    })
+  } else {
+    console.log('\nSkipping grounding model (unhealthy)')
+  }
+
+  // Test action model (no history)
+  if (actionHealthy) {
+    const result = await testGenerate('Action Model (step 1)', ACTION_URL, {
+      instruction,
+      image_base64: imageBase64,
+      history: 'None',
+    })
+
+    // Test action model with history (simulate multi-turn)
+    if (result && result.action === 'click') {
+      await testGenerate('Action Model (step 2, with history)', ACTION_URL, {
+        instruction: 'Type "hello world" in the search bar',
+        image_base64: imageBase64,
+        history: `click(${result.x}, ${result.y})`,
+      })
+    }
+  } else {
+    console.log('\nSkipping action model (unhealthy)')
+  }
+
+  console.log('\n=== Done ===')
+}
+
+main().catch((err) => {
+  console.error('Fatal:', err)
+  process.exit(1)
+})
--- a/packages/browseros-agent/apps/eval/scripts/upload-run.ts
+++ b/packages/browseros-agent/apps/eval/scripts/upload-run.ts
@@ -0,0 +1,349 @@
+/**
+ * Upload eval runs to R2.
+ *
+ * Two modes:
+ *   bun scripts/upload-run.ts results/browseros-agent-weekly/2026-03-21-1730
+ *       → uploads that specific run
+ *
+ *   bun scripts/upload-run.ts results/browseros-agent-weekly
+ *       → finds all timestamped subfolders, uploads any not yet in R2
+ *
+ * Env vars: EVAL_R2_ACCOUNT_ID, EVAL_R2_ACCESS_KEY_ID, EVAL_R2_SECRET_ACCESS_KEY
+ *           EVAL_R2_BUCKET (default: browseros-eval)
+ *           EVAL_R2_CDN_BASE_URL (default: https://eval.browseros.com)
+ */
+
+import { readdir, readFile, stat } from 'node:fs/promises'
+import { basename, dirname, extname, join } from 'node:path'
+import {
+  GetObjectCommand,
+  PutObjectCommand,
+  S3Client,
+} from '@aws-sdk/client-s3'
+
+const CONCURRENCY = 20
+
+const CONTENT_TYPES: Record<string, string> = {
+  '.json': 'application/json',
+  '.jsonl': 'application/x-ndjson',
+  '.png': 'image/png',
+}
+
+interface R2Config {
+  accountId: string
+  accessKeyId: string
+  secretAccessKey: string
+  bucket: string
+  cdnBaseUrl: string
+}
+
+function loadConfig(): R2Config {
+  const accountId = process.env.EVAL_R2_ACCOUNT_ID
+  const accessKeyId = process.env.EVAL_R2_ACCESS_KEY_ID
+  const secretAccessKey = process.env.EVAL_R2_SECRET_ACCESS_KEY
+
+  if (!accountId || !accessKeyId || !secretAccessKey) {
+    console.error(
+      'Missing required env vars: EVAL_R2_ACCOUNT_ID, EVAL_R2_ACCESS_KEY_ID, EVAL_R2_SECRET_ACCESS_KEY',
+    )
+    process.exit(1)
+  }
+
+  return {
+    accountId,
+    accessKeyId,
+    secretAccessKey,
+    bucket: process.env.EVAL_R2_BUCKET || 'browseros-eval',
+    cdnBaseUrl: (
+      process.env.EVAL_R2_CDN_BASE_URL || 'https://eval.browseros.com'
+    ).replace(/\/+$/, ''),
+  }
+}
+
+function createClient(config: R2Config): S3Client {
+  return new S3Client({
+    region: 'auto',
+    endpoint: `https://${config.accountId}.r2.cloudflarestorage.com`,
+    credentials: {
+      accessKeyId: config.accessKeyId,
+      secretAccessKey: config.secretAccessKey,
+    },
+  })
+}
+
+async function upload(
+  client: S3Client,
+  bucket: string,
+  key: string,
+  body: Buffer,
+  contentType: string,
+) {
+  await client.send(
+    new PutObjectCommand({
+      Bucket: bucket,
+      Key: key,
+      Body: body,
+      ContentType: contentType,
+    }),
+  )
+}
+
+async function collectFiles(dir: string): Promise<string[]> {
+  const files: string[] = []
+  const entries = await readdir(dir, { withFileTypes: true })
+  for (const entry of entries) {
+    const full = join(dir, entry.name)
+    if (entry.isDirectory()) {
+      files.push(...(await collectFiles(full)))
+    } else {
+      files.push(full)
+    }
+  }
+  return files
+}
+
+async function runPool<T>(
+  items: T[],
+  concurrency: number,
+  fn: (item: T) => Promise<void>,
+) {
+  let i = 0
+  const workers = Array.from({ length: concurrency }, async () => {
+    while (i < items.length) {
+      const idx = i++
+      await fn(items[idx])
+    }
+  })
+  await Promise.all(workers)
+}
+
+// Check if a run has already been uploaded to R2
+async function isUploaded(
+  client: S3Client,
+  bucket: string,
+  runId: string,
+): Promise<boolean> {
+  try {
+    await client.send(
+      new GetObjectCommand({
+        Bucket: bucket,
+        Key: `runs/${runId}/manifest.json`,
+      }),
+    )
+    return true
+  } catch {
+    return false
+  }
+}
+
+// Detect if a directory is a run dir (has task subdirs with metadata.json)
+// vs a config dir (has timestamped subdirs like 2026-03-21-1730/)
+async function isRunDir(dir: string): Promise<boolean> {
+  const entries = await readdir(dir, { withFileTypes: true })
+  const subdirs = entries.filter((e) => e.isDirectory())
+  for (const subdir of subdirs) {
+    const metaPath = join(dir, subdir.name, 'metadata.json')
+    const metaStat = await stat(metaPath).catch(() => null)
+    if (metaStat?.isFile()) return true
+  }
+  return false
+}
+
+async function uploadSingleRun(
+  runDir: string,
+  runId: string,
+  r2Config: R2Config,
+  client: S3Client,
+): Promise<void> {
+  const taskDirs = await readdir(runDir, { withFileTypes: true })
+  const taskEntries = taskDirs.filter((d) => d.isDirectory())
+
+  if (taskEntries.length === 0) {
+    console.warn(`  No task subdirectories in ${runId}, skipping`)
+    return
+  }
+
+  const manifestTasks: Record<string, unknown>[] = []
+  const jobs: { key: string; filePath: string; contentType: string }[] = []
+
+  // Extract agent config from first task
+  let agentConfig: Record<string, unknown> | undefined
+  let dataset: string | undefined
+
+  for (const taskDir of taskEntries) {
+    const taskId = taskDir.name
+    const taskPath = join(runDir, taskId)
+    const metaPath = join(taskPath, 'metadata.json')
+
+    let meta: Record<string, unknown> = {}
+    try {
+      meta = JSON.parse(await readFile(metaPath, 'utf-8'))
+    } catch {
+      continue
+    }
+
+    if (!agentConfig && meta.agent_config)
+      agentConfig = meta.agent_config as Record<string, unknown>
+    if (!dataset && meta.dataset) dataset = meta.dataset as string
+
+    const files = await collectFiles(taskPath)
+    let screenshotCount = 0
+
+    for (const file of files) {
+      const relative = file.slice(taskPath.length + 1)
+      const ext = extname(file)
+      if (relative.startsWith('screenshots/') && ext === '.png')
+        screenshotCount++
+
+      jobs.push({
+        key: `runs/${runId}/${taskId}/${relative}`,
+        filePath: file,
+        contentType: CONTENT_TYPES[ext] || 'application/octet-stream',
+      })
+    }
+
+    manifestTasks.push({
+      queryId: meta.query_id || taskId,
+      query: meta.query || '',
+      startUrl: meta.start_url || '',
+      status:
+        meta.termination_reason === 'completed'
+          ? 'completed'
+          : meta.termination_reason || 'unknown',
+      durationMs: meta.total_duration_ms || 0,
+      screenshotCount: (meta.screenshot_count as number) || screenshotCount,
+      graderResults: meta.grader_results || {},
+    })
+  }
+
+  if (manifestTasks.length === 0) {
+    console.warn(`  No completed tasks in ${runId}, skipping`)
+    return
+  }
+
+  console.log(
+    `  Uploading ${jobs.length} files across ${manifestTasks.length} tasks...`,
+  )
+
+  let uploaded = 0
+  await runPool(jobs, CONCURRENCY, async (job) => {
+    const body = await readFile(job.filePath)
+    await upload(client, r2Config.bucket, job.key, body, job.contentType)
+    uploaded++
+    if (uploaded % 50 === 0 || uploaded === jobs.length) {
+      console.log(`    ${uploaded}/${jobs.length}`)
+    }
+  })
+
+  // Read summary.json if it exists
+  let summaryData: Record<string, unknown> | undefined
+  try {
+    summaryData = JSON.parse(
+      await readFile(join(runDir, 'summary.json'), 'utf-8'),
+    )
+  } catch {}
+
+  // Upload manifest
+  const manifest = {
+    runId,
+    uploadedAt: new Date().toISOString(),
+    agentConfig,
+    dataset,
+    summary: summaryData
+      ? {
+          passRate: summaryData.passRate,
+          avgDurationMs: summaryData.avgDurationMs,
+        }
+      : undefined,
+    tasks: manifestTasks,
+  }
+  const manifestBody = Buffer.from(JSON.stringify(manifest, null, 2))
+  await upload(
+    client,
+    r2Config.bucket,
+    `runs/${runId}/manifest.json`,
+    manifestBody,
+    'application/json',
+  )
+
+  // Upload viewer.html to bucket root
+  const viewerPath = join(
+    import.meta.dir,
+    '..',
+    'src',
+    'dashboard',
+    'viewer.html',
+  )
+  const viewerBody = await readFile(viewerPath)
+  await upload(client, r2Config.bucket, 'viewer.html', viewerBody, 'text/html')
+
+  console.log(`  Uploaded ${uploaded + 2} files`)
+  console.log(`  ${r2Config.cdnBaseUrl}/viewer.html?run=${runId}`)
+}
+
+async function main() {
+  const inputDir = process.argv[2]
+  if (!inputDir) {
+    console.error(
+      'Usage:\n' +
+        '  bun scripts/upload-run.ts results/config-name/2026-03-21-1730  (specific run)\n' +
+        '  bun scripts/upload-run.ts results/config-name                   (all un-uploaded runs)',
+    )
+    process.exit(1)
+  }
+
+  const dirStat = await stat(inputDir).catch(() => null)
+  if (!dirStat?.isDirectory()) {
+    console.error(`Not a directory: ${inputDir}`)
+    process.exit(1)
+  }
+
+  const r2Config = loadConfig()
+  const client = createClient(r2Config)
+
+  if (await isRunDir(inputDir)) {
+    // Single run: results/config-name/2026-03-21-1730
+    const timestamp = basename(inputDir)
+    const configName = basename(dirname(inputDir))
+    const runId = `${configName}-${timestamp}`
+    console.log(`Uploading run: ${runId}`)
+    await uploadSingleRun(inputDir, runId, r2Config, client)
+  } else {
+    // Config dir: results/config-name/ — upload all un-uploaded runs
+    const configName = basename(inputDir)
+    const entries = await readdir(inputDir, { withFileTypes: true })
+    const runDirs = entries
+      .filter((e) => e.isDirectory())
+      .map((e) => e.name)
+      .sort()
+
+    if (runDirs.length === 0) {
+      console.error('No run subdirectories found')
+      process.exit(1)
+    }
+
+    console.log(
+      `Found ${runDirs.length} runs for config "${configName}", checking R2...`,
+    )
+
+    let uploadedCount = 0
+    for (const dir of runDirs) {
+      const runId = `${configName}-${dir}`
+      const alreadyUploaded = await isUploaded(client, r2Config.bucket, runId)
+      if (alreadyUploaded) {
+        console.log(`  ${runId}: already uploaded, skipping`)
+        continue
+      }
+
+      console.log(`  ${runId}: uploading...`)
+      await uploadSingleRun(join(inputDir, dir), runId, r2Config, client)
+      uploadedCount++
+    }
+
+    console.log(
+      `\nDone. Uploaded ${uploadedCount} new run(s), ${runDirs.length - uploadedCount} already in R2.`,
+    )
+  }
+}
+
+main()
--- a/packages/browseros-agent/apps/eval/scripts/weekly-report.ts
+++ b/packages/browseros-agent/apps/eval/scripts/weekly-report.ts
@@ -0,0 +1,591 @@
+/**
+ * Weekly Report Generator
+ *
+ * Reads all uploaded eval runs from R2, builds cumulative score history,
+ * and generates an HTML dashboard with:
+ *   - Config selector dropdown (groups runs by config/runId pattern)
+ *   - Config details card (architecture, model, dataset, grader)
+ *   - Interactive trend chart (filtered by selected config)
+ *   - Stat cards (latest, trend, best, duration)
+ *   - Searchable table of all runs
+ *
+ * Usage:
+ *   bun apps/eval/scripts/weekly-report.ts [local-output-path]
+ *
+ * Env vars required:
+ *   EVAL_R2_ACCOUNT_ID, EVAL_R2_ACCESS_KEY_ID, EVAL_R2_SECRET_ACCESS_KEY
+ *   EVAL_R2_BUCKET (default: browseros-eval)
+ */
+
+import { writeFile } from 'node:fs/promises'
+import {
+  GetObjectCommand,
+  ListObjectsV2Command,
+  PutObjectCommand,
+  S3Client,
+} from '@aws-sdk/client-s3'
+
+interface ManifestTask {
+  queryId: string
+  query: string
+  status: string
+  durationMs: number
+  screenshotCount: number
+  graderResults: Record<string, { pass: boolean; score: number }>
+}
+
+interface Manifest {
+  runId: string
+  uploadedAt: string
+  agentConfig?: { type?: string; model?: string }
+  dataset?: string
+  summary?: { passRate?: number; avgDurationMs?: number }
+  tasks: ManifestTask[]
+}
+
+interface RunSummary {
+  runId: string
+  configName: string
+  date: string
+  avgScore: number
+  total: number
+  completed: number
+  failed: number
+  timeout: number
+  avgDurationMs: number
+  model: string
+  dataset: string
+  agentType: string
+}
+
+const PASS_FAIL_GRADER_ORDER = [
+  'performance_grader',
+  'webvoyager_grader',
+  'fara_combined',
+  'fara_grader',
+]
+
+function requireEnv(name: string): string {
+  const value = process.env[name]
+  if (!value) {
+    console.error(`Missing required env var: ${name}`)
+    process.exit(1)
+  }
+  return value
+}
+
+const accountId = requireEnv('EVAL_R2_ACCOUNT_ID')
+const accessKeyId = requireEnv('EVAL_R2_ACCESS_KEY_ID')
+const secretAccessKey = requireEnv('EVAL_R2_SECRET_ACCESS_KEY')
+const bucket = process.env.EVAL_R2_BUCKET || 'browseros-eval'
+
+const client = new S3Client({
+  region: 'auto',
+  endpoint: `https://${accountId}.r2.cloudflarestorage.com`,
+  credentials: { accessKeyId, secretAccessKey },
+})
+
+// Step 1: List all manifest.json files in runs/
+console.log('Scanning R2 for eval runs...')
+
+const manifests: Manifest[] = []
+let continuationToken: string | undefined
+
+do {
+  const listRes = await client.send(
+    new ListObjectsV2Command({
+      Bucket: bucket,
+      Prefix: 'runs/',
+      ContinuationToken: continuationToken,
+    }),
+  )
+
+  const manifestKeys =
+    listRes.Contents?.filter((obj) => obj.Key?.endsWith('/manifest.json')).map(
+      (obj) => obj.Key as string,
+    ) ?? []
+
+  for (const key of manifestKeys) {
+    try {
+      const res = await client.send(
+        new GetObjectCommand({ Bucket: bucket, Key: key }),
+      )
+      const body = await res.Body?.transformToString()
+      if (body) manifests.push(JSON.parse(body))
+    } catch {
+      console.warn(`  Failed to read ${key}, skipping`)
+    }
+  }
+
+  continuationToken = listRes.NextContinuationToken
+} while (continuationToken)
+
+console.log(`Found ${manifests.length} runs`)
+
+if (manifests.length === 0) {
+  console.log('No runs found. Nothing to report.')
+  process.exit(0)
+}
+
+// Step 2: Build run summaries
+const runs: RunSummary[] = manifests
+  .map((m) => {
+    const total = m.tasks.length
+    const completed = m.tasks.filter((t) => t.status === 'completed').length
+    const failed = m.tasks.filter((t) => t.status === 'failed').length
+    const timeout = m.tasks.filter((t) => t.status === 'timeout').length
+
+    let scoredCount = 0
+    let scoreSum = 0
+    for (const task of m.tasks) {
+      if (!task.graderResults) continue
+      for (const name of PASS_FAIL_GRADER_ORDER) {
+        if (task.graderResults[name]) {
+          scoredCount++
+          scoreSum += task.graderResults[name].score ?? 0
+          break
+        }
+      }
+    }
+
+    const avgScore = scoredCount > 0 ? (scoreSum / scoredCount) * 100 : 0
+    const durations = m.tasks
+      .filter((t) => t.durationMs > 0)
+      .map((t) => t.durationMs)
+    const avgDurationMs =
+      durations.length > 0
+        ? durations.reduce((a, b) => a + b, 0) / durations.length
+        : 0
+
+    const date = m.uploadedAt
+      ? `${m.uploadedAt.split('T')[0]} ${m.uploadedAt.split('T')[1]?.slice(0, 5) || ''}`
+      : m.runId.slice(0, 15)
+
+    const model = m.agentConfig?.model || 'unknown'
+    const dataset = m.dataset || m.runId
+    const agentType = m.agentConfig?.type || 'unknown'
+
+    const configName = extractConfigName(m.runId)
+    return {
+      runId: m.runId,
+      configName,
+      date,
+      avgScore,
+      total,
+      completed,
+      failed,
+      timeout,
+      avgDurationMs,
+      model,
+      dataset,
+      agentType,
+    }
+  })
+  .sort((a, b) => a.date.localeCompare(b.date))
+
+// Step 3: Identify unique config groups
+// runId can be "ci-weekly" (old) or "ci-weekly-2026-03-21-1730" (timestamped)
+// Extract config name by stripping the date-time suffix pattern
+function escHtml(s: string): string {
+  return s
+    .replace(/&/g, '&amp;')
+    .replace(/</g, '&lt;')
+    .replace(/>/g, '&gt;')
+    .replace(/"/g, '&quot;')
+}
+
+function extractConfigName(runId: string): string {
+  // "browseros-agent-weekly-2026-03-21-1730" → "browseros-agent-weekly"
+  // "ci-weekly" → "ci-weekly" (no timestamp, old format)
+  return runId.replace(/-\d{4}-\d{2}-\d{2}-\d{4}$/, '')
+}
+
+const configGroups = [...new Set(runs.map((r) => r.configName))]
+const defaultConfig = configGroups.includes('ci-weekly')
+  ? 'ci-weekly'
+  : configGroups[0]
+
+// Step 4: Generate HTML report
+const html = `<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>BrowserOS Eval Dashboard</title>
+  <style>
+    * { margin: 0; padding: 0; box-sizing: border-box; }
+    body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', system-ui, sans-serif; background: #0d1117; color: #e6edf3; padding: 2rem; max-width: 1400px; margin: 0 auto; }
+
+    /* Header */
+    .page-header { display: flex; align-items: center; gap: 16px; margin-bottom: 2rem; flex-wrap: wrap; }
+    .page-header h1 { font-size: 1.5rem; }
+    .page-header h1 span { color: #58a6ff; }
+    .page-header .gen-date { color: #6e7681; font-size: 12px; margin-left: auto; }
+
+    /* Config selector */
+    .config-bar { display: flex; align-items: center; gap: 16px; margin-bottom: 1.5rem; flex-wrap: wrap; }
+    .config-bar label { font-size: 13px; color: #8b949e; font-weight: 600; }
+    .config-bar select { background: #161b22; border: 1px solid #30363d; color: #e6edf3; padding: 8px 12px; border-radius: 6px; font-size: 13px; font-family: 'SF Mono', Consolas, monospace; cursor: pointer; min-width: 200px; }
+    .config-bar select:focus { outline: none; border-color: #58a6ff; }
+
+    /* Config details card */
+    .config-details { background: #161b22; border: 1px solid #30363d; border-radius: 8px; padding: 16px 20px; margin-bottom: 1.5rem; display: flex; gap: 32px; flex-wrap: wrap; }
+    .config-detail { display: flex; flex-direction: column; gap: 2px; }
+    .config-detail .cd-label { font-size: 10px; font-weight: 600; color: #6e7681; text-transform: uppercase; letter-spacing: 0.04em; }
+    .config-detail .cd-value { font-size: 13px; color: #e6edf3; font-family: 'SF Mono', Consolas, monospace; }
+
+    /* Stat cards */
+    .stats { display: flex; gap: 1rem; margin-bottom: 1.5rem; flex-wrap: wrap; }
+    .stat-card { background: #161b22; border: 1px solid #30363d; border-radius: 8px; padding: 1.25rem; flex: 1; min-width: 140px; }
+    .stat-label { color: #8b949e; font-size: 0.8rem; margin-bottom: 0.25rem; }
+    .stat-value { font-size: 1.4rem; font-weight: 600; }
+    .stat-value.big { font-size: 2.5rem; font-weight: 700; }
+    .pass { color: #3fb950; }
+    .fail { color: #f85149; }
+    .neutral { color: #f0883e; }
+    .trend-up { color: #3fb950; }
+    .trend-down { color: #f85149; }
+    .trend-flat { color: #8b949e; }
+
+    /* Chart */
+    .chart-container { background: #161b22; border: 1px solid #30363d; border-radius: 8px; padding: 1.5rem; margin-bottom: 2rem; position: relative; }
+    canvas { width: 100%; height: 300px; }
+    #tooltip { display: none; position: absolute; background: #1c2128; border: 1px solid #30363d; border-radius: 6px; padding: 8px 12px; pointer-events: none; font-size: 12px; z-index: 10; box-shadow: 0 4px 12px rgba(0,0,0,0.4); }
+
+    /* Section headers */
+    .section-header { display: flex; align-items: center; gap: 12px; margin-bottom: 1rem; }
+    .section-header h2 { font-size: 1rem; font-weight: 600; }
+    .section-header .search-input { margin-left: auto; background: #0d1117; border: 1px solid #30363d; color: #e6edf3; padding: 6px 12px; border-radius: 6px; font-size: 12px; font-family: inherit; width: 220px; }
+    .section-header .search-input:focus { outline: none; border-color: #58a6ff; }
+    .section-header .search-input::placeholder { color: #484f58; }
+
+    /* Table */
+    table { width: 100%; border-collapse: collapse; background: #161b22; border: 1px solid #30363d; border-radius: 8px; overflow: hidden; }
+    th, td { padding: 0.65rem 1rem; text-align: left; border-bottom: 1px solid #21262d; }
+    th { background: #1c2128; color: #8b949e; font-weight: 600; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.03em; }
+    td { font-size: 0.85rem; }
+    td.mono { font-family: 'SF Mono', Consolas, monospace; font-size: 0.8rem; }
+    a.view-link { color: #58a6ff; text-decoration: none; font-weight: 500; }
+    a.view-link:hover { text-decoration: underline; }
+    tr.hidden { display: none; }
+  </style>
+</head>
+<body>
+
+<div class="page-header">
+  <h1>BrowserOS <span>Eval Dashboard</span></h1>
+  <span class="gen-date">Generated ${new Date().toISOString().split('T')[0]}</span>
+</div>
+
+<!-- Config selector -->
+<div class="config-bar">
+  <label>Config:</label>
+  <select id="config-select">
+    ${configGroups.map((c) => `<option value="${escHtml(c)}"${c === defaultConfig ? ' selected' : ''}>${escHtml(c)}</option>`).join('\n    ')}
+  </select>
+</div>
+
+<!-- Config details (populated by JS) -->
+<div class="config-details" id="config-details"></div>
+
+<!-- Stat cards (populated by JS) -->
+<div class="stats" id="stat-cards"></div>
+
+<!-- Chart -->
+<div class="chart-container">
+  <canvas id="chart"></canvas>
+  <div id="tooltip">
+    <div id="tt-date" style="color:#8b949e;margin-bottom:2px;"></div>
+    <div id="tt-score" style="font-size:16px;font-weight:700;"></div>
+    <div id="tt-detail" style="color:#8b949e;margin-top:2px;font-size:11px;"></div>
+  </div>
+</div>
+
+<!-- Recent runs table -->
+<div class="section-header">
+  <h2>All Runs</h2>
+  <input type="text" class="search-input" id="table-search" placeholder="Search runs..." autocomplete="off" spellcheck="false" />
+</div>
+<table id="runs-table">
+  <thead>
+    <tr>
+      <th>Date</th>
+      <th>Config</th>
+      <th>Model</th>
+      <th>Dataset</th>
+      <th>Architecture</th>
+      <th>Score</th>
+      <th>Tasks</th>
+      <th>Timeout</th>
+      <th>Avg Duration</th>
+      <th>View</th>
+    </tr>
+  </thead>
+  <tbody>
+    ${runs
+      .slice()
+      .reverse()
+      .map((r) => {
+        const viewerUrl = `viewer.html?run=${encodeURIComponent(r.runId)}`
+        const archLabel =
+          r.agentType === 'orchestrator-executor'
+            ? 'Orch-Exec'
+            : r.agentType === 'single'
+              ? 'Tool Loop'
+              : r.agentType === 'gemini-computer-use'
+                ? 'Gemini CU'
+                : r.agentType || '—'
+        return `<tr data-config="${escHtml(r.runId)}" data-search="${escHtml(`${r.date} ${r.runId} ${r.model} ${r.dataset} ${archLabel}`)}">
+      <td>${escHtml(r.date)}</td>
+      <td class="mono">${escHtml(r.runId)}</td>
+      <td class="mono" style="max-width:200px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap;" title="${escHtml(r.model)}">${escHtml(r.model)}</td>
+      <td>${escHtml(r.dataset)}</td>
+      <td>${escHtml(archLabel)}</td>
+      <td class="${r.avgScore >= 75 ? 'pass' : r.avgScore >= 40 ? 'neutral' : 'fail'}">${r.avgScore.toFixed(1)}%</td>
+      <td>${r.total}</td>
+      <td class="${r.timeout > 0 ? 'neutral' : ''}">${r.timeout}</td>
+      <td>${(r.avgDurationMs / 1000).toFixed(0)}s</td>
+      <td><a href="${viewerUrl}" class="view-link">View &rarr;</a></td>
+    </tr>`
+      })
+      .join('\n')}
+  </tbody>
+</table>
+
+<script>
+(function() {
+  'use strict';
+
+  var allRuns = ${JSON.stringify(runs)};
+  var configSelect = document.getElementById('config-select');
+  var canvas = document.getElementById('chart');
+  var ctx = canvas.getContext('2d');
+  var tooltip = document.getElementById('tooltip');
+  var dpr = window.devicePixelRatio || 1;
+  var dotPositions = [];
+
+  function getFilteredRuns() {
+    var cfg = configSelect.value;
+    return allRuns.filter(function(r) { return r.configName === cfg; });
+  }
+
+  function updateDashboard() {
+    var runs = getFilteredRuns();
+    renderConfigDetails(runs);
+    renderStatCards(runs);
+    drawChart(runs);
+  }
+
+  // Config details card
+  function renderConfigDetails(runs) {
+    var el = document.getElementById('config-details');
+    if (runs.length === 0) { el.innerHTML = '<span style="color:#6e7681;">No runs found for this config.</span>'; return; }
+    var latest = runs[runs.length - 1];
+    var archLabel = latest.agentType === 'orchestrator-executor' ? 'Orchestrator-Executor'
+      : latest.agentType === 'single' ? 'Single Agent (Tool Loop)'
+      : latest.agentType === 'gemini-computer-use' ? 'Gemini Computer Use'
+      : latest.agentType || 'Unknown';
+    var scoreColor = latest.avgScore >= 75 ? '#3fb950' : latest.avgScore >= 40 ? '#f0883e' : '#f85149';
+    el.innerHTML =
+      '<div class="config-detail"><span class="cd-label">Architecture</span><span class="cd-value">' + archLabel + '</span></div>' +
+      '<div class="config-detail"><span class="cd-label">Model</span><span class="cd-value">' + (latest.model || 'unknown') + '</span></div>' +
+      '<div class="config-detail"><span class="cd-label">Dataset</span><span class="cd-value">' + (latest.dataset || 'unknown') + '</span></div>' +
+      '<div class="config-detail"><span class="cd-label">Latest Score</span><span class="cd-value" style="color:' + scoreColor + ';">' + latest.avgScore.toFixed(1) + '%</span></div>' +
+      '<div class="config-detail"><span class="cd-label">Tasks</span><span class="cd-value">' + latest.total + '</span></div>' +
+      '<div class="config-detail"><span class="cd-label">Runs</span><span class="cd-value">' + runs.length + '</span></div>';
+  }
+
+  // Stat cards
+  function renderStatCards(runs) {
+    var el = document.getElementById('stat-cards');
+    if (runs.length === 0) { el.innerHTML = ''; return; }
+    var latest = runs[runs.length - 1];
+    var prev = runs.length >= 2 ? runs[runs.length - 2] : null;
+    var best = Math.max.apply(null, runs.map(function(r) { return r.avgScore; }));
+    var delta = prev ? latest.avgScore - prev.avgScore : 0;
+    var sign = delta > 0 ? '+' : '';
+    var trendCls = delta > 0 ? 'trend-up' : delta < 0 ? 'trend-down' : 'trend-flat';
+    var latestColor = latest.avgScore >= 75 ? 'pass' : latest.avgScore >= 40 ? 'neutral' : 'fail';
+
+    el.innerHTML =
+      '<div class="stat-card"><div class="stat-label">Latest Score</div><div class="stat-value big ' + latestColor + '">' + latest.avgScore.toFixed(1) + '%</div></div>' +
+      '<div class="stat-card"><div class="stat-label">Trend</div><div class="stat-value ' + trendCls + '">' + (prev ? sign + delta.toFixed(1) + ' pp' : 'N/A') + '</div></div>' +
+      '<div class="stat-card"><div class="stat-label">Best Score</div><div class="stat-value pass">' + best.toFixed(1) + '%</div></div>' +
+      '<div class="stat-card"><div class="stat-label">Avg Duration</div><div class="stat-value">' + (latest.avgDurationMs / 1000).toFixed(0) + 's</div></div>' +
+      '<div class="stat-card"><div class="stat-label">Runs</div><div class="stat-value">' + runs.length + '</div></div>';
+  }
+
+  // Chart
+  function drawChart(runs) {
+    var rect = canvas.getBoundingClientRect();
+    canvas.width = rect.width * dpr;
+    canvas.height = rect.height * dpr;
+    ctx.scale(dpr, dpr);
+
+    var W = rect.width, H = rect.height;
+    var pad = { top: 20, right: 20, bottom: 50, left: 50 };
+    var plotW = W - pad.left - pad.right;
+    var plotH = H - pad.top - pad.bottom;
+    dotPositions = [];
+
+    ctx.clearRect(0, 0, W, H);
+
+    if (runs.length === 0) {
+      ctx.fillStyle = '#8b949e';
+      ctx.font = '14px sans-serif';
+      ctx.textAlign = 'center';
+      ctx.fillText('No data for this config', W / 2, H / 2);
+      return;
+    }
+
+    var scores = runs.map(function(r) { return r.avgScore; });
+    var minY = Math.max(0, Math.floor(Math.min.apply(null, scores) / 10) * 10 - 10);
+    var maxY = Math.min(100, Math.ceil(Math.max.apply(null, scores) / 10) * 10 + 10);
+    if (minY === maxY) { minY = Math.max(0, minY - 10); maxY = Math.min(100, maxY + 10); }
+
+    // Grid
+    ctx.strokeStyle = '#21262d';
+    ctx.lineWidth = 1;
+    for (var y = minY; y <= maxY; y += 10) {
+      var py = pad.top + plotH - ((y - minY) / (maxY - minY)) * plotH;
+      ctx.beginPath(); ctx.moveTo(pad.left, py); ctx.lineTo(pad.left + plotW, py); ctx.stroke();
+      ctx.fillStyle = '#8b949e'; ctx.font = '11px sans-serif'; ctx.textAlign = 'right';
+      ctx.fillText(y + '%', pad.left - 8, py + 4);
+    }
+
+    // X labels
+    ctx.fillStyle = '#8b949e'; ctx.font = '11px sans-serif'; ctx.textAlign = 'center';
+    runs.forEach(function(r, i) {
+      var px = pad.left + (runs.length === 1 ? plotW / 2 : (i / (runs.length - 1)) * plotW);
+      ctx.save(); ctx.translate(px, pad.top + plotH + 15); ctx.rotate(-Math.PI / 6);
+      ctx.fillText(r.date, 0, 0); ctx.restore();
+    });
+
+    // Line
+    ctx.strokeStyle = '#58a6ff'; ctx.lineWidth = 2; ctx.beginPath();
+    runs.forEach(function(r, i) {
+      var px = pad.left + (runs.length === 1 ? plotW / 2 : (i / (runs.length - 1)) * plotW);
+      var py2 = pad.top + plotH - ((r.avgScore - minY) / (maxY - minY)) * plotH;
+      if (i === 0) ctx.moveTo(px, py2); else ctx.lineTo(px, py2);
+    });
+    ctx.stroke();
+
+    // Dots
+    runs.forEach(function(r, i) {
+      var px = pad.left + (runs.length === 1 ? plotW / 2 : (i / (runs.length - 1)) * plotW);
+      var py2 = pad.top + plotH - ((r.avgScore - minY) / (maxY - minY)) * plotH;
+      dotPositions.push({ x: px, y: py2, run: r });
+      ctx.beginPath(); ctx.arc(px, py2, 4, 0, Math.PI * 2);
+      ctx.fillStyle = r.avgScore >= 75 ? '#3fb950' : r.avgScore >= 40 ? '#f0883e' : '#f85149';
+      ctx.fill(); ctx.strokeStyle = '#0d1117'; ctx.lineWidth = 2; ctx.stroke();
+    });
+  }
+
+  // Tooltip
+  canvas.addEventListener('mousemove', function(e) {
+    var rect = canvas.getBoundingClientRect();
+    var mx = e.clientX - rect.left, my = e.clientY - rect.top;
+    var closest = null, closestDist = Infinity;
+    dotPositions.forEach(function(dot) {
+      var d = Math.sqrt(Math.pow(mx - dot.x, 2) + Math.pow(my - dot.y, 2));
+      if (d < closestDist) { closestDist = d; closest = dot; }
+    });
+
+    if (closest && closestDist < 40) {
+      var r = closest.run;
+      document.getElementById('tt-date').textContent = r.date;
+      document.getElementById('tt-score').textContent = r.avgScore.toFixed(1) + '%';
+      document.getElementById('tt-score').style.color = r.avgScore >= 75 ? '#3fb950' : r.avgScore >= 40 ? '#f0883e' : '#f85149';
+      document.getElementById('tt-detail').textContent = 'score ' + r.avgScore.toFixed(1) + '% \\u00B7 ' + r.total + ' tasks \\u00B7 ' + (r.avgDurationMs / 1000).toFixed(0) + 's avg \\u00B7 ' + r.model;
+      tooltip.style.display = 'block';
+
+      var tx = closest.x + 12, ty = closest.y - 50;
+      if (tx + 200 > rect.width) tx = closest.x - 210;
+      if (ty < 0) ty = closest.y + 12;
+      tooltip.style.left = tx + 'px'; tooltip.style.top = ty + 'px';
+
+      // Highlight dot
+      drawChart(getFilteredRuns());
+      ctx.beginPath(); ctx.arc(closest.x, closest.y, 7, 0, Math.PI * 2);
+      ctx.fillStyle = 'rgba(88, 166, 255, 0.3)'; ctx.fill();
+      ctx.beginPath(); ctx.arc(closest.x, closest.y, 5, 0, Math.PI * 2);
+      ctx.fillStyle = r.avgScore >= 75 ? '#3fb950' : r.avgScore >= 40 ? '#f0883e' : '#f85149'; ctx.fill();
+      ctx.strokeStyle = '#e6edf3'; ctx.lineWidth = 2; ctx.stroke();
+      canvas.style.cursor = 'pointer';
+    } else {
+      tooltip.style.display = 'none';
+      canvas.style.cursor = 'default';
+    }
+  });
+
+  canvas.addEventListener('mouseleave', function() {
+    tooltip.style.display = 'none';
+    drawChart(getFilteredRuns());
+  });
+
+  canvas.addEventListener('click', function(e) {
+    var rect = canvas.getBoundingClientRect();
+    var mx = e.clientX - rect.left, my = e.clientY - rect.top;
+    dotPositions.forEach(function(dot) {
+      if (Math.sqrt(Math.pow(mx - dot.x, 2) + Math.pow(my - dot.y, 2)) < 20) {
+        window.open('viewer.html?run=' + encodeURIComponent(dot.run.runId), '_blank');
+      }
+    });
+  });
+
+  // Config selector change
+  configSelect.addEventListener('change', function() {
+    tooltip.style.display = 'none';
+    updateDashboard();
+  });
+
+  // Table search
+  document.getElementById('table-search').addEventListener('input', function(e) {
+    var q = e.target.value.toLowerCase();
+    var rows = document.querySelectorAll('#runs-table tbody tr');
+    rows.forEach(function(row) {
+      var searchText = row.getAttribute('data-search') || '';
+      row.classList.toggle('hidden', q && searchText.toLowerCase().indexOf(q) === -1);
+    });
+  });
+
+  // Resize
+  window.addEventListener('resize', function() { tooltip.style.display = 'none'; drawChart(getFilteredRuns()); });
+
+  // Init
+  updateDashboard();
+})();
+</script>
+
+</body>
+</html>`
+
+// Step 5: Save locally and upload to R2
+const localPath = process.argv[2] || '/tmp/eval-report.html'
+await writeFile(localPath, html)
+console.log(`Report saved locally: ${localPath}`)
+
+await client.send(
+  new PutObjectCommand({
+    Bucket: bucket,
+    Key: 'report.html',
+    Body: html,
+    ContentType: 'text/html',
+    CacheControl: 'public, max-age=300',
+  }),
+)
+
+const cdnBaseUrl = (
+  process.env.EVAL_R2_CDN_BASE_URL || 'https://eval.browseros.com'
+).replace(/\/+$/, '')
+
+console.log(`Report uploaded to R2: ${bucket}/report.html`)
+console.log(`  View at: ${cdnBaseUrl}/report.html`)
+
+// Print summary
+console.log('\nScore trend:')
+for (const run of runs.slice(-10)) {
+  const bar = '\u2588'.repeat(Math.round(run.avgScore / 5))
+  const pct = run.avgScore.toFixed(0).padStart(3)
+  console.log(`  ${run.date}  ${pct}% ${bar}`)
+}
--- a/packages/browseros-agent/apps/eval/src/agents/orchestrator-executor/index.ts
+++ b/packages/browseros-agent/apps/eval/src/agents/orchestrator-executor/index.ts
@@ -11,6 +11,7 @@
 import type { ResolvedAgentConfig } from '@browseros/server/agent/types'
 import { Browser } from '@browseros/server/browser'
 import { CdpBackend } from '@browseros/server/browser/backends/cdp'
+import { CaptchaWaiter } from '../../capture/captcha-waiter'
 import { DEFAULT_TIMEOUT_MS } from '../../constants'
 import type {
  EvalConfig,
@@ -161,6 +162,13 @@ export class OrchestratorExecutorEvaluator implements AgentEvaluator {
    const browser = new Browser(cdp, CONTROLLER_STUB)
    capture.screenshot.setBrowser(browser)

+    const captchaWaiter = config.captcha
+      ? new CaptchaWaiter({
+          waitTimeoutMs: config.captcha.wait_timeout_ms,
+          pollIntervalMs: config.captcha.poll_interval_ms,
+        })
+      : null
+
    try {
      // Build capture callbacks (same pattern as single-agent.ts)
      const callbacks: ExecutorCallbacks = {
@@ -172,6 +180,12 @@ export class OrchestratorExecutorEvaluator implements AgentEvaluator {
        },
        onToolCallFinish: async () => {
          try {
+            if (captchaWaiter) {
+              await captchaWaiter.waitIfCaptchaPresent(
+                browser,
+                capture.getActivePageId(),
+              )
+            }
            const screenshotNum = await capture.screenshot.capture(
              capture.getActivePageId(),
            )
--- a/packages/browseros-agent/apps/eval/src/agents/single-agent.ts
+++ b/packages/browseros-agent/apps/eval/src/agents/single-agent.ts
@@ -1,9 +1,13 @@
 import { randomUUID } from 'node:crypto'
-import { AiSdkAgent } from '@browseros/server/agent/tool-loop'
+import {
+  AiSdkAgent,
+  formatUserMessage,
+} from '@browseros/server/agent/tool-loop'
 import type { ResolvedAgentConfig } from '@browseros/server/agent/types'
 import { Browser } from '@browseros/server/browser'
 import { CdpBackend } from '@browseros/server/browser/backends/cdp'
 import { registry } from '@browseros/server/tools/registry'
+import { CaptchaWaiter } from '../capture/captcha-waiter'
 import { DEFAULT_TIMEOUT_MS } from '../constants'
 import type { EvalConfig, TaskMetadata } from '../types'
 import { resolveProviderConfig } from '../utils/resolve-provider-config'
@@ -75,6 +79,13 @@ export class SingleAgentEvaluator implements AgentEvaluator {
        }
      : undefined

+    const captchaWaiter = config.captcha
+      ? new CaptchaWaiter({
+          waitTimeoutMs: config.captcha.wait_timeout_ms,
+          pollIntervalMs: config.captcha.poll_interval_ms,
+        })
+      : null
+
    let agent: AiSdkAgent | null = null

    try {
@@ -91,8 +102,11 @@ export class SingleAgentEvaluator implements AgentEvaluator {
        capture,
        async (signal) => {
          if (!agent) throw new Error('Agent was not initialized')
+          // Format prompt with browser context so the agent knows what page it's on
+          // (same formatting as chat-service.ts → formatUserMessage)
+          const prompt = formatUserMessage(task.query, browserContext)
          const result = await agent.toolLoopAgent.generate({
-            prompt: task.query,
+            prompt,
            abortSignal: signal,

            experimental_onToolCallStart: ({ toolCall }) => {
@@ -106,6 +120,12 @@ export class SingleAgentEvaluator implements AgentEvaluator {

            experimental_onToolCallFinish: async () => {
              try {
+                if (captchaWaiter) {
+                  await captchaWaiter.waitIfCaptchaPresent(
+                    browser,
+                    capture.getActivePageId(),
+                  )
+                }
                const screenshotNum = await capture.screenshot.capture(
                  capture.getActivePageId(),
                )
--- a/packages/browseros-agent/apps/eval/src/capture/captcha-waiter.ts
+++ b/packages/browseros-agent/apps/eval/src/capture/captcha-waiter.ts
@@ -0,0 +1,115 @@
+import type { Browser } from '@browseros/server/browser'
+
+export interface CaptchaWaitResult {
+  detected: boolean
+  type: 'recaptcha' | 'hcaptcha' | 'turnstile' | 'none'
+  solved: boolean
+  waitDurationMs: number
+}
+
+interface CaptchaWaiterConfig {
+  waitTimeoutMs: number
+  pollIntervalMs: number
+}
+
+const DETECTION_SCRIPT = `(() => {
+  const recaptcha = document.querySelector('iframe[src*="recaptcha"]')
+  if (recaptcha) {
+    const response = document.getElementById('g-recaptcha-response')
+    return { type: 'recaptcha', solved: !!(response && response.value) }
+  }
+  const hcaptcha = document.querySelector('iframe[src*="hcaptcha"]')
+  if (hcaptcha) {
+    const response = document.querySelector('[name="h-captcha-response"]')
+    return { type: 'hcaptcha', solved: !!(response && response.value) }
+  }
+  const turnstile = document.querySelector('iframe[src*="challenges.cloudflare.com"]')
+  if (turnstile) {
+    const response = document.querySelector('[name="cf-turnstile-response"]')
+    return { type: 'turnstile', solved: !!(response && response.value) }
+  }
+  return { type: 'none', solved: false }
+})()`
+
+export class CaptchaWaiter {
+  private readonly config: CaptchaWaiterConfig
+
+  constructor(config: CaptchaWaiterConfig) {
+    this.config = config
+  }
+
+  async waitIfCaptchaPresent(
+    browser: Browser,
+    pageId: number,
+  ): Promise<CaptchaWaitResult> {
+    const start = Date.now()
+
+    try {
+      const initial = await this.detect(browser, pageId)
+      if (initial.type === 'none') {
+        return {
+          detected: false,
+          type: 'none',
+          solved: false,
+          waitDurationMs: Date.now() - start,
+        }
+      }
+
+      if (initial.solved) {
+        return {
+          detected: true,
+          type: initial.type,
+          solved: true,
+          waitDurationMs: Date.now() - start,
+        }
+      }
+
+      // Poll until solved or timeout
+      while (Date.now() - start < this.config.waitTimeoutMs) {
+        await sleep(this.config.pollIntervalMs)
+        const check = await this.detect(browser, pageId)
+        if (check.solved || check.type === 'none') {
+          return {
+            detected: true,
+            type: initial.type,
+            solved: check.solved,
+            waitDurationMs: Date.now() - start,
+          }
+        }
+      }
+
+      return {
+        detected: true,
+        type: initial.type,
+        solved: false,
+        waitDurationMs: Date.now() - start,
+      }
+    } catch {
+      return {
+        detected: false,
+        type: 'none',
+        solved: false,
+        waitDurationMs: Date.now() - start,
+      }
+    }
+  }
+
+  private async detect(
+    browser: Browser,
+    pageId: number,
+  ): Promise<{ type: CaptchaWaitResult['type']; solved: boolean }> {
+    const result = await browser.evaluate(pageId, DETECTION_SCRIPT)
+    if (result.error || !result.value) {
+      return { type: 'none', solved: false }
+    }
+    const val = result.value as { type: string; solved: boolean }
+    return {
+      type: (val.type as CaptchaWaitResult['type']) ?? 'none',
+      solved: val.solved ?? false,
+    }
+  }
+}
+
+function sleep(ms: number): Promise<void> {
+  return new Promise((resolve) => setTimeout(resolve, ms))
+}
--- a/packages/browseros-agent/apps/eval/src/capture/index.ts
+++ b/packages/browseros-agent/apps/eval/src/capture/index.ts
@@ -1,4 +1,5 @@
 export { callMcpTool } from '../utils/mcp-client'
+export { CaptchaWaiter } from './captcha-waiter'
 export { CaptureContext } from './context'
 export { MessageLogger } from './message-logger'
 export { ScreenshotCapture } from './screenshot'
--- a/packages/browseros-agent/apps/eval/src/constants.ts
+++ b/packages/browseros-agent/apps/eval/src/constants.ts
@@ -2,7 +2,7 @@
 * Eval-specific constants shared across agents, runners, and capture modules.
 */

-export const DEFAULT_TIMEOUT_MS = 15 * 60 * 1000 // 15 minutes
+export const DEFAULT_TIMEOUT_MS = 30 * 60 * 1000 // 30 minutes
 export const SCREENSHOT_TIMEOUT_MS = 65_000 // 65s — ensures we get extension's error (60s)
 export const MAX_ACTIONS_PER_DELEGATION = 15
 export const CLADO_REQUEST_TIMEOUT_MS = 120_000
--- a/packages/browseros-agent/apps/eval/src/dashboard/index.html
+++ b/packages/browseros-agent/apps/eval/src/dashboard/index.html
@@ -354,7 +354,7 @@
          </div>
          <div class="config-field">
            <label>Timeout (ms)</label>
-            <input type="number" id="cfg-timeout" value="600000" min="30000" max="3600000">
+            <input type="number" id="cfg-timeout" value="1800000" min="30000" max="3600000">
          </div>
        </div>
        <div class="config-row" style="gap: 16px;">
@@ -454,6 +454,17 @@
        <button class="btn-run" id="btn-run" onclick="submitConfig()">Run Eval</button>
      </div>

+      <!-- Load previous run -->
+      <div class="config-actions">
+        <div class="load-config">
+          <label>Load run:</label>
+          <select id="cfg-run-select">
+            <option value="">-- select --</option>
+          </select>
+        </div>
+        <button class="btn-secondary" onclick="loadPreviousRun()">Load Run</button>
+      </div>
+
    </div>
  </div>
 </div>
@@ -529,6 +540,7 @@ async function init() {

  // Load saved configs into dropdown
  loadConfigList();
+  loadRunList();

  if (mode.configMode) {
    // Config mode — show panel expanded
@@ -656,6 +668,53 @@ async function loadConfigList() {
  } catch {}
 }

+async function loadRunList() {
+  try {
+    const res = await fetch('/api/runs');
+    const runs = await res.json();
+    const sel = document.getElementById('cfg-run-select');
+    sel.innerHTML = '<option value="">-- select --</option>';
+    runs.forEach(r => {
+      const opt = document.createElement('option');
+      opt.value = r;
+      opt.textContent = r;
+      sel.appendChild(opt);
+    });
+  } catch {}
+}
+
+async function loadPreviousRun() {
+  const runName = document.getElementById('cfg-run-select').value;
+  if (!runName) return;
+  const errEl = document.getElementById('config-error');
+  errEl.textContent = '';
+  try {
+    const res = await fetch('/api/load-run', {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({ runName }),
+    });
+    const result = await res.json();
+    if (!res.ok) {
+      errEl.textContent = result.error || 'Failed to load run';
+      return;
+    }
+    const stateRes = await fetch('/api/state');
+    const state = await stateRes.json();
+    document.getElementById('config-name').textContent =
+      state.configName ? `${state.configName} \u00B7 ${state.agentType}` : '';
+    tasks = state.tasks;
+    setConfigPanelOpen(false);
+    updateConfigSummary(state.configName, state.agentType);
+    selectedTaskId = null;
+    renderTaskList();
+    updateProgress();
+    if (tasks.length > 0) selectTask(tasks[0].queryId);
+  } catch (e) {
+    errEl.textContent = `Network error: ${e.message}`;
+  }
+}
+
 async function loadSavedConfig(name) {
  if (!name) return;
  try {
@@ -1055,7 +1114,8 @@ function renderTaskList() {
    if (t.graderResults) {
      const primary = getPrimaryGrader(t.graderResults);
      if (primary) {
-        graderBadge = `<span class="grade-badge ${primary.pass ? 'pass' : 'fail'}">${primary.pass ? 'PASS' : 'FAIL'}</span>`;
+        const pct = typeof primary.score === 'number' ? `${(primary.score * 100).toFixed(0)}%` : (primary.pass ? 'PASS' : 'FAIL');
+        graderBadge = `<span class="grade-badge ${primary.pass ? 'pass' : 'fail'}">${pct}</span>`;
      }
    }

@@ -1144,12 +1204,35 @@ function toggleAutoplay() {
 // ============================================================================
 // Agent Stream
 // ============================================================================
-function renderStreamForTask(taskId) {
+async function renderStreamForTask(taskId) {
  const body = document.getElementById('stream-body');
  body.innerHTML = '';
  const events = streamEvents[taskId] || [];
-  events.forEach(e => appendStreamEntry(e, false));
-  body.scrollTop = body.scrollHeight;
+  if (events.length > 0) {
+    events.forEach(e => appendStreamEntry(e, false));
+    body.scrollTop = body.scrollHeight;
+    return;
+  }
+  const task = tasks.find(t => t.queryId === taskId);
+  if (!task || task.status === 'pending' || task.status === 'running') return;
+  body.innerHTML = '<div class="empty-state">Loading events...</div>';
+  try {
+    const res = await fetch(`/api/messages/${taskId}`);
+    if (!res.ok) {
+      body.innerHTML = '<div class="empty-state">No event log available</div>';
+      return;
+    }
+    const text = await res.text();
+    const parsed = text.trim().split('\n').filter(Boolean).map(line => {
+      try { return JSON.parse(line); } catch { return null; }
+    }).filter(Boolean);
+    streamEvents[taskId] = parsed;
+    body.innerHTML = '';
+    parsed.forEach(e => appendStreamEntry(e, false));
+    body.scrollTop = body.scrollHeight;
+  } catch {
+    body.innerHTML = '<div class="empty-state">Failed to load events</div>';
+  }
 }

 function appendStreamEntry(event, scroll = true) {
--- a/packages/browseros-agent/apps/eval/src/dashboard/server.ts
+++ b/packages/browseros-agent/apps/eval/src/dashboard/server.ts
@@ -1,4 +1,4 @@
-import { mkdir, readdir, readFile } from 'node:fs/promises'
+import { mkdir, readdir, readFile, stat } from 'node:fs/promises'
 import { join, resolve } from 'node:path'
 import { Hono } from 'hono'
 import { streamSSE } from 'hono/streaming'
@@ -199,6 +199,133 @@ app.get('/api/screenshots/:taskId/:index', async (c) => {
  }
 })

+app.get('/api/messages/:taskId', async (c) => {
+  const { taskId } = c.req.param()
+  if (taskId.includes('..') || taskId.includes('/')) {
+    return c.json({ error: 'Invalid parameters' }, 400)
+  }
+  const filepath = join(dashboardState.outputDir, taskId, 'messages.jsonl')
+  const resolved = resolve(filepath)
+  if (!resolved.startsWith(resolve(dashboardState.outputDir))) {
+    return c.json({ error: 'Invalid path' }, 400)
+  }
+  try {
+    const file = Bun.file(filepath)
+    if (!(await file.exists())) return c.notFound()
+    const data = await file.arrayBuffer()
+    return c.body(data, 200, {
+      'Content-Type': 'application/x-ndjson',
+      'Cache-Control': 'no-cache',
+    })
+  } catch {
+    return c.notFound()
+  }
+})
+
+const resultsDir = join(import.meta.dir, '..', '..', 'results')
+
+app.get('/api/runs', async (c) => {
+  try {
+    const runs: string[] = []
+    const entries = await readdir(resultsDir, { withFileTypes: true })
+    for (const entry of entries.filter((e) => e.isDirectory())) {
+      const subEntries = await readdir(join(resultsDir, entry.name), {
+        withFileTypes: true,
+      }).catch(() => [] as import('node:fs').Dirent[])
+      const hasTimestampDirs = subEntries.some(
+        (s) => s.isDirectory() && /^\d{4}-\d{2}-\d{2}-\d{4}$/.test(s.name),
+      )
+      if (hasTimestampDirs) {
+        for (const sub of subEntries.filter((s) => s.isDirectory())) {
+          runs.push(`${entry.name}/${sub.name}`)
+        }
+      } else {
+        runs.push(entry.name)
+      }
+    }
+    runs.sort().reverse()
+    return c.json(runs)
+  } catch {
+    return c.json([])
+  }
+})
+
+app.post('/api/load-run', async (c) => {
+  if (evalRunning)
+    return c.json({ error: 'Cannot load while eval is running' }, 409)
+  let body: { runName: string }
+  try {
+    body = await c.req.json()
+  } catch {
+    return c.json({ error: 'Invalid JSON body' }, 400)
+  }
+  const runName = body.runName
+  if (!runName || runName.includes('..')) {
+    return c.json({ error: 'Invalid run name' }, 400)
+  }
+  if ((runName.match(/\//g) || []).length > 1) {
+    return c.json({ error: 'Invalid run name' }, 400)
+  }
+  const outputDir = resolve(resultsDir, runName)
+  if (!outputDir.startsWith(resolve(resultsDir))) {
+    return c.json({ error: 'Invalid path' }, 400)
+  }
+  const dirStat = await stat(outputDir).catch(() => null)
+  if (!dirStat?.isDirectory()) {
+    return c.json({ error: 'Run directory not found' }, 404)
+  }
+  const entries = await readdir(outputDir, { withFileTypes: true })
+  const taskDirs = entries.filter((e) => e.isDirectory())
+  const loadedTasks: DashboardTask[] = []
+  let agentType = ''
+  for (const taskDir of taskDirs) {
+    const metaPath = join(outputDir, taskDir.name, 'metadata.json')
+    try {
+      const raw = JSON.parse(await readFile(metaPath, 'utf-8'))
+      if (!agentType && raw.agent_config?.type) {
+        agentType = raw.agent_config.type
+      }
+      const screenshotDir = join(outputDir, taskDir.name, 'screenshots')
+      let screenshotCount = raw.screenshot_count ?? 0
+      if (!screenshotCount) {
+        try {
+          const files = await readdir(screenshotDir)
+          screenshotCount = files.filter((f: string) =>
+            f.endsWith('.png'),
+          ).length
+        } catch {}
+      }
+      loadedTasks.push({
+        queryId: raw.query_id || taskDir.name,
+        query: raw.query || '',
+        startUrl: raw.start_url,
+        status:
+          raw.termination_reason === 'completed'
+            ? 'completed'
+            : raw.termination_reason === 'timeout'
+              ? 'timeout'
+              : 'failed',
+        durationMs: raw.total_duration_ms,
+        graderResults: raw.grader_results,
+        screenshotCount,
+      })
+    } catch {}
+  }
+  if (loadedTasks.length === 0) {
+    return c.json({ error: 'No completed tasks found in this run' }, 404)
+  }
+  dashboardState.configName = runName
+  dashboardState.agentType = agentType
+  dashboardState.outputDir = outputDir
+  dashboardState.tasks = loadedTasks
+  return c.json({
+    status: 'loaded',
+    configName: runName,
+    agentType,
+    taskCount: loadedTasks.length,
+  })
+})
+
 // ============================================================================
 // Config & Run API
 // ============================================================================
--- a/packages/browseros-agent/apps/eval/src/dashboard/viewer.html
+++ b/packages/browseros-agent/apps/eval/src/dashboard/viewer.html
--- a/packages/browseros-agent/apps/eval/src/graders/performance/axes.ts
+++ b/packages/browseros-agent/apps/eval/src/graders/performance/axes.ts
@@ -53,8 +53,8 @@ The raw event stream — one JSON object per line with a "type" field.
 - "tool-output-error" / "tool-input-error" — Tool call failed. Fields: toolCallId, error.
 - "text-delta" — Agent's reasoning text. Field: delta (small text chunk).

-**Event types to AVOID reading:**
- "tool-output-available" — Tool output. The "output" field contains FULL PAGE DOM CONTENT — hundreds of interactive elements, entire page text, etc. These lines are 5-50KB each. NEVER read them. The tool-input-available lines already tell you what the agent did. Screenshots show you the visual result.
+**Event types to handle carefully:**
+- "tool-output-available" — Tool output. The "output" field contains FULL PAGE DOM CONTENT — hundreds of interactive elements, entire page text, etc. These lines are 5-50KB each. NEVER read them in bulk. However, you CAN and SHOULD use Grep to search within these lines for specific keywords when screenshots alone can't verify a claim. For example, if the task asks "find the price of X" and the screenshot is unclear, grep messages.jsonl for the product name or price value to confirm the agent actually saw it in the DOM.

 ### 2. screenshots/ directory
 Numbered PNG screenshots (1.png, 2.png, ...) captured after each tool execution.
@@ -95,6 +95,13 @@ Grep for "tool-output-error" or "tool-input-error". If none found, zero errors.
 **Step 3: Sample reasoning (only if needed for reasoning_quality)**
 Grep for "text-delta" but LIMIT to the first 10 and last 10 results. Don't read all reasoning text.

+**Step 4: Verify claims from DOM content (critical for task_completion)**
+When the agent's final answer contains specific data (prices, names, dates, counts, etc.) that you can't confirm from screenshots alone, use Grep to search messages.jsonl for those specific values or keywords. This searches the tool-output-available lines which contain DOM content the agent actually saw. For example:
+- Task asks "find cheapest flight price" → grep for the dollar amount from the final answer
+- Task asks "list the top 3 articles" → grep for the article titles mentioned in the answer
+- Task asks "extract the email address" → grep for the email pattern
+This is the most reliable way to verify whether the agent actually found the data it claims, since screenshots may be blurry, truncated, or missing the relevant section.
+
 ## How to View Screenshots

 You have {screenshot_count} screenshots. View 3-5 strategically:
--- a/packages/browseros-agent/apps/eval/src/graders/performance/performance-grader.ts
+++ b/packages/browseros-agent/apps/eval/src/graders/performance/performance-grader.ts
@@ -16,8 +16,8 @@ import {
  type PerformanceGraderOptions,
 } from './types'

-export const DEFAULT_MAX_TURNS = 15
-export const DEFAULT_MAX_BUDGET_USD = 0.5
+export const DEFAULT_MAX_TURNS = 100
+export const DEFAULT_MAX_BUDGET_USD = 100
 export const DEFAULT_PASS_THRESHOLD = 75
 const DEFAULT_MODEL = 'claude-opus-4-5-20251101'
 const GRADER_TIMEOUT_MS = 300_000
--- a/packages/browseros-agent/apps/eval/src/runner/browseros-app-manager.ts
+++ b/packages/browseros-agent/apps/eval/src/runner/browseros-app-manager.ts
@@ -14,7 +14,13 @@
 * Each worker gets isolated ports: base + workerIndex offset.
 */

-import { existsSync, mkdtempSync, rmSync } from 'node:fs'
+import {
+  existsSync,
+  mkdtempSync,
+  readFileSync,
+  rmSync,
+  writeFileSync,
+} from 'node:fs'
 import { dirname, join } from 'node:path'
 import { fileURLToPath } from 'node:url'
 import { type Subprocess, spawn, spawnSync } from 'bun'
@@ -30,9 +36,15 @@ const MONOREPO_ROOT = join(
  '../../../..',
 )

-const BROWSEROS_BINARY = '/Applications/BrowserOS.app/Contents/MacOS/BrowserOS'
+const BROWSEROS_BINARY =
+  process.env.BROWSEROS_BINARY ||
+  '/Applications/BrowserOS.app/Contents/MacOS/BrowserOS'

 const CONTROLLER_EXT_DIR = join(MONOREPO_ROOT, 'apps/controller-ext/dist')
+const CAPTCHA_EXT_DIR = join(
+  dirname(fileURLToPath(import.meta.url)),
+  '../../extensions/nopecha',
+)

 export class BrowserOSAppManager {
  private ports: EvalPorts
@@ -143,7 +155,6 @@ export class BrowserOSAppManager {
      '--use-mock-keychain',
      '--disable-browseros-server',
      '--disable-browseros-extensions',
-      '--incognito',
      ...(this.headless ? ['--headless=new'] : []),
      '--window-size=1440,900',
      `--remote-debugging-port=${cdp}`,
@@ -152,8 +163,15 @@ export class BrowserOSAppManager {
      `--user-data-dir=${this.tempDir}`,
    ]

+    const extensions: string[] = []
    if (this.loadExtensions && existsSync(CONTROLLER_EXT_DIR)) {
-      chromeArgs.push(`--load-extension=${CONTROLLER_EXT_DIR}`)
+      extensions.push(CONTROLLER_EXT_DIR)
+    }
+    if (existsSync(CAPTCHA_EXT_DIR)) {
+      extensions.push(CAPTCHA_EXT_DIR)
+    }
+    if (extensions.length > 0) {
+      chromeArgs.push(`--load-extension=${extensions.join(',')}`)
    }

    chromeArgs.push('about:blank')
@@ -306,4 +324,22 @@ export class BrowserOSAppManager {
    })
    return (result.stdout?.toString().trim() ?? '').length > 0
  }
+
+  /**
+   * Patch NopeCHA extension manifest with API key.
+   * Call once before launching any workers — the extension directory is shared.
+   */
+  static patchNopechaApiKey(apiKey: string): void {
+    const manifestPath = join(CAPTCHA_EXT_DIR, 'manifest.json')
+    if (!existsSync(manifestPath)) {
+      console.log(
+        '[BROWSEROS] NopeCHA extension not found, skipping API key patch',
+      )
+      return
+    }
+    const manifest = JSON.parse(readFileSync(manifestPath, 'utf-8'))
+    manifest.nopecha = { ...manifest.nopecha, key: apiKey }
+    writeFileSync(manifestPath, JSON.stringify(manifest, null, 2))
+    console.log('[BROWSEROS] NopeCHA API key patched')
+  }
 }
--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				<svg fill="currentColor" fill-rule="evenodd" height="1em" style="flex:none;line-height:1" viewBox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><title>GithubCopilot</title><path d="M19.245 5.364c1.322 1.36 1.877 3.216 2.11 5.817.622 0 1.2.135 1.592.654l.73.964c.21.278.323.61.323.955v2.62c0 .339-.173.669-.453.868C20.239 19.602 16.157 21.5 12 21.5c-4.6 0-9.205-2.583-11.547-4.258-.28-.2-.452-.53-.453-.868v-2.62c0-.345.113-.679.321-.956l.73-.963c.392-.517.974-.654 1.593-.654l.029-.297c.25-2.446.81-4.213 2.082-5.52 2.461-2.54 5.71-2.851 7.146-2.864h.198c1.436.013 4.685.323 7.146 2.864zm-7.244 4.328c-.284 0-.613.016-.962.05-.123.447-.305.85-.57 1.108-1.05 1.023-2.316 1.18-2.994 1.18-.638 0-1.306-.13-1.851-.464-.516.165-1.012.403-1.044.996a65.882 65.882 0 00-.063 2.884l-.002.48c-.002.563-.005 1.126-.013 1.69.002.326.204.63.51.765 2.482 1.102 4.83 1.657 6.99 1.657 2.156 0 4.504-.555 6.985-1.657a.854.854 0 00.51-.766c.03-1.682.006-3.372-.076-5.053-.031-.596-.528-.83-1.046-.996-.546.333-1.212.464-1.85.464-.677 0-1.942-.157-2.993-1.18-.266-.258-.447-.661-.57-1.108-.32-.032-.64-.049-.96-.05zm-2.525 4.013c.539 0 .976.426.976.95v1.753c0 .525-.437.95-.976.95a.964.964 0 01-.976-.95v-1.752c0-.525.437-.951.976-.951zm5 0c.539 0 .976.426.976.95v1.753c0 .525-.437.95-.976.95a.964.964 0 01-.976-.95v-1.752c0-.525.437-.951.976-.951zM7.635 5.087c-1.05.102-1.935.438-2.385.906-.975 1.037-.765 3.668-.21 4.224.405.394 1.17.657 1.995.657h.09c.649-.013 1.785-.176 2.73-1.11.435-.41.705-1.433.675-2.47-.03-.834-.27-1.52-.63-1.813-.39-.336-1.275-.482-2.265-.394zm6.465.394c-.36.292-.6.98-.63 1.813-.03 1.037.24 2.06.675 2.47.968.957 2.136 1.104 2.776 1.11h.044c.825 0 1.59-.263 1.995-.657.555-.556.765-3.187-.21-4.224-.45-.468-1.335-.804-2.385-.906-.99-.088-1.875.058-2.265.394zM12 7.615c-.24 0-.525.015-.84.044.03.16.045.336.06.526l-.001.159a2.94 2.94 0 01-.014.25c.225-.022.425-.027.612-.028h.366c.187 0 .387.006.612.028-.015-.146-.015-.277-.015-.409.015-.19.03-.365.06-.526a9.29 9.29 0 00-.84-.044z"></path></svg>