feat(eval): add parallel workers support to showcase generator

Add --workers/-w flag for parallel task execution. Each worker gets its own isolated BrowserOS stack on offset ports, matching the eval ParallelExecutor pattern. Also uses env-based defaults for model/ provider/baseUrl (SHOWCASE_MODEL, SHOWCASE_PROVIDER, SHOWCASE_BASE_URL) and reads API key from OPENROUTER_API_KEY. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
fix(eval): improve crosshair visibility, fix page ID resolution, add prod tasks
2026-05-14 08:03:58 +00:00 · 2026-03-21 17:16:04 -07:00 · 2026-03-21 17:14:40 -07:00 · 2026-03-21 16:12:33 -07:00 · 2026-03-21 15:07:14 -07:00 · 2026-03-21 11:31:57 -07:00
183 changed files with 10229 additions and 4665 deletions
--- a/.github/workflows/code-quality.yml
+++ b/.github/workflows/code-quality.yml
@@ -5,7 +5,7 @@ on:
    branches:
      - main
    paths:
-      - 'packages/browseros-agent/**'
+      - "packages/browseros-agent/**"

 jobs:
  biome:
@@ -50,6 +50,9 @@ jobs:
      - name: Install dependencies
        run: bun ci

+      - name: Prepare wxt
+        run: VITE_PUBLIC_BROWSEROS_API=http://localhost:3000 bun run --cwd apps/agent wxt prepare
+
      - name: Run codegen
        run: bun run --cwd apps/agent codegen

--- a/.github/workflows/eval-weekly.yml
+++ b/.github/workflows/eval-weekly.yml
@@ -0,0 +1,89 @@
+name: Weekly Eval
+
+on:
+  schedule:
+    # Every Saturday at 06:00 UTC
+    - cron: '0 6 * * 6'
+  workflow_dispatch:
+    inputs:
+      config:
+        description: 'Eval config file (relative to apps/eval/)'
+        required: false
+        default: 'configs/browseros-agent-weekly.json'
+
+permissions:
+  contents: read
+
+jobs:
+  eval:
+    runs-on: ubuntu-latest
+    timeout-minutes: 360
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Install BrowserOS
+        run: |
+          wget -q https://github.com/browseros-ai/BrowserOS/releases/download/v0.44.0.1/BrowserOS_v0.44.0.1_amd64.deb
+          sudo dpkg -i BrowserOS_v0.44.0.1_amd64.deb
+          browseros --version || echo "BrowserOS installed at $(which browseros)"
+
+      - name: Install Bun
+        uses: oven-sh/setup-bun@v2
+        with:
+          bun-version: latest
+
+      - name: Install dependencies
+        working-directory: packages/browseros-agent
+        run: bun install --ignore-scripts && bun run build:agent-sdk
+
+      - name: Install captcha solver extension
+        working-directory: packages/browseros-agent/apps/eval
+        run: |
+          mkdir -p extensions
+          curl -sL -o /tmp/nopecha.zip https://github.com/NopeCHALLC/nopecha-extension/releases/latest/download/chromium_automation.zip
+          unzip -qo /tmp/nopecha.zip -d extensions/nopecha
+
+      - name: Run eval
+        working-directory: packages/browseros-agent/apps/eval
+        env:
+          FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
+          CLAUDE_CODE_OAUTH_TOKEN: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
+          BROWSEROS_BINARY: /usr/bin/browseros
+          EVAL_CONFIG: ${{ github.event.inputs.config || 'configs/browseros-agent-weekly.json' }}
+        run: |
+          echo "Running eval with config: $EVAL_CONFIG"
+          bun run src/index.ts -c "$EVAL_CONFIG"
+
+      - name: Upload runs to R2
+        if: success()
+        working-directory: packages/browseros-agent/apps/eval
+        env:
+          EVAL_R2_ACCOUNT_ID: ${{ secrets.EVAL_R2_ACCOUNT_ID }}
+          EVAL_R2_ACCESS_KEY_ID: ${{ secrets.EVAL_R2_ACCESS_KEY_ID }}
+          EVAL_R2_SECRET_ACCESS_KEY: ${{ secrets.EVAL_R2_SECRET_ACCESS_KEY }}
+          EVAL_R2_BUCKET: ${{ secrets.EVAL_R2_BUCKET }}
+          EVAL_R2_CDN_BASE_URL: ${{ secrets.EVAL_R2_CDN_BASE_URL }}
+          EVAL_CONFIG: ${{ github.event.inputs.config || 'configs/browseros-agent-weekly.json' }}
+        run: |
+          CONFIG_NAME=$(basename "$EVAL_CONFIG" .json)
+          bun scripts/upload-run.ts "results/$CONFIG_NAME"
+
+      - name: Generate trend report
+        if: success()
+        working-directory: packages/browseros-agent
+        env:
+          EVAL_R2_ACCOUNT_ID: ${{ secrets.EVAL_R2_ACCOUNT_ID }}
+          EVAL_R2_ACCESS_KEY_ID: ${{ secrets.EVAL_R2_ACCESS_KEY_ID }}
+          EVAL_R2_SECRET_ACCESS_KEY: ${{ secrets.EVAL_R2_SECRET_ACCESS_KEY }}
+          EVAL_R2_BUCKET: ${{ secrets.EVAL_R2_BUCKET }}
+          EVAL_R2_CDN_BASE_URL: ${{ secrets.EVAL_R2_CDN_BASE_URL }}
+        run: bun apps/eval/scripts/weekly-report.ts /tmp/eval-report.html
+
+      - name: Upload report as artifact
+        if: success()
+        uses: actions/upload-artifact@v4
+        with:
+          name: eval-report-${{ github.run_id }}
+          path: /tmp/eval-report.html
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -1,15 +1,44 @@
 name: Tests

-on: []
+on:
+  pull_request:
+    types:
+      - opened
+      - synchronize
+      - reopened
+      - ready_for_review
+    paths:
+      - .github/workflows/test.yml
+      - packages/browseros-agent/**
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+env:
+  BROWSEROS_APPIMAGE_URL: https://files.browseros.com/download/BrowserOS.AppImage

 jobs:
  test:
-    name: Run Tests
-    runs-on: macos-latest
-    timeout-minutes: 10
+    name: Tests / ${{ matrix.suite }}
+    runs-on: ubuntu-latest
+    timeout-minutes: 20
    defaults:
      run:
        working-directory: packages/browseros-agent
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - suite: tools
+            test_path: tests/tools
+            junit_path: test-results/tools.xml
+          - suite: integration
+            test_path: tests/server.integration.test.ts
+            junit_path: test-results/integration.xml
+          - suite: sdk
+            test_path: tests/sdk
+            junit_path: test-results/sdk.xml

    steps:
      - name: Checkout code
@@ -21,7 +50,92 @@ jobs:
      - name: Install dependencies
        run: bun ci

-      - name: Run all tests
-        run: bun test:all
+      - name: Resolve BrowserOS cache key
+        id: browseros-cache-key
+        run: |
+          set -euo pipefail
+          headers="$(curl -fsSI "$BROWSEROS_APPIMAGE_URL")"
+          etag="$(printf '%s\n' "$headers" | awk 'BEGIN{IGNORECASE=1} /^etag:/ {sub(/\r$/, "", $2); gsub(/"/, "", $2); print $2; exit}')"
+          last_modified="$(printf '%s\n' "$headers" | awk 'BEGIN{IGNORECASE=1} /^last-modified:/ {$1=""; sub(/^ /, ""); sub(/\r$/, ""); print; exit}')"
+          raw_key="${etag:-$last_modified}"
+          if [ -z "$raw_key" ]; then
+            raw_key="$BROWSEROS_APPIMAGE_URL"
+          fi
+          cache_key="$(printf '%s' "$raw_key" | shasum -a 256 | awk '{print $1}')"
+          echo "key=browseros-appimage-${{ runner.os }}-$cache_key" >> "$GITHUB_OUTPUT"
+
+      - name: Restore BrowserOS cache
+        id: browseros-cache
+        uses: actions/cache@v4
+        with:
+          path: packages/browseros-agent/.ci/bin/BrowserOS.AppImage
+          key: ${{ steps.browseros-cache-key.outputs.key }}
+
+      - name: Download BrowserOS
+        if: steps.browseros-cache.outputs.cache-hit != 'true'
+        run: |
+          mkdir -p .ci/bin
+          curl -fsSL "$BROWSEROS_APPIMAGE_URL" -o .ci/bin/BrowserOS.AppImage
+          chmod +x .ci/bin/BrowserOS.AppImage
+
+      - name: Prepare BrowserOS wrapper
+        run: |
+          mkdir -p .ci/bin
+          cat > .ci/bin/browseros <<'EOF'
+          #!/usr/bin/env bash
+          set -euo pipefail
+          export APPIMAGE_EXTRACT_AND_RUN=1
+          exec "$(dirname "$0")/BrowserOS.AppImage" "$@"
+          EOF
+          chmod +x .ci/bin/browseros
+
+      - name: Create server env file
+        working-directory: packages/browseros-agent/apps/server
+        run: cp .env.example .env.development
+
+      - name: Run ${{ matrix.suite }} tests
+        id: test
        env:
-          PUPPETEER_EXECUTABLE_PATH: /Applications/Google Chrome.app/Contents/MacOS/Google Chrome
+          BROWSEROS_BINARY: ${{ github.workspace }}/packages/browseros-agent/.ci/bin/browseros
+          BROWSEROS_TEST_HEADLESS: "true"
+          BROWSEROS_TEST_EXTRA_ARGS: --no-sandbox --disable-dev-shm-usage
+        run: |
+          set +e
+          mkdir -p test-results
+          cd apps/server
+          bun run test:cleanup
+          bun --env-file=.env.development test "${{ matrix.test_path }}" --reporter=junit --reporter-outfile="../../${{ matrix.junit_path }}"
+          exit_code=$?
+          cd ../..
+          if [ ! -f "${{ matrix.junit_path }}" ]; then
+            cat > "${{ matrix.junit_path }}" <<EOF
+          <?xml version="1.0" encoding="UTF-8"?>
+          <testsuites tests="1" failures="1">
+            <testsuite name="${{ matrix.suite }}" tests="1" failures="1">
+              <testcase classname="workflow" name="${{ matrix.suite }} setup">
+                <failure message="Test run failed before JUnit output was written">See workflow logs for details.</failure>
+              </testcase>
+            </testsuite>
+          </testsuites>
+          EOF
+          fi
+          echo "exit_code=$exit_code" >> "$GITHUB_OUTPUT"
+
+      - name: Upload JUnit XML
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: junit-${{ matrix.suite }}
+          path: packages/browseros-agent/${{ matrix.junit_path }}
+
+      - name: Summarize suite result
+        if: always()
+        run: |
+          if [ "${{ steps.test.outputs.exit_code }}" = "0" ]; then
+            echo "### :white_check_mark: ${{ matrix.suite }} suite passed" >> "$GITHUB_STEP_SUMMARY"
+          else
+            echo "### :x: ${{ matrix.suite }} suite failed (exit code ${{ steps.test.outputs.exit_code }})" >> "$GITHUB_STEP_SUMMARY"
+            echo "" >> "$GITHUB_STEP_SUMMARY"
+            echo "See the uploaded \`junit-${{ matrix.suite }}\` artifact for details." >> "$GITHUB_STEP_SUMMARY"
+            exit 1
+          fi
--- a/.vscode/PythonImportHelper-v2-Completion.json
+++ b/.vscode/PythonImportHelper-v2-Completion.json
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -0,0 +1,4 @@
+{
+  "terminal.integrated.tabs.title": "${sequence} ${process}",
+  "terminal.integrated.tabs.description": "${cwd}"
+}
--- a/lefthook.yml
+++ b/lefthook.yml
@@ -0,0 +1,57 @@
+commit-msg:
+  commands:
+    conventional:
+      run: |
+        msg=$(head -1 {1})
+        if [[ ! "$msg" =~ ^(feat|fix|docs|style|refactor|perf|test|chore|ci|build|revert)(\(.+\))?\!?:\ .+ ]]; then
+          echo "Commit message must follow Conventional Commits format:"
+          echo "  <type>(<optional scope>): <description>"
+          echo "  Types: feat, fix, docs, style, refactor, perf, test, chore, ci, build, revert"
+          echo ""
+          echo "Examples:"
+          echo "  feat(auth): add OAuth2 support"
+          echo "  fix: resolve null pointer exception"
+          exit 1
+        fi
+
+pre-commit:
+  commands:
+    biome-check:
+      root: "packages/browseros-agent/"
+      glob: "*.{js,ts,cjs,mjs,d.cts,d.mts,jsx,tsx,json,jsonc}"
+      run: npx @biomejs/biome check --write --no-errors-on-unmatched --files-ignore-unknown=true --colors=off {staged_files}
+      stage_fixed: true
+
+    file-length:
+      root: "packages/browseros-agent/"
+      glob: "*.{ts,tsx}"
+      exclude: "*.{test,spec,d}.ts|*.{test,spec}.tsx|**/__tests__/**|**/tests/**|**/*.generated.*"
+      run: |
+        for file in {staged_files}; do
+          if [[ -f "$file" ]]; then
+            lines=$(wc -l < "$file" | tr -d ' ')
+            if [[ $lines -gt 400 ]]; then
+              echo "⚠️  Warning: $file has $lines lines (threshold: 400)"
+              echo "   Consider splitting this file if it has multiple responsibilities."
+            fi
+          fi
+        done
+
+pre-push:
+  commands:
+    branch-name:
+      run: |
+        branch=$(git rev-parse --abbrev-ref HEAD)
+        if [[ "$branch" == "main" || "$branch" == "master" ]]; then
+          exit 0
+        fi
+        if [[ ! "$branch" =~ ^(feat|fix|bugfix|hotfix|release|docs|refactor|test|chore|experiment)/[a-z0-9-]+$ ]]; then
+          echo "⚠️  Warning: Branch name '$branch' doesn't match recommended format."
+          echo "   Use: <type>/<short-description>"
+          echo "   Types: feat, fix, bugfix, hotfix, release, docs, refactor, test, chore, experiment"
+          echo "   Example: feat/add-auth, fix/login-crash"
+          echo ""
+          echo "   To rename your branch:"
+          echo "     git branch -m <new-name>"
+          echo "     git push -u origin <new-name>"
+        fi
--- a/packages/browseros-agent/.gitignore
+++ b/packages/browseros-agent/.gitignore
@@ -187,6 +187,10 @@ log.txt
 # Testing iteration temp files
 tmp/

+# CI artifacts
+.ci/
+test-results/
+
 # Coding agent artifacts
 .agent/
 .llm/
--- a/packages/browseros-agent/apps/agent/biome.json
+++ b/packages/browseros-agent/apps/agent/biome.json
@@ -1,5 +1,5 @@
 {
-  "$schema": "https://biomejs.dev/schemas/2.4.5/schema.json",
+  "$schema": "https://biomejs.dev/schemas/2.4.8/schema.json",
  "root": false,
  "extends": "//",
  "vcs": {
--- a/packages/browseros-agent/apps/agent/components/credits/CreditBadge.tsx
+++ b/packages/browseros-agent/apps/agent/components/credits/CreditBadge.tsx
@@ -0,0 +1,26 @@
+import { Coins } from 'lucide-react'
+import type { FC } from 'react'
+import { getCreditTextColor } from '@/lib/credits/credit-colors'
+import { cn } from '@/lib/utils'
+
+interface CreditBadgeProps {
+  credits: number
+  onClick?: () => void
+}
+
+export const CreditBadge: FC<CreditBadgeProps> = ({ credits, onClick }) => {
+  return (
+    <button
+      type="button"
+      onClick={onClick}
+      className={cn(
+        'inline-flex items-center gap-1 rounded-md px-1.5 py-0.5 font-medium text-xs transition-colors hover:bg-muted/50',
+        getCreditTextColor(credits),
+      )}
+      title={`${credits} credits remaining`}
+    >
+      <Coins className="h-3.5 w-3.5" />
+      <span>{credits}</span>
+    </button>
+  )
+}
--- a/packages/browseros-agent/apps/agent/components/sidebar/SettingsSidebar.tsx
+++ b/packages/browseros-agent/apps/agent/components/sidebar/SettingsSidebar.tsx
@@ -3,6 +3,7 @@ import {
  BookOpen,
  Bot,
  Compass,
+  CreditCard,
  GitBranch,
  MessageSquare,
  Palette,
@@ -79,6 +80,12 @@ const primarySettingsSections: NavSection[] = [
        feature: Feature.CUSTOMIZATION_SUPPORT,
      },
      { name: 'BrowserOS as MCP', to: '/settings/mcp', icon: Server },
+      {
+        name: 'Usage & Billing',
+        to: '/settings/usage',
+        icon: CreditCard,
+        feature: Feature.CREDITS_SUPPORT,
+      },
      {
        name: 'Workflows',
        to: '/workflows',
--- a/packages/browseros-agent/apps/agent/components/ui/alert-dialog.tsx
+++ b/packages/browseros-agent/apps/agent/components/ui/alert-dialog.tsx
@@ -176,14 +176,14 @@ function AlertDialogCancel({

 export {
  AlertDialog,
-  AlertDialogPortal,
-  AlertDialogOverlay,
-  AlertDialogTrigger,
-  AlertDialogContent,
-  AlertDialogHeader,
-  AlertDialogFooter,
-  AlertDialogTitle,
-  AlertDialogDescription,
  AlertDialogAction,
  AlertDialogCancel,
+  AlertDialogContent,
+  AlertDialogDescription,
+  AlertDialogFooter,
+  AlertDialogHeader,
+  AlertDialogOverlay,
+  AlertDialogPortal,
+  AlertDialogTitle,
+  AlertDialogTrigger,
 }
--- a/packages/browseros-agent/apps/agent/components/ui/alert.tsx
+++ b/packages/browseros-agent/apps/agent/components/ui/alert.tsx
@@ -72,4 +72,4 @@ function AlertDescription({
  )
 }

-export { Alert, AlertTitle, AlertDescription }
+export { Alert, AlertDescription, AlertTitle }
--- a/packages/browseros-agent/apps/agent/components/ui/card.tsx
+++ b/packages/browseros-agent/apps/agent/components/ui/card.tsx
@@ -104,10 +104,10 @@ function CardFooter({ className, ...props }: React.ComponentProps<'div'>) {

 export {
  Card,
-  CardHeader,
-  CardFooter,
-  CardTitle,
  CardAction,
-  CardDescription,
  CardContent,
+  CardDescription,
+  CardFooter,
+  CardHeader,
+  CardTitle,
 }
--- a/packages/browseros-agent/apps/agent/components/ui/carousel.tsx
+++ b/packages/browseros-agent/apps/agent/components/ui/carousel.tsx
@@ -251,10 +251,10 @@ function CarouselNext({
 }

 export {
-  type CarouselApi,
  Carousel,
+  type CarouselApi,
  CarouselContent,
  CarouselItem,
-  CarouselPrevious,
  CarouselNext,
+  CarouselPrevious,
 }
--- a/packages/browseros-agent/apps/agent/components/ui/collapsible.tsx
+++ b/packages/browseros-agent/apps/agent/components/ui/collapsible.tsx
@@ -39,4 +39,4 @@ function CollapsibleContent({
  )
 }

-export { Collapsible, CollapsibleTrigger, CollapsibleContent }
+export { Collapsible, CollapsibleContent, CollapsibleTrigger }
--- a/packages/browseros-agent/apps/agent/components/ui/command.tsx
+++ b/packages/browseros-agent/apps/agent/components/ui/command.tsx
@@ -198,11 +198,11 @@ function CommandShortcut({
 export {
  Command,
  CommandDialog,
-  CommandInput,
-  CommandList,
  CommandEmpty,
  CommandGroup,
+  CommandInput,
  CommandItem,
-  CommandShortcut,
+  CommandList,
  CommandSeparator,
+  CommandShortcut,
 }
--- a/packages/browseros-agent/apps/agent/components/ui/dropdown-menu.tsx
+++ b/packages/browseros-agent/apps/agent/components/ui/dropdown-menu.tsx
@@ -283,18 +283,18 @@ function DropdownMenuSubContent({

 export {
  DropdownMenu,
-  DropdownMenuPortal,
-  DropdownMenuTrigger,
+  DropdownMenuCheckboxItem,
  DropdownMenuContent,
  DropdownMenuGroup,
-  DropdownMenuLabel,
  DropdownMenuItem,
-  DropdownMenuCheckboxItem,
+  DropdownMenuLabel,
+  DropdownMenuPortal,
  DropdownMenuRadioGroup,
  DropdownMenuRadioItem,
  DropdownMenuSeparator,
  DropdownMenuShortcut,
  DropdownMenuSub,
-  DropdownMenuSubTrigger,
  DropdownMenuSubContent,
+  DropdownMenuSubTrigger,
+  DropdownMenuTrigger,
 }
--- a/packages/browseros-agent/apps/agent/components/ui/form.tsx
+++ b/packages/browseros-agent/apps/agent/components/ui/form.tsx
@@ -179,12 +179,12 @@ function FormMessage({ className, ...props }: React.ComponentProps<'p'>) {
 }

 export {
-  useFormField,
  Form,
-  FormItem,
-  FormLabel,
  FormControl,
  FormDescription,
-  FormMessage,
  FormField,
+  FormItem,
+  FormLabel,
+  FormMessage,
+  useFormField,
 }
--- a/packages/browseros-agent/apps/agent/components/ui/hover-card.tsx
+++ b/packages/browseros-agent/apps/agent/components/ui/hover-card.tsx
@@ -50,4 +50,4 @@ function HoverCardContent({
  )
 }

-export { HoverCard, HoverCardTrigger, HoverCardContent }
+export { HoverCard, HoverCardContent, HoverCardTrigger }
--- a/packages/browseros-agent/apps/agent/components/ui/input-group.tsx
+++ b/packages/browseros-agent/apps/agent/components/ui/input-group.tsx
@@ -184,7 +184,7 @@ export {
  InputGroup,
  InputGroupAddon,
  InputGroupButton,
-  InputGroupText,
  InputGroupInput,
+  InputGroupText,
  InputGroupTextarea,
 }
--- a/packages/browseros-agent/apps/agent/components/ui/popover.tsx
+++ b/packages/browseros-agent/apps/agent/components/ui/popover.tsx
@@ -55,4 +55,4 @@ function PopoverAnchor({
  return <PopoverPrimitive.Anchor data-slot="popover-anchor" {...props} />
 }

-export { Popover, PopoverTrigger, PopoverContent, PopoverAnchor }
+export { Popover, PopoverAnchor, PopoverContent, PopoverTrigger }
--- a/packages/browseros-agent/apps/agent/components/ui/resizable.tsx
+++ b/packages/browseros-agent/apps/agent/components/ui/resizable.tsx
@@ -49,4 +49,4 @@ function ResizableHandle({
  )
 }

-export { ResizablePanelGroup, ResizablePanel, ResizableHandle }
+export { ResizableHandle, ResizablePanel, ResizablePanelGroup }
--- a/packages/browseros-agent/apps/agent/components/ui/sheet.tsx
+++ b/packages/browseros-agent/apps/agent/components/ui/sheet.tsx
@@ -129,11 +129,11 @@ function SheetDescription({

 export {
  Sheet,
-  SheetTrigger,
  SheetClose,
  SheetContent,
-  SheetHeader,
-  SheetFooter,
-  SheetTitle,
  SheetDescription,
+  SheetFooter,
+  SheetHeader,
+  SheetTitle,
+  SheetTrigger,
 }
--- a/packages/browseros-agent/apps/agent/components/ui/sonner.tsx
+++ b/packages/browseros-agent/apps/agent/components/ui/sonner.tsx
@@ -18,6 +18,7 @@ const Toaster = ({ ...props }: ToasterProps) => {
    <Sonner
      theme={theme as ToasterProps['theme']}
      className="toaster group"
+      closeButton
      icons={{
        success: <CircleCheckIcon className="size-4" />,
        info: <InfoIcon className="size-4" />,
--- a/packages/browseros-agent/apps/agent/components/ui/tabs.tsx
+++ b/packages/browseros-agent/apps/agent/components/ui/tabs.tsx
@@ -86,4 +86,4 @@ function TabsContent({
  )
 }

-export { Tabs, TabsList, TabsTrigger, TabsContent, tabsListVariants }
+export { Tabs, TabsContent, TabsList, TabsTrigger, tabsListVariants }
--- a/packages/browseros-agent/apps/agent/components/ui/tooltip.tsx
+++ b/packages/browseros-agent/apps/agent/components/ui/tooltip.tsx
@@ -68,4 +68,4 @@ function TooltipContent({
  )
 }

-export { Tooltip, TooltipTrigger, TooltipContent, TooltipProvider }
+export { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger }
--- a/packages/browseros-agent/apps/agent/entrypoints/app/App.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/App.tsx
@@ -2,6 +2,7 @@ import type { FC } from 'react'
 import { HashRouter, Navigate, Route, Routes, useParams } from 'react-router'

 import { NewTab } from '../newtab/index/NewTab'
+import { NewTabChat } from '../newtab/index/NewTabChat'
 import { NewTabLayout } from '../newtab/layout/NewTabLayout'
 import { Personalize } from '../newtab/personalize/Personalize'
 import { OnboardingDemo } from '../onboarding/demo/OnboardingDemo'
@@ -27,6 +28,7 @@ import { ScheduledTasksPage } from './scheduled-tasks/ScheduledTasksPage'
 import { SearchProviderPage } from './search-provider/SearchProviderPage'
 import { SkillsPage } from './skills/SkillsPage'
 import { SoulPage } from './soul/SoulPage'
+import { UsagePage } from './usage/UsagePage'
 import { WorkflowsPageWrapper } from './workflows/WorkflowsPageWrapper'

 function getSurveyParams(): { maxTurns?: number; experimentId?: string } {
@@ -79,6 +81,7 @@ export const App: FC = () => {
          {/* Home routes */}
          <Route path="home" element={<NewTabLayout />}>
            <Route index element={<NewTab />} />
+            <Route path="chat" element={<NewTabChat />} />
            <Route path="personalize" element={<Personalize />} />
            <Route path="soul" element={<SoulPage />} />
            <Route path="skills" element={<SkillsPage />} />
@@ -101,6 +104,7 @@ export const App: FC = () => {
            <Route path="customization" element={<CustomizationPage />} />
            <Route path="search" element={<SearchProviderPage />} />
            <Route path="survey" element={<SurveyPage {...surveyParams} />} />
+            <Route path="usage" element={<UsagePage />} />
          </Route>
        </Route>

--- a/packages/browseros-agent/apps/agent/entrypoints/app/ai-settings/AISettingsPage.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/ai-settings/AISettingsPage.tsx
@@ -13,6 +13,17 @@ import {
 } from '@/components/ui/alert-dialog'
 import { useSessionInfo } from '@/lib/auth/sessionStorage'
 import { useAgentServerUrl } from '@/lib/browseros/useBrowserOSProviders'
+import {
+  CHATGPT_PRO_OAUTH_COMPLETED_EVENT,
+  CHATGPT_PRO_OAUTH_DISCONNECTED_EVENT,
+  CHATGPT_PRO_OAUTH_STARTED_EVENT,
+  GITHUB_COPILOT_OAUTH_COMPLETED_EVENT,
+  GITHUB_COPILOT_OAUTH_DISCONNECTED_EVENT,
+  GITHUB_COPILOT_OAUTH_STARTED_EVENT,
+  QWEN_CODE_OAUTH_COMPLETED_EVENT,
+  QWEN_CODE_OAUTH_DISCONNECTED_EVENT,
+  QWEN_CODE_OAUTH_STARTED_EVENT,
+} from '@/lib/constants/analyticsEvents'
 import { GetProfileIdByUserIdDocument } from '@/lib/conversations/graphql/uploadConversationDocument'
 import { getQueryKeyFromDocument } from '@/lib/graphql/getQueryKeyFromDocument'
 import { useGraphqlMutation } from '@/lib/graphql/useGraphqlMutation'
@@ -21,6 +32,11 @@ import type { ProviderTemplate } from '@/lib/llm-providers/providerTemplates'
 import { testProvider } from '@/lib/llm-providers/testProvider'
 import type { LlmProviderConfig } from '@/lib/llm-providers/types'
 import { useLlmProviders } from '@/lib/llm-providers/useLlmProviders'
+import {
+  type OAuthProviderFlowConfig,
+  useOAuthProviderFlow,
+} from '@/lib/llm-providers/useOAuthProviderFlow'
+import { track } from '@/lib/metrics/track'
 import { ConfiguredProvidersList } from './ConfiguredProvidersList'
 import {
  DeleteRemoteLlmProviderDocument,
@@ -32,6 +48,47 @@ import { LlmProvidersHeader } from './LlmProvidersHeader'
 import { NewProviderDialog } from './NewProviderDialog'
 import { ProviderTemplatesSection } from './ProviderTemplatesSection'

+// All OAuth providers share the same flow via useOAuthProviderFlow
+const OAUTH_PROVIDERS_CONFIG: Record<string, OAuthProviderFlowConfig> = {
+  'chatgpt-pro': {
+    providerType: 'chatgpt-pro',
+    displayName: 'ChatGPT Plus/Pro',
+    startedEvent: CHATGPT_PRO_OAUTH_STARTED_EVENT,
+    completedEvent: CHATGPT_PRO_OAUTH_COMPLETED_EVENT,
+    disconnectedEvent: CHATGPT_PRO_OAUTH_DISCONNECTED_EVENT,
+  },
+  'github-copilot': {
+    providerType: 'github-copilot',
+    displayName: 'GitHub Copilot',
+    startedEvent: GITHUB_COPILOT_OAUTH_STARTED_EVENT,
+    completedEvent: GITHUB_COPILOT_OAUTH_COMPLETED_EVENT,
+    disconnectedEvent: GITHUB_COPILOT_OAUTH_DISCONNECTED_EVENT,
+    clientAuth: {
+      deviceCodeEndpoint: 'https://github.com/login/device/code',
+      tokenEndpoint: 'https://github.com/login/oauth/access_token',
+      clientId: 'Ov23li8tweQw6odWQebz',
+      scopes: 'read:user',
+      requiresPKCE: false,
+      contentType: 'json',
+    },
+  },
+  'qwen-code': {
+    providerType: 'qwen-code',
+    displayName: 'Qwen Code',
+    startedEvent: QWEN_CODE_OAUTH_STARTED_EVENT,
+    completedEvent: QWEN_CODE_OAUTH_COMPLETED_EVENT,
+    disconnectedEvent: QWEN_CODE_OAUTH_DISCONNECTED_EVENT,
+    clientAuth: {
+      deviceCodeEndpoint: 'https://chat.qwen.ai/api/v1/oauth2/device/code',
+      tokenEndpoint: 'https://chat.qwen.ai/api/v1/oauth2/token',
+      clientId: 'f0304373b74a44d2b584a3fb70ca9e56',
+      scopes: 'openid profile email model.completion',
+      requiresPKCE: true,
+      contentType: 'form',
+    },
+  },
+}
+
 /**
 * AI Settings page for managing LLM providers
 * @public
@@ -78,9 +135,7 @@ export const AISettingsPage: FC = () => {

  const incompleteProviders = useMemo<IncompleteProvider[]>(() => {
    if (!remoteProvidersData?.llmProviders?.nodes) return []
-
    const localProviderIds = new Set(providers.map((p) => p.id))
-
    return remoteProvidersData.llmProviders.nodes
      .filter((node): node is NonNullable<typeof node> => node !== null)
      .filter((node) => !localProviderIds.has(node.rowId))
@@ -101,12 +156,61 @@ export const AISettingsPage: FC = () => {
    null,
  )

+  // OAuth flows — shared hook eliminates per-provider duplication
+  const chatgptPro = useOAuthProviderFlow(
+    OAUTH_PROVIDERS_CONFIG['chatgpt-pro'],
+    providers,
+    saveProvider,
+  )
+  const copilot = useOAuthProviderFlow(
+    OAUTH_PROVIDERS_CONFIG['github-copilot'],
+    providers,
+    saveProvider,
+  )
+  const qwenCode = useOAuthProviderFlow(
+    OAUTH_PROVIDERS_CONFIG['qwen-code'],
+    providers,
+    saveProvider,
+  )
+
+  const oauthFlows: Record<
+    string,
+    {
+      startOAuthFlow: (url: string | undefined) => Promise<void>
+      disconnect: () => Promise<void>
+      disconnectedEvent: string
+    }
+  > = {
+    'chatgpt-pro': {
+      startOAuthFlow: chatgptPro.startOAuthFlow,
+      disconnect: chatgptPro.disconnect,
+      disconnectedEvent: CHATGPT_PRO_OAUTH_DISCONNECTED_EVENT,
+    },
+    'github-copilot': {
+      startOAuthFlow: copilot.startOAuthFlow,
+      disconnect: copilot.disconnect,
+      disconnectedEvent: GITHUB_COPILOT_OAUTH_DISCONNECTED_EVENT,
+    },
+    'qwen-code': {
+      startOAuthFlow: qwenCode.startOAuthFlow,
+      disconnect: qwenCode.disconnect,
+      disconnectedEvent: QWEN_CODE_OAUTH_DISCONNECTED_EVENT,
+    },
+  }
+
  const handleAddProvider = () => {
    setTemplateValues(undefined)
    setIsNewDialogOpen(true)
  }

  const handleUseTemplate = (template: ProviderTemplate) => {
+    // OAuth providers: trigger OAuth flow
+    const oauthFlow = oauthFlows[template.id]
+    if (oauthFlow) {
+      oauthFlow.startOAuthFlow(agentServerUrl ?? undefined)
+      return
+    }
+
    setTemplateValues({
      type: template.id,
      name: template.name,
@@ -129,11 +233,18 @@ export const AISettingsPage: FC = () => {
  }

  const confirmDeleteProvider = async () => {
-    if (providerToDelete) {
-      await deleteProvider(providerToDelete.id)
-      deleteRemoteProviderMutation.mutate({ rowId: providerToDelete.id })
-      setProviderToDelete(null)
+    if (!providerToDelete) return
+
+    // Clear OAuth tokens on server for OAuth-based providers
+    const oauthFlow = oauthFlows[providerToDelete.type]
+    if (oauthFlow) {
+      await oauthFlow.disconnect()
+      track(oauthFlow.disconnectedEvent)
    }
+
+    await deleteProvider(providerToDelete.id)
+    deleteRemoteProviderMutation.mutate({ rowId: providerToDelete.id })
+    setProviderToDelete(null)
  }

  const handleAddKeysToIncomplete = (provider: IncompleteProvider) => {
--- a/packages/browseros-agent/apps/agent/entrypoints/app/ai-settings/NewProviderDialog.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/ai-settings/NewProviderDialog.tsx
@@ -61,6 +61,9 @@ const providerTypeEnum = z.enum([
  'lmstudio',
  'bedrock',
  'browseros',
+  'chatgpt-pro',
+  'github-copilot',
+  'qwen-code',
 ])

 /**
@@ -84,6 +87,9 @@ export const providerFormSchema = z
    secretAccessKey: z.string().optional(),
    region: z.string().optional(),
    sessionToken: z.string().optional(),
+    // ChatGPT Pro (Codex)
+    reasoningEffort: z.enum(['none', 'low', 'medium', 'high']).optional(),
+    reasoningSummary: z.enum(['auto', 'concise', 'detailed']).optional(),
  })
  .superRefine((data, ctx) => {
    // Azure: require either resourceName or baseUrl
@@ -127,6 +133,14 @@ export const providerFormSchema = z
        })
      }
    }
+    // OAuth providers: no credentials needed (server-managed)
+    else if (
+      data.type === 'chatgpt-pro' ||
+      data.type === 'github-copilot' ||
+      data.type === 'qwen-code'
+    ) {
+      // No validation needed — OAuth tokens are on the server
+    }
    // Other providers: require baseUrl
    else if (!data.baseUrl) {
      ctx.addIssue({
@@ -182,6 +196,11 @@ export const NewProviderDialog: FC<NewProviderDialogProps> = ({
  const kimiLaunch = useKimiLaunch()

  const filteredProviderTypeOptions = providerTypeOptions.filter((opt) => {
+    if (opt.value === 'chatgpt-pro')
+      return supports(Feature.CHATGPT_PRO_SUPPORT)
+    if (opt.value === 'github-copilot')
+      return supports(Feature.GITHUB_COPILOT_SUPPORT)
+    if (opt.value === 'qwen-code') return supports(Feature.QWEN_CODE_SUPPORT)
    if (opt.value === 'moonshot')
      return kimiLaunch || initialValues?.type === 'moonshot'
    if (opt.value === 'openai-compatible') {
@@ -209,6 +228,8 @@ export const NewProviderDialog: FC<NewProviderDialogProps> = ({
      secretAccessKey: initialValues?.secretAccessKey || '',
      region: initialValues?.region || '',
      sessionToken: initialValues?.sessionToken || '',
+      reasoningEffort: initialValues?.reasoningEffort || 'high',
+      reasoningSummary: initialValues?.reasoningSummary || 'auto',
    },
  })

@@ -301,6 +322,8 @@ export const NewProviderDialog: FC<NewProviderDialogProps> = ({
        secretAccessKey: initialValues.secretAccessKey || '',
        region: initialValues.region || '',
        sessionToken: initialValues.sessionToken || '',
+        reasoningEffort: initialValues.reasoningEffort || 'high',
+        reasoningSummary: initialValues.reasoningSummary || 'auto',
      })
      setIsCustomModel(false)
    }
@@ -326,6 +349,8 @@ export const NewProviderDialog: FC<NewProviderDialogProps> = ({
        secretAccessKey: '',
        region: '',
        sessionToken: '',
+        reasoningEffort: 'high',
+        reasoningSummary: 'auto',
      })
      setIsCustomModel(false)
    }
@@ -363,6 +388,14 @@ export const NewProviderDialog: FC<NewProviderDialogProps> = ({
  const canTest = (): boolean => {
    if (!watchedModelId) return false

+    // OAuth providers: always testable (server has the OAuth token)
+    if (
+      watchedType === 'chatgpt-pro' ||
+      watchedType === 'github-copilot' ||
+      watchedType === 'qwen-code'
+    )
+      return true
+
    if (watchedType === 'azure') {
      return !!(watchedResourceName || watchedBaseUrl) && !!watchedApiKey
    }
@@ -444,6 +477,85 @@ export const NewProviderDialog: FC<NewProviderDialogProps> = ({
  }

  const renderProviderSpecificFields = () => {
+    // OAuth-only providers (no API key needed)
+    if (watchedType === 'github-copilot' || watchedType === 'qwen-code') {
+      const name = watchedType === 'github-copilot' ? 'GitHub' : 'Qwen Code'
+      return (
+        <div className="rounded-lg border border-green-200 bg-green-50 p-3 text-green-700 text-sm dark:border-green-800 dark:bg-green-950 dark:text-green-300">
+          Credentials are managed via {name} OAuth. No API key needed.
+        </div>
+      )
+    }
+    // ChatGPT Pro: OAuth credentials + Codex reasoning settings
+    if (watchedType === 'chatgpt-pro') {
+      return (
+        <>
+          <div className="rounded-lg border border-green-200 bg-green-50 p-3 text-green-700 text-sm dark:border-green-800 dark:bg-green-950 dark:text-green-300">
+            Credentials are managed via OAuth. No API key needed.
+          </div>
+          <div className="grid gap-4 sm:grid-cols-2">
+            <FormField
+              control={form.control}
+              name="reasoningEffort"
+              render={({ field }) => (
+                <FormItem>
+                  <FormLabel>Reasoning Effort</FormLabel>
+                  <Select
+                    onValueChange={field.onChange}
+                    value={field.value || 'high'}
+                  >
+                    <FormControl>
+                      <SelectTrigger className="w-full">
+                        <SelectValue />
+                      </SelectTrigger>
+                    </FormControl>
+                    <SelectContent>
+                      <SelectItem value="none">None</SelectItem>
+                      <SelectItem value="low">Low</SelectItem>
+                      <SelectItem value="medium">Medium</SelectItem>
+                      <SelectItem value="high">High</SelectItem>
+                    </SelectContent>
+                  </Select>
+                  <FormDescription>
+                    How much the model thinks before responding
+                  </FormDescription>
+                  <FormMessage />
+                </FormItem>
+              )}
+            />
+            <FormField
+              control={form.control}
+              name="reasoningSummary"
+              render={({ field }) => (
+                <FormItem>
+                  <FormLabel>Reasoning Summary</FormLabel>
+                  <Select
+                    onValueChange={field.onChange}
+                    value={field.value || 'auto'}
+                  >
+                    <FormControl>
+                      <SelectTrigger className="w-full">
+                        <SelectValue />
+                      </SelectTrigger>
+                    </FormControl>
+                    <SelectContent>
+                      <SelectItem value="auto">Auto</SelectItem>
+                      <SelectItem value="concise">Concise</SelectItem>
+                      <SelectItem value="detailed">Detailed</SelectItem>
+                    </SelectContent>
+                  </Select>
+                  <FormDescription>
+                    Detail level of visible thinking steps
+                  </FormDescription>
+                  <FormMessage />
+                </FormItem>
+              )}
+            />
+          </div>
+        </>
+      )
+    }
+
    if (watchedType === 'azure') {
      return (
        <>
--- a/packages/browseros-agent/apps/agent/entrypoints/app/ai-settings/ProviderCard.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/ai-settings/ProviderCard.tsx
@@ -103,8 +103,10 @@ export const ProviderCard: FC<ProviderCardProps> = ({
                for better performance.
              </>
            )
-          ) : (
+          ) : provider.baseUrl ? (
            `${provider.modelId} • ${provider.baseUrl}`
+          ) : (
+            provider.modelId
          )}
        </p>
      </div>
--- a/packages/browseros-agent/apps/agent/entrypoints/app/ai-settings/ProviderTemplatesSection.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/ai-settings/ProviderTemplatesSection.tsx
@@ -26,6 +26,11 @@ export const ProviderTemplatesSection: FC<ProviderTemplatesSectionProps> = ({
  const kimiLaunch = useKimiLaunch()

  const filteredTemplates = providerTemplates.filter((template) => {
+    if (template.id === 'chatgpt-pro')
+      return supports(Feature.CHATGPT_PRO_SUPPORT)
+    if (template.id === 'github-copilot')
+      return supports(Feature.GITHUB_COPILOT_SUPPORT)
+    if (template.id === 'qwen-code') return supports(Feature.QWEN_CODE_SUPPORT)
    if (template.id === 'moonshot') return kimiLaunch
    if (template.id === 'openai-compatible') {
      return supports(Feature.OPENAI_COMPATIBLE_SUPPORT)
--- a/packages/browseros-agent/apps/agent/entrypoints/app/ai-settings/models.ts
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/ai-settings/models.ts
@@ -23,6 +23,9 @@ export interface ModelsData {
  bedrock: ModelInfo[]
  browseros: ModelInfo[]
  moonshot: ModelInfo[]
+  'chatgpt-pro': ModelInfo[]
+  'github-copilot': ModelInfo[]
+  'qwen-code': ModelInfo[]
 }

 /**
@@ -90,6 +93,48 @@ export const MODELS_DATA: ModelsData = {
  ],
  bedrock: [],
  browseros: [{ modelId: 'browseros-auto', contextLength: 200000 }],
+  'chatgpt-pro': [
+    { modelId: 'gpt-5.4', contextLength: 400000 },
+    { modelId: 'gpt-5.3-codex', contextLength: 400000 },
+    { modelId: 'gpt-5.2-codex', contextLength: 400000 },
+    { modelId: 'gpt-5.2', contextLength: 200000 },
+    { modelId: 'gpt-5.1-codex', contextLength: 400000 },
+    { modelId: 'gpt-5.1-codex-max', contextLength: 400000 },
+    { modelId: 'gpt-5.1-codex-mini', contextLength: 400000 },
+    { modelId: 'gpt-5.1', contextLength: 200000 },
+  ],
+  'github-copilot': [
+    // Free tier (unlimited with Pro)
+    { modelId: 'gpt-5-mini', contextLength: 128000 },
+    { modelId: 'claude-haiku-4.5', contextLength: 128000 },
+    { modelId: 'gpt-4o', contextLength: 64000 },
+    { modelId: 'gpt-4.1', contextLength: 64000 },
+    // Premium models (Pro: 300/mo, Pro+: 1500/mo)
+    { modelId: 'claude-sonnet-4.6', contextLength: 128000 },
+    { modelId: 'claude-sonnet-4.5', contextLength: 128000 },
+    { modelId: 'claude-sonnet-4', contextLength: 128000 },
+    { modelId: 'claude-opus-4.6', contextLength: 128000 },
+    { modelId: 'claude-opus-4.5', contextLength: 128000 },
+    { modelId: 'gemini-2.5-pro', contextLength: 128000 },
+    { modelId: 'gemini-3-pro-preview', contextLength: 128000 },
+    { modelId: 'gemini-3-flash-preview', contextLength: 128000 },
+    { modelId: 'gemini-3.1-pro-preview', contextLength: 128000 },
+    { modelId: 'gpt-5.4', contextLength: 272000 },
+    { modelId: 'gpt-5.4-mini', contextLength: 128000 },
+    { modelId: 'gpt-5.3-codex', contextLength: 272000 },
+    { modelId: 'gpt-5.2-codex', contextLength: 272000 },
+    { modelId: 'gpt-5.2', contextLength: 128000 },
+    { modelId: 'gpt-5.1-codex', contextLength: 128000 },
+    { modelId: 'gpt-5.1-codex-max', contextLength: 128000 },
+    { modelId: 'gpt-5.1', contextLength: 128000 },
+    { modelId: 'grok-code-fast-1', contextLength: 128000 },
+  ],
+  'qwen-code': [
+    { modelId: 'coder-model', contextLength: 1000000 },
+    { modelId: 'qwen3-coder-plus', contextLength: 1000000 },
+    { modelId: 'qwen3-coder-flash', contextLength: 1000000 },
+    { modelId: 'qwen3.5-plus', contextLength: 1000000 },
+  ],
 }

 /**
--- a/packages/browseros-agent/apps/agent/entrypoints/app/connect-mcp/useGetUserMCPIntegrations.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/connect-mcp/useGetUserMCPIntegrations.tsx
@@ -24,6 +24,7 @@ export const useGetUserMCPIntegrations = () => {

  const query = useQuery({
    queryKey: [INTEGRATIONS_QUERY_KEY, agentServerUrl],
+    // biome-ignore lint/style/noNonNullAssertion: guarded by enabled
    queryFn: () => getUserMCPIntegrations(agentServerUrl!),
    enabled: !!agentServerUrl,
    refetchOnWindowFocus: true,
--- a/packages/browseros-agent/apps/agent/entrypoints/app/layout/SettingsSidebarLayout.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/layout/SettingsSidebarLayout.tsx
@@ -17,11 +17,8 @@ export const SettingsSidebarLayout: FC = () => {

  useEffect(() => {
    track(SETTINGS_PAGE_VIEWED_EVENT, { page: location.pathname })
-  }, [location.pathname])
-
-  useEffect(() => {
    setMobileOpen(false)
-  }, [])
+  }, [location.pathname])

  if (isMobile) {
    return (
--- a/packages/browseros-agent/apps/agent/entrypoints/app/layout/SidebarLayout.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/layout/SidebarLayout.tsx
@@ -7,8 +7,6 @@ import { Button } from '@/components/ui/button'
 import { Sheet, SheetContent } from '@/components/ui/sheet'
 import { ShortcutsDialog } from '@/entrypoints/newtab/index/ShortcutsDialog'
 import { useIsMobile } from '@/hooks/use-mobile'
-import { SETTINGS_PAGE_VIEWED_EVENT } from '@/lib/constants/analyticsEvents'
-import { track } from '@/lib/metrics/track'
 import { RpcClientProvider } from '@/lib/rpc/RpcClientProvider'

 const COLLAPSE_DELAY = 150
@@ -25,10 +23,6 @@ export const SidebarLayout: FC = () => {
    setShortcutsDialogOpen(true)
  }, [])

-  useEffect(() => {
-    track(SETTINGS_PAGE_VIEWED_EVENT, { page: location.pathname })
-  }, [location.pathname])
-
  useEffect(() => {
    setMobileOpen(false)
  }, [])
@@ -103,11 +97,17 @@ export const SidebarLayout: FC = () => {
        </div>

        {/* Main content - full width, centered */}
-        <main className="min-h-screen overflow-y-auto">
-          <div className="mx-auto max-w-4xl px-4 py-8 sm:px-6 lg:px-8">
+        {location.pathname === '/home/chat' ? (
+          <main className="relative h-dvh overflow-hidden">
            <Outlet />
-          </div>
-        </main>
+          </main>
+        ) : (
+          <main className="min-h-screen overflow-y-auto">
+            <div className="mx-auto max-w-4xl px-4 py-8 sm:px-6 lg:px-8">
+              <Outlet />
+            </div>
+          </main>
+        )}
      </div>
      <ShortcutsDialog
        open={shortcutsDialogOpen}
--- a/packages/browseros-agent/apps/agent/entrypoints/app/scheduled-tasks/NewScheduledTaskDialog.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/scheduled-tasks/NewScheduledTaskDialog.tsx
@@ -3,6 +3,7 @@ import { ChevronDown, Loader2, Sparkles, Undo2 } from 'lucide-react'
 import type { FC } from 'react'
 import { useEffect, useRef, useState } from 'react'
 import { useForm } from 'react-hook-form'
+import { toast } from 'sonner'
 import { z } from 'zod/v3'
 import { ChatProviderSelector } from '@/components/chat/ChatProviderSelector'
 import type { Provider } from '@/components/chat/chatComponentTypes'
@@ -34,16 +35,15 @@ import {
  SelectValue,
 } from '@/components/ui/select'
 import { Textarea } from '@/components/ui/textarea'
+import { SCHEDULED_TASK_PROMPT_REFINED_EVENT } from '@/lib/constants/analyticsEvents'
 import { BrowserOSIcon, ProviderIcon } from '@/lib/llm-providers/providerIcons'
 import {
  defaultProviderIdStorage,
  providersStorage,
 } from '@/lib/llm-providers/storage'
 import type { LlmProviderConfig, ProviderType } from '@/lib/llm-providers/types'
-import { SCHEDULED_TASK_PROMPT_REFINED_EVENT } from '@/lib/constants/analyticsEvents'
 import { track } from '@/lib/metrics/track'
 import { refinePrompt } from '@/lib/schedules/refine-prompt'
-import { toast } from 'sonner'
 import type { ScheduledJob } from './types'

 const formSchema = z
@@ -117,6 +117,7 @@ export const NewScheduledTaskDialog: FC<NewScheduledTaskDialogProps> = ({
  const [isRefining, setIsRefining] = useState(false)
  const originalPromptRef = useRef<string | null>(null)
  const refineRequestIdRef = useRef(0)
+  const isProgrammaticChange = useRef(false)

  // Load providers from storage
  useEffect(() => {
@@ -179,6 +180,24 @@ export const NewScheduledTaskDialog: FC<NewScheduledTaskDialogProps> = ({
    type: p.type,
  }))

+  // Replace textarea content via execCommand so the browser's native undo
+  // stack (Cmd+Z / Ctrl+Z) records the change. Falls back to form.setValue
+  // if the textarea element can't be found.
+  const setQueryWithUndo = (value: string) => {
+    const textarea = document.querySelector(
+      'textarea[name="query"]',
+    ) as HTMLTextAreaElement
+    if (textarea) {
+      isProgrammaticChange.current = true
+      textarea.focus()
+      textarea.select()
+      document.execCommand('insertText', false, value)
+      isProgrammaticChange.current = false
+    } else {
+      form.setValue('query', value)
+    }
+  }
+
  const handleRefinePrompt = async () => {
    const currentQuery = form.getValues('query').trim()
    const currentName = form.getValues('name').trim()
@@ -195,7 +214,7 @@ export const NewScheduledTaskDialog: FC<NewScheduledTaskDialogProps> = ({
        providerId: form.getValues('providerId'),
      })
      if (requestId !== refineRequestIdRef.current) return
-      form.setValue('query', refined)
+      setQueryWithUndo(refined)
      track(SCHEDULED_TASK_PROMPT_REFINED_EVENT)
    } catch {
      if (requestId !== refineRequestIdRef.current) return
@@ -210,7 +229,7 @@ export const NewScheduledTaskDialog: FC<NewScheduledTaskDialogProps> = ({

  const handleUndoRefine = () => {
    if (originalPromptRef.current !== null) {
-      form.setValue('query', originalPromptRef.current)
+      setQueryWithUndo(originalPromptRef.current)
      originalPromptRef.current = null
    }
  }
@@ -272,7 +291,7 @@ export const NewScheduledTaskDialog: FC<NewScheduledTaskDialogProps> = ({
                      type="button"
                      variant="ghost"
                      size="sm"
-                      className="h-auto gap-1 px-2 py-1 text-xs text-muted-foreground"
+                      className="h-auto gap-1 px-2 py-1 text-muted-foreground text-xs"
                      disabled={!queryValue?.trim() || isRefining}
                      onClick={handleRefinePrompt}
                    >
@@ -291,7 +310,10 @@ export const NewScheduledTaskDialog: FC<NewScheduledTaskDialogProps> = ({
                      {...field}
                      onChange={(e) => {
                        field.onChange(e)
-                        if (originalPromptRef.current !== null) {
+                        if (
+                          !isProgrammaticChange.current &&
+                          originalPromptRef.current !== null
+                        ) {
                          originalPromptRef.current = null
                        }
                      }}
@@ -300,7 +322,7 @@ export const NewScheduledTaskDialog: FC<NewScheduledTaskDialogProps> = ({
                  {!isRefining && originalPromptRef.current !== null ? (
                    <button
                      type="button"
-                      className="flex items-center gap-1 text-xs text-muted-foreground hover:text-foreground"
+                      className="flex items-center gap-1 text-muted-foreground text-xs hover:text-foreground"
                      onClick={handleUndoRefine}
                    >
                      <Undo2 className="h-3 w-3" />
--- a/packages/browseros-agent/apps/agent/entrypoints/app/scheduled-tasks/ScheduledTasksPage.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/scheduled-tasks/ScheduledTasksPage.tsx
@@ -22,9 +22,7 @@ import {
  SCHEDULED_TASK_TOGGLED_EVENT,
  SCHEDULED_TASK_VIEW_RESULTS_EVENT,
 } from '@/lib/constants/analyticsEvents'
-import { useGraphqlMutation } from '@/lib/graphql/useGraphqlMutation'
 import { track } from '@/lib/metrics/track'
-import { DeleteScheduledJobDocument } from '@/lib/schedules/graphql/syncSchedulesDocument'
 import {
  scheduledJobRunStorage,
  useScheduledJobRuns,
@@ -46,8 +44,6 @@ export const ScheduledTasksPage: FC = () => {
    useScheduledJobs()
  const { jobRuns, cancelJobRun } = useScheduledJobRuns()

-  const deleteRemoteJobMutation = useGraphqlMutation(DeleteScheduledJobDocument)
-
  const [activeTab, setActiveTab] = useState<string | null>(null)
  const [isDialogOpen, setIsDialogOpen] = useState(false)
  const [editingJob, setEditingJob] = useState<ScheduledJob | null>(null)
@@ -102,7 +98,6 @@ export const ScheduledTasksPage: FC = () => {
  const confirmDelete = async () => {
    if (deleteJobId) {
      await removeJob(deleteJobId)
-      deleteRemoteJobMutation.mutate({ rowId: deleteJobId })
      setDeleteJobId(null)
      track(SCHEDULED_TASK_DELETED_EVENT)
    }
--- a/packages/browseros-agent/apps/agent/entrypoints/app/skills/SkillsPage.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/skills/SkillsPage.tsx
@@ -1,5 +1,6 @@
-import { AlertCircle, Pencil, Plus, Trash2, Wand2 } from 'lucide-react'
+import { AlertCircle, Eye, Pencil, Plus, Trash2, Wand2 } from 'lucide-react'
 import { type FC, useEffect, useState } from 'react'
+import Markdown from 'react-markdown'
 import { toast } from 'sonner'
 import {
  AlertDialog,
@@ -108,23 +109,19 @@ export const SkillsPage: FC = () => {
      ) : null}

      {!isLoading && !error && skills.length > 0 ? (
-        <div className="grid grid-cols-1 gap-3 sm:grid-cols-2 xl:grid-cols-3">
-          {skills.map((skill) => (
-            <SkillCard
-              key={skill.id}
-              skill={skill}
-              onEdit={() => handleEdit(skill)}
-              onDelete={() => setSkillToDelete(skill)}
-              onToggle={(enabled) => handleToggle(skill, enabled)}
-            />
-          ))}
-        </div>
+        <SkillSections
+          skills={skills}
+          onEdit={handleEdit}
+          onDelete={(skill) => setSkillToDelete(skill)}
+          onToggle={handleToggle}
+        />
      ) : null}

      <SkillDialog
        open={isDialogOpen}
        onOpenChange={setIsDialogOpen}
        editingSkill={editingSkill}
+        readOnly={editingSkill?.builtIn}
        onSave={async (data) => {
          try {
            if (editingSkill) {
@@ -251,6 +248,50 @@ const EmptyState: FC<{ onCreateClick: () => void }> = ({ onCreateClick }) => (
  </Card>
 )

+const SkillGrid: FC<{ children: React.ReactNode }> = ({ children }) => (
+  <div className="grid grid-cols-1 gap-3 sm:grid-cols-2 xl:grid-cols-3">
+    {children}
+  </div>
+)
+
+const SkillSections: FC<{
+  skills: SkillMeta[]
+  onEdit: (skill: SkillMeta) => void
+  onDelete: (skill: SkillMeta) => void
+  onToggle: (skill: SkillMeta, enabled: boolean) => void
+}> = ({ skills, onEdit, onDelete, onToggle }) => {
+  const userSkills = skills.filter((s) => !s.builtIn)
+  const builtInSkills = skills.filter((s) => s.builtIn)
+
+  const renderCard = (skill: SkillMeta) => (
+    <SkillCard
+      key={skill.id}
+      skill={skill}
+      onEdit={() => onEdit(skill)}
+      onDelete={() => onDelete(skill)}
+      onToggle={(enabled) => onToggle(skill, enabled)}
+    />
+  )
+
+  return (
+    <div className="space-y-6">
+      {userSkills.length > 0 ? (
+        <div className="space-y-3">
+          <h3 className="font-semibold text-sm">My Skills</h3>
+          <SkillGrid>{userSkills.map(renderCard)}</SkillGrid>
+        </div>
+      ) : null}
+
+      {builtInSkills.length > 0 ? (
+        <div className="space-y-3">
+          <h3 className="font-semibold text-sm">BrowserOS Skills</h3>
+          <SkillGrid>{builtInSkills.map(renderCard)}</SkillGrid>
+        </div>
+      ) : null}
+    </div>
+  )
+}
+
 const SkillCard: FC<{
  skill: SkillMeta
  onEdit: () => void
@@ -260,7 +301,14 @@ const SkillCard: FC<{
  <Card className="h-full py-0 shadow-sm">
    <CardContent className="flex h-full flex-col p-4">
      <div className="flex items-start justify-between gap-3">
-        <h2 className="font-semibold text-sm leading-5">{skill.name}</h2>
+        <div className="flex items-center gap-2">
+          <h2 className="font-semibold text-sm leading-5">{skill.name}</h2>
+          {skill.builtIn ? (
+            <Badge variant="secondary" className="px-1.5 py-0 text-[10px]">
+              Built-in
+            </Badge>
+          ) : null}
+        </div>
        <Switch
          checked={skill.enabled}
          onCheckedChange={onToggle}
@@ -281,18 +329,29 @@ const SkillCard: FC<{
          onClick={onEdit}
          className="-ml-2 h-7 px-2 text-muted-foreground hover:bg-transparent hover:text-foreground"
        >
-          <Pencil className="size-3.5" />
-          Edit
-        </Button>
-        <Button
-          variant="ghost"
-          size="icon-sm"
-          onClick={onDelete}
-          className="size-7 text-muted-foreground hover:bg-transparent hover:text-destructive"
-          aria-label={`Delete ${skill.name}`}
-        >
-          <Trash2 className="size-4" />
+          {skill.builtIn ? (
+            <>
+              <Eye className="size-3.5" />
+              View
+            </>
+          ) : (
+            <>
+              <Pencil className="size-3.5" />
+              Edit
+            </>
+          )}
        </Button>
+        {!skill.builtIn ? (
+          <Button
+            variant="ghost"
+            size="icon-sm"
+            onClick={onDelete}
+            className="size-7 text-muted-foreground hover:bg-transparent hover:text-destructive"
+            aria-label={`Delete ${skill.name}`}
+          >
+            <Trash2 className="size-4" />
+          </Button>
+        ) : null}
      </div>
    </CardContent>
  </Card>
@@ -302,12 +361,13 @@ const SkillDialog: FC<{
  open: boolean
  onOpenChange: (open: boolean) => void
  editingSkill: SkillDetail | null
+  readOnly?: boolean
  onSave: (data: {
    name: string
    description: string
    content: string
  }) => Promise<void>
-}> = ({ open, onOpenChange, editingSkill, onSave }) => {
+}> = ({ open, onOpenChange, editingSkill, readOnly, onSave }) => {
  const [name, setName] = useState('')
  const [description, setDescription] = useState('')
  const [content, setContent] = useState('')
@@ -354,12 +414,18 @@ const SkillDialog: FC<{
      <DialogContent className="flex max-h-[90vh] flex-col gap-0 overflow-hidden p-0 sm:max-w-5xl">
        <DialogHeader className="border-b px-6 py-5">
          <DialogTitle>
-            {editingSkill ? 'Edit Skill' : 'Create Skill'}
+            {readOnly
+              ? 'View Skill'
+              : editingSkill
+                ? 'Edit Skill'
+                : 'Create Skill'}
          </DialogTitle>
          <DialogDescription>
-            {editingSkill
-              ? 'Refine when the agent should use this skill and how it should execute it.'
-              : 'Define a reusable instruction set your agent can apply when a request matches.'}
+            {readOnly
+              ? 'This skill is managed by BrowserOS and updated automatically.'
+              : editingSkill
+                ? 'Refine when the agent should use this skill and how it should execute it.'
+                : 'Define a reusable instruction set your agent can apply when a request matches.'}
          </DialogDescription>
        </DialogHeader>

@@ -373,6 +439,7 @@ const SkillDialog: FC<{
                value={name}
                onChange={(event) => setName(event.target.value)}
                maxLength={100}
+                readOnly={readOnly}
              />
              <p className="text-muted-foreground text-xs leading-5">
                Keep it short and recognizable in the skills list.
@@ -388,19 +455,22 @@ const SkillDialog: FC<{
                onChange={(event) => setDescription(event.target.value)}
                maxLength={500}
                className="min-h-28 resize-none bg-background"
+                readOnly={readOnly}
              />
              <p className="text-muted-foreground text-xs leading-5">
                This is the trigger summary the agent uses to pick the skill.
              </p>
            </div>

-            <div className="mt-auto rounded-lg border border-border/60 border-dashed bg-muted/30 px-3 py-2.5">
-              <p className="font-medium text-muted-foreground text-xs">Tip</p>
-              <ul className="mt-1.5 list-disc space-y-1 pl-4 text-muted-foreground text-xs leading-5">
-                <li>List the ordered steps the agent should follow.</li>
-                <li>Close with the output or formatting you expect back.</li>
-              </ul>
-            </div>
+            {!readOnly ? (
+              <div className="mt-auto rounded-lg border border-border/60 border-dashed bg-muted/30 px-3 py-2.5">
+                <p className="font-medium text-muted-foreground text-xs">Tip</p>
+                <ul className="mt-1.5 list-disc space-y-1 pl-4 text-muted-foreground text-xs leading-5">
+                  <li>List the ordered steps the agent should follow.</li>
+                  <li>Close with the output or formatting you expect back.</li>
+                </ul>
+              </div>
+            ) : null}
          </div>

          <div className="flex min-h-0 flex-col px-6 py-5">
@@ -411,36 +481,52 @@ const SkillDialog: FC<{
              </Badge>
            </div>

-            <MarkdownEditor
-              id="skill-content"
-              value={content}
-              onChange={setContent}
-              onKeyDown={handleContentKeyDown}
-              placeholder="Write instructions for the agent. Use markdown for structure."
-              className="mt-4 min-h-[320px] flex-1 overflow-y-auto text-sm"
-            />
+            {readOnly ? (
+              <div className="prose prose-sm dark:prose-invert mt-4 min-h-[320px] max-w-none flex-1 overflow-y-auto rounded-md border p-4 text-sm">
+                <Markdown>{content}</Markdown>
+              </div>
+            ) : (
+              <MarkdownEditor
+                id="skill-content"
+                value={content}
+                onChange={setContent}
+                onKeyDown={handleContentKeyDown}
+                placeholder="Write instructions for the agent. Use markdown for structure."
+                className="mt-4 min-h-[320px] flex-1 overflow-y-auto text-sm"
+              />
+            )}
          </div>
        </div>

        <div className="flex flex-col gap-3 border-t px-6 py-4 sm:flex-row sm:items-center sm:justify-between">
          <p className="text-muted-foreground text-xs">
-            Saved locally and available to your agent immediately.
+            {readOnly
+              ? 'This skill is managed by BrowserOS and updated automatically.'
+              : 'Saved locally and available to your agent immediately.'}
          </p>
          <div className="flex flex-col-reverse gap-2 sm:flex-row">
-            <Button
-              variant="outline"
-              onClick={() => onOpenChange(false)}
-              disabled={saving}
-            >
-              Cancel
-            </Button>
-            <Button onClick={handleSubmit} disabled={!isValid || saving}>
-              {saving
-                ? 'Saving...'
-                : editingSkill
-                  ? 'Update Skill'
-                  : 'Create Skill'}
-            </Button>
+            {readOnly ? (
+              <Button variant="outline" onClick={() => onOpenChange(false)}>
+                Close
+              </Button>
+            ) : (
+              <>
+                <Button
+                  variant="outline"
+                  onClick={() => onOpenChange(false)}
+                  disabled={saving}
+                >
+                  Cancel
+                </Button>
+                <Button onClick={handleSubmit} disabled={!isValid || saving}>
+                  {saving
+                    ? 'Saving...'
+                    : editingSkill
+                      ? 'Update Skill'
+                      : 'Create Skill'}
+                </Button>
+              </>
+            )}
          </div>
        </div>
      </DialogContent>
--- a/packages/browseros-agent/apps/agent/entrypoints/app/skills/useSkills.ts
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/skills/useSkills.ts
@@ -7,6 +7,7 @@ export type SkillMeta = {
  description: string
  location: string
  enabled: boolean
+  builtIn: boolean
 }

 export type SkillDetail = SkillMeta & {
--- a/packages/browseros-agent/apps/agent/entrypoints/app/usage/UsagePage.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/app/usage/UsagePage.tsx
@@ -0,0 +1,125 @@
+import { AlertCircle, Clock, Coins, CreditCard, Zap } from 'lucide-react'
+import type { FC } from 'react'
+import { Button } from '@/components/ui/button'
+import {
+  getCreditBarColor,
+  getCreditTextColor,
+} from '@/lib/credits/credit-colors'
+import { useCredits } from '@/lib/credits/useCredits'
+import { BrowserOSIcon } from '@/lib/llm-providers/providerIcons'
+import { cn } from '@/lib/utils'
+
+export const UsagePage: FC = () => {
+  const { data, isLoading, error } = useCredits()
+
+  if (isLoading) {
+    return (
+      <div className="flex items-center justify-center p-12 text-muted-foreground text-sm">
+        Loading usage data...
+      </div>
+    )
+  }
+
+  if (error) {
+    return (
+      <div className="space-y-6 p-6">
+        <div className="flex items-center gap-4 rounded-xl border p-5">
+          <BrowserOSIcon size={40} />
+          <div>
+            <h2 className="font-semibold text-lg">Usage & Billing</h2>
+            <p className="text-muted-foreground text-sm">
+              Monitor your BrowserOS AI credit usage
+            </p>
+          </div>
+        </div>
+        <div className="flex flex-col items-center gap-3 rounded-xl border border-destructive/30 bg-destructive/5 p-8">
+          <AlertCircle className="h-6 w-6 text-muted-foreground" />
+          <p className="text-muted-foreground text-sm">
+            Unable to load credit information
+          </p>
+        </div>
+      </div>
+    )
+  }
+
+  const credits = data?.credits ?? 0
+  const total = data?.dailyLimit ?? 100
+  const percentage = Math.min((credits / total) * 100, 100)
+
+  return (
+    <div className="space-y-6 p-6">
+      <div className="flex items-center gap-4 rounded-xl border p-5">
+        <BrowserOSIcon size={40} />
+        <div>
+          <h2 className="font-semibold text-lg">Usage & Billing</h2>
+          <p className="text-muted-foreground text-sm">
+            Monitor your BrowserOS AI credit usage
+          </p>
+        </div>
+      </div>
+
+      <div className="rounded-xl border p-5">
+        <div className="mb-4 flex items-center justify-between">
+          <div className="flex items-center gap-2">
+            <Coins className="h-5 w-5 text-muted-foreground" />
+            <span className="font-semibold text-sm">Daily Credits</span>
+          </div>
+          <span
+            className={cn('font-bold text-2xl', getCreditTextColor(credits))}
+          >
+            {credits}
+            <span className="ml-1 font-normal text-muted-foreground text-sm">
+              / {total}
+            </span>
+          </span>
+        </div>
+
+        <div className="mb-5 h-2.5 w-full overflow-hidden rounded-full bg-muted">
+          <div
+            className={cn(
+              'h-full rounded-full transition-all duration-500',
+              getCreditBarColor(credits),
+            )}
+            style={{ width: `${percentage}%` }}
+          />
+        </div>
+
+        <div className="grid grid-cols-2 gap-3">
+          <div className="flex items-center gap-2.5 rounded-lg bg-muted/50 px-3 py-2.5">
+            <Clock className="h-4 w-4 shrink-0 text-muted-foreground" />
+            <div>
+              <p className="font-medium text-xs">Resets daily</p>
+              <p className="text-muted-foreground text-xs">Midnight UTC</p>
+            </div>
+          </div>
+          <div className="flex items-center gap-2.5 rounded-lg bg-muted/50 px-3 py-2.5">
+            <Zap className="h-4 w-4 shrink-0 text-muted-foreground" />
+            <div>
+              <p className="font-medium text-xs">Credits used today</p>
+              <p className="text-muted-foreground text-xs">
+                {total - credits} of {total}
+              </p>
+            </div>
+          </div>
+        </div>
+      </div>
+
+      <div className="rounded-xl border p-5">
+        <div className="flex items-center justify-between">
+          <div className="flex items-center gap-3">
+            <CreditCard className="h-5 w-5 text-muted-foreground" />
+            <div>
+              <p className="font-semibold text-sm">Need more credits?</p>
+              <p className="text-muted-foreground text-xs">
+                Additional credit packages coming soon
+              </p>
+            </div>
+          </div>
+          <Button variant="outline" size="sm" disabled className="opacity-50">
+            Add Credits
+          </Button>
+        </div>
+      </div>
+    </div>
+  )
+}
--- a/packages/browseros-agent/apps/agent/entrypoints/background/index.ts
+++ b/packages/browseros-agent/apps/agent/entrypoints/background/index.ts
@@ -18,6 +18,7 @@ import {
  syncScheduledJobs,
 } from '@/lib/schedules/scheduleStorage'
 import { searchActionsStorage } from '@/lib/search-actions/searchActionsStorage'
+import { selectedTextStorage } from '@/lib/selected-text/selectedTextStorage'
 import { stopAgentStorage } from '@/lib/stop-agent/stop-agent-storage'
 import { scheduledJobRuns } from './scheduledJobRuns'

@@ -66,7 +67,12 @@ export default defineBackground(() => {
    }
  })

-  chrome.runtime.onMessage.addListener((message, sender) => {
+  chrome.runtime.onMessage.addListener((message, sender, sendResponse) => {
+    if (message?.type === 'get-tab-id') {
+      sendResponse({ tabId: sender.tab?.id })
+      return true
+    }
+
    if (message?.type === 'AUTH_SUCCESS' && sender.tab?.id) {
      const tabId = sender.tab.id
      authRedirectPathStorage
@@ -93,6 +99,17 @@ export default defineBackground(() => {
    }
  })

+  // Clean up selected text storage when a tab is closed
+  chrome.tabs.onRemoved.addListener((tabId) => {
+    const key = String(tabId)
+    selectedTextStorage.getValue().then((map) => {
+      if (map[key]) {
+        const { [key]: _, ...rest } = map
+        selectedTextStorage.setValue(rest)
+      }
+    })
+  })
+
  sessionStorage.watch(async (newSession) => {
    if (newSession?.user?.id) {
      try {
--- a/packages/browseros-agent/apps/agent/entrypoints/newtab/index/NewTab.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/newtab/index/NewTab.tsx
@@ -5,12 +5,17 @@ import {
  Folder,
  Globe,
  Layers,
+  Loader2,
+  Mic,
  PlugZap,
  Search,
+  Square,
  X,
 } from 'lucide-react'
 import { AnimatePresence, motion } from 'motion/react'
 import { useCallback, useEffect, useRef, useState } from 'react'
+import { useNavigate } from 'react-router'
+import { ChatProviderSelector } from '@/components/chat/ChatProviderSelector'
 import { AppSelector } from '@/components/elements/AppSelector'
 import {
  GlowingBorder,
@@ -36,20 +41,26 @@ import {
 import {
  NEWTAB_AI_TRIGGERED_EVENT,
  NEWTAB_APPS_OPENED_EVENT,
-  NEWTAB_CHAT_RESET_EVENT,
  NEWTAB_CHAT_STARTED_EVENT,
  NEWTAB_OPENED_EVENT,
  NEWTAB_SEARCH_EXECUTED_EVENT,
  NEWTAB_TAB_REMOVED_EVENT,
  NEWTAB_TAB_TOGGLED_EVENT,
  NEWTAB_TABS_OPENED_EVENT,
+  NEWTAB_VOICE_ERROR_EVENT,
+  NEWTAB_VOICE_RECORDING_STARTED_EVENT,
+  NEWTAB_VOICE_RECORDING_STOPPED_EVENT,
+  NEWTAB_VOICE_TRANSCRIPTION_COMPLETED_EVENT,
  NEWTAB_WORKSPACE_OPENED_EVENT,
 } from '@/lib/constants/analyticsEvents'
+import { BrowserOSIcon, ProviderIcon } from '@/lib/llm-providers/providerIcons'
+import type { ProviderType } from '@/lib/llm-providers/types'
 import { useMcpServers } from '@/lib/mcp/mcpServerStorage'
 import { useSyncRemoteIntegrations } from '@/lib/mcp/useSyncRemoteIntegrations'
 import { openSidePanelWithSearch } from '@/lib/messaging/sidepanel/openSidepanelWithSearch'
 import { track } from '@/lib/metrics/track'
 import { cn } from '@/lib/utils'
+import { useVoiceInput } from '@/lib/voice/useVoiceInput'
 import { useWorkspace } from '@/lib/workspace/use-workspace'
 import { ImportDataHint } from './ImportDataHint'
 import type { SuggestionItem } from './lib/suggestions/types'
@@ -58,7 +69,6 @@ import {
  useSuggestions,
 } from './lib/suggestions/useSuggestions'
 import { NewTabBranding } from './NewTabBranding'
-import { NewTabChat } from './NewTabChat'
 import { NewTabTip } from './NewTabTip'
 import { ScheduleResults } from './ScheduleResults'
 import { SearchSuggestions } from './SearchSuggestions'
@@ -78,13 +88,13 @@ interface MentionState {
 */
 export const NewTab = () => {
  const activeHint = useActiveHint()
+  const navigate = useNavigate()
  const [inputValue, setInputValue] = useState('')
  const [mounted, setMounted] = useState(false)
  const inputRef = useRef<HTMLInputElement>(null)
  const tabsDropdownRef = useRef<HTMLDivElement>(null)
  const [selectedTabs, setSelectedTabs] = useState<chrome.tabs.Tab[]>([])
  const [shortcutsDialogOpen, setShortcutsDialogOpen] = useState(false)
-  const [chatActive, setChatActive] = useState(false)
  const [mentionState, setMentionState] = useState<MentionState>({
    isOpen: false,
    filterText: '',
@@ -92,12 +102,41 @@ export const NewTab = () => {
  })
  const { selectedFolder } = useWorkspace()
  const { supports } = useCapabilities()
+  const { providers, selectedProvider, handleSelectProvider } =
+    useChatSessionContext()
  const { servers: mcpServers } = useMcpServers()
  const { data: userMCPIntegrations } = useGetUserMCPIntegrations()
  useSyncRemoteIntegrations()

-  const { messages, sendMessage, setMode, resetConversation } =
-    useChatSessionContext()
+  const voice = useVoiceInput()
+
+  // Voice transcript → populate search input
+  // biome-ignore lint/correctness/useExhaustiveDependencies: only trigger on transcript/transcribing change
+  useEffect(() => {
+    if (voice.transcript && !voice.isTranscribing) {
+      setComboboxInputValue(voice.transcript)
+      track(NEWTAB_VOICE_TRANSCRIPTION_COMPLETED_EVENT)
+      voice.clearTranscript()
+    }
+  }, [voice.transcript, voice.isTranscribing])
+
+  useEffect(() => {
+    if (voice.error) {
+      track(NEWTAB_VOICE_ERROR_EVENT, { error: voice.error })
+    }
+  }, [voice.error])
+
+  const handleStartRecording = async () => {
+    const started = await voice.startRecording()
+    if (started) {
+      track(NEWTAB_VOICE_RECORDING_STARTED_EVENT)
+    }
+  }
+
+  const handleStopRecording = async () => {
+    await voice.stopRecording()
+    track(NEWTAB_VOICE_RECORDING_STOPPED_EVENT)
+  }

  const connectedManagedServers = mcpServers.filter((s) => {
    if (s.type !== 'managed' || !s.managedServerName) return false
@@ -275,17 +314,28 @@ export const NewTab = () => {

  const startInlineChat = (
    message: string,
-    mode: 'chat' | 'agent',
-    action?: ReturnType<
-      typeof createBrowserOSAction | typeof createAITabAction
-    >,
+    chatMode: 'chat' | 'agent',
+    aiTab?: { name: string; description: string },
  ) => {
-    track(NEWTAB_CHAT_STARTED_EVENT, { mode, tabs_count: selectedTabs.length })
-    setMode(mode)
-    setChatActive(true)
-    sendMessage({ text: message, action })
+    track(NEWTAB_CHAT_STARTED_EVENT, {
+      mode: chatMode,
+      tabs_count: selectedTabs.length,
+    })
+    const tabIds = selectedTabs
+      .map((t) => t.id)
+      .filter((id): id is number => id !== undefined)
    reset()
    setSelectedTabs([])
+    const params = new URLSearchParams({ q: message, mode: chatMode })
+    if (tabIds.length > 0) {
+      params.set('tabs', tabIds.join(','))
+    }
+    if (aiTab) {
+      params.set('actionType', 'ai-tab')
+      params.set('tabName', aiTab.name)
+      params.set('tabDescription', aiTab.description)
+    }
+    navigate(`/home/chat?${params.toString()}`)
  }

  const runSelectedAction = (item: SuggestionItem | undefined) => {
@@ -306,15 +356,18 @@ export const NewTab = () => {
          mode: 'agent',
          tabs_count: selectedTabs.length,
        })
-        const action = createAITabAction({
-          name: item.name,
-          description: item.description,
-          tabs: selectedTabs,
-        })
        const searchQuery = `${item.name}${item.description ? ` - ${item.description}` : ''}}`
        if (supports(Feature.NEWTAB_CHAT_SUPPORT)) {
-          startInlineChat(searchQuery, 'agent', action)
+          startInlineChat(searchQuery, 'agent', {
+            name: item.name,
+            description: item.description,
+          })
        } else {
+          const action = createAITabAction({
+            name: item.name,
+            description: item.description,
+            tabs: selectedTabs,
+          })
          openSidePanelWithSearch('open', {
            query: searchQuery,
            mode: 'agent',
@@ -330,14 +383,14 @@ export const NewTab = () => {
          mode: item.mode,
          tabs_count: selectedTabs.length,
        })
-        const action = createBrowserOSAction({
-          mode: item.mode,
-          message: item.message,
-          tabs: selectedTabs,
-        })
        if (supports(Feature.NEWTAB_CHAT_SUPPORT)) {
-          startInlineChat(item.message, item.mode, action)
+          startInlineChat(item.message, item.mode)
        } else {
+          const action = createBrowserOSAction({
+            mode: item.mode,
+            message: item.message,
+            tabs: selectedTabs,
+          })
          openSidePanelWithSearch('open', {
            query: item.message,
            mode: item.mode,
@@ -351,12 +404,6 @@ export const NewTab = () => {
    }
  }

-  const handleBackToSearch = () => {
-    track(NEWTAB_CHAT_RESET_EVENT, { message_count: messages.length })
-    resetConversation()
-    setChatActive(false)
-  }
-
  const isSuggestionsVisible =
    !mentionState.isOpen &&
    ((isOpen && inputValue.length) ||
@@ -368,10 +415,6 @@ export const NewTab = () => {
    track(NEWTAB_OPENED_EVENT)
  }, [])

-  if (chatActive) {
-    return <NewTabChat onBackToSearch={handleBackToSearch} />
-  }
-
  return (
    <div className="pt-[max(25vh,16px)]">
      {/* Main content */}
@@ -425,32 +468,89 @@ export const NewTab = () => {
                anchorRef={inputRef}
                side="bottom"
              />
-              <input
-                type="text"
-                placeholder={searchPlaceholder}
-                className="flex-1 border-none bg-transparent text-base text-foreground outline-none placeholder:text-muted-foreground"
-                {...getInputProps({
-                  ref: inputRef,
-                  onChange: (e) => handleInputChange(e.currentTarget.value),
-                  onKeyDown: (e) => {
-                    if (!mentionStateRef.current.isOpen) return
-                    if (e.key === 'Tab') {
-                      e.preventDefault()
-                      closeMention()
-                    }
-                  },
-                })}
-              />
+              {voice.isRecording ? (
+                <div className="flex min-h-[40px] flex-1 items-center justify-center gap-1.5">
+                  {voice.audioLevels.map((level, i) => (
+                    <div
+                      key={i.toString()}
+                      className="w-1.5 rounded-full bg-red-500 transition-all duration-75"
+                      style={{
+                        height: `${Math.max(6, Math.min(28, level * 0.7))}px`,
+                      }}
+                    />
+                  ))}
+                </div>
+              ) : (
+                <input
+                  type="text"
+                  placeholder={
+                    voice.isTranscribing ? 'Transcribing...' : searchPlaceholder
+                  }
+                  disabled={voice.isTranscribing}
+                  className="flex-1 border-none bg-transparent text-base text-foreground outline-none placeholder:text-muted-foreground disabled:opacity-60"
+                  {...getInputProps({
+                    ref: inputRef,
+                    onChange: (e) => handleInputChange(e.currentTarget.value),
+                    onKeyDown: (e) => {
+                      if (!mentionStateRef.current.isOpen) return
+                      if (e.key === 'Tab') {
+                        e.preventDefault()
+                        closeMention()
+                      }
+                    },
+                  })}
+                />
+              )}

-              <Button
-                onClick={handleSend}
-                size="icon"
-                className="h-10 w-10 flex-shrink-0 rounded-xl bg-primary text-primary-foreground hover:bg-primary/90"
-              >
-                <ArrowRight className="h-5 w-5" />
-              </Button>
+              <div className="flex items-center gap-1.5">
+                {voice.isRecording ? (
+                  <Button
+                    type="button"
+                    size="icon"
+                    onClick={handleStopRecording}
+                    className="h-10 w-10 flex-shrink-0 rounded-xl bg-red-600 text-white hover:bg-red-700"
+                  >
+                    <Square className="h-4 w-4" />
+                  </Button>
+                ) : voice.isTranscribing ? (
+                  <Button
+                    type="button"
+                    variant="ghost"
+                    size="icon"
+                    disabled
+                    className="h-10 w-10 flex-shrink-0 rounded-xl"
+                  >
+                    <Loader2 className="h-5 w-5 animate-spin" />
+                  </Button>
+                ) : (
+                  <Button
+                    type="button"
+                    variant="ghost"
+                    size="icon"
+                    onClick={handleStartRecording}
+                    className="h-10 w-10 flex-shrink-0 rounded-xl text-muted-foreground transition-colors hover:text-foreground"
+                    title="Voice input"
+                  >
+                    <Mic className="h-5 w-5" />
+                  </Button>
+                )}
+                <Button
+                  onClick={handleSend}
+                  size="icon"
+                  disabled={voice.isRecording || voice.isTranscribing}
+                  className="h-10 w-10 flex-shrink-0 rounded-xl bg-primary text-primary-foreground hover:bg-primary/90"
+                >
+                  <ArrowRight className="h-5 w-5" />
+                </Button>
+              </div>
            </div>

+            {voice.error && (
+              <div className="px-5 pb-2 text-destructive text-xs">
+                {voice.error}
+              </div>
+            )}
+
            <AnimatePresence>
              {selectedTabs.length > 0 && (
                <motion.div
@@ -524,6 +624,34 @@ export const NewTab = () => {
            {mounted && (
              <div className="flex items-center justify-between border-border/50 border-t px-5 py-3">
                <div className="flex items-center gap-1">
+                  {selectedProvider && (
+                    <ChatProviderSelector
+                      providers={providers}
+                      selectedProvider={selectedProvider}
+                      onSelectProvider={handleSelectProvider}
+                    >
+                      <Button
+                        variant="ghost"
+                        size="icon"
+                        title={selectedProvider.name}
+                        className={cn(
+                          'h-8 w-8 rounded-lg transition-all',
+                          'text-muted-foreground hover:bg-accent hover:text-accent-foreground',
+                          'data-[state=open]:bg-accent',
+                        )}
+                      >
+                        {selectedProvider.type === 'browseros' ? (
+                          <BrowserOSIcon size={16} />
+                        ) : (
+                          <ProviderIcon
+                            type={selectedProvider.type as ProviderType}
+                            size={16}
+                          />
+                        )}
+                      </Button>
+                    </ChatProviderSelector>
+                  )}
+
                  {supports(Feature.WORKSPACE_FOLDER_SUPPORT) && (
                    <WorkspaceSelector>
                      <Button
--- a/packages/browseros-agent/apps/agent/entrypoints/newtab/index/NewTabChat.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/newtab/index/NewTabChat.tsx
@@ -1,35 +1,41 @@
 import { Loader2 } from 'lucide-react'
-import { type FC, useEffect, useState } from 'react'
+import { type FC, useEffect, useRef } from 'react'
+import { useSearchParams } from 'react-router'
 import { ChatEmptyState } from '@/entrypoints/sidepanel/index/ChatEmptyState'
 import { ChatError } from '@/entrypoints/sidepanel/index/ChatError'
 import { ChatFooter } from '@/entrypoints/sidepanel/index/ChatFooter'
+import { ChatHeader } from '@/entrypoints/sidepanel/index/ChatHeader'
 import { ChatMessages } from '@/entrypoints/sidepanel/index/ChatMessages'
-import type { ChatMode } from '@/entrypoints/sidepanel/index/chatTypes'
-import { useChatSessionContext } from '@/entrypoints/sidepanel/layout/ChatSessionContext'
-import { createBrowserOSAction } from '@/lib/chat-actions/types'
 import {
+  createAITabAction,
+  createBrowserOSAction,
+} from '@/lib/chat-actions/types'
+import { useChatActions } from '@/lib/chat-actions/useChatActions'
+import {
+  NEWTAB_AI_TRIGGERED_EVENT,
  NEWTAB_CHAT_MODE_CHANGED_EVENT,
  NEWTAB_CHAT_RESET_EVENT,
  NEWTAB_CHAT_STOPPED_EVENT,
  NEWTAB_CHAT_SUGGESTION_CLICKED_EVENT,
  NEWTAB_TAB_REMOVED_EVENT,
  NEWTAB_TAB_TOGGLED_EVENT,
+  NEWTAB_VOICE_ERROR_EVENT,
+  NEWTAB_VOICE_RECORDING_STARTED_EVENT,
+  NEWTAB_VOICE_RECORDING_STOPPED_EVENT,
+  NEWTAB_VOICE_TRANSCRIPTION_COMPLETED_EVENT,
 } from '@/lib/constants/analyticsEvents'
 import { track } from '@/lib/metrics/track'
-import { NewTabChatHeader } from './NewTabChatHeader'

-interface NewTabChatProps {
-  onBackToSearch: () => void
-}
+export const NewTabChat: FC = () => {
+  const [searchParams, setSearchParams] = useSearchParams()
+  const hasSentInitialRef = useRef(false)

-export const NewTabChat: FC<NewTabChatProps> = ({ onBackToSearch }) => {
  const {
    mode,
    setMode,
    messages,
    sendMessage,
    status,
-    stop,
    agentUrlError,
    chatError,
    getActionForMessage,
@@ -42,71 +48,80 @@ export const NewTabChat: FC<NewTabChatProps> = ({ onBackToSearch }) => {
    selectedProvider,
    handleSelectProvider,
    resetConversation,
-  } = useChatSessionContext()
-
-  const [input, setInput] = useState('')
-  const [attachedTabs, setAttachedTabs] = useState<chrome.tabs.Tab[]>([])
-  const [mounted, setMounted] = useState(false)
+    input,
+    setInput,
+    attachedTabs,
+    mounted,
+    voiceState,
+    handleModeChange,
+    handleStop,
+    toggleTabSelection,
+    removeTab,
+    handleSubmit,
+    handleSuggestionClick,
+  } = useChatActions({
+    events: {
+      modeChanged: NEWTAB_CHAT_MODE_CHANGED_EVENT,
+      stopClicked: NEWTAB_CHAT_STOPPED_EVENT,
+      suggestionClicked: NEWTAB_CHAT_SUGGESTION_CLICKED_EVENT,
+      tabToggled: NEWTAB_TAB_TOGGLED_EVENT,
+      tabRemoved: NEWTAB_TAB_REMOVED_EVENT,
+      aiTriggered: NEWTAB_AI_TRIGGERED_EVENT,
+      voiceRecordingStarted: NEWTAB_VOICE_RECORDING_STARTED_EVENT,
+      voiceRecordingStopped: NEWTAB_VOICE_RECORDING_STOPPED_EVENT,
+      voiceTranscriptionCompleted: NEWTAB_VOICE_TRANSCRIPTION_COMPLETED_EVENT,
+      voiceError: NEWTAB_VOICE_ERROR_EVENT,
+    },
+  })

+  // Send the initial message from URL query params (from /home search bar).
+  // Guarded by ref to prevent double-fire in React Strict Mode.
+  // biome-ignore lint/correctness/useExhaustiveDependencies: must only run once on mount
  useEffect(() => {
-    setMounted(true)
-  }, [])
+    if (hasSentInitialRef.current) return
+    const query = searchParams.get('q')
+    const chatMode = searchParams.get('mode')
+    const tabIdsParam = searchParams.get('tabs')
+    if (!query) return

-  const handleModeChange = (newMode: ChatMode) => {
-    track(NEWTAB_CHAT_MODE_CHANGED_EVENT, { from: mode, to: newMode })
-    setMode(newMode)
-  }
-
-  const handleStop = () => {
-    track(NEWTAB_CHAT_STOPPED_EVENT)
-    stop()
-  }
-
-  const toggleTabSelection = (tab: chrome.tabs.Tab) => {
-    setAttachedTabs((prev) => {
-      const isSelected = prev.some((t) => t.id === tab.id)
-      track(NEWTAB_TAB_TOGGLED_EVENT, {
-        action: isSelected ? 'removed' : 'added',
-      })
-      if (isSelected) {
-        return prev.filter((t) => t.id !== tab.id)
-      }
-      return [...prev, tab]
-    })
-  }
-
-  const removeTab = (tabId?: number) => {
-    track(NEWTAB_TAB_REMOVED_EVENT)
-    setAttachedTabs((prev) => prev.filter((t) => t.id !== tabId))
-  }
-
-  const executeMessage = (customMessageText?: string) => {
-    const messageText = customMessageText ? customMessageText : input.trim()
-    if (!messageText) return
-
-    if (attachedTabs.length) {
-      const action = createBrowserOSAction({
-        mode,
-        message: messageText,
-        tabs: attachedTabs,
-      })
-      sendMessage({ text: messageText, action })
-    } else {
-      sendMessage({ text: messageText })
+    hasSentInitialRef.current = true
+    if (chatMode === 'chat' || chatMode === 'agent') {
+      setMode(chatMode)
    }
-    setInput('')
-    setAttachedTabs([])
-  }
+    setSearchParams({}, { replace: true })

-  const handleSubmit = (e: React.FormEvent) => {
-    e.preventDefault()
-    executeMessage()
-  }
+    const actionType = searchParams.get('actionType')
+    const tabName = searchParams.get('tabName')
+    const tabDescription = searchParams.get('tabDescription')

-  const handleSuggestionClick = (suggestion: string) => {
-    track(NEWTAB_CHAT_SUGGESTION_CLICKED_EVENT, { mode })
-    executeMessage(suggestion)
-  }
+    if (tabIdsParam) {
+      const tabIds = tabIdsParam.split(',').map(Number).filter(Boolean)
+      chrome.tabs.query({}).then((allTabs) => {
+        const matchedTabs = allTabs.filter(
+          (t) => t.id !== undefined && tabIds.includes(t.id),
+        )
+        if (matchedTabs.length > 0) {
+          const action =
+            actionType === 'ai-tab' && tabName
+              ? createAITabAction({
+                  name: tabName,
+                  description: tabDescription ?? '',
+                  tabs: matchedTabs,
+                })
+              : createBrowserOSAction({
+                  mode: (chatMode as 'chat' | 'agent') ?? 'agent',
+                  message: query,
+                  tabs: matchedTabs,
+                })
+          sendMessage({ text: query, action })
+        } else {
+          sendMessage({ text: query })
+        }
+      })
+    } else {
+      sendMessage({ text: query })
+    }
+  }, [])

  const handleNewConversation = () => {
    track(NEWTAB_CHAT_RESET_EVENT, { message_count: messages.length })
@@ -116,17 +131,19 @@ export const NewTabChat: FC<NewTabChatProps> = ({ onBackToSearch }) => {
  if (!selectedProvider) return null

  return (
-    <div className="flex h-[calc(100vh-2rem)] flex-col">
-      <NewTabChatHeader
-        selectedProvider={selectedProvider}
-        providers={providers}
-        onSelectProvider={handleSelectProvider}
-        onNewConversation={handleNewConversation}
-        onBackToSearch={onBackToSearch}
-        hasMessages={messages.length > 0}
-      />
+    <div className="absolute inset-0 flex flex-col overflow-hidden">
+      <div className="mx-auto w-full max-w-3xl">
+        <ChatHeader
+          selectedProvider={selectedProvider}
+          providers={providers}
+          onSelectProvider={handleSelectProvider}
+          onNewConversation={handleNewConversation}
+          hasMessages={messages.length > 0}
+          hideHistory
+        />
+      </div>

-      <main className="mx-auto flex w-full max-w-3xl flex-1 flex-col space-y-4 overflow-y-auto px-4 pt-4">
+      <main className="styled-scrollbar [&_[data-streamdown='code-block']]:!max-w-full [&_[data-streamdown='code-block']]:!w-auto [&_[data-streamdown='table-wrapper']]:!max-w-full [&_[data-streamdown='table-wrapper']]:!w-auto mx-auto flex min-h-0 w-full max-w-3xl flex-1 flex-col space-y-4 overflow-y-auto overflow-x-hidden px-4 pt-4 [&_[data-streamdown='code-block']]:overflow-x-auto [&_[data-streamdown='table-wrapper']]:overflow-x-auto">
        {isRestoringConversation ? (
          <div className="flex flex-1 items-center justify-center">
            <Loader2 className="h-6 w-6 animate-spin text-muted-foreground" />
@@ -156,7 +173,7 @@ export const NewTabChat: FC<NewTabChatProps> = ({ onBackToSearch }) => {
        {chatError && <ChatError error={chatError} />}
      </main>

-      <div className="mx-auto w-full max-w-3xl px-4">
+      <div className="mx-auto w-full max-w-3xl flex-shrink-0 px-4 pb-2">
        <ChatFooter
          mode={mode}
          onModeChange={handleModeChange}
@@ -168,6 +185,7 @@ export const NewTabChat: FC<NewTabChatProps> = ({ onBackToSearch }) => {
          attachedTabs={attachedTabs}
          onToggleTab={toggleTabSelection}
          onRemoveTab={removeTab}
+          voice={voiceState}
        />
      </div>
    </div>
--- a/packages/browseros-agent/apps/agent/entrypoints/newtab/index/NewTabChatHeader.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/newtab/index/NewTabChatHeader.tsx
@@ -1,78 +0,0 @@
-import { ArrowLeft, Plus } from 'lucide-react'
-import type { FC } from 'react'
-import { ChatProviderSelector } from '@/components/chat/ChatProviderSelector'
-import type { Provider } from '@/components/chat/chatComponentTypes'
-import { BrowserOSIcon, ProviderIcon } from '@/lib/llm-providers/providerIcons'
-import type { ProviderType } from '@/lib/llm-providers/types'
-
-interface NewTabChatHeaderProps {
-  selectedProvider: Provider
-  providers: Provider[]
-  onSelectProvider: (provider: Provider) => void
-  onNewConversation: () => void
-  onBackToSearch: () => void
-  hasMessages: boolean
-}
-
-export const NewTabChatHeader: FC<NewTabChatHeaderProps> = ({
-  selectedProvider,
-  providers,
-  onSelectProvider,
-  onNewConversation,
-  onBackToSearch,
-  hasMessages,
-}) => {
-  return (
-    <header className="flex items-center justify-between border-border/40 border-b bg-background/80 px-4 py-2.5 backdrop-blur-md">
-      <div className="flex items-center gap-2">
-        {/* Back to search */}
-        <button
-          type="button"
-          onClick={onBackToSearch}
-          className="cursor-pointer rounded-lg p-2 text-muted-foreground transition-colors hover:bg-muted/50 hover:text-foreground"
-          title="Back to search"
-        >
-          <ArrowLeft className="h-4 w-4" />
-        </button>
-
-        {/* Provider selector */}
-        <ChatProviderSelector
-          providers={providers}
-          selectedProvider={selectedProvider}
-          onSelectProvider={onSelectProvider}
-        >
-          <button
-            type="button"
-            className="group relative inline-flex cursor-pointer items-center gap-2 rounded-lg p-2 text-muted-foreground transition-colors hover:bg-muted/50 hover:text-foreground data-[state=open]:bg-accent"
-            title="Change AI Provider"
-          >
-            {selectedProvider.type === 'browseros' ? (
-              <BrowserOSIcon size={18} />
-            ) : (
-              <ProviderIcon
-                type={selectedProvider.type as ProviderType}
-                size={18}
-              />
-            )}
-            <span className="font-semibold text-base">
-              {selectedProvider.name}
-            </span>
-          </button>
-        </ChatProviderSelector>
-      </div>
-
-      <div className="flex items-center gap-1">
-        {hasMessages && (
-          <button
-            type="button"
-            onClick={onNewConversation}
-            className="cursor-pointer rounded-lg p-2 text-muted-foreground transition-colors hover:bg-muted/50 hover:text-foreground"
-            title="New conversation"
-          >
-            <Plus className="h-4 w-4" />
-          </button>
-        )}
-      </div>
-    </header>
-  )
-}
--- a/packages/browseros-agent/apps/agent/entrypoints/newtab/layout/NewTabLayout.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/newtab/layout/NewTabLayout.tsx
@@ -3,14 +3,19 @@ import { Outlet, useLocation } from 'react-router'
 import { ChatSessionProvider } from '@/entrypoints/sidepanel/layout/ChatSessionContext'
 import { NewTabFocusGrid } from './NewTabFocusGrid'

+const HIDE_FOCUS_GRID_PATHS = new Set([
+  '/home/soul',
+  '/home/memory',
+  '/home/skills',
+  '/home/chat',
+])
+
 export const NewTabLayout: FC = () => {
  const location = useLocation()

  return (
    <ChatSessionProvider origin="newtab">
-      {location.pathname !== '/home/soul' &&
-        location.pathname !== '/home/memory' &&
-        location.pathname !== '/home/skills' && <NewTabFocusGrid />}
+      {!HIDE_FOCUS_GRID_PATHS.has(location.pathname) && <NewTabFocusGrid />}
      <Outlet />
    </ChatSessionProvider>
  )
--- a/packages/browseros-agent/apps/agent/entrypoints/selection.content.ts
+++ b/packages/browseros-agent/apps/agent/entrypoints/selection.content.ts
@@ -0,0 +1,42 @@
+import { selectedTextStorage } from '@/lib/selected-text/selectedTextStorage'
+
+const MAX_SELECTED_TEXT_LENGTH = 5000
+
+export default defineContentScript({
+  matches: ['*://*/*'],
+  runAt: 'document_idle',
+  async main() {
+    const response = await chrome.runtime.sendMessage({ type: 'get-tab-id' })
+    const tabId: number | undefined = response?.tabId
+    if (!tabId) return
+
+    const key = String(tabId)
+
+    document.addEventListener('mouseup', () => {
+      const text = window.getSelection()?.toString().trim()
+
+      if (text && text.length > 0) {
+        selectedTextStorage.getValue().then((map) => {
+          selectedTextStorage.setValue({
+            ...map,
+            [key]: {
+              text: text.slice(0, MAX_SELECTED_TEXT_LENGTH),
+              pageUrl: window.location.href,
+              pageTitle: document.title,
+              tabId,
+              timestamp: Date.now(),
+            },
+          })
+        })
+      } else {
+        // User clicked without selecting — clear this tab's entry only
+        selectedTextStorage.getValue().then((map) => {
+          if (map[key]) {
+            const { [key]: _, ...rest } = map
+            selectedTextStorage.setValue(rest)
+          }
+        })
+      }
+    })
+  },
+})
--- a/packages/browseros-agent/apps/agent/entrypoints/sidepanel/index/ChatError.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/sidepanel/index/ChatError.tsx
@@ -30,6 +30,7 @@ function parseErrorMessage(message: string): {
  text: string
  url?: string
  isRateLimit?: boolean
+  isCreditsExhausted?: boolean
  isConnectionError?: boolean
 } {
  // Detect MCP server connection failures
@@ -44,6 +45,19 @@ function parseErrorMessage(message: string): {
    }
  }

+  // Detect credit exhaustion from gateway
+  if (
+    message.includes('CREDITS_EXHAUSTED') ||
+    message.includes('Daily credits exhausted')
+  ) {
+    return {
+      text: 'Daily credits exhausted. Credits reset at midnight UTC.',
+      url: '/app.html#/settings/usage',
+      isRateLimit: true,
+      isCreditsExhausted: true,
+    }
+  }
+
  // Detect BrowserOS rate limit (unique pattern, no provider uses this)
  if (message.includes('BrowserOS LLM daily limit reached')) {
    return {
@@ -70,9 +84,8 @@ function parseErrorMessage(message: string): {
 }

 export const ChatError: FC<ChatErrorProps> = ({ error, onRetry }) => {
-  const { text, url, isRateLimit, isConnectionError } = parseErrorMessage(
-    error.message,
-  )
+  const { text, url, isRateLimit, isCreditsExhausted, isConnectionError } =
+    parseErrorMessage(error.message)

  // --- Commented out for Kimi partnership launch (restore after) ---
  // const surveyUrl = useMemo(
@@ -128,7 +141,17 @@ export const ChatError: FC<ChatErrorProps> = ({ error, onRetry }) => {
        </p>
      )}
      --- End commented out survey code --- */}
-      {isRateLimit && (
+      {isCreditsExhausted && url && (
+        <a
+          href={url}
+          target="_blank"
+          rel="noopener noreferrer"
+          className="text-muted-foreground text-xs underline hover:text-foreground"
+        >
+          View Usage & Billing
+        </a>
+      )}
+      {isRateLimit && !isCreditsExhausted && (
        <div className="flex flex-col items-center gap-1">
          <p className="text-muted-foreground text-xs">
            {/* biome-ignore lint/a11y/useValidAnchor: link with click tracking */}
--- a/packages/browseros-agent/apps/agent/entrypoints/sidepanel/index/ChatFooter.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/sidepanel/index/ChatFooter.tsx
@@ -8,12 +8,17 @@ import { useGetUserMCPIntegrations } from '@/entrypoints/app/connect-mcp/useGetU
 import { Feature } from '@/lib/browseros/capabilities'
 import { useCapabilities } from '@/lib/browseros/useCapabilities'
 import { useMcpServers } from '@/lib/mcp/mcpServerStorage'
+import {
+  type SelectedTextData,
+  selectedTextStorage,
+} from '@/lib/selected-text/selectedTextStorage'
 import { cn } from '@/lib/utils'
 import type { VoiceInputState } from '@/lib/voice/useVoiceInput'
 import { useWorkspace } from '@/lib/workspace/use-workspace'
 import { ChatAttachedTabs } from './ChatAttachedTabs'
 import { ChatInput, type ChatInputHandle } from './ChatInput'
 import { ChatModeToggle } from './ChatModeToggle'
+import { ChatSelectedText } from './ChatSelectedText'
 import type { ChatMode } from './chatTypes'

 interface ChatFooterProps {
@@ -48,6 +53,33 @@ export const ChatFooter: FC<ChatFooterProps> = ({
  const { servers: mcpServers } = useMcpServers()
  const { data: userMCPIntegrations } = useGetUserMCPIntegrations()
  const chatInputRef = useRef<ChatInputHandle>(null)
+  const [selectionMap, setSelectionMap] = useState<
+    Record<string, SelectedTextData>
+  >({})
+  const [activeTabId, setActiveTabId] = useState<number | undefined>()
+
+  // Track active tab for tab-scoped selection display
+  useEffect(() => {
+    chrome.tabs
+      .query({ active: true, currentWindow: true })
+      .then((tabs) => setActiveTabId(tabs[0]?.id))
+    const listener = (activeInfo: { tabId: number }) => {
+      setActiveTabId(activeInfo.tabId)
+    }
+    chrome.tabs.onActivated.addListener(listener)
+    return () => chrome.tabs.onActivated.removeListener(listener)
+  }, [])
+
+  // Watch selected text storage (per-tab map)
+  useEffect(() => {
+    selectedTextStorage.getValue().then(setSelectionMap)
+    const unwatch = selectedTextStorage.watch(setSelectionMap)
+    return () => unwatch()
+  }, [])
+
+  const visibleSelectedText = activeTabId
+    ? (selectionMap[String(activeTabId)] ?? null)
+    : null
  const [isTabMentionOpen, setIsTabMentionOpen] = useState(false)

  useEffect(() => {
@@ -81,6 +113,19 @@ export const ChatFooter: FC<ChatFooterProps> = ({
  return (
    <footer className="border-border/40 border-t bg-background/80 backdrop-blur-md">
      <ChatAttachedTabs tabs={attachedTabs} onRemoveTab={onRemoveTab} />
+      {visibleSelectedText && (
+        <ChatSelectedText
+          selectedText={visibleSelectedText}
+          onDismiss={() => {
+            if (!activeTabId) return
+            const key = String(activeTabId)
+            selectedTextStorage.getValue().then((map) => {
+              const { [key]: _, ...rest } = map
+              selectedTextStorage.setValue(rest)
+            })
+          }}
+        />
+      )}

      <div className="p-3">
        <div className="flex items-center gap-2">
--- a/packages/browseros-agent/apps/agent/entrypoints/sidepanel/index/ChatHeader.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/sidepanel/index/ChatHeader.tsx
@@ -3,17 +3,34 @@ import type { FC } from 'react'
 import { Link, useLocation, useNavigate } from 'react-router'
 import { ChatProviderSelector } from '@/components/chat/ChatProviderSelector'
 import type { Provider } from '@/components/chat/chatComponentTypes'
+import { CreditBadge } from '@/components/credits/CreditBadge'
 import { ThemeToggle } from '@/components/elements/theme-toggle'
+import { Feature } from '@/lib/browseros/capabilities'
+import { useCapabilities } from '@/lib/browseros/useCapabilities'
 import { productRepositoryUrl } from '@/lib/constants/productUrls'
+import { useCredits } from '@/lib/credits/useCredits'
 import { BrowserOSIcon, ProviderIcon } from '@/lib/llm-providers/providerIcons'
 import type { ProviderType } from '@/lib/llm-providers/types'

+const CreditsBadgeWrapper: FC = () => {
+  const { supports } = useCapabilities()
+  const { data } = useCredits()
+  if (!supports(Feature.CREDITS_SUPPORT) || data === undefined) return null
+  return (
+    <CreditBadge
+      credits={data.credits}
+      onClick={() => window.open('/app.html#/settings/usage', '_blank')}
+    />
+  )
+}
+
 interface ChatHeaderProps {
  selectedProvider: Provider
  providers: Provider[]
  onSelectProvider: (provider: Provider) => void
  onNewConversation: () => void
  hasMessages: boolean
+  hideHistory?: boolean
 }

 export const ChatHeader: FC<ChatHeaderProps> = ({
@@ -22,6 +39,7 @@ export const ChatHeader: FC<ChatHeaderProps> = ({
  onSelectProvider,
  onNewConversation,
  hasMessages,
+  hideHistory,
 }) => {
  const location = useLocation()
  const navigate = useNavigate()
@@ -59,6 +77,7 @@ export const ChatHeader: FC<ChatHeaderProps> = ({
            </span>
          </button>
        </ChatProviderSelector>
+        {selectedProvider.type === 'browseros' && <CreditsBadgeWrapper />}
      </div>

      <div className="flex items-center gap-1">
@@ -73,24 +92,25 @@ export const ChatHeader: FC<ChatHeaderProps> = ({
          </button>
        )}

-        {isHistoryPage ? (
-          <button
-            type="button"
-            onClick={handleNewConversationFromHistory}
-            className="cursor-pointer rounded-lg p-2 text-muted-foreground transition-colors hover:bg-muted/50 hover:text-foreground"
-            title="New conversation"
-          >
-            <Plus className="h-4 w-4" />
-          </button>
-        ) : (
-          <Link
-            to="/history"
-            className="cursor-pointer rounded-lg p-2 text-muted-foreground transition-colors hover:bg-muted/50 hover:text-foreground"
-            title="Chat history"
-          >
-            <History className="h-4 w-4" />
-          </Link>
-        )}
+        {!hideHistory &&
+          (isHistoryPage ? (
+            <button
+              type="button"
+              onClick={handleNewConversationFromHistory}
+              className="cursor-pointer rounded-lg p-2 text-muted-foreground transition-colors hover:bg-muted/50 hover:text-foreground"
+              title="New conversation"
+            >
+              <Plus className="h-4 w-4" />
+            </button>
+          ) : (
+            <Link
+              to="/history"
+              className="cursor-pointer rounded-lg p-2 text-muted-foreground transition-colors hover:bg-muted/50 hover:text-foreground"
+              title="Chat history"
+            >
+              <History className="h-4 w-4" />
+            </Link>
+          ))}

        <a
          href={productRepositoryUrl}
--- a/packages/browseros-agent/apps/agent/entrypoints/sidepanel/index/ChatInput.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/sidepanel/index/ChatInput.tsx
@@ -280,7 +280,11 @@ export const ChatInput = forwardRef<ChatInputHandle, ChatInputProps>(

      if (voice.isTranscribing) {
        return (
-          <button type="button" disabled className="rounded-full p-2 text-muted-foreground">
+          <button
+            type="button"
+            disabled
+            className="rounded-full p-2 text-muted-foreground"
+          >
            <Loader2 className="h-3.5 w-3.5 animate-spin" />
            <span className="sr-only">Transcribing</span>
          </button>
@@ -317,7 +321,9 @@ export const ChatInput = forwardRef<ChatInputHandle, ChatInputProps>(
      return (
        <button
          type="submit"
-          disabled={!input.trim() || voice?.isRecording || voice?.isTranscribing}
+          disabled={
+            !input.trim() || voice?.isRecording || voice?.isTranscribing
+          }
          className="cursor-pointer rounded-full bg-[var(--accent-orange)] p-2 text-white shadow-sm transition-all duration-200 hover:bg-[var(--accent-orange-bright)] disabled:cursor-not-allowed disabled:opacity-50"
        >
          <Send className="h-3.5 w-3.5" />
@@ -341,12 +347,10 @@ export const ChatInput = forwardRef<ChatInputHandle, ChatInputProps>(
          anchorRef={textareaRef}
        />
        {voice?.isRecording ? (
-          <div
-            className="flex min-h-[42px] flex-1 items-center justify-center gap-1 rounded-2xl border border-red-500/50 bg-muted/50 px-4 py-2.5 pr-[4.5rem]"
-          >
+          <div className="flex min-h-[42px] flex-1 items-center justify-center gap-1 rounded-2xl border border-red-500/50 bg-muted/50 px-4 py-2.5 pr-[4.5rem]">
            {voice.audioLevels.map((level, i) => (
              <div
-                key={i}
+                key={i.toString()}
                className="w-1 rounded-full bg-red-500 transition-all duration-75"
                style={{
                  height: `${Math.max(4, Math.min(20, level * 0.6))}px`,
--- a/packages/browseros-agent/apps/agent/entrypoints/sidepanel/index/ChatSelectedText.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/sidepanel/index/ChatSelectedText.tsx
@@ -0,0 +1,46 @@
+import { FileText, X } from 'lucide-react'
+import type { FC } from 'react'
+import type { SelectedTextData } from '@/lib/selected-text/selectedTextStorage'
+
+const MAX_DISPLAY_LENGTH = 200
+
+interface ChatSelectedTextProps {
+  selectedText: SelectedTextData
+  onDismiss: () => void
+}
+
+export const ChatSelectedText: FC<ChatSelectedTextProps> = ({
+  selectedText,
+  onDismiss,
+}) => {
+  const truncated =
+    selectedText.text.length > MAX_DISPLAY_LENGTH
+      ? `${selectedText.text.slice(0, MAX_DISPLAY_LENGTH)}...`
+      : selectedText.text
+
+  return (
+    <div className="px-3 pt-2">
+      <div className="relative rounded-lg border border-[var(--accent-orange)]/30 bg-accent/30">
+        <div className="flex items-start gap-2 px-3 py-2">
+          <FileText className="mt-0.5 h-3.5 w-3.5 flex-shrink-0 text-[var(--accent-orange)]" />
+          <div className="min-w-0 flex-1">
+            <div className="mb-0.5 truncate font-medium text-[10px] text-muted-foreground">
+              {selectedText.pageTitle}
+            </div>
+            <div className="line-clamp-3 text-foreground text-xs leading-relaxed">
+              &ldquo;{truncated}&rdquo;
+            </div>
+          </div>
+          <button
+            type="button"
+            onClick={onDismiss}
+            className="flex-shrink-0 rounded p-0.5 transition-colors hover:bg-background"
+            title="Remove selected text"
+          >
+            <X className="h-3 w-3 text-muted-foreground" />
+          </button>
+        </div>
+      </div>
+    </div>
+  )
+}
--- a/packages/browseros-agent/apps/agent/entrypoints/sidepanel/index/useChatSession.ts
+++ b/packages/browseros-agent/apps/agent/entrypoints/sidepanel/index/useChatSession.ts
@@ -21,12 +21,14 @@ import {
  useConversations,
 } from '@/lib/conversations/conversationStorage'
 import { formatConversationHistory } from '@/lib/conversations/formatConversationHistory'
+import { useInvalidateCredits } from '@/lib/credits/useCredits'
 import { declinedAppsStorage } from '@/lib/declined-apps/storage'
 import { useGraphqlQuery } from '@/lib/graphql/useGraphqlQuery'
 import { createDefaultBrowserOSProvider } from '@/lib/llm-providers/storage'
 import { useLlmProviders } from '@/lib/llm-providers/useLlmProviders'
 import { track } from '@/lib/metrics/track'
 import { searchActionsStorage } from '@/lib/search-actions/searchActionsStorage'
+import { selectedTextStorage } from '@/lib/selected-text/selectedTextStorage'
 import { stopAgentStorage } from '@/lib/stop-agent/stop-agent-storage'
 import { selectedWorkspaceStorage } from '@/lib/workspace/workspace-storage'
 import type { ChatMode } from './chatTypes'
@@ -85,6 +87,7 @@ export const useChatSession = (options?: ChatSessionOptions) => {
    selectedLlmProvider,
    isLoadingProviders,
  } = useChatRefs()
+  const invalidateCredits = useInvalidateCredits()

  const { providers: llmProviders, setDefaultProvider } = useLlmProviders()

@@ -165,8 +168,34 @@ export const useChatSession = (options?: ChatSessionOptions) => {
  const modeRef = useRef<ChatMode>(mode)
  const textToActionRef = useRef<Map<string, ChatAction>>(textToAction)
  const workingDirRef = useRef<string | undefined>(undefined)
+  const selectionMapRef = useRef<
+    Record<string, { text: string; url: string; title: string }>
+  >({})
+  const pendingSelectionTabKeyRef = useRef<string | null>(null)
  const messagesRef = useRef<UIMessage[]>([])

+  useEffect(() => {
+    const toRef = (
+      map: Record<string, { text: string; pageUrl: string; pageTitle: string }>,
+    ) => {
+      const result: Record<
+        string,
+        { text: string; url: string; title: string }
+      > = {}
+      for (const [k, v] of Object.entries(map)) {
+        result[k] = { text: v.text, url: v.pageUrl, title: v.pageTitle }
+      }
+      return result
+    }
+    selectedTextStorage.getValue().then((map) => {
+      selectionMapRef.current = toRef(map)
+    })
+    const unwatchText = selectedTextStorage.watch((map) => {
+      selectionMapRef.current = toRef(map)
+    })
+    return () => unwatchText()
+  }, [])
+
  useEffect(() => {
    selectedWorkspaceStorage.getValue().then((folder) => {
      workingDirRef.current = folder?.path
@@ -210,6 +239,9 @@ export const useChatSession = (options?: ChatSessionOptions) => {
          currentWindow: true,
        })
        const activeTab = activeTabsList?.[0] ?? undefined
+        const activeTabSelection = activeTab?.id
+          ? (selectionMapRef.current[String(activeTab.id)] ?? null)
+          : null
        const message = getLastMessageText(messages)
        const provider =
          selectedLlmProviderRef.current ?? createDefaultBrowserOSProvider()
@@ -287,7 +319,7 @@ export const useChatSession = (options?: ChatSessionOptions) => {
            : history.map((m) => `${m.role}: ${m.content}`).join('\n')
          : undefined

-        return {
+        const result = {
          api: `${agentUrlRef.current}/chat`,
          body: {
            message,
@@ -308,6 +340,9 @@ export const useChatSession = (options?: ChatSessionOptions) => {
            secretAccessKey: provider?.secretAccessKey,
            region: provider?.region,
            sessionToken: provider?.sessionToken,
+            // ChatGPT Pro (Codex)
+            reasoningEffort: provider?.reasoningEffort,
+            reasoningSummary: provider?.reasoningSummary,
            browserContext,
            userSystemPrompt:
              options?.origin === 'newtab'
@@ -319,8 +354,21 @@ export const useChatSession = (options?: ChatSessionOptions) => {
            supportsImages: provider?.supportsImages,
            previousConversation,
            declinedApps: declinedApps.length > 0 ? declinedApps : undefined,
+            selectedText: activeTabSelection?.text,
+            selectedTextSource: activeTabSelection
+              ? {
+                  url: activeTabSelection.url,
+                  title: activeTabSelection.title,
+                }
+              : undefined,
          },
        }
+
+        // Track which tab's selection was sent so we can clear it on success
+        pendingSelectionTabKeyRef.current =
+          activeTabSelection && activeTab?.id ? String(activeTab.id) : null
+
+        return result
      },
    }),
  })
@@ -414,6 +462,19 @@ export const useChatSession = (options?: ChatSessionOptions) => {

    if (!justFinished) return

+    // Clear the selected text that was sent with this request
+    const tabKey = pendingSelectionTabKeyRef.current
+    if (tabKey) {
+      pendingSelectionTabKeyRef.current = null
+      delete selectionMapRef.current[tabKey]
+      selectedTextStorage.getValue().then((map) => {
+        if (map[tabKey]) {
+          const { [tabKey]: _, ...rest } = map
+          selectedTextStorage.setValue(rest)
+        }
+      })
+    }
+
    const messagesToSave = messages.filter((m) => m.parts?.length > 0)
    if (messagesToSave.length === 0) return

@@ -422,8 +483,14 @@ export const useChatSession = (options?: ChatSessionOptions) => {
    } else {
      saveLocalConversation(conversationIdRef.current, messagesToSave)
    }
+
+    invalidateCredits()
  }, [status])

+  useEffect(() => {
+    if (chatError) invalidateCredits()
+  }, [chatError, invalidateCredits])
+
  const isIntegrationsSynced = options?.isIntegrationsSynced ?? true
  const isIntegrationsSyncedRef = useRef(isIntegrationsSynced)
  const pendingMessageRef = useRef<{
@@ -443,6 +510,7 @@ export const useChatSession = (options?: ChatSessionOptions) => {
      if (pending.action) {
        setTextToAction((prev) => {
          const next = new Map(prev)
+          // biome-ignore lint/style/noNonNullAssertion: guarded by if (pending.action) above
          next.set(pending.text, pending.action!)
          return next
        })
--- a/packages/browseros-agent/apps/agent/entrypoints/sidepanel/index/useNotifyActiveTab.tsx
+++ b/packages/browseros-agent/apps/agent/entrypoints/sidepanel/index/useNotifyActiveTab.tsx
@@ -19,6 +19,10 @@ function extractTabId(toolPart: ToolUIPart | null): number | undefined {
  return input?.tabId
 }

+function sendGlow(tabId: number, message: GlowMessage): void {
+  chrome.tabs.sendMessage(tabId, message).catch(() => {})
+}
+
 export const useNotifyActiveTab = ({
  messages,
  status,
@@ -28,7 +32,10 @@ export const useNotifyActiveTab = ({
  status: ChatStatus
  conversationId: string
 }) => {
-  const lastTabIdRef = useRef<number | null>(null)
+  // Track the single tab currently glowing
+  const activeTabIdRef = useRef<number | null>(null)
+  // Track all tabs that have been glowed during this stream (for cleanup)
+  const allGlowedTabsRef = useRef<Set<number>>(new Set())

  const lastMessage = messages?.[messages.length - 1]

@@ -41,27 +48,35 @@ export const useNotifyActiveTab = ({

  useEffect(() => {
    const isStreaming = status === 'streaming'
-    const previousTabId = lastTabIdRef.current

    if (!isStreaming) {
-      if (previousTabId) {
+      // Deactivate ALL tabs that were glowed during this stream
+      const allGlowed = allGlowedTabsRef.current
+      if (allGlowed.size > 0) {
        const deactivate = async () => {
+          // Capture tab IDs before any async work to avoid race with clear()
+          const tabIds = Array.from(allGlowed)
+          allGlowed.clear()
+
          const alreadyShown = await firstRunConfettiShownStorage.getValue()
-          const deactivateMessage: GlowMessage = {
-            conversationId,
-            isActive: false,
-            showConfetti: !alreadyShown,
+          let showConfetti = !alreadyShown
+
+          for (const tabId of tabIds) {
+            sendGlow(tabId, {
+              conversationId,
+              isActive: false,
+              showConfetti,
+            })
+            showConfetti = false
          }
-          chrome.tabs
-            .sendMessage(previousTabId, deactivateMessage)
-            .catch(() => {})
+
          if (!alreadyShown) {
            await firstRunConfettiShownStorage.setValue(true)
          }
        }
        deactivate()
-        lastTabIdRef.current = null
      }
+      activeTabIdRef.current = null
      return
    }

@@ -70,34 +85,41 @@ export const useNotifyActiveTab = ({
    let cancelled = false

    const activate = async () => {
-      let targetTabId = toolTabId ?? previousTabId ?? undefined
+      let targetTabId = toolTabId ?? undefined

      if (!targetTabId) {
-        const tabs = await chrome.tabs.query({
-          active: true,
-          currentWindow: true,
-        })
-        targetTabId = tabs[0]?.id
+        // Fallback: use the currently active tab, or query browser
+        if (activeTabIdRef.current) {
+          targetTabId = activeTabIdRef.current
+        } else {
+          const tabs = await chrome.tabs.query({
+            active: true,
+            currentWindow: true,
+          })
+          targetTabId = tabs[0]?.id
+        }
      }

      if (cancelled || !targetTabId) return

+      const previousTabId = activeTabIdRef.current
+
+      // If the agent moved to a different tab, deactivate the previous one
      if (previousTabId && previousTabId !== targetTabId) {
-        const deactivateMessage: GlowMessage = {
+        sendGlow(previousTabId, {
          conversationId,
          isActive: false,
-        }
-        chrome.tabs
-          .sendMessage(previousTabId, deactivateMessage)
-          .catch(() => {})
+        })
      }

-      const activateMessage: GlowMessage = {
+      // Activate glow on the target tab
+      sendGlow(targetTabId, {
        conversationId,
        isActive: true,
-      }
-      chrome.tabs.sendMessage(targetTabId, activateMessage).catch(() => {})
-      lastTabIdRef.current = targetTabId
+      })
+
+      activeTabIdRef.current = targetTabId
+      allGlowedTabsRef.current.add(targetTabId)
    }

    activate()
--- a/packages/browseros-agent/apps/agent/lib/browseros/capabilities.ts
+++ b/packages/browseros-agent/apps/agent/lib/browseros/capabilities.ts
@@ -45,6 +45,14 @@ export enum Feature {
  MEMORY_SUPPORT = 'MEMORY_SUPPORT',
  // Skills page: agent skills viewer and editor
  SKILLS_SUPPORT = 'SKILLS_SUPPORT',
+  // ChatGPT Pro OAuth LLM provider
+  CHATGPT_PRO_SUPPORT = 'CHATGPT_PRO_SUPPORT',
+  // GitHub Copilot OAuth LLM provider
+  GITHUB_COPILOT_SUPPORT = 'GITHUB_COPILOT_SUPPORT',
+  // Qwen Code OAuth LLM provider
+  QWEN_CODE_SUPPORT = 'QWEN_CODE_SUPPORT',
+  // Credit-based usage tracking
+  CREDITS_SUPPORT = 'CREDITS_SUPPORT',
 }

 /**
@@ -72,6 +80,10 @@ const FEATURE_CONFIG: { [K in Feature]: FeatureConfig } = {
  [Feature.VERTICAL_TABS_SUPPORT]: { minBrowserOSVersion: '0.42.0.0' },
  [Feature.MEMORY_SUPPORT]: { minServerVersion: '0.0.73' },
  [Feature.SKILLS_SUPPORT]: { minBrowserOSVersion: '0.43.0.0' },
+  [Feature.CHATGPT_PRO_SUPPORT]: { minServerVersion: '0.0.77' },
+  [Feature.GITHUB_COPILOT_SUPPORT]: { minServerVersion: '0.0.77' },
+  [Feature.QWEN_CODE_SUPPORT]: { minServerVersion: '0.0.77' },
+  [Feature.CREDITS_SUPPORT]: { minServerVersion: '0.0.78' },
 }

 function parseVersion(version: string): number[] {
--- a/packages/browseros-agent/apps/agent/lib/chat-actions/useChatActions.ts
+++ b/packages/browseros-agent/apps/agent/lib/chat-actions/useChatActions.ts
@@ -0,0 +1,172 @@
+import { useEffect, useState } from 'react'
+import type { ChatMode } from '@/entrypoints/sidepanel/index/chatTypes'
+import { useChatSessionContext } from '@/entrypoints/sidepanel/layout/ChatSessionContext'
+import { track } from '@/lib/metrics/track'
+import { useVoiceInput } from '@/lib/voice/useVoiceInput'
+import { createBrowserOSAction } from './types'
+
+interface ChatActionsConfig {
+  /** Analytics event names scoped to the origin */
+  events: {
+    modeChanged: string
+    stopClicked: string
+    suggestionClicked: string
+    tabToggled: string
+    tabRemoved: string
+    aiTriggered: string
+    voiceRecordingStarted: string
+    voiceRecordingStopped: string
+    voiceTranscriptionCompleted: string
+    voiceError: string
+  }
+  /** Auto-attach current active tab on mount (sidepanel only) */
+  autoAttachActiveTab?: boolean
+}
+
+export function useChatActions(config: ChatActionsConfig) {
+  const session = useChatSessionContext()
+  const { mode, setMode, sendMessage, stop, messages } = session
+
+  const voice = useVoiceInput()
+
+  const [input, setInput] = useState('')
+  const [attachedTabs, setAttachedTabs] = useState<chrome.tabs.Tab[]>([])
+  const [mounted, setMounted] = useState(false)
+
+  useEffect(() => {
+    setMounted(true)
+  }, [])
+
+  // Auto-attach current tab on mount (sidepanel)
+  useEffect(() => {
+    if (!config.autoAttachActiveTab) return
+    ;(async () => {
+      const currentTab = (
+        await chrome.tabs.query({ active: true, currentWindow: true })
+      ).filter((tab) => tab.url?.startsWith('http'))
+      setAttachedTabs(currentTab)
+    })()
+  }, [config.autoAttachActiveTab])
+
+  // Voice transcript → input
+  // biome-ignore lint/correctness/useExhaustiveDependencies: only trigger on transcript/transcribing change
+  useEffect(() => {
+    if (voice.transcript && !voice.isTranscribing) {
+      setInput((prev) => {
+        const separator = prev.trim() ? ' ' : ''
+        return prev + separator + voice.transcript
+      })
+      track(config.events.voiceTranscriptionCompleted)
+      voice.clearTranscript()
+    }
+  }, [voice.transcript, voice.isTranscribing])
+
+  // Track voice errors
+  useEffect(() => {
+    if (voice.error) {
+      track(config.events.voiceError, { error: voice.error })
+    }
+  }, [voice.error, config.events.voiceError])
+
+  const handleModeChange = (newMode: ChatMode) => {
+    track(config.events.modeChanged, { from: mode, to: newMode })
+    setMode(newMode)
+  }
+
+  const handleStop = () => {
+    track(config.events.stopClicked)
+    stop()
+  }
+
+  const toggleTabSelection = (tab: chrome.tabs.Tab) => {
+    setAttachedTabs((prev) => {
+      const isSelected = prev.some((t) => t.id === tab.id)
+      track(config.events.tabToggled, {
+        action: isSelected ? 'removed' : 'added',
+      })
+      if (isSelected) {
+        return prev.filter((t) => t.id !== tab.id)
+      }
+      return [...prev, tab]
+    })
+  }
+
+  const removeTab = (tabId?: number) => {
+    track(config.events.tabRemoved)
+    setAttachedTabs((prev) => prev.filter((t) => t.id !== tabId))
+  }
+
+  const executeMessage = (customMessageText?: string) => {
+    const messageText = customMessageText ? customMessageText : input.trim()
+    if (!messageText) return
+
+    if (attachedTabs.length) {
+      const action = createBrowserOSAction({
+        mode,
+        message: messageText,
+        tabs: attachedTabs,
+      })
+      sendMessage({ text: messageText, action })
+    } else {
+      sendMessage({ text: messageText })
+    }
+    setInput('')
+    setAttachedTabs([])
+  }
+
+  const handleSubmit = (e: React.FormEvent) => {
+    e.preventDefault()
+    if (messages.length === 0) {
+      track(config.events.aiTriggered, {
+        mode,
+        tabs_count: attachedTabs.length,
+      })
+    }
+    executeMessage()
+  }
+
+  const handleSuggestionClick = (suggestion: string) => {
+    track(config.events.suggestionClicked, { mode })
+    executeMessage(suggestion)
+  }
+
+  const handleStartRecording = async () => {
+    const started = await voice.startRecording()
+    if (started) {
+      track(config.events.voiceRecordingStarted)
+    }
+  }
+
+  const handleStopRecording = async () => {
+    await voice.stopRecording()
+    track(config.events.voiceRecordingStopped)
+  }
+
+  const voiceState = {
+    isRecording: voice.isRecording,
+    isTranscribing: voice.isTranscribing,
+    audioLevels: voice.audioLevels,
+    error: voice.error,
+    onStartRecording: handleStartRecording,
+    onStopRecording: handleStopRecording,
+  }
+
+  const { stop: _stop, ...restSession } = session
+
+  return {
+    ...restSession,
+    input,
+    setInput,
+    attachedTabs,
+    setAttachedTabs,
+    mounted,
+    voiceState,
+    handleModeChange,
+    handleStop,
+    toggleTabSelection,
+    removeTab,
+    executeMessage,
+    handleSubmit,
+    handleSuggestionClick,
+  }
+}
--- a/packages/browseros-agent/apps/agent/lib/constants/analyticsEvents.ts
+++ b/packages/browseros-agent/apps/agent/lib/constants/analyticsEvents.ts
@@ -29,6 +29,41 @@ export const CONVERSATION_RESET_EVENT = 'ui.conversation.reset'
 /** @public */
 export const AI_PROVIDER_ADDED_EVENT = 'settings.ai_provider.added'

+/** @public */
+export const CHATGPT_PRO_OAUTH_STARTED_EVENT =
+  'settings.chatgpt_pro.oauth_started'
+
+/** @public */
+export const CHATGPT_PRO_OAUTH_COMPLETED_EVENT =
+  'settings.chatgpt_pro.oauth_completed'
+
+/** @public */
+export const CHATGPT_PRO_OAUTH_DISCONNECTED_EVENT =
+  'settings.chatgpt_pro.oauth_disconnected'
+
+/** @public */
+export const GITHUB_COPILOT_OAUTH_STARTED_EVENT =
+  'settings.github_copilot.oauth_started'
+
+/** @public */
+export const GITHUB_COPILOT_OAUTH_COMPLETED_EVENT =
+  'settings.github_copilot.oauth_completed'
+
+/** @public */
+export const GITHUB_COPILOT_OAUTH_DISCONNECTED_EVENT =
+  'settings.github_copilot.oauth_disconnected'
+
+/** @public */
+export const QWEN_CODE_OAUTH_STARTED_EVENT = 'settings.qwen_code.oauth_started'
+
+/** @public */
+export const QWEN_CODE_OAUTH_COMPLETED_EVENT =
+  'settings.qwen_code.oauth_completed'
+
+/** @public */
+export const QWEN_CODE_OAUTH_DISCONNECTED_EVENT =
+  'settings.qwen_code.oauth_disconnected'
+
 /** @public */
 export const HUB_PROVIDER_ADDED_EVENT = 'settings.hub_provider.added'

@@ -118,6 +153,21 @@ export const NEWTAB_CHAT_SUGGESTION_CLICKED_EVENT =
 /** @public */
 export const NEWTAB_CHAT_MODE_CHANGED_EVENT = 'newtab.chat.mode_changed'

+/** @public */
+export const NEWTAB_VOICE_RECORDING_STARTED_EVENT =
+  'newtab.voice.recording_started'
+
+/** @public */
+export const NEWTAB_VOICE_RECORDING_STOPPED_EVENT =
+  'newtab.voice.recording_stopped'
+
+/** @public */
+export const NEWTAB_VOICE_TRANSCRIPTION_COMPLETED_EVENT =
+  'newtab.voice.transcription_completed'
+
+/** @public */
+export const NEWTAB_VOICE_ERROR_EVENT = 'newtab.voice.error'
+
 /** @public */
 export const WORKFLOW_DELETED_EVENT = 'settings.workflow.deleted'

--- a/packages/browseros-agent/apps/agent/lib/credits/credit-colors.ts
+++ b/packages/browseros-agent/apps/agent/lib/credits/credit-colors.ts
@@ -0,0 +1,13 @@
+const LOW_THRESHOLD = 30
+
+export function getCreditTextColor(credits: number): string {
+  if (credits <= 0) return 'text-red-500'
+  if (credits <= LOW_THRESHOLD) return 'text-yellow-500'
+  return 'text-green-500'
+}
+
+export function getCreditBarColor(credits: number): string {
+  if (credits <= 0) return 'bg-red-500'
+  if (credits <= LOW_THRESHOLD) return 'bg-yellow-500'
+  return 'bg-green-500'
+}
--- a/packages/browseros-agent/apps/agent/lib/credits/useCredits.ts
+++ b/packages/browseros-agent/apps/agent/lib/credits/useCredits.ts
@@ -0,0 +1,33 @@
+import { useQuery, useQueryClient } from '@tanstack/react-query'
+import { getAgentServerUrl } from '@/lib/browseros/helpers'
+
+export interface CreditsInfo {
+  credits: number
+  dailyLimit: number
+  lastResetAt?: string
+}
+
+const CREDITS_QUERY_KEY = ['credits']
+
+async function fetchCredits(): Promise<CreditsInfo> {
+  const baseUrl = await getAgentServerUrl()
+  const response = await fetch(`${baseUrl}/credits`)
+  if (!response.ok)
+    throw new Error(`Failed to fetch credits: ${response.status}`)
+  return response.json()
+}
+
+export function useCredits() {
+  return useQuery<CreditsInfo>({
+    queryKey: CREDITS_QUERY_KEY,
+    queryFn: fetchCredits,
+    refetchOnWindowFocus: true,
+    staleTime: 30_000,
+    retry: 1,
+  })
+}
+
+export function useInvalidateCredits() {
+  const queryClient = useQueryClient()
+  return () => queryClient.invalidateQueries({ queryKey: CREDITS_QUERY_KEY })
+}
--- a/packages/browseros-agent/apps/agent/lib/llm-providers/client-oauth.ts
+++ b/packages/browseros-agent/apps/agent/lib/llm-providers/client-oauth.ts
@@ -0,0 +1,169 @@
+/**
+ * Client-side OAuth Device Code flow.
+ * Used for providers where server-side fetch is blocked by WAF (e.g. Qwen).
+ * The extension makes requests using Chrome's network stack which bypasses
+ * TLS fingerprint-based WAF detection.
+ */
+
+export interface ClientAuthConfig {
+  deviceCodeEndpoint: string
+  tokenEndpoint: string
+  clientId: string
+  scopes: string
+  requiresPKCE: boolean
+  contentType: 'json' | 'form'
+}
+
+interface DeviceCodeData {
+  device_code: string
+  user_code: string
+  verification_uri: string
+  verification_uri_complete?: string
+  expires_in: number
+  interval: number
+}
+
+export interface TokenResult {
+  accessToken: string
+  refreshToken: string
+  expiresIn: number
+}
+
+export async function requestDeviceCode(
+  auth: ClientAuthConfig,
+): Promise<{ deviceData: DeviceCodeData; codeVerifier?: string }> {
+  let codeVerifier: string | undefined
+  const params: Record<string, string> = {
+    client_id: auth.clientId,
+    scope: auth.scopes,
+  }
+
+  if (auth.requiresPKCE) {
+    codeVerifier = generateCodeVerifier()
+    params.code_challenge = await generateCodeChallenge(codeVerifier)
+    params.code_challenge_method = 'S256'
+  }
+
+  const res = await authFetch(auth.deviceCodeEndpoint, params, auth.contentType)
+
+  // WAF captcha detected — open the site for user to solve, then retry
+  const ct = res.headers.get('content-type') ?? ''
+  if (!ct.includes('application/json')) {
+    const baseUrl = new URL(auth.deviceCodeEndpoint).origin
+    window.open(baseUrl, '_blank')
+    throw new Error(
+      'Please complete the verification in the opened tab, then click USE again.',
+    )
+  }
+  if (!res.ok) throw new Error(`Device code request failed: ${res.status}`)
+
+  const deviceData = (await res.json()) as DeviceCodeData
+  if (!deviceData.device_code || !deviceData.user_code) {
+    throw new Error('Invalid device code response')
+  }
+
+  return { deviceData, codeVerifier }
+}
+
+export function startTokenPolling(
+  auth: ClientAuthConfig,
+  deviceData: DeviceCodeData,
+  codeVerifier: string | undefined,
+  onToken: (token: TokenResult) => void,
+): void {
+  let interval = deviceData.interval
+  const deadline = Date.now() + deviceData.expires_in * 1000
+  const safetyMargin = 3
+
+  const poll = async () => {
+    if (Date.now() > deadline) return
+
+    const params: Record<string, string> = {
+      client_id: auth.clientId,
+      device_code: deviceData.device_code,
+      grant_type: 'urn:ietf:params:oauth:grant-type:device_code',
+    }
+    if (codeVerifier) params.code_verifier = codeVerifier
+
+    try {
+      const res = await authFetch(auth.tokenEndpoint, params, auth.contentType)
+
+      // WAF returned HTML — retry later
+      const ct = res.headers.get('content-type') ?? ''
+      if (!ct.includes('application/json')) {
+        setTimeout(poll, (interval + safetyMargin) * 1000)
+        return
+      }
+
+      const data = (await res.json()) as {
+        access_token?: string
+        refresh_token?: string
+        expires_in?: number
+        error?: string
+        interval?: number
+      }
+
+      if (data.access_token) {
+        onToken({
+          accessToken: data.access_token,
+          refreshToken: data.refresh_token ?? '',
+          expiresIn: data.expires_in ?? 0,
+        })
+        return
+      }
+
+      if (data.error === 'authorization_pending') {
+        setTimeout(poll, (interval + safetyMargin) * 1000)
+        return
+      }
+      if (data.error === 'slow_down') {
+        interval = (data.interval ?? interval) + 5
+        setTimeout(poll, (interval + safetyMargin) * 1000)
+        return
+      }
+    } catch {
+      setTimeout(poll, (interval + safetyMargin) * 1000)
+    }
+  }
+
+  setTimeout(poll, (interval + safetyMargin) * 1000)
+}
+
+function authFetch(
+  endpoint: string,
+  params: Record<string, string>,
+  contentType: 'json' | 'form',
+): Promise<Response> {
+  return fetch(endpoint, {
+    method: 'POST',
+    headers: {
+      'Content-Type':
+        contentType === 'form'
+          ? 'application/x-www-form-urlencoded'
+          : 'application/json',
+      Accept: 'application/json',
+    },
+    body:
+      contentType === 'form'
+        ? new URLSearchParams(params).toString()
+        : JSON.stringify(params),
+  })
+}
+
+function generateCodeVerifier(): string {
+  const bytes = crypto.getRandomValues(new Uint8Array(32))
+  return base64UrlEncode(bytes)
+}
+
+async function generateCodeChallenge(verifier: string): Promise<string> {
+  const digest = await crypto.subtle.digest(
+    'SHA-256',
+    new TextEncoder().encode(verifier),
+  )
+  return base64UrlEncode(new Uint8Array(digest))
+}
+
+function base64UrlEncode(bytes: Uint8Array): string {
+  const base64 = btoa(String.fromCharCode(...bytes))
+  return base64.replace(/\+/g, '-').replace(/\//g, '_').replace(/=+$/, '')
+}
--- a/packages/browseros-agent/apps/agent/lib/llm-providers/providerIcons.tsx
+++ b/packages/browseros-agent/apps/agent/lib/llm-providers/providerIcons.tsx
@@ -8,8 +8,9 @@ import {
  Ollama,
  OpenAI,
  OpenRouter,
+  Qwen,
 } from '@lobehub/icons'
-import { Bot } from 'lucide-react'
+import { Bot, Github } from 'lucide-react'
 import type { FC, SVGProps } from 'react'
 import ProductLogoSvg from '@/assets/product_logo.svg'
 import type { ProviderType } from './types'
@@ -32,6 +33,9 @@ const providerIconMap: Record<ProviderType, IconComponent | null> = {
  bedrock: Bedrock,
  browseros: null,
  moonshot: Kimi,
+  'chatgpt-pro': OpenAI,
+  'github-copilot': Github,
+  'qwen-code': Qwen,
 }

 interface ProviderIconProps {
--- a/packages/browseros-agent/apps/agent/lib/llm-providers/providerTemplates.ts
+++ b/packages/browseros-agent/apps/agent/lib/llm-providers/providerTemplates.ts
@@ -20,6 +20,33 @@ export interface ProviderTemplate {
 * @public
 */
 export const providerTemplates: ProviderTemplate[] = [
+  {
+    id: 'chatgpt-pro',
+    name: 'ChatGPT Plus/Pro',
+    defaultBaseUrl: 'https://chatgpt.com/backend-api',
+    defaultModelId: 'gpt-5.3-codex',
+    supportsImages: true,
+    contextWindow: 400000,
+    setupGuideUrl: 'https://docs.browseros.com/features/chatgpt-pro-oauth',
+  },
+  {
+    id: 'github-copilot',
+    name: 'GitHub Copilot',
+    defaultBaseUrl: 'https://api.githubcopilot.com',
+    defaultModelId: 'gpt-5-mini',
+    supportsImages: true,
+    contextWindow: 128000,
+    setupGuideUrl: 'https://docs.browseros.com/features/github-copilot-oauth',
+  },
+  {
+    id: 'qwen-code',
+    name: 'Qwen Code',
+    defaultBaseUrl: 'https://portal.qwen.ai/v1',
+    defaultModelId: 'coder-model',
+    supportsImages: true,
+    contextWindow: 1000000,
+    setupGuideUrl: 'https://docs.browseros.com/features/qwen-code-oauth',
+  },
  {
    id: 'moonshot',
    name: 'Moonshot AI',
@@ -129,6 +156,9 @@ export const providerTemplates: ProviderTemplate[] = [
 * @public
 */
 export const providerTypeOptions: { value: ProviderType; label: string }[] = [
+  { value: 'chatgpt-pro', label: 'ChatGPT Plus/Pro' },
+  { value: 'github-copilot', label: 'GitHub Copilot' },
+  { value: 'qwen-code', label: 'Qwen Code' },
  { value: 'moonshot', label: 'Moonshot AI' },
  { value: 'anthropic', label: 'Anthropic' },
  { value: 'openai', label: 'OpenAI' },
@@ -157,6 +187,9 @@ export const getProviderTemplate = (
 * Auto-fills when user selects a provider type
 */
 export const DEFAULT_BASE_URLS: Record<ProviderType, string> = {
+  'chatgpt-pro': 'https://chatgpt.com/backend-api',
+  'github-copilot': 'https://api.githubcopilot.com',
+  'qwen-code': 'https://portal.qwen.ai/v1',
  moonshot: 'https://api.moonshot.ai/v1',
  anthropic: 'https://api.anthropic.com/v1',
  openai: 'https://api.openai.com/v1',
--- a/packages/browseros-agent/apps/agent/lib/llm-providers/types.ts
+++ b/packages/browseros-agent/apps/agent/lib/llm-providers/types.ts
@@ -14,6 +14,9 @@ export type ProviderType =
  | 'bedrock'
  | 'browseros'
  | 'moonshot'
+  | 'chatgpt-pro'
+  | 'github-copilot'
+  | 'qwen-code'

 /**
 * LLM Provider configuration
@@ -56,6 +59,10 @@ export interface LlmProviderConfig {
  region?: string
  /** AWS session token (for temporary STS credentials) */
  sessionToken?: string
+
+  // ChatGPT Pro (Codex) fields
+  reasoningEffort?: 'none' | 'low' | 'medium' | 'high'
+  reasoningSummary?: 'auto' | 'concise' | 'detailed'
 }

 /**
--- a/packages/browseros-agent/apps/agent/lib/llm-providers/useOAuthProviderFlow.ts
+++ b/packages/browseros-agent/apps/agent/lib/llm-providers/useOAuthProviderFlow.ts
@@ -0,0 +1,167 @@
+import { useEffect, useRef } from 'react'
+import { toast } from 'sonner'
+import { track } from '@/lib/metrics/track'
+import {
+  type ClientAuthConfig,
+  requestDeviceCode,
+  startTokenPolling,
+} from './client-oauth'
+import { getProviderTemplate } from './providerTemplates'
+import type { LlmProviderConfig, ProviderType } from './types'
+import { useOAuthStatus } from './useOAuthStatus'
+
+export interface OAuthProviderFlowConfig {
+  providerType: ProviderType
+  displayName: string
+  startedEvent: string
+  completedEvent: string
+  disconnectedEvent: string
+  /** Client-side auth for providers with WAF-protected endpoints */
+  clientAuth?: ClientAuthConfig
+}
+
+interface OAuthProviderFlowReturn {
+  status: { authenticated: boolean; email?: string } | null
+  disconnect: () => Promise<void>
+  startOAuthFlow: (agentServerUrl: string | undefined) => Promise<void>
+}
+
+export function useOAuthProviderFlow(
+  config: OAuthProviderFlowConfig,
+  providers: LlmProviderConfig[],
+  saveProvider: (provider: LlmProviderConfig) => Promise<void> | void,
+): OAuthProviderFlowReturn {
+  const { status, startPolling, disconnect } = useOAuthStatus(
+    config.providerType,
+  )
+  const flowStartedRef = useRef(false)
+
+  // Auto-create provider when OAuth completes
+  // biome-ignore lint/correctness/useExhaustiveDependencies: intentional — only trigger on auth status change
+  useEffect(() => {
+    if (!status?.authenticated) return
+    if (!flowStartedRef.current) return
+    if (providers.some((p) => p.type === config.providerType)) return
+
+    const now = Date.now()
+    try {
+      const template = getProviderTemplate(config.providerType)
+      saveProvider({
+        id: `${config.providerType}-${now}`,
+        type: config.providerType,
+        name: `${config.displayName}${status.email ? ` (${status.email})` : ''}`,
+        modelId: template?.defaultModelId ?? '',
+        supportsImages: template?.supportsImages ?? true,
+        contextWindow: template?.contextWindow ?? 128000,
+        temperature: 0.2,
+        createdAt: now,
+        updatedAt: now,
+      })
+      track(config.completedEvent, { email: status.email })
+      toast.success(`${config.displayName} Connected`, {
+        description: status.email
+          ? `Authenticated as ${status.email}`
+          : `Successfully authenticated with ${config.displayName}`,
+      })
+    } catch (err) {
+      toast.error(`Failed to create ${config.displayName} provider`, {
+        description: err instanceof Error ? err.message : 'Unknown error',
+      })
+    } finally {
+      flowStartedRef.current = false
+    }
+  }, [status?.authenticated])
+
+  async function startOAuthFlow(agentServerUrl: string | undefined) {
+    if (!agentServerUrl) {
+      toast.error('Server not available', {
+        description: 'Cannot start OAuth flow without server connection.',
+      })
+      return
+    }
+
+    flowStartedRef.current = true
+
+    try {
+      if (config.clientAuth) {
+        await handleClientAuth(config.clientAuth, agentServerUrl)
+      } else {
+        await handleServerAuth(agentServerUrl)
+      }
+    } catch (err) {
+      flowStartedRef.current = false
+      toast.error(`Failed to start ${config.displayName} authentication`, {
+        description: err instanceof Error ? err.message : 'Unknown error',
+      })
+    }
+  }
+
+  // Client-side: extension handles device code + polling, sends token to server
+  async function handleClientAuth(auth: ClientAuthConfig, serverUrl: string) {
+    const { deviceData, codeVerifier } = await requestDeviceCode(auth)
+
+    const verificationUri =
+      deviceData.verification_uri_complete ?? deviceData.verification_uri
+    window.open(verificationUri, '_blank')
+    track(config.startedEvent)
+    toast.info(`Enter code: ${deviceData.user_code}`, {
+      description: `Paste this code on the ${config.displayName} page that just opened.`,
+      duration: 60_000,
+    })
+
+    startTokenPolling(auth, deviceData, codeVerifier, async (token) => {
+      await fetch(`${serverUrl}/oauth/${config.providerType}/token`, {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify(token),
+      })
+      startPolling()
+    })
+  }
+
+  // Server-side: server handles device code + polling
+  async function handleServerAuth(agentServerUrl: string) {
+    const res = await fetch(
+      `${agentServerUrl}/oauth/${config.providerType}/start`,
+    )
+
+    if (res.headers.get('content-type')?.includes('application/json')) {
+      const data = (await res.json()) as {
+        userCode?: string
+        verificationUri?: string
+        error?: string
+      }
+
+      if (!res.ok || data.error) {
+        throw new Error(data.error || `Server returned ${res.status}`)
+      }
+      if (!data.userCode || !data.verificationUri) {
+        throw new Error('Invalid response from server')
+      }
+
+      window.open(data.verificationUri, '_blank')
+      startPolling()
+      track(config.startedEvent)
+      toast.info(`Enter code: ${data.userCode}`, {
+        description: `Paste this code on the ${config.displayName} page that just opened.`,
+        duration: 60_000,
+      })
+      return
+    }
+
+    // PKCE redirect flow
+    if (!res.ok) throw new Error(`Server returned ${res.status}`)
+    window.open(res.url, '_blank')
+    startPolling()
+    track(config.startedEvent)
+    toast.info(`Authenticating with ${config.displayName}`, {
+      description: 'Complete the login in the opened tab.',
+    })
+  }
+
+  return {
+    status,
+    disconnect,
+    startOAuthFlow,
+  }
+}
--- a/packages/browseros-agent/apps/agent/lib/llm-providers/useOAuthStatus.ts
+++ b/packages/browseros-agent/apps/agent/lib/llm-providers/useOAuthStatus.ts
@@ -0,0 +1,90 @@
+import { useEffect, useRef, useState } from 'react'
+import { getAgentServerUrl } from '@/lib/browseros/helpers'
+
+interface OAuthStatus {
+  authenticated: boolean
+  email?: string
+  provider: string
+}
+
+interface UseOAuthStatusReturn {
+  status: OAuthStatus | null
+  isPolling: boolean
+  startPolling: () => void
+  stopPolling: () => void
+  refresh: () => Promise<OAuthStatus | null>
+  disconnect: () => Promise<void>
+}
+
+export function useOAuthStatus(provider: string): UseOAuthStatusReturn {
+  const [status, setStatus] = useState<OAuthStatus | null>(null)
+  const [isPolling, setIsPolling] = useState(false)
+  const pollIntervalRef = useRef<ReturnType<typeof setInterval> | null>(null)
+  const pollTimeoutRef = useRef<ReturnType<typeof setTimeout> | null>(null)
+
+  async function fetchStatus(): Promise<OAuthStatus | null> {
+    try {
+      const serverUrl = await getAgentServerUrl()
+      const res = await fetch(`${serverUrl}/oauth/${provider}/status`)
+      if (!res.ok) return null
+      const data = (await res.json()) as OAuthStatus
+      setStatus(data)
+      return data
+    } catch {
+      return null
+    }
+  }
+
+  function stopPolling() {
+    if (pollIntervalRef.current) clearInterval(pollIntervalRef.current)
+    if (pollTimeoutRef.current) clearTimeout(pollTimeoutRef.current)
+    pollIntervalRef.current = null
+    pollTimeoutRef.current = null
+    setIsPolling(false)
+  }
+
+  function startPolling() {
+    stopPolling()
+    setIsPolling(true)
+
+    pollIntervalRef.current = setInterval(async () => {
+      const result = await fetchStatus()
+      if (result?.authenticated) {
+        stopPolling()
+      }
+    }, 2_000)
+
+    pollTimeoutRef.current = setTimeout(stopPolling, 300_000)
+  }
+
+  async function disconnect() {
+    try {
+      const serverUrl = await getAgentServerUrl()
+      await fetch(`${serverUrl}/oauth/${provider}`, { method: 'DELETE' })
+      setStatus({ authenticated: false, provider })
+    } catch {
+      // Best-effort disconnect
+    }
+  }
+
+  // Initial status check on mount
+  // biome-ignore lint/correctness/useExhaustiveDependencies: only run on mount
+  useEffect(() => {
+    fetchStatus()
+  }, [])
+
+  // Cleanup on unmount
+  // biome-ignore lint/correctness/useExhaustiveDependencies: cleanup only needs to run on unmount
+  useEffect(() => {
+    return () => stopPolling()
+  }, [])
+
+  return {
+    status,
+    isPolling,
+    startPolling,
+    stopPolling,
+    refresh: fetchStatus,
+    disconnect,
+  }
+}
--- a/packages/browseros-agent/apps/agent/lib/messaging/schedules/scheduleMessages.ts
+++ b/packages/browseros-agent/apps/agent/lib/messaging/schedules/scheduleMessages.ts
@@ -23,4 +23,4 @@ type ScheduleMessagesProtocol = {
 const { sendMessage, onMessage } =
  defineExtensionMessaging<ScheduleMessagesProtocol>()

-export { sendMessage as sendScheduleMessage, onMessage as onScheduleMessage }
+export { onMessage as onScheduleMessage, sendMessage as sendScheduleMessage }
--- a/packages/browseros-agent/apps/agent/lib/messaging/server/serverMessages.ts
+++ b/packages/browseros-agent/apps/agent/lib/messaging/server/serverMessages.ts
@@ -12,4 +12,4 @@ type ServerMessagesProtocol = {
 const { sendMessage, onMessage } =
  defineExtensionMessaging<ServerMessagesProtocol>()

-export { sendMessage as sendServerMessage, onMessage as onServerMessage }
+export { onMessage as onServerMessage, sendMessage as sendServerMessage }
--- a/packages/browseros-agent/apps/agent/lib/messaging/sidepanel/openSidepanelWithSearch.ts
+++ b/packages/browseros-agent/apps/agent/lib/messaging/sidepanel/openSidepanelWithSearch.ts
@@ -12,6 +12,6 @@ const { sendMessage, onMessage } =
  defineExtensionMessaging<OpenSidePanelWithSearchParams>()

 export {
-  sendMessage as openSidePanelWithSearch,
  onMessage as onOpenSidePanelWithSearch,
+  sendMessage as openSidePanelWithSearch,
 }
--- a/packages/browseros-agent/apps/agent/lib/schedules/scheduleStorage.ts
+++ b/packages/browseros-agent/apps/agent/lib/schedules/scheduleStorage.ts
@@ -22,6 +22,13 @@ export const scheduledJobRunStorage = storage.defineItem<ScheduledJobRun[]>(
  },
 )

+export const pendingDeletionStorage = storage.defineItem<string[]>(
+  'local:scheduledJobsPendingDeletion',
+  {
+    fallback: [],
+  },
+)
+
 export function useScheduledJobs() {
  const [jobs, setJobs] = useState<ScheduledJob[]>([])

@@ -54,6 +61,11 @@ export function useScheduledJobs() {
  const removeJob = async (id: string) => {
    await chrome.alarms.clear(getAlarmName(id))

+    const pending = (await pendingDeletionStorage.getValue()) ?? []
+    if (!pending.includes(id)) {
+      await pendingDeletionStorage.setValue([...pending, id])
+    }
+
    const currentJobs = (await scheduledJobStorage.getValue()) ?? []
    await scheduledJobStorage.setValue(currentJobs.filter((j) => j.id !== id))

--- a/packages/browseros-agent/apps/agent/lib/schedules/syncSchedulesToBackend.ts
+++ b/packages/browseros-agent/apps/agent/lib/schedules/syncSchedulesToBackend.ts
@@ -5,10 +5,11 @@ import { sentry } from '@/lib/sentry/sentry'
 import { createAlarmFromJob } from './createAlarmFromJob'
 import {
  CreateScheduledJobDocument,
+  DeleteScheduledJobDocument,
  GetScheduledJobsByProfileIdDocument,
  UpdateScheduledJobDocument,
 } from './graphql/syncSchedulesDocument'
-import { scheduledJobStorage } from './scheduleStorage'
+import { pendingDeletionStorage, scheduledJobStorage } from './scheduleStorage'
 import type { ScheduledJob } from './scheduleTypes'

 type RemoteScheduledJob = {
@@ -99,6 +100,32 @@ export async function syncSchedulesToBackend(
    }
  }

+  const pendingDeletions = new Set(
+    (await pendingDeletionStorage.getValue()) ?? [],
+  )
+  const resolvedDeletions = new Set<string>()
+
+  for (const rowId of pendingDeletions) {
+    if (remoteJobs.has(rowId)) {
+      try {
+        await execute(DeleteScheduledJobDocument, { rowId })
+        remoteJobs.delete(rowId)
+        resolvedDeletions.add(rowId)
+      } catch (error) {
+        sentry.captureException(error, {
+          extra: { jobId: rowId, context: 'sync-pending-deletion' },
+        })
+      }
+    } else {
+      resolvedDeletions.add(rowId)
+    }
+  }
+
+  const latestPending = (await pendingDeletionStorage.getValue()) ?? []
+  await pendingDeletionStorage.setValue(
+    latestPending.filter((id) => !resolvedDeletions.has(id)),
+  )
+
  const localJobsMap = new Map(localJobs.map((j) => [j.id, j]))
  const jobsToAddLocally: ScheduledJob[] = []
  const jobsToUpdateLocally: ScheduledJob[] = []
--- a/packages/browseros-agent/apps/agent/lib/selected-text/selectedTextStorage.ts
+++ b/packages/browseros-agent/apps/agent/lib/selected-text/selectedTextStorage.ts
@@ -0,0 +1,14 @@
+import { storage } from '@wxt-dev/storage'
+
+export interface SelectedTextData {
+  text: string
+  pageUrl: string
+  pageTitle: string
+  tabId: number
+  timestamp: number
+}
+
+/** Map of tabId → selected text. Each tab's selection is independent. */
+export const selectedTextStorage = storage.defineItem<
+  Record<string, SelectedTextData>
+>('local:selectedTextMap', { defaultValue: {} })
--- a/packages/browseros-agent/apps/agent/lib/voice/transcribe-audio.ts
+++ b/packages/browseros-agent/apps/agent/lib/voice/transcribe-audio.ts
@@ -0,0 +1,29 @@
+const GATEWAY_URL = 'https://llm.browseros.com'
+
+interface TranscribeResponse {
+  text: string
+}
+
+export async function transcribeAudio(audioBlob: Blob): Promise<string> {
+  const formData = new FormData()
+  formData.append('file', audioBlob, 'recording.webm')
+  formData.append('response_format', 'json')
+
+  const response = await fetch(`${GATEWAY_URL}/api/transcribe`, {
+    method: 'POST',
+    body: formData,
+    signal: AbortSignal.timeout(30_000),
+  })
+
+  if (!response.ok) {
+    const errorBody: { error?: string } = await response
+      .json()
+      .catch(() => ({ error: 'Transcription failed' }))
+    throw new Error(
+      errorBody.error || `Transcription failed: ${response.status}`,
+    )
+  }
+
+  const result: TranscribeResponse = await response.json()
+  return result.text || ''
+}
--- a/packages/browseros-agent/apps/agent/lib/voice/useVoiceInput.ts
+++ b/packages/browseros-agent/apps/agent/lib/voice/useVoiceInput.ts
@@ -1,6 +1,6 @@
 import { useEffect, useRef, useState } from 'react'
+import { transcribeAudio } from './transcribe-audio'

-const GATEWAY_URL = 'https://llm.browseros.com'
 const WAVEFORM_BAND_COUNT = 5

 export interface VoiceInputState {
@@ -26,32 +26,6 @@ export interface UseVoiceInputReturn {

 const EMPTY_LEVELS = Array(WAVEFORM_BAND_COUNT).fill(0)

-interface TranscribeResponse {
-  text: string
-}
-
-async function transcribeAudio(audioBlob: Blob): Promise<string> {
-  const formData = new FormData()
-  formData.append('file', audioBlob, 'recording.webm')
-  formData.append('response_format', 'json')
-
-  const response = await fetch(`${GATEWAY_URL}/api/transcribe`, {
-    method: 'POST',
-    body: formData,
-    signal: AbortSignal.timeout(30_000),
-  })
-
-  if (!response.ok) {
-    const errorBody: { error?: string } = await response
-      .json()
-      .catch(() => ({ error: 'Transcription failed' }))
-    throw new Error(errorBody.error || `Transcription failed: ${response.status}`)
-  }
-
-  const result: TranscribeResponse = await response.json()
-  return result.text || ''
-}
-
 export function useVoiceInput(): UseVoiceInputReturn {
  const [isRecording, setIsRecording] = useState(false)
  const [isTranscribing, setIsTranscribing] = useState(false)
@@ -81,6 +55,7 @@ export function useVoiceInput(): UseVoiceInputReturn {
    setAudioLevels(EMPTY_LEVELS)
  }

+  // biome-ignore lint/correctness/useExhaustiveDependencies: cleanup only needs to run on unmount
  useEffect(() => {
    return () => {
      streamRef.current?.getTracks().forEach((track) => {
@@ -171,7 +146,9 @@ export function useVoiceInput(): UseVoiceInputReturn {
      setIsRecording(true)
      return true
    } catch (err) {
-      streamRef.current?.getTracks().forEach((track) => track.stop())
+      streamRef.current?.getTracks().forEach((track) => {
+        track.stop()
+      })
      streamRef.current = null
      stopAudioLevelMonitoring()

--- a/packages/browseros-agent/apps/agent/package.json
+++ b/packages/browseros-agent/apps/agent/package.json
@@ -9,9 +9,9 @@
    "build": "bun run codegen && wxt build",
    "build:dev": "bun --env-file=.env.development wxt build --mode development",
    "zip": "wxt zip",
-    "compile": "tsc --noEmit",
+    "compile": "tsgo --noEmit",
    "lint": "bunx biome check",
-    "typecheck": "tsc --noEmit",
+    "typecheck": "tsgo --noEmit",
    "lint:fix": "bunx biome check --write --unsafe",
    "clean:cache": "rm -rf node_modules/.cache && rm -rf .output/ && rm -rf .wxt/",
    "codegen": "bun --env-file=.env.development graphql-codegen --config codegen.ts",
@@ -79,6 +79,7 @@
    "react": "^19.1.1",
    "react-dom": "^19.1.1",
    "react-hook-form": "^7.66.1",
+    "react-markdown": "^10.1.0",
    "react-resizable-panels": "^4.3.3",
    "react-router": "^7.12.0",
    "shiki": "^3.15.0",
--- a/packages/browseros-agent/apps/agent/tsconfig.json
+++ b/packages/browseros-agent/apps/agent/tsconfig.json
@@ -4,7 +4,6 @@
    "types": ["chrome", "bun"],
    "allowImportingTsExtensions": true,
    "jsx": "react-jsx",
-    "baseUrl": ".",
    "paths": {
      "@/*": ["./*"]
    },
--- a/packages/browseros-agent/apps/agent/wxt.config.ts
+++ b/packages/browseros-agent/apps/agent/wxt.config.ts
@@ -55,6 +55,7 @@ export default defineConfig({
    permissions: [
      'topSites',
      'tabs',
+      'tabGroups',
      'storage',
      'sidePanel',
      'browserOS',
--- a/packages/browseros-agent/apps/cli/cmd/root.go
+++ b/packages/browseros-agent/apps/cli/cmd/root.go
@@ -1,8 +1,10 @@
 package cmd

 import (
+	"encoding/json"
 	"fmt"
 	"os"
+	"path/filepath"
 	"strconv"
 	"strings"
 	"time"
@@ -170,11 +172,44 @@ func defaultServerURL() string {
 	}

 	cfg, err := config.Load()
+	if err == nil {
+		if url := normalizeServerURL(cfg.ServerURL); url != "" {
+			return url
+		}
+	}
+
+	if url := loadBrowserosServerURL(); url != "" {
+		return url
+	}
+
+	return ""
+}
+
+type serverDiscoveryConfig struct {
+	ServerPort       int    `json:"server_port"`
+	URL              string `json:"url"`
+	ServerVersion    string `json:"server_version"`
+	BrowserOSVersion string `json:"browseros_version,omitempty"`
+	ChromiumVersion  string `json:"chromium_version,omitempty"`
+}
+
+func loadBrowserosServerURL() string {
+	home, err := os.UserHomeDir()
 	if err != nil {
 		return ""
 	}

-	return normalizeServerURL(cfg.ServerURL)
+	data, err := os.ReadFile(filepath.Join(home, ".browseros", "server.json"))
+	if err != nil {
+		return ""
+	}
+
+	var sc serverDiscoveryConfig
+	if err := json.Unmarshal(data, &sc); err != nil {
+		return ""
+	}
+
+	return normalizeServerURL(sc.URL)
 }

 func normalizeServerURL(raw string) string {
--- a/packages/browseros-agent/apps/eval/.env.example
+++ b/packages/browseros-agent/apps/eval/.env.example
@@ -0,0 +1,11 @@
+# Showcase generator — LLM config
+OPENROUTER_API_KEY=
+SHOWCASE_MODEL=openai/gpt-4o
+SHOWCASE_PROVIDER=openrouter
+SHOWCASE_BASE_URL=https://openrouter.ai/api/v1
+
+# R2 upload (for --upload flag)
+R2_ACCOUNT_ID=
+R2_ACCESS_KEY_ID=
+R2_SECRET_ACCESS_KEY=
+R2_BUCKET=rl-env
--- a/packages/browseros-agent/apps/eval/.gitignore
+++ b/packages/browseros-agent/apps/eval/.gitignore
@@ -1,2 +1,5 @@
 data/raw/
 results/
+extensions/
+showcase-output/
+.env
--- a/packages/browseros-agent/apps/eval/configs/browseros-agent-weekly.json
+++ b/packages/browseros-agent/apps/eval/configs/browseros-agent-weekly.json
@@ -0,0 +1,26 @@
+{
+  "agent": {
+    "type": "single",
+    "provider": "openai-compatible",
+    "model": "accounts/fireworks/models/kimi-k2p5",
+    "apiKey": "FIREWORKS_API_KEY",
+    "baseUrl": "https://api.fireworks.ai/inference/v1",
+    "supportsImages": true
+  },
+  "dataset": "../data/webbench-2of4-50.jsonl",
+  "num_workers": 10,
+  "restart_server_per_task": true,
+  "browseros": {
+    "server_url": "http://127.0.0.1:9110",
+    "base_cdp_port": 9010,
+    "base_server_port": 9110,
+    "base_extension_port": 9310,
+    "load_extensions": false,
+    "headless": true
+  },
+  "graders": ["performance_grader"],
+  "grader_api_key_env": "OPENROUTER_API_KEY",
+  "grader_base_url": "https://openrouter.ai/api/v1",
+  "grader_model": "openai/gpt-4.1",
+  "timeout_ms": 1800000
+}
--- a/packages/browseros-agent/apps/eval/configs/browseros-oe-clado-weekly.json
+++ b/packages/browseros-agent/apps/eval/configs/browseros-oe-clado-weekly.json
@@ -0,0 +1,33 @@
+{
+  "agent": {
+    "type": "orchestrator-executor",
+    "orchestrator": {
+      "provider": "openai-compatible",
+      "model": "accounts/fireworks/models/kimi-k2p5",
+      "apiKey": "FIREWORKS_API_KEY",
+      "baseUrl": "https://api.fireworks.ai/inference/v1"
+    },
+    "executor": {
+      "provider": "clado-action",
+      "model": "qwen3-vl-30b-a3b-instruct",
+      "apiKey": "",
+      "baseUrl": "https://clado-ai--clado-browseros-action-actionmodel-generate.modal.run"
+    }
+  },
+  "dataset": "../data/webbench-2of4-50.jsonl",
+  "num_workers": 10,
+  "restart_server_per_task": true,
+  "browseros": {
+    "server_url": "http://127.0.0.1:9110",
+    "base_cdp_port": 9010,
+    "base_server_port": 9110,
+    "base_extension_port": 9310,
+    "load_extensions": false,
+    "headless": true
+  },
+  "graders": ["performance_grader"],
+  "grader_api_key_env": "OPENROUTER_API_KEY",
+  "grader_base_url": "https://openrouter.ai/api/v1",
+  "grader_model": "openai/gpt-4.1",
+  "timeout_ms": 1800000
+}
--- a/packages/browseros-agent/apps/eval/data/prod-tasks.jsonl
+++ b/packages/browseros-agent/apps/eval/data/prod-tasks.jsonl
@@ -0,0 +1,20 @@
+{"query_id":"prod-financial-advisor-morningstar","dataset":"prod","query":"Look up the Morningstar rating and expense ratio for Vanguard Total Stock Market Index Fund (VTSAX) on morningstar.com","start_url":"https://www.morningstar.com","metadata":{"original_task_id":"prod-financial-advisor-morningstar","category":"finance"}}
+{"query_id":"prod-lawyer-pacer-search","dataset":"prod","query":"Go to courtlistener.com and search for recent federal court opinions mentioning 'non-compete agreement' from 2025","start_url":"https://www.courtlistener.com","metadata":{"original_task_id":"prod-lawyer-pacer-search","category":"legal"}}
+{"query_id":"prod-doctor-drug-interaction","dataset":"prod","query":"Check drug interactions between metformin and lisinopril on drugs.com","start_url":"https://www.drugs.com/drug_interactions.html","metadata":{"original_task_id":"prod-doctor-drug-interaction","category":"healthcare"}}
+{"query_id":"prod-software-eng-github-trending","dataset":"prod","query":"Find the top trending Python repositories on GitHub this week and open the most starred one","start_url":"https://github.com/trending","metadata":{"original_task_id":"prod-software-eng-github-trending","category":"technology"}}
+{"query_id":"prod-cfo-sec-filing","dataset":"prod","query":"Look up Apple's most recent 10-K filing on SEC EDGAR and find their total revenue for fiscal year 2024","start_url":"https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&company=apple&CIK=&type=10-K&dateb=&owner=include&count=10&search_text=&action=getcompany","metadata":{"original_task_id":"prod-cfo-sec-filing","category":"finance"}}
+{"query_id":"prod-realtor-zillow-comp","dataset":"prod","query":"Search for recently sold homes in Palo Alto, CA on Zillow and filter for 3+ bedrooms sold in the last 30 days","start_url":"https://www.zillow.com","metadata":{"original_task_id":"prod-realtor-zillow-comp","category":"real_estate"}}
+{"query_id":"prod-hr-manager-linkedin-job","dataset":"prod","query":"Post a job listing search on LinkedIn for a Senior Product Manager role in San Francisco with salary range $180k-$220k","start_url":"https://www.linkedin.com/jobs/","metadata":{"original_task_id":"prod-hr-manager-linkedin-job","category":"hr"}}
+{"query_id":"prod-analyst-fred-data","dataset":"prod","query":"Go to FRED and pull up the US Consumer Price Index chart, change the time range to the last 5 years","start_url":"https://fred.stlouisfed.org","metadata":{"original_task_id":"prod-analyst-fred-data","category":"finance"}}
+{"query_id":"prod-accountant-irs-form","dataset":"prod","query":"Find and download the latest IRS Form W-9 from irs.gov","start_url":"https://www.irs.gov","metadata":{"original_task_id":"prod-accountant-irs-form","category":"finance"}}
+{"query_id":"prod-sales-manager-crm-research","dataset":"prod","query":"Go to g2.com and compare the top 3 CRM software platforms by user rating and pricing","start_url":"https://www.g2.com/categories/crm","metadata":{"original_task_id":"prod-sales-manager-crm-research","category":"sales"}}
+{"query_id":"prod-engineer-stackoverflow","dataset":"prod","query":"Search Stack Overflow for how to implement retry logic with exponential backoff in Python and find the highest voted answer","start_url":"https://stackoverflow.com","metadata":{"original_task_id":"prod-engineer-stackoverflow","category":"technology"}}
+{"query_id":"prod-pm-producthunt","dataset":"prod","query":"Browse today's top launches on Product Hunt and upvote the highest ranked AI product","start_url":"https://www.producthunt.com","metadata":{"original_task_id":"prod-pm-producthunt","category":"technology"}}
+{"query_id":"prod-pharmacist-fda-recall","dataset":"prod","query":"Check the FDA website for any recent drug recalls in the last month","start_url":"https://www.fda.gov/safety/recalls-market-withdrawals-safety-alerts","metadata":{"original_task_id":"prod-pharmacist-fda-recall","category":"healthcare"}}
+{"query_id":"prod-investment-analyst-yahoo-finance","dataset":"prod","query":"Look up NVIDIA stock on Yahoo Finance, check the P/E ratio, and add it to a watchlist","start_url":"https://finance.yahoo.com","metadata":{"original_task_id":"prod-investment-analyst-yahoo-finance","category":"finance"}}
+{"query_id":"prod-compliance-officer-regulations","dataset":"prod","query":"Search for the latest GDPR enforcement actions on the European Data Protection Board website","start_url":"https://www.edpb.europa.eu/news/news_en","metadata":{"original_task_id":"prod-compliance-officer-regulations","category":"compliance"}}
+{"query_id":"prod-management-consultant-mckinsey","dataset":"prod","query":"Go to McKinsey's insights page and find their latest article about generative AI's impact on productivity","start_url":"https://www.mckinsey.com/featured-insights","metadata":{"original_task_id":"prod-management-consultant-mckinsey","category":"consulting"}}
+{"query_id":"prod-operations-manager-shipping","dataset":"prod","query":"Track a FedEx package with tracking number 123456789012 on fedex.com","start_url":"https://www.fedex.com/en-us/tracking.html","metadata":{"original_task_id":"prod-operations-manager-shipping","category":"operations"}}
+{"query_id":"prod-market-researcher-statista","dataset":"prod","query":"Search Statista for the global AI market size forecast and find the projected value for 2026","start_url":"https://www.statista.com","metadata":{"original_task_id":"prod-market-researcher-statista","category":"research"}}
+{"query_id":"prod-nurse-uptodate","dataset":"prod","query":"Search WebMD for the recommended dosing guidelines for adult acetaminophen and check the maximum daily dose","start_url":"https://www.webmd.com","metadata":{"original_task_id":"prod-nurse-uptodate","category":"healthcare"}}
+{"query_id":"prod-executive-flights","dataset":"prod","query":"Search Google Flights for a business class round trip from SFO to JFK departing next Monday returning Friday","start_url":"https://www.google.com/travel/flights","metadata":{"original_task_id":"prod-executive-flights","category":"travel"}}
--- a/packages/browseros-agent/apps/eval/data/showcase-tasks.jsonl
+++ b/packages/browseros-agent/apps/eval/data/showcase-tasks.jsonl
@@ -0,0 +1 @@
+{"query_id":"showcase-amazon-order","dataset":"showcase","query":"Open amazon.com and order Sensodyne toothpaste","start_url":"https://www.amazon.com","metadata":{"original_task_id":"showcase-amazon-order"}}
--- a/packages/browseros-agent/apps/eval/package.json
+++ b/packages/browseros-agent/apps/eval/package.json
@@ -9,12 +9,13 @@
  },
  "dependencies": {
    "@anthropic-ai/claude-agent-sdk": "^0.2.63",
+    "@aws-sdk/client-s3": "^3.1014.0",
    "@browseros/server": "workspace:*",
    "@browseros/shared": "workspace:*",
    "@google/gemini-cli-core": "^0.16.0",
-    "ai": "^6.0.94",
    "@google/genai": "1.30.0",
    "@modelcontextprotocol/sdk": "^1.25.2",
+    "ai": "^6.0.94",
    "hono": "^4.6.0",
    "openai": "^4.0.0",
    "sharp": "^0.34.5",
--- a/packages/browseros-agent/apps/eval/scripts/annotate-screenshots.ts
+++ b/packages/browseros-agent/apps/eval/scripts/annotate-screenshots.ts
@@ -173,7 +173,9 @@ async function annotateScreenshot(

  const image = sharp(inputPath)
  const metadata = await image.metadata()
+  // biome-ignore lint/style/noNonNullAssertion: sharp metadata always has dimensions for valid images
  const imgWidth = metadata.width!
+  // biome-ignore lint/style/noNonNullAssertion: sharp metadata always has dimensions for valid images
  const imgHeight = metadata.height!

  const sx = Math.round(action.cssX * dpr)
--- a/packages/browseros-agent/apps/eval/scripts/debug-long-run.ts
+++ b/packages/browseros-agent/apps/eval/scripts/debug-long-run.ts
@@ -49,10 +49,13 @@ async function callMcpTool(
    const result = await Promise.race([toolPromise, timeoutPromise])
    const duration = Date.now() - start

-    if ((result as any).isError) {
+    const res = result as Record<string, unknown>
+    if (res.isError) {
+      const content = res.content as
+        | Array<{ type: string; text?: string }>
+        | undefined
      const errorText =
-        (result as any).content?.find((c: any) => c.type === 'text')?.text ||
-        'Unknown error'
+        content?.find((c) => c.type === 'text')?.text || 'Unknown error'
      return { success: false, error: errorText, duration }
    }

@@ -96,13 +99,19 @@ async function main() {
    })

    // Try structured content first
-    windowId = (result as any).structuredContent?.windowId
-    tabId = (result as any).structuredContent?.tabId
+    const createRes = result as Record<string, unknown>
+    const structured = createRes.structuredContent as
+      | Record<string, number>
+      | undefined
+    windowId = structured?.windowId ?? 0
+    tabId = structured?.tabId ?? 0

    // Fall back to parsing text
    if (!windowId || !tabId) {
-      const text =
-        (result as any).content?.find((c: any) => c.type === 'text')?.text || ''
+      const content = createRes.content as
+        | Array<{ type: string; text?: string }>
+        | undefined
+      const text = content?.find((c) => c.type === 'text')?.text || ''
      const windowMatch = text.match(/window\s+(\d+)/i)
      const tabMatch =
        text.match(/Tab ID:\s*(\d+)/i) || text.match(/tab\s+(\d+)/i)
--- a/packages/browseros-agent/apps/eval/scripts/showcase/crosshair.ts
+++ b/packages/browseros-agent/apps/eval/scripts/showcase/crosshair.ts
@@ -0,0 +1,52 @@
+import type { Browser } from '@browseros/server/browser'
+
+const CROSSHAIR_ID = '__browseros_showcase_crosshair__'
+
+export async function injectCrosshair(
+  browser: Browser,
+  pageId: number,
+  coords: { x: number; y: number },
+  toolName: string,
+): Promise<void> {
+  const x = Math.round(coords.x)
+  const y = Math.round(coords.y)
+  const label = toolName.replace(/_/g, ' ')
+  const labelWidth = Math.round(label.length * 9 + 24)
+  const labelX = x + 32
+  const labelY = y - 32
+
+  await browser.evaluate(
+    pageId,
+    `(() => {
+      const existing = document.getElementById('${CROSSHAIR_ID}');
+      if (existing) existing.remove();
+
+      const el = document.createElement('div');
+      el.id = '${CROSSHAIR_ID}';
+      el.style.cssText = 'position:fixed;top:0;left:0;width:100vw;height:100vh;pointer-events:none;z-index:2147483647';
+      el.innerHTML = '<svg style="position:absolute;top:0;left:0;width:100%;height:100%" xmlns="http://www.w3.org/2000/svg">'
+        + '<defs><filter id="glow"><feGaussianBlur stdDeviation="3" result="blur"/><feMerge><feMergeNode in="blur"/><feMergeNode in="SourceGraphic"/></feMerge></filter></defs>'
+        + '<circle cx="${x}" cy="${y}" r="28" fill="rgba(255,59,48,0.12)" stroke="none"/>'
+        + '<line x1="${x - 40}" y1="${y}" x2="${x - 12}" y2="${y}" stroke="#FF3B30" stroke-width="3" filter="url(#glow)"/>'
+        + '<line x1="${x + 12}" y1="${y}" x2="${x + 40}" y2="${y}" stroke="#FF3B30" stroke-width="3" filter="url(#glow)"/>'
+        + '<line x1="${x}" y1="${y - 40}" x2="${x}" y2="${y - 12}" stroke="#FF3B30" stroke-width="3" filter="url(#glow)"/>'
+        + '<line x1="${x}" y1="${y + 12}" x2="${x}" y2="${y + 40}" stroke="#FF3B30" stroke-width="3" filter="url(#glow)"/>'
+        + '<circle cx="${x}" cy="${y}" r="20" fill="none" stroke="#FF3B30" stroke-width="3" filter="url(#glow)"/>'
+        + '<circle cx="${x}" cy="${y}" r="4" fill="#FF3B30"/>'
+        + '<rect x="${labelX}" y="${labelY}" rx="6" ry="6" width="${labelWidth}" height="28" fill="rgba(0,0,0,0.85)"/>'
+        + '<text x="${labelX + 12}" y="${labelY + 19}" font-family="system-ui,-apple-system,sans-serif" font-size="14" fill="white" font-weight="600">${label}</text>'
+        + '</svg>';
+      document.body.appendChild(el);
+    })()`,
+  )
+}
+
+export async function removeCrosshair(
+  browser: Browser,
+  pageId: number,
+): Promise<void> {
+  await browser.evaluate(
+    pageId,
+    `document.getElementById('${CROSSHAIR_ID}')?.remove()`,
+  )
+}
--- a/packages/browseros-agent/apps/eval/scripts/showcase/executor.ts
+++ b/packages/browseros-agent/apps/eval/scripts/showcase/executor.ts
@@ -0,0 +1,288 @@
+import { randomUUID } from 'node:crypto'
+import { mkdir, writeFile } from 'node:fs/promises'
+import { join } from 'node:path'
+import { AiSdkAgent } from '@browseros/server/agent/tool-loop'
+import type { ResolvedAgentConfig } from '@browseros/server/agent/types'
+import { Browser } from '@browseros/server/browser'
+import { CdpBackend } from '@browseros/server/browser/backends/cdp'
+import { registry } from '@browseros/server/tools/registry'
+import type { Task } from '../../src/types'
+import { injectCrosshair, removeCrosshair } from './crosshair'
+import { buildTaskManifest, saveTaskManifest } from './manifest'
+import type { ShowcaseStep, ShowcaseTaskManifest } from './types'
+
+const ELEMENT_TOOLS = new Set([
+  'click',
+  'fill',
+  'hover',
+  'clear',
+  'select_option',
+  'drag',
+  'focus',
+  'check',
+  'uncheck',
+])
+
+const COORDINATE_TOOLS = new Set(['click_at', 'hover_at', 'type_at', 'drag_at'])
+
+const CONTROLLER_STUB = {
+  start: async () => {},
+  stop: async () => {},
+  isConnected: () => false,
+  send: async () => ({}),
+  // biome-ignore lint/suspicious/noExplicitAny: ControllerBackend type not exported
+} as any
+
+async function resolvePageId(
+  browser: Browser,
+  requestedId: number,
+): Promise<number> {
+  const pages = await browser.listPages()
+  if (pages.some((p) => p.pageId === requestedId)) return requestedId
+  if (pages.length > 0) return pages[0].pageId
+  return requestedId
+}
+
+export interface ExecuteTaskResult {
+  manifest: ShowcaseTaskManifest
+  status: 'completed' | 'timeout' | 'failed'
+}
+
+export async function executeShowcaseTask(
+  task: Task,
+  cdpPort: number,
+  outputDir: string,
+  agentConfig: {
+    model: string
+    provider: string
+    apiKey?: string
+    baseUrl?: string
+  },
+  timeoutMs: number,
+): Promise<ExecuteTaskResult> {
+  const executionId = randomUUID()
+  const taskDir = join(outputDir, executionId)
+  const screenshotDir = join(taskDir, 'screenshots')
+  await mkdir(screenshotDir, { recursive: true })
+
+  const cdp = new CdpBackend({ port: cdpPort })
+  await cdp.connect()
+  const browser = new Browser(cdp, CONTROLLER_STUB)
+
+  const pages = await browser.listPages()
+  const activePage = pages[0]
+  let activePageId = activePage?.pageId ?? 1
+
+  // Navigate to start URL
+  if (task.start_url && task.start_url !== 'about:blank') {
+    await browser.goto(activePageId, task.start_url)
+  }
+
+  const conversationId = randomUUID()
+  const resolvedConfig: ResolvedAgentConfig = {
+    conversationId,
+    // biome-ignore lint/suspicious/noExplicitAny: LLMProvider type validated at runtime
+    provider: agentConfig.provider as any,
+    model: agentConfig.model,
+    apiKey: agentConfig.apiKey,
+    baseUrl: agentConfig.baseUrl,
+    workingDir: `/tmp/browseros-showcase-${conversationId}`,
+    evalMode: true,
+    supportsImages: true,
+  }
+
+  const browserContext = activePage
+    ? {
+        activeTab: {
+          id: activePage.tabId,
+          pageId: activePage.pageId,
+          url: activePage.url,
+          title: activePage.title,
+        },
+      }
+    : undefined
+
+  let agent: AiSdkAgent | null = null
+  const steps: ShowcaseStep[] = []
+  let stepNum = 0
+  let finalText: string | null = null
+  let status: 'completed' | 'timeout' | 'failed' = 'completed'
+  const startTime = Date.now()
+
+  try {
+    agent = await AiSdkAgent.create({
+      resolvedConfig,
+      browser,
+      registry,
+      browserContext,
+    })
+
+    let pendingStep: Partial<ShowcaseStep> | null = null
+
+    const abortController = new AbortController()
+    const timeoutHandle = setTimeout(() => abortController.abort(), timeoutMs)
+
+    try {
+      const result = await agent.toolLoopAgent.generate({
+        prompt: task.query,
+        abortSignal: abortController.signal,
+
+        experimental_onToolCallStart: async ({ toolCall }) => {
+          try {
+            const input = (toolCall.input ?? {}) as Record<string, unknown>
+            if (typeof input.page === 'number') {
+              activePageId = input.page
+            }
+            const pageId = await resolvePageId(browser, activePageId)
+            activePageId = pageId
+
+            const beforeResult = await browser.screenshot(pageId, {
+              format: 'png',
+              fullPage: false,
+            })
+            const beforePath = join(screenshotDir, `${stepNum}_before.png`)
+            await writeFile(
+              beforePath,
+              Buffer.from(beforeResult.data, 'base64'),
+            )
+
+            let axTree = ''
+            try {
+              axTree = await browser.snapshot(pageId)
+            } catch {
+              // snapshot can fail on some pages
+            }
+
+            let coords: { x: number; y: number } | undefined
+            const elementId = input.element as number | undefined
+            if (
+              elementId !== undefined &&
+              ELEMENT_TOOLS.has(toolCall.toolName)
+            ) {
+              try {
+                coords = await browser.getElementCenter(pageId, elementId)
+              } catch {
+                // element may have been removed
+              }
+            } else if (
+              COORDINATE_TOOLS.has(toolCall.toolName) &&
+              typeof input.x === 'number' &&
+              typeof input.y === 'number'
+            ) {
+              coords = { x: input.x, y: input.y }
+            }
+
+            pendingStep = {
+              stepIndex: stepNum,
+              toolName: toolCall.toolName,
+              toolInput: input,
+              beforeScreenshot: beforePath,
+              accessibilitySnapshot: axTree,
+              elementCoordinates: coords,
+              timestamp: new Date().toISOString(),
+            }
+
+            if (coords) {
+              try {
+                await injectCrosshair(
+                  browser,
+                  pageId,
+                  coords,
+                  toolCall.toolName,
+                )
+                const annotatedResult = await browser.screenshot(pageId, {
+                  format: 'png',
+                  fullPage: false,
+                })
+                const annotatedPath = join(
+                  screenshotDir,
+                  `${stepNum}_annotated.png`,
+                )
+                await writeFile(
+                  annotatedPath,
+                  Buffer.from(annotatedResult.data, 'base64'),
+                )
+                pendingStep.annotatedScreenshot = annotatedPath
+                await removeCrosshair(browser, pageId)
+              } catch {
+                // annotation is best-effort
+              }
+            }
+          } catch (err) {
+            console.warn(
+              `  Step ${stepNum} before-capture failed: ${err instanceof Error ? err.message : String(err)}`,
+            )
+          }
+        },
+
+        experimental_onToolCallFinish: async ({ toolResult }) => {
+          try {
+            const pageId = await resolvePageId(browser, activePageId)
+            activePageId = pageId
+            const afterResult = await browser.screenshot(pageId, {
+              format: 'png',
+              fullPage: false,
+            })
+            const afterPath = join(screenshotDir, `${stepNum}_after.png`)
+            await writeFile(afterPath, Buffer.from(afterResult.data, 'base64'))
+
+            if (pendingStep) {
+              pendingStep.afterScreenshot = afterPath
+              pendingStep.toolOutput = toolResult
+              steps.push(pendingStep as ShowcaseStep)
+              stepNum++
+            }
+          } catch (err) {
+            console.warn(
+              `  Step ${stepNum} after-capture failed: ${err instanceof Error ? err.message : String(err)}`,
+            )
+          }
+          pendingStep = null
+        },
+
+        onStepFinish: async ({ text }) => {
+          if (text && steps.length > 0) {
+            const lastStep = steps[steps.length - 1]
+            lastStep.assistantText = text
+          }
+        },
+      })
+
+      finalText = result.text || null
+    } catch (err) {
+      if (abortController.signal.aborted) {
+        status = 'timeout'
+        console.log(`  ${task.query_id}: timed out after ${timeoutMs / 1000}s`)
+      } else {
+        status = 'failed'
+        console.error(
+          `  ${task.query_id}: failed — ${err instanceof Error ? err.message : String(err)}`,
+        )
+      }
+    } finally {
+      clearTimeout(timeoutHandle)
+    }
+
+    const totalDurationMs = Date.now() - startTime
+
+    const manifest = buildTaskManifest({
+      executionId,
+      taskId: task.query_id,
+      query: task.query,
+      startUrl: task.start_url ?? 'about:blank',
+      dataset: task.dataset,
+      steps,
+      finalAnswer: finalText,
+      model: agentConfig.model,
+      provider: agentConfig.provider,
+      totalDurationMs,
+    })
+
+    await saveTaskManifest(outputDir, executionId, manifest)
+
+    return { manifest, status }
+  } finally {
+    if (agent) await agent.dispose().catch(() => {})
+    await cdp.disconnect().catch(() => {})
+  }
+}
--- a/packages/browseros-agent/apps/eval/scripts/showcase/generate.ts
+++ b/packages/browseros-agent/apps/eval/scripts/showcase/generate.ts
@@ -0,0 +1,235 @@
+#!/usr/bin/env bun
+
+import { mkdir } from 'node:fs/promises'
+import { parseArgs } from 'node:util'
+import { BrowserOSAppManager } from '../../src/runner/browseros-app-manager'
+import { loadTasks } from '../../src/runner/task-loader'
+import type { Task } from '../../src/types'
+import { executeShowcaseTask } from './executor'
+import { saveRunIndex } from './manifest'
+import type { ShowcaseRunIndex } from './types'
+import { uploadShowcase } from './uploader'
+
+const BASE_PORTS = { cdp: 9010, server: 9110, extension: 9310 }
+
+const { values } = parseArgs({
+  args: Bun.argv.slice(2),
+  options: {
+    tasks: { type: 'string', short: 't' },
+    output: { type: 'string', short: 'o', default: './showcase-output' },
+    model: { type: 'string', short: 'm' },
+    provider: { type: 'string', short: 'p' },
+    'base-url': { type: 'string' },
+    workers: { type: 'string', short: 'w', default: '1' },
+    'cdp-port': { type: 'string' },
+    timeout: { type: 'string', default: '300000' },
+    upload: { type: 'boolean', default: false },
+    help: { type: 'boolean', short: 'h' },
+  },
+})
+
+if (values.help || !values.tasks) {
+  console.log(`
+Showcase Dataset Generator
+
+Runs the BrowserOS agent on tasks and captures before/after screenshots
+with crosshair annotations for element-targeting tool calls.
+
+Usage:
+  bun scripts/showcase/generate.ts --tasks <path> [options]
+
+Options:
+  -t, --tasks <path>       JSONL task file (required)
+  -o, --output <dir>       Output directory (default: ./showcase-output)
+  -m, --model <model>      LLM model (env: SHOWCASE_MODEL, default: openai/gpt-4o)
+  -p, --provider <name>    LLM provider (env: SHOWCASE_PROVIDER, default: openrouter)
+  --base-url <url>         LLM base URL (env: SHOWCASE_BASE_URL)
+  -w, --workers <n>        Parallel workers (default: 1)
+  --cdp-port <port>        Connect to existing Chrome (single-worker only)
+  --timeout <ms>           Per-task timeout in ms (default: 300000)
+  --upload                 Upload results to R2 after generation
+  -h, --help               Show this help
+`)
+  process.exit(values.help ? 0 : 1)
+}
+
+const config = {
+  tasks: values.tasks as string,
+  output: (values.output ?? './showcase-output') as string,
+  model: (values.model ??
+    process.env.SHOWCASE_MODEL ??
+    'openai/gpt-4o') as string,
+  provider: (values.provider ??
+    process.env.SHOWCASE_PROVIDER ??
+    'openrouter') as string,
+  baseUrl: (values['base-url'] ?? process.env.SHOWCASE_BASE_URL) as
+    | string
+    | undefined,
+  workers: Math.max(1, Number(values.workers ?? '1')),
+  cdpPort: values['cdp-port'] ? Number(values['cdp-port']) : undefined,
+  timeout: Number(values.timeout ?? '300000'),
+  upload: values.upload ?? false,
+}
+
+if (config.cdpPort && config.workers > 1) {
+  console.error('--cdp-port only works with a single worker (--workers 1)')
+  process.exit(1)
+}
+
+const apiKey = process.env.OPENROUTER_API_KEY ?? process.env.OPENAI_API_KEY
+if (!apiKey) {
+  console.error(
+    'Missing API key: set OPENROUTER_API_KEY or OPENAI_API_KEY environment variable',
+  )
+  process.exit(1)
+}
+
+const { tasks } = await loadTasks({ type: 'file', path: config.tasks })
+console.log(`Loaded ${tasks.length} task(s), ${config.workers} worker(s)`)
+
+await mkdir(config.output, { recursive: true })
+
+const runId = `${new Date().toISOString().slice(0, 10)}-${crypto.randomUUID().slice(0, 8)}`
+const runIndex: ShowcaseRunIndex = {
+  runId,
+  createdAt: new Date().toISOString(),
+  agentConfig: { model: config.model, provider: config.provider },
+  tasks: [],
+}
+
+console.log(`\nRun ID: ${runId}`)
+console.log(`Output: ${config.output}\n`)
+
+// --- Task Queue ---
+
+class TaskQueue {
+  private index = 0
+  private stopped = false
+  constructor(private tasks: Task[]) {}
+
+  next(): Task | null {
+    if (this.stopped || this.index >= this.tasks.length) return null
+    return this.tasks[this.index++]
+  }
+
+  stop(): void {
+    this.stopped = true
+  }
+}
+
+const queue = new TaskQueue(tasks)
+let completedCount = 0
+const appManagers: BrowserOSAppManager[] = []
+
+// --- Signal handling ---
+
+const onSignal = async () => {
+  console.log('\nShutting down workers...')
+  queue.stop()
+  await Promise.allSettled(appManagers.map((m) => m.killApp()))
+  process.exit(0)
+}
+process.on('SIGINT', onSignal)
+process.on('SIGTERM', onSignal)
+
+// --- Worker ---
+
+async function runWorker(workerIndex: number): Promise<void> {
+  let appManager: BrowserOSAppManager | null = null
+  let cdpPort = config.cdpPort ?? BASE_PORTS.cdp + workerIndex
+
+  if (!config.cdpPort) {
+    appManager = new BrowserOSAppManager(workerIndex, BASE_PORTS)
+    appManagers.push(appManager)
+    console.log(`  [W${workerIndex}] Starting BrowserOS...`)
+    await appManager.restart()
+    cdpPort = BASE_PORTS.cdp + workerIndex
+  }
+
+  const agentConfig = {
+    model: config.model,
+    provider: config.provider,
+    apiKey,
+    baseUrl: config.baseUrl,
+  }
+
+  try {
+    while (true) {
+      const task = queue.next()
+      if (!task) break
+
+      completedCount++
+      const tag = config.workers > 1 ? `[W${workerIndex}] ` : ''
+      console.log(
+        `${tag}[${completedCount}/${tasks.length}] ${task.query_id}: ${task.query}`,
+      )
+
+      // Restart browser between tasks for clean state
+      if (appManager) {
+        await appManager.restart()
+      }
+
+      try {
+        const { manifest, status } = await executeShowcaseTask(
+          task,
+          cdpPort,
+          config.output,
+          agentConfig,
+          config.timeout,
+        )
+
+        runIndex.tasks.push({
+          executionId: manifest.executionId,
+          taskId: task.query_id,
+          query: task.query,
+          stepCount: manifest.steps.length,
+          status,
+          manifestPath: `${manifest.executionId}/manifest.json`,
+        })
+
+        const duration = (manifest.totalDurationMs / 1000).toFixed(1)
+        console.log(
+          `${tag}  ${status.toUpperCase()} — ${manifest.steps.length} steps, ${duration}s\n`,
+        )
+      } catch (err) {
+        console.error(
+          `${tag}  FAILED — ${err instanceof Error ? err.message : String(err)}\n`,
+        )
+        runIndex.tasks.push({
+          executionId: 'unknown',
+          taskId: task.query_id,
+          query: task.query,
+          stepCount: 0,
+          status: 'failed',
+          manifestPath: '',
+        })
+      }
+    }
+  } finally {
+    if (appManager) await appManager.killApp()
+  }
+}
+
+// --- Run ---
+
+try {
+  const workers = Array.from({ length: config.workers }, (_, i) => runWorker(i))
+  await Promise.all(workers)
+
+  await saveRunIndex(config.output, runIndex)
+  console.log(`\nResults saved to: ${config.output}`)
+  console.log(
+    `Tasks: ${runIndex.tasks.filter((t) => t.status === 'completed').length} completed, ` +
+      `${runIndex.tasks.filter((t) => t.status === 'failed').length} failed, ` +
+      `${runIndex.tasks.filter((t) => t.status === 'timeout').length} timed out`,
+  )
+
+  if (config.upload) {
+    console.log('\nUploading to R2...')
+    const baseUrl = await uploadShowcase(config.output, runId)
+    console.log(`Uploaded to: ${baseUrl}`)
+  }
+} finally {
+  process.off('SIGINT', onSignal)
+  process.off('SIGTERM', onSignal)
+}
--- a/packages/browseros-agent/apps/eval/scripts/showcase/manifest.ts
+++ b/packages/browseros-agent/apps/eval/scripts/showcase/manifest.ts
@@ -0,0 +1,52 @@
+import { writeFile } from 'node:fs/promises'
+import { join } from 'node:path'
+import type {
+  ShowcaseRunIndex,
+  ShowcaseStep,
+  ShowcaseTaskManifest,
+} from './types'
+
+export function buildTaskManifest(opts: {
+  executionId: string
+  taskId: string
+  query: string
+  startUrl: string
+  dataset: string
+  steps: ShowcaseStep[]
+  finalAnswer: string | null
+  model: string
+  provider: string
+  totalDurationMs: number
+}): ShowcaseTaskManifest {
+  return {
+    executionId: opts.executionId,
+    taskId: opts.taskId,
+    query: opts.query,
+    startUrl: opts.startUrl,
+    dataset: opts.dataset,
+    steps: opts.steps,
+    finalAnswer: opts.finalAnswer,
+    agentConfig: { model: opts.model, provider: opts.provider },
+    totalDurationMs: opts.totalDurationMs,
+    createdAt: new Date().toISOString(),
+  }
+}
+
+export async function saveTaskManifest(
+  outputDir: string,
+  executionId: string,
+  manifest: ShowcaseTaskManifest,
+): Promise<string> {
+  const manifestPath = join(outputDir, executionId, 'manifest.json')
+  await writeFile(manifestPath, JSON.stringify(manifest, null, 2))
+  return manifestPath
+}
+
+export async function saveRunIndex(
+  outputDir: string,
+  index: ShowcaseRunIndex,
+): Promise<string> {
+  const indexPath = join(outputDir, 'index.json')
+  await writeFile(indexPath, JSON.stringify(index, null, 2))
+  return indexPath
+}
--- a/packages/browseros-agent/apps/eval/scripts/showcase/types.ts
+++ b/packages/browseros-agent/apps/eval/scripts/showcase/types.ts
@@ -0,0 +1,53 @@
+export interface ShowcaseConfig {
+  tasks: string
+  output: string
+  upload: boolean
+  model: string
+  provider: string
+  apiKeyEnv: string
+  cdpPort?: number
+  timeout: number
+}
+
+export interface ShowcaseStep {
+  stepIndex: number
+  toolName: string
+  toolInput: Record<string, unknown>
+  toolOutput: unknown
+  elementCoordinates?: { x: number; y: number }
+  beforeScreenshot: string
+  afterScreenshot: string
+  annotatedScreenshot?: string
+  accessibilitySnapshot: string
+  assistantText?: string
+  timestamp: string
+}
+
+export interface ShowcaseTaskManifest {
+  executionId: string
+  taskId: string
+  query: string
+  startUrl: string
+  dataset: string
+  steps: ShowcaseStep[]
+  finalAnswer: string | null
+  agentConfig: { model: string; provider: string }
+  totalDurationMs: number
+  createdAt: string
+  uploadedAt?: string
+}
+
+export interface ShowcaseRunIndex {
+  runId: string
+  createdAt: string
+  uploadedAt?: string
+  agentConfig: { model: string; provider: string }
+  tasks: Array<{
+    executionId: string
+    taskId: string
+    query: string
+    stepCount: number
+    status: 'completed' | 'timeout' | 'failed'
+    manifestPath: string
+  }>
+}
--- a/packages/browseros-agent/apps/eval/scripts/showcase/uploader.ts
+++ b/packages/browseros-agent/apps/eval/scripts/showcase/uploader.ts
@@ -0,0 +1,155 @@
+import { readdir, readFile, writeFile } from 'node:fs/promises'
+import { extname, join, relative } from 'node:path'
+import { PutObjectCommand, S3Client } from '@aws-sdk/client-s3'
+import type { ShowcaseRunIndex, ShowcaseTaskManifest } from './types'
+
+interface R2Config {
+  accountId: string
+  accessKeyId: string
+  secretAccessKey: string
+  bucket: string
+}
+
+function contentTypeFor(filePath: string): string {
+  const ext = extname(filePath).toLowerCase()
+  if (ext === '.png') return 'image/png'
+  if (ext === '.json') return 'application/json'
+  if (ext === '.jsonl') return 'application/jsonl'
+  return 'application/octet-stream'
+}
+
+function loadR2Config(): R2Config {
+  const accountId = process.env.R2_ACCOUNT_ID
+  const accessKeyId = process.env.R2_ACCESS_KEY_ID
+  const secretAccessKey = process.env.R2_SECRET_ACCESS_KEY
+  const bucket = process.env.R2_BUCKET ?? 'rl-env'
+
+  if (!accountId || !accessKeyId || !secretAccessKey) {
+    throw new Error(
+      'Missing R2 credentials. Set R2_ACCOUNT_ID, R2_ACCESS_KEY_ID, R2_SECRET_ACCESS_KEY',
+    )
+  }
+
+  return { accountId, accessKeyId, secretAccessKey, bucket }
+}
+
+function toR2Key(prefix: string, outputDir: string, filePath: string): string {
+  return `${prefix}/${relative(outputDir, filePath).replaceAll('\\', '/')}`
+}
+
+async function walkDir(dir: string): Promise<string[]> {
+  const files: string[] = []
+  const entries = await readdir(dir, { withFileTypes: true })
+  for (const entry of entries) {
+    const fullPath = join(dir, entry.name)
+    if (entry.isDirectory()) {
+      files.push(...(await walkDir(fullPath)))
+    } else {
+      files.push(fullPath)
+    }
+  }
+  return files
+}
+
+export async function uploadShowcase(
+  outputDir: string,
+  runId: string,
+): Promise<string> {
+  const r2 = loadR2Config()
+  const client = new S3Client({
+    region: 'auto',
+    endpoint: `https://${r2.accountId}.r2.cloudflarestorage.com`,
+    credentials: {
+      accessKeyId: r2.accessKeyId,
+      secretAccessKey: r2.secretAccessKey,
+    },
+  })
+
+  const prefix = `showcase/${runId}`
+  const files = await walkDir(outputDir)
+  console.log(
+    `Uploading ${files.length} files to R2 (${r2.bucket}/${prefix})...`,
+  )
+
+  for (const filePath of files) {
+    const key = toR2Key(prefix, outputDir, filePath)
+    const data = await readFile(filePath)
+    await client.send(
+      new PutObjectCommand({
+        Bucket: r2.bucket,
+        Key: key,
+        Body: data,
+        ContentType: contentTypeFor(filePath),
+      }),
+    )
+  }
+
+  const baseUrl = `https://${r2.bucket}.${r2.accountId}.r2.cloudflarestorage.com/${prefix}`
+  console.log(`Upload complete. Base: ${baseUrl}`)
+
+  // Stamp uploadedAt on index.json
+  const indexPath = join(outputDir, 'index.json')
+  try {
+    const indexData = JSON.parse(
+      await readFile(indexPath, 'utf-8'),
+    ) as ShowcaseRunIndex
+    indexData.uploadedAt = new Date().toISOString()
+    await writeFile(indexPath, JSON.stringify(indexData, null, 2))
+  } catch {
+    // index may not exist if run was partial
+  }
+
+  // Stamp uploadedAt on each task manifest
+  for (const file of files) {
+    if (file.endsWith('manifest.json') && file !== indexPath) {
+      try {
+        const manifestData = JSON.parse(
+          await readFile(file, 'utf-8'),
+        ) as ShowcaseTaskManifest
+        manifestData.uploadedAt = new Date().toISOString()
+        // Rewrite screenshot paths to R2 keys
+        for (const step of manifestData.steps) {
+          step.beforeScreenshot = toR2Key(
+            prefix,
+            outputDir,
+            step.beforeScreenshot,
+          )
+          step.afterScreenshot = toR2Key(
+            prefix,
+            outputDir,
+            step.afterScreenshot,
+          )
+          if (step.annotatedScreenshot) {
+            step.annotatedScreenshot = toR2Key(
+              prefix,
+              outputDir,
+              step.annotatedScreenshot,
+            )
+          }
+        }
+        await writeFile(file, JSON.stringify(manifestData, null, 2))
+      } catch {
+        // skip malformed manifests
+      }
+    }
+  }
+
+  // Re-upload rewritten manifests + index
+  const jsonFiles = files.filter(
+    (f) => f.endsWith('.json') && !f.includes('node_modules'),
+  )
+  for (const filePath of jsonFiles) {
+    const key = toR2Key(prefix, outputDir, filePath)
+    const data = await readFile(filePath)
+    await client.send(
+      new PutObjectCommand({
+        Bucket: r2.bucket,
+        Key: key,
+        Body: data,
+        ContentType: 'application/json',
+      }),
+    )
+  }
+
+  return baseUrl
+}
--- a/packages/browseros-agent/apps/eval/scripts/upload-run.ts
+++ b/packages/browseros-agent/apps/eval/scripts/upload-run.ts
@@ -0,0 +1,349 @@
+/**
+ * Upload eval runs to R2.
+ *
+ * Two modes:
+ *   bun scripts/upload-run.ts results/browseros-agent-weekly/2026-03-21-1730
+ *       → uploads that specific run
+ *
+ *   bun scripts/upload-run.ts results/browseros-agent-weekly
+ *       → finds all timestamped subfolders, uploads any not yet in R2
+ *
+ * Env vars: EVAL_R2_ACCOUNT_ID, EVAL_R2_ACCESS_KEY_ID, EVAL_R2_SECRET_ACCESS_KEY
+ *           EVAL_R2_BUCKET (default: browseros-eval)
+ *           EVAL_R2_CDN_BASE_URL (default: https://eval.browseros.com)
+ */
+
+import { readdir, readFile, stat } from 'node:fs/promises'
+import { basename, dirname, extname, join } from 'node:path'
+import {
+  GetObjectCommand,
+  PutObjectCommand,
+  S3Client,
+} from '@aws-sdk/client-s3'
+
+const CONCURRENCY = 20
+
+const CONTENT_TYPES: Record<string, string> = {
+  '.json': 'application/json',
+  '.jsonl': 'application/x-ndjson',
+  '.png': 'image/png',
+}
+
+interface R2Config {
+  accountId: string
+  accessKeyId: string
+  secretAccessKey: string
+  bucket: string
+  cdnBaseUrl: string
+}
+
+function loadConfig(): R2Config {
+  const accountId = process.env.EVAL_R2_ACCOUNT_ID
+  const accessKeyId = process.env.EVAL_R2_ACCESS_KEY_ID
+  const secretAccessKey = process.env.EVAL_R2_SECRET_ACCESS_KEY
+
+  if (!accountId || !accessKeyId || !secretAccessKey) {
+    console.error(
+      'Missing required env vars: EVAL_R2_ACCOUNT_ID, EVAL_R2_ACCESS_KEY_ID, EVAL_R2_SECRET_ACCESS_KEY',
+    )
+    process.exit(1)
+  }
+
+  return {
+    accountId,
+    accessKeyId,
+    secretAccessKey,
+    bucket: process.env.EVAL_R2_BUCKET || 'browseros-eval',
+    cdnBaseUrl: (
+      process.env.EVAL_R2_CDN_BASE_URL || 'https://eval.browseros.com'
+    ).replace(/\/+$/, ''),
+  }
+}
+
+function createClient(config: R2Config): S3Client {
+  return new S3Client({
+    region: 'auto',
+    endpoint: `https://${config.accountId}.r2.cloudflarestorage.com`,
+    credentials: {
+      accessKeyId: config.accessKeyId,
+      secretAccessKey: config.secretAccessKey,
+    },
+  })
+}
+
+async function upload(
+  client: S3Client,
+  bucket: string,
+  key: string,
+  body: Buffer,
+  contentType: string,
+) {
+  await client.send(
+    new PutObjectCommand({
+      Bucket: bucket,
+      Key: key,
+      Body: body,
+      ContentType: contentType,
+    }),
+  )
+}
+
+async function collectFiles(dir: string): Promise<string[]> {
+  const files: string[] = []
+  const entries = await readdir(dir, { withFileTypes: true })
+  for (const entry of entries) {
+    const full = join(dir, entry.name)
+    if (entry.isDirectory()) {
+      files.push(...(await collectFiles(full)))
+    } else {
+      files.push(full)
+    }
+  }
+  return files
+}
+
+async function runPool<T>(
+  items: T[],
+  concurrency: number,
+  fn: (item: T) => Promise<void>,
+) {
+  let i = 0
+  const workers = Array.from({ length: concurrency }, async () => {
+    while (i < items.length) {
+      const idx = i++
+      await fn(items[idx])
+    }
+  })
+  await Promise.all(workers)
+}
+
+// Check if a run has already been uploaded to R2
+async function isUploaded(
+  client: S3Client,
+  bucket: string,
+  runId: string,
+): Promise<boolean> {
+  try {
+    await client.send(
+      new GetObjectCommand({
+        Bucket: bucket,
+        Key: `runs/${runId}/manifest.json`,
+      }),
+    )
+    return true
+  } catch {
+    return false
+  }
+}
+
+// Detect if a directory is a run dir (has task subdirs with metadata.json)
+// vs a config dir (has timestamped subdirs like 2026-03-21-1730/)
+async function isRunDir(dir: string): Promise<boolean> {
+  const entries = await readdir(dir, { withFileTypes: true })
+  const subdirs = entries.filter((e) => e.isDirectory())
+  for (const subdir of subdirs) {
+    const metaPath = join(dir, subdir.name, 'metadata.json')
+    const metaStat = await stat(metaPath).catch(() => null)
+    if (metaStat?.isFile()) return true
+  }
+  return false
+}
+
+async function uploadSingleRun(
+  runDir: string,
+  runId: string,
+  r2Config: R2Config,
+  client: S3Client,
+): Promise<void> {
+  const taskDirs = await readdir(runDir, { withFileTypes: true })
+  const taskEntries = taskDirs.filter((d) => d.isDirectory())
+
+  if (taskEntries.length === 0) {
+    console.warn(`  No task subdirectories in ${runId}, skipping`)
+    return
+  }
+
+  const manifestTasks: Record<string, unknown>[] = []
+  const jobs: { key: string; filePath: string; contentType: string }[] = []
+
+  // Extract agent config from first task
+  let agentConfig: Record<string, unknown> | undefined
+  let dataset: string | undefined
+
+  for (const taskDir of taskEntries) {
+    const taskId = taskDir.name
+    const taskPath = join(runDir, taskId)
+    const metaPath = join(taskPath, 'metadata.json')
+
+    let meta: Record<string, unknown> = {}
+    try {
+      meta = JSON.parse(await readFile(metaPath, 'utf-8'))
+    } catch {
+      continue
+    }
+
+    if (!agentConfig && meta.agent_config)
+      agentConfig = meta.agent_config as Record<string, unknown>
+    if (!dataset && meta.dataset) dataset = meta.dataset as string
+
+    const files = await collectFiles(taskPath)
+    let screenshotCount = 0
+
+    for (const file of files) {
+      const relative = file.slice(taskPath.length + 1)
+      const ext = extname(file)
+      if (relative.startsWith('screenshots/') && ext === '.png')
+        screenshotCount++
+
+      jobs.push({
+        key: `runs/${runId}/${taskId}/${relative}`,
+        filePath: file,
+        contentType: CONTENT_TYPES[ext] || 'application/octet-stream',
+      })
+    }
+
+    manifestTasks.push({
+      queryId: meta.query_id || taskId,
+      query: meta.query || '',
+      startUrl: meta.start_url || '',
+      status:
+        meta.termination_reason === 'completed'
+          ? 'completed'
+          : meta.termination_reason || 'unknown',
+      durationMs: meta.total_duration_ms || 0,
+      screenshotCount: (meta.screenshot_count as number) || screenshotCount,
+      graderResults: meta.grader_results || {},
+    })
+  }
+
+  if (manifestTasks.length === 0) {
+    console.warn(`  No completed tasks in ${runId}, skipping`)
+    return
+  }
+
+  console.log(
+    `  Uploading ${jobs.length} files across ${manifestTasks.length} tasks...`,
+  )
+
+  let uploaded = 0
+  await runPool(jobs, CONCURRENCY, async (job) => {
+    const body = await readFile(job.filePath)
+    await upload(client, r2Config.bucket, job.key, body, job.contentType)
+    uploaded++
+    if (uploaded % 50 === 0 || uploaded === jobs.length) {
+      console.log(`    ${uploaded}/${jobs.length}`)
+    }
+  })
+
+  // Read summary.json if it exists
+  let summaryData: Record<string, unknown> | undefined
+  try {
+    summaryData = JSON.parse(
+      await readFile(join(runDir, 'summary.json'), 'utf-8'),
+    )
+  } catch {}
+
+  // Upload manifest
+  const manifest = {
+    runId,
+    uploadedAt: new Date().toISOString(),
+    agentConfig,
+    dataset,
+    summary: summaryData
+      ? {
+          passRate: summaryData.passRate,
+          avgDurationMs: summaryData.avgDurationMs,
+        }
+      : undefined,
+    tasks: manifestTasks,
+  }
+  const manifestBody = Buffer.from(JSON.stringify(manifest, null, 2))
+  await upload(
+    client,
+    r2Config.bucket,
+    `runs/${runId}/manifest.json`,
+    manifestBody,
+    'application/json',
+  )
+
+  // Upload viewer.html to bucket root
+  const viewerPath = join(
+    import.meta.dir,
+    '..',
+    'src',
+    'dashboard',
+    'viewer.html',
+  )
+  const viewerBody = await readFile(viewerPath)
+  await upload(client, r2Config.bucket, 'viewer.html', viewerBody, 'text/html')
+
+  console.log(`  Uploaded ${uploaded + 2} files`)
+  console.log(`  ${r2Config.cdnBaseUrl}/viewer.html?run=${runId}`)
+}
+
+async function main() {
+  const inputDir = process.argv[2]
+  if (!inputDir) {
+    console.error(
+      'Usage:\n' +
+        '  bun scripts/upload-run.ts results/config-name/2026-03-21-1730  (specific run)\n' +
+        '  bun scripts/upload-run.ts results/config-name                   (all un-uploaded runs)',
+    )
+    process.exit(1)
+  }
+
+  const dirStat = await stat(inputDir).catch(() => null)
+  if (!dirStat?.isDirectory()) {
+    console.error(`Not a directory: ${inputDir}`)
+    process.exit(1)
+  }
+
+  const r2Config = loadConfig()
+  const client = createClient(r2Config)
+
+  if (await isRunDir(inputDir)) {
+    // Single run: results/config-name/2026-03-21-1730
+    const timestamp = basename(inputDir)
+    const configName = basename(dirname(inputDir))
+    const runId = `${configName}-${timestamp}`
+    console.log(`Uploading run: ${runId}`)
+    await uploadSingleRun(inputDir, runId, r2Config, client)
+  } else {
+    // Config dir: results/config-name/ — upload all un-uploaded runs
+    const configName = basename(inputDir)
+    const entries = await readdir(inputDir, { withFileTypes: true })
+    const runDirs = entries
+      .filter((e) => e.isDirectory())
+      .map((e) => e.name)
+      .sort()
+
+    if (runDirs.length === 0) {
+      console.error('No run subdirectories found')
+      process.exit(1)
+    }
+
+    console.log(
+      `Found ${runDirs.length} runs for config "${configName}", checking R2...`,
+    )
+
+    let uploadedCount = 0
+    for (const dir of runDirs) {
+      const runId = `${configName}-${dir}`
+      const alreadyUploaded = await isUploaded(client, r2Config.bucket, runId)
+      if (alreadyUploaded) {
+        console.log(`  ${runId}: already uploaded, skipping`)
+        continue
+      }
+
+      console.log(`  ${runId}: uploading...`)
+      await uploadSingleRun(join(inputDir, dir), runId, r2Config, client)
+      uploadedCount++
+    }
+
+    console.log(
+      `\nDone. Uploaded ${uploadedCount} new run(s), ${runDirs.length - uploadedCount} already in R2.`,
+    )
+  }
+}
+
+main()
--- a/packages/browseros-agent/apps/eval/scripts/weekly-report.ts
+++ b/packages/browseros-agent/apps/eval/scripts/weekly-report.ts
@@ -0,0 +1,590 @@
+/**
+ * Weekly Report Generator
+ *
+ * Reads all uploaded eval runs from R2, builds cumulative score history,
+ * and generates an HTML dashboard with:
+ *   - Config selector dropdown (groups runs by config/runId pattern)
+ *   - Config details card (architecture, model, dataset, grader)
+ *   - Interactive trend chart (filtered by selected config)
+ *   - Stat cards (latest, trend, best, duration)
+ *   - Searchable table of all runs
+ *
+ * Usage:
+ *   bun apps/eval/scripts/weekly-report.ts [local-output-path]
+ *
+ * Env vars required:
+ *   EVAL_R2_ACCOUNT_ID, EVAL_R2_ACCESS_KEY_ID, EVAL_R2_SECRET_ACCESS_KEY
+ *   EVAL_R2_BUCKET (default: browseros-eval)
+ */
+
+import { writeFile } from 'node:fs/promises'
+import {
+  GetObjectCommand,
+  ListObjectsV2Command,
+  PutObjectCommand,
+  S3Client,
+} from '@aws-sdk/client-s3'
+
+interface ManifestTask {
+  queryId: string
+  query: string
+  status: string
+  durationMs: number
+  screenshotCount: number
+  graderResults: Record<string, { pass: boolean; score: number }>
+}
+
+interface Manifest {
+  runId: string
+  uploadedAt: string
+  agentConfig?: { type?: string; model?: string }
+  dataset?: string
+  summary?: { passRate?: number; avgDurationMs?: number }
+  tasks: ManifestTask[]
+}
+
+interface RunSummary {
+  runId: string
+  configName: string
+  date: string
+  passRate: number
+  total: number
+  completed: number
+  failed: number
+  timeout: number
+  avgDurationMs: number
+  model: string
+  dataset: string
+  agentType: string
+}
+
+const PASS_FAIL_GRADER_ORDER = [
+  'performance_grader',
+  'webvoyager_grader',
+  'fara_combined',
+  'fara_grader',
+]
+
+function requireEnv(name: string): string {
+  const value = process.env[name]
+  if (!value) {
+    console.error(`Missing required env var: ${name}`)
+    process.exit(1)
+  }
+  return value
+}
+
+const accountId = requireEnv('EVAL_R2_ACCOUNT_ID')
+const accessKeyId = requireEnv('EVAL_R2_ACCESS_KEY_ID')
+const secretAccessKey = requireEnv('EVAL_R2_SECRET_ACCESS_KEY')
+const bucket = process.env.EVAL_R2_BUCKET || 'browseros-eval'
+
+const client = new S3Client({
+  region: 'auto',
+  endpoint: `https://${accountId}.r2.cloudflarestorage.com`,
+  credentials: { accessKeyId, secretAccessKey },
+})
+
+// Step 1: List all manifest.json files in runs/
+console.log('Scanning R2 for eval runs...')
+
+const manifests: Manifest[] = []
+let continuationToken: string | undefined
+
+do {
+  const listRes = await client.send(
+    new ListObjectsV2Command({
+      Bucket: bucket,
+      Prefix: 'runs/',
+      ContinuationToken: continuationToken,
+    }),
+  )
+
+  const manifestKeys =
+    listRes.Contents?.filter((obj) => obj.Key?.endsWith('/manifest.json')).map(
+      (obj) => obj.Key as string,
+    ) ?? []
+
+  for (const key of manifestKeys) {
+    try {
+      const res = await client.send(
+        new GetObjectCommand({ Bucket: bucket, Key: key }),
+      )
+      const body = await res.Body?.transformToString()
+      if (body) manifests.push(JSON.parse(body))
+    } catch {
+      console.warn(`  Failed to read ${key}, skipping`)
+    }
+  }
+
+  continuationToken = listRes.NextContinuationToken
+} while (continuationToken)
+
+console.log(`Found ${manifests.length} runs`)
+
+if (manifests.length === 0) {
+  console.log('No runs found. Nothing to report.')
+  process.exit(0)
+}
+
+// Step 2: Build run summaries
+const runs: RunSummary[] = manifests
+  .map((m) => {
+    const total = m.tasks.length
+    const completed = m.tasks.filter((t) => t.status === 'completed').length
+    const failed = m.tasks.filter((t) => t.status === 'failed').length
+    const timeout = m.tasks.filter((t) => t.status === 'timeout').length
+
+    let graded = 0
+    let passed = 0
+    for (const task of m.tasks) {
+      if (!task.graderResults) continue
+      for (const name of PASS_FAIL_GRADER_ORDER) {
+        if (task.graderResults[name]) {
+          graded++
+          if (task.graderResults[name].pass) passed++
+          break
+        }
+      }
+    }
+
+    const passRate = graded > 0 ? passed / graded : 0
+    const durations = m.tasks
+      .filter((t) => t.durationMs > 0)
+      .map((t) => t.durationMs)
+    const avgDurationMs =
+      durations.length > 0
+        ? durations.reduce((a, b) => a + b, 0) / durations.length
+        : 0
+
+    const date = m.uploadedAt
+      ? `${m.uploadedAt.split('T')[0]} ${m.uploadedAt.split('T')[1]?.slice(0, 5) || ''}`
+      : m.runId.slice(0, 15)
+
+    const model = m.agentConfig?.model || 'unknown'
+    const dataset = m.dataset || m.runId
+    const agentType = m.agentConfig?.type || 'unknown'
+
+    const configName = extractConfigName(m.runId)
+    return {
+      runId: m.runId,
+      configName,
+      date,
+      passRate,
+      total,
+      completed,
+      failed,
+      timeout,
+      avgDurationMs,
+      model,
+      dataset,
+      agentType,
+    }
+  })
+  .sort((a, b) => a.date.localeCompare(b.date))
+
+// Step 3: Identify unique config groups
+// runId can be "ci-weekly" (old) or "ci-weekly-2026-03-21-1730" (timestamped)
+// Extract config name by stripping the date-time suffix pattern
+function escHtml(s: string): string {
+  return s
+    .replace(/&/g, '&amp;')
+    .replace(/</g, '&lt;')
+    .replace(/>/g, '&gt;')
+    .replace(/"/g, '&quot;')
+}
+
+function extractConfigName(runId: string): string {
+  // "browseros-agent-weekly-2026-03-21-1730" → "browseros-agent-weekly"
+  // "ci-weekly" → "ci-weekly" (no timestamp, old format)
+  return runId.replace(/-\d{4}-\d{2}-\d{2}-\d{4}$/, '')
+}
+
+const configGroups = [...new Set(runs.map((r) => r.configName))]
+const defaultConfig = configGroups.includes('ci-weekly')
+  ? 'ci-weekly'
+  : configGroups[0]
+
+// Step 4: Generate HTML report
+const html = `<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>BrowserOS Eval Dashboard</title>
+  <style>
+    * { margin: 0; padding: 0; box-sizing: border-box; }
+    body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', system-ui, sans-serif; background: #0d1117; color: #e6edf3; padding: 2rem; max-width: 1400px; margin: 0 auto; }
+
+    /* Header */
+    .page-header { display: flex; align-items: center; gap: 16px; margin-bottom: 2rem; flex-wrap: wrap; }
+    .page-header h1 { font-size: 1.5rem; }
+    .page-header h1 span { color: #58a6ff; }
+    .page-header .gen-date { color: #6e7681; font-size: 12px; margin-left: auto; }
+
+    /* Config selector */
+    .config-bar { display: flex; align-items: center; gap: 16px; margin-bottom: 1.5rem; flex-wrap: wrap; }
+    .config-bar label { font-size: 13px; color: #8b949e; font-weight: 600; }
+    .config-bar select { background: #161b22; border: 1px solid #30363d; color: #e6edf3; padding: 8px 12px; border-radius: 6px; font-size: 13px; font-family: 'SF Mono', Consolas, monospace; cursor: pointer; min-width: 200px; }
+    .config-bar select:focus { outline: none; border-color: #58a6ff; }
+
+    /* Config details card */
+    .config-details { background: #161b22; border: 1px solid #30363d; border-radius: 8px; padding: 16px 20px; margin-bottom: 1.5rem; display: flex; gap: 32px; flex-wrap: wrap; }
+    .config-detail { display: flex; flex-direction: column; gap: 2px; }
+    .config-detail .cd-label { font-size: 10px; font-weight: 600; color: #6e7681; text-transform: uppercase; letter-spacing: 0.04em; }
+    .config-detail .cd-value { font-size: 13px; color: #e6edf3; font-family: 'SF Mono', Consolas, monospace; }
+
+    /* Stat cards */
+    .stats { display: flex; gap: 1rem; margin-bottom: 1.5rem; flex-wrap: wrap; }
+    .stat-card { background: #161b22; border: 1px solid #30363d; border-radius: 8px; padding: 1.25rem; flex: 1; min-width: 140px; }
+    .stat-label { color: #8b949e; font-size: 0.8rem; margin-bottom: 0.25rem; }
+    .stat-value { font-size: 1.4rem; font-weight: 600; }
+    .stat-value.big { font-size: 2.5rem; font-weight: 700; }
+    .pass { color: #3fb950; }
+    .fail { color: #f85149; }
+    .neutral { color: #8b949e; }
+    .trend-up { color: #3fb950; }
+    .trend-down { color: #f85149; }
+    .trend-flat { color: #8b949e; }
+
+    /* Chart */
+    .chart-container { background: #161b22; border: 1px solid #30363d; border-radius: 8px; padding: 1.5rem; margin-bottom: 2rem; position: relative; }
+    canvas { width: 100%; height: 300px; }
+    #tooltip { display: none; position: absolute; background: #1c2128; border: 1px solid #30363d; border-radius: 6px; padding: 8px 12px; pointer-events: none; font-size: 12px; z-index: 10; box-shadow: 0 4px 12px rgba(0,0,0,0.4); }
+
+    /* Section headers */
+    .section-header { display: flex; align-items: center; gap: 12px; margin-bottom: 1rem; }
+    .section-header h2 { font-size: 1rem; font-weight: 600; }
+    .section-header .search-input { margin-left: auto; background: #0d1117; border: 1px solid #30363d; color: #e6edf3; padding: 6px 12px; border-radius: 6px; font-size: 12px; font-family: inherit; width: 220px; }
+    .section-header .search-input:focus { outline: none; border-color: #58a6ff; }
+    .section-header .search-input::placeholder { color: #484f58; }
+
+    /* Table */
+    table { width: 100%; border-collapse: collapse; background: #161b22; border: 1px solid #30363d; border-radius: 8px; overflow: hidden; }
+    th, td { padding: 0.65rem 1rem; text-align: left; border-bottom: 1px solid #21262d; }
+    th { background: #1c2128; color: #8b949e; font-weight: 600; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.03em; }
+    td { font-size: 0.85rem; }
+    td.mono { font-family: 'SF Mono', Consolas, monospace; font-size: 0.8rem; }
+    a.view-link { color: #58a6ff; text-decoration: none; font-weight: 500; }
+    a.view-link:hover { text-decoration: underline; }
+    tr.hidden { display: none; }
+  </style>
+</head>
+<body>
+
+<div class="page-header">
+  <h1>BrowserOS <span>Eval Dashboard</span></h1>
+  <span class="gen-date">Generated ${new Date().toISOString().split('T')[0]}</span>
+</div>
+
+<!-- Config selector -->
+<div class="config-bar">
+  <label>Config:</label>
+  <select id="config-select">
+    ${configGroups.map((c) => `<option value="${escHtml(c)}"${c === defaultConfig ? ' selected' : ''}>${escHtml(c)}</option>`).join('\n    ')}
+  </select>
+</div>
+
+<!-- Config details (populated by JS) -->
+<div class="config-details" id="config-details"></div>
+
+<!-- Stat cards (populated by JS) -->
+<div class="stats" id="stat-cards"></div>
+
+<!-- Chart -->
+<div class="chart-container">
+  <canvas id="chart"></canvas>
+  <div id="tooltip">
+    <div id="tt-date" style="color:#8b949e;margin-bottom:2px;"></div>
+    <div id="tt-score" style="font-size:16px;font-weight:700;"></div>
+    <div id="tt-detail" style="color:#8b949e;margin-top:2px;font-size:11px;"></div>
+  </div>
+</div>
+
+<!-- Recent runs table -->
+<div class="section-header">
+  <h2>All Runs</h2>
+  <input type="text" class="search-input" id="table-search" placeholder="Search runs..." autocomplete="off" spellcheck="false" />
+</div>
+<table id="runs-table">
+  <thead>
+    <tr>
+      <th>Date</th>
+      <th>Config</th>
+      <th>Model</th>
+      <th>Dataset</th>
+      <th>Architecture</th>
+      <th>Pass Rate</th>
+      <th>Tasks</th>
+      <th>Timeout</th>
+      <th>Avg Duration</th>
+      <th>View</th>
+    </tr>
+  </thead>
+  <tbody>
+    ${runs
+      .slice()
+      .reverse()
+      .map((r) => {
+        const viewerUrl = `viewer.html?run=${encodeURIComponent(r.runId)}`
+        const passed = Math.round(r.passRate * r.total)
+        const archLabel =
+          r.agentType === 'orchestrator-executor'
+            ? 'Orch-Exec'
+            : r.agentType === 'single'
+              ? 'Tool Loop'
+              : r.agentType === 'gemini-computer-use'
+                ? 'Gemini CU'
+                : r.agentType || '—'
+        return `<tr data-config="${escHtml(r.runId)}" data-search="${escHtml(`${r.date} ${r.runId} ${r.model} ${r.dataset} ${archLabel}`)}">
+      <td>${escHtml(r.date)}</td>
+      <td class="mono">${escHtml(r.runId)}</td>
+      <td class="mono" style="max-width:200px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap;" title="${escHtml(r.model)}">${escHtml(r.model)}</td>
+      <td>${escHtml(r.dataset)}</td>
+      <td>${escHtml(archLabel)}</td>
+      <td class="${r.passRate >= 0.7 ? 'pass' : r.passRate >= 0.4 ? 'neutral' : 'fail'}">${(r.passRate * 100).toFixed(1)}% <span style="color:#6e7681;font-size:11px;">(${passed}/${r.total})</span></td>
+      <td>${r.total}</td>
+      <td class="${r.timeout > 0 ? 'neutral' : ''}">${r.timeout}</td>
+      <td>${(r.avgDurationMs / 1000).toFixed(0)}s</td>
+      <td><a href="${viewerUrl}" class="view-link">View &rarr;</a></td>
+    </tr>`
+      })
+      .join('\n')}
+  </tbody>
+</table>
+
+<script>
+(function() {
+  'use strict';
+
+  var allRuns = ${JSON.stringify(runs)};
+  var configSelect = document.getElementById('config-select');
+  var canvas = document.getElementById('chart');
+  var ctx = canvas.getContext('2d');
+  var tooltip = document.getElementById('tooltip');
+  var dpr = window.devicePixelRatio || 1;
+  var dotPositions = [];
+
+  function getFilteredRuns() {
+    var cfg = configSelect.value;
+    return allRuns.filter(function(r) { return r.configName === cfg; });
+  }
+
+  function updateDashboard() {
+    var runs = getFilteredRuns();
+    renderConfigDetails(runs);
+    renderStatCards(runs);
+    drawChart(runs);
+  }
+
+  // Config details card
+  function renderConfigDetails(runs) {
+    var el = document.getElementById('config-details');
+    if (runs.length === 0) { el.innerHTML = '<span style="color:#6e7681;">No runs found for this config.</span>'; return; }
+    var latest = runs[runs.length - 1];
+    var archLabel = latest.agentType === 'orchestrator-executor' ? 'Orchestrator-Executor'
+      : latest.agentType === 'single' ? 'Single Agent (Tool Loop)'
+      : latest.agentType === 'gemini-computer-use' ? 'Gemini Computer Use'
+      : latest.agentType || 'Unknown';
+    el.innerHTML =
+      '<div class="config-detail"><span class="cd-label">Architecture</span><span class="cd-value">' + archLabel + '</span></div>' +
+      '<div class="config-detail"><span class="cd-label">Model</span><span class="cd-value">' + (latest.model || 'unknown') + '</span></div>' +
+      '<div class="config-detail"><span class="cd-label">Dataset</span><span class="cd-value">' + (latest.dataset || 'unknown') + '</span></div>' +
+      '<div class="config-detail"><span class="cd-label">Tasks</span><span class="cd-value">' + latest.total + '</span></div>' +
+      '<div class="config-detail"><span class="cd-label">Runs</span><span class="cd-value">' + runs.length + '</span></div>';
+  }
+
+  // Stat cards
+  function renderStatCards(runs) {
+    var el = document.getElementById('stat-cards');
+    if (runs.length === 0) { el.innerHTML = ''; return; }
+    var latest = runs[runs.length - 1];
+    var prev = runs.length >= 2 ? runs[runs.length - 2] : null;
+    var best = Math.max.apply(null, runs.map(function(r) { return r.passRate; }));
+    var delta = prev ? latest.passRate - prev.passRate : 0;
+    var sign = delta > 0 ? '+' : '';
+    var trendCls = delta > 0 ? 'trend-up' : delta < 0 ? 'trend-down' : 'trend-flat';
+
+    el.innerHTML =
+      '<div class="stat-card"><div class="stat-label">Latest Pass Rate</div><div class="stat-value big ' + (latest.passRate >= 0.7 ? 'pass' : 'fail') + '">' + (latest.passRate * 100).toFixed(1) + '%</div></div>' +
+      '<div class="stat-card"><div class="stat-label">Trend</div><div class="stat-value ' + trendCls + '">' + (prev ? sign + (delta * 100).toFixed(1) + ' pp' : 'N/A') + '</div></div>' +
+      '<div class="stat-card"><div class="stat-label">Best Score</div><div class="stat-value pass">' + (best * 100).toFixed(1) + '%</div></div>' +
+      '<div class="stat-card"><div class="stat-label">Avg Duration</div><div class="stat-value">' + (latest.avgDurationMs / 1000).toFixed(0) + 's</div></div>' +
+      '<div class="stat-card"><div class="stat-label">Runs</div><div class="stat-value">' + runs.length + '</div></div>';
+  }
+
+  // Chart
+  function drawChart(runs) {
+    var rect = canvas.getBoundingClientRect();
+    canvas.width = rect.width * dpr;
+    canvas.height = rect.height * dpr;
+    ctx.scale(dpr, dpr);
+
+    var W = rect.width, H = rect.height;
+    var pad = { top: 20, right: 20, bottom: 50, left: 50 };
+    var plotW = W - pad.left - pad.right;
+    var plotH = H - pad.top - pad.bottom;
+    dotPositions = [];
+
+    ctx.clearRect(0, 0, W, H);
+
+    if (runs.length === 0) {
+      ctx.fillStyle = '#8b949e';
+      ctx.font = '14px sans-serif';
+      ctx.textAlign = 'center';
+      ctx.fillText('No data for this config', W / 2, H / 2);
+      return;
+    }
+
+    var scores = runs.map(function(r) { return r.passRate * 100; });
+    var minY = Math.max(0, Math.floor(Math.min.apply(null, scores) / 10) * 10 - 10);
+    var maxY = Math.min(100, Math.ceil(Math.max.apply(null, scores) / 10) * 10 + 10);
+    if (minY === maxY) { minY = Math.max(0, minY - 10); maxY = Math.min(100, maxY + 10); }
+
+    // Grid
+    ctx.strokeStyle = '#21262d';
+    ctx.lineWidth = 1;
+    for (var y = minY; y <= maxY; y += 10) {
+      var py = pad.top + plotH - ((y - minY) / (maxY - minY)) * plotH;
+      ctx.beginPath(); ctx.moveTo(pad.left, py); ctx.lineTo(pad.left + plotW, py); ctx.stroke();
+      ctx.fillStyle = '#8b949e'; ctx.font = '11px sans-serif'; ctx.textAlign = 'right';
+      ctx.fillText(y + '%', pad.left - 8, py + 4);
+    }
+
+    // X labels
+    ctx.fillStyle = '#8b949e'; ctx.font = '11px sans-serif'; ctx.textAlign = 'center';
+    runs.forEach(function(r, i) {
+      var px = pad.left + (runs.length === 1 ? plotW / 2 : (i / (runs.length - 1)) * plotW);
+      ctx.save(); ctx.translate(px, pad.top + plotH + 15); ctx.rotate(-Math.PI / 6);
+      ctx.fillText(r.date, 0, 0); ctx.restore();
+    });
+
+    // Line
+    ctx.strokeStyle = '#58a6ff'; ctx.lineWidth = 2; ctx.beginPath();
+    runs.forEach(function(r, i) {
+      var px = pad.left + (runs.length === 1 ? plotW / 2 : (i / (runs.length - 1)) * plotW);
+      var py2 = pad.top + plotH - ((r.passRate * 100 - minY) / (maxY - minY)) * plotH;
+      if (i === 0) ctx.moveTo(px, py2); else ctx.lineTo(px, py2);
+    });
+    ctx.stroke();
+
+    // Dots
+    runs.forEach(function(r, i) {
+      var px = pad.left + (runs.length === 1 ? plotW / 2 : (i / (runs.length - 1)) * plotW);
+      var py2 = pad.top + plotH - ((r.passRate * 100 - minY) / (maxY - minY)) * plotH;
+      dotPositions.push({ x: px, y: py2, run: r });
+      ctx.beginPath(); ctx.arc(px, py2, 4, 0, Math.PI * 2);
+      ctx.fillStyle = r.passRate >= 0.7 ? '#3fb950' : '#f85149';
+      ctx.fill(); ctx.strokeStyle = '#0d1117'; ctx.lineWidth = 2; ctx.stroke();
+    });
+  }
+
+  // Tooltip
+  canvas.addEventListener('mousemove', function(e) {
+    var rect = canvas.getBoundingClientRect();
+    var mx = e.clientX - rect.left, my = e.clientY - rect.top;
+    var closest = null, closestDist = Infinity;
+    dotPositions.forEach(function(dot) {
+      var d = Math.sqrt(Math.pow(mx - dot.x, 2) + Math.pow(my - dot.y, 2));
+      if (d < closestDist) { closestDist = d; closest = dot; }
+    });
+
+    if (closest && closestDist < 40) {
+      var r = closest.run;
+      var passed = Math.round(r.passRate * r.total);
+      document.getElementById('tt-date').textContent = r.date;
+      document.getElementById('tt-score').textContent = (r.passRate * 100).toFixed(1) + '%';
+      document.getElementById('tt-score').style.color = r.passRate >= 0.7 ? '#3fb950' : '#f85149';
+      document.getElementById('tt-detail').textContent = passed + '/' + r.total + ' pass \\u00B7 ' + (r.avgDurationMs / 1000).toFixed(0) + 's avg \\u00B7 ' + r.model;
+      tooltip.style.display = 'block';
+
+      var tx = closest.x + 12, ty = closest.y - 50;
+      if (tx + 200 > rect.width) tx = closest.x - 210;
+      if (ty < 0) ty = closest.y + 12;
+      tooltip.style.left = tx + 'px'; tooltip.style.top = ty + 'px';
+
+      // Highlight dot
+      drawChart(getFilteredRuns());
+      ctx.beginPath(); ctx.arc(closest.x, closest.y, 7, 0, Math.PI * 2);
+      ctx.fillStyle = 'rgba(88, 166, 255, 0.3)'; ctx.fill();
+      ctx.beginPath(); ctx.arc(closest.x, closest.y, 5, 0, Math.PI * 2);
+      ctx.fillStyle = r.passRate >= 0.7 ? '#3fb950' : '#f85149'; ctx.fill();
+      ctx.strokeStyle = '#e6edf3'; ctx.lineWidth = 2; ctx.stroke();
+      canvas.style.cursor = 'pointer';
+    } else {
+      tooltip.style.display = 'none';
+      canvas.style.cursor = 'default';
+    }
+  });
+
+  canvas.addEventListener('mouseleave', function() {
+    tooltip.style.display = 'none';
+    drawChart(getFilteredRuns());
+  });
+
+  canvas.addEventListener('click', function(e) {
+    var rect = canvas.getBoundingClientRect();
+    var mx = e.clientX - rect.left, my = e.clientY - rect.top;
+    dotPositions.forEach(function(dot) {
+      if (Math.sqrt(Math.pow(mx - dot.x, 2) + Math.pow(my - dot.y, 2)) < 20) {
+        window.open('viewer.html?run=' + encodeURIComponent(dot.run.runId), '_blank');
+      }
+    });
+  });
+
+  // Config selector change
+  configSelect.addEventListener('change', function() {
+    tooltip.style.display = 'none';
+    updateDashboard();
+  });
+
+  // Table search
+  document.getElementById('table-search').addEventListener('input', function(e) {
+    var q = e.target.value.toLowerCase();
+    var rows = document.querySelectorAll('#runs-table tbody tr');
+    rows.forEach(function(row) {
+      var searchText = row.getAttribute('data-search') || '';
+      row.classList.toggle('hidden', q && searchText.toLowerCase().indexOf(q) === -1);
+    });
+  });
+
+  // Resize
+  window.addEventListener('resize', function() { tooltip.style.display = 'none'; drawChart(getFilteredRuns()); });
+
+  // Init
+  updateDashboard();
+})();
+</script>
+
+</body>
+</html>`
+
+// Step 5: Save locally and upload to R2
+const localPath = process.argv[2] || '/tmp/eval-report.html'
+await writeFile(localPath, html)
+console.log(`Report saved locally: ${localPath}`)
+
+await client.send(
+  new PutObjectCommand({
+    Bucket: bucket,
+    Key: 'report.html',
+    Body: html,
+    ContentType: 'text/html',
+    CacheControl: 'public, max-age=300',
+  }),
+)
+
+const cdnBaseUrl = (
+  process.env.EVAL_R2_CDN_BASE_URL || 'https://eval.browseros.com'
+).replace(/\/+$/, '')
+
+console.log(`Report uploaded to R2: ${bucket}/report.html`)
+console.log(`  View at: ${cdnBaseUrl}/report.html`)
+
+// Print summary
+console.log('\nScore trend:')
+for (const run of runs.slice(-10)) {
+  const bar = '\u2588'.repeat(Math.round(run.passRate * 20))
+  const pct = (run.passRate * 100).toFixed(0).padStart(3)
+  console.log(`  ${run.date}  ${pct}% ${bar}`)
+}
--- a/packages/browseros-agent/apps/eval/src/agents/single-agent.ts
+++ b/packages/browseros-agent/apps/eval/src/agents/single-agent.ts
@@ -90,7 +90,8 @@ export class SingleAgentEvaluator implements AgentEvaluator {
        timeoutMs,
        capture,
        async (signal) => {
-          const result = await agent!.toolLoopAgent.generate({
+          if (!agent) throw new Error('Agent was not initialized')
+          const result = await agent.toolLoopAgent.generate({
            prompt: task.query,
            abortSignal: signal,

--- a/packages/browseros-agent/apps/eval/src/capture/screenshot.ts
+++ b/packages/browseros-agent/apps/eval/src/capture/screenshot.ts
@@ -63,9 +63,9 @@ export class ScreenshotCapture {
  }

  private async captureDirect(pageId: number): Promise<string | null> {
+    if (!this.browser) return null
    try {
-      // browser is guaranteed non-null here — captureDirect is only called when this.browser is truthy
-      const result = await this.browser!.screenshot(pageId, {
+      const result = await this.browser.screenshot(pageId, {
        format: 'png',
        fullPage: false,
      })
@@ -74,9 +74,9 @@ export class ScreenshotCapture {
    } catch (error) {
      // If page ID is invalid, try listing pages and use the first one
      try {
-        const pages = await this.browser!.listPages()
+        const pages = await this.browser.listPages()
        if (pages.length > 0) {
-          const result = await this.browser!.screenshot(pages[0].pageId, {
+          const result = await this.browser.screenshot(pages[0].pageId, {
            format: 'png',
            fullPage: false,
          })
--- a/packages/browseros-agent/apps/eval/src/constants.ts
+++ b/packages/browseros-agent/apps/eval/src/constants.ts
@@ -2,7 +2,7 @@
 * Eval-specific constants shared across agents, runners, and capture modules.
 */

-export const DEFAULT_TIMEOUT_MS = 15 * 60 * 1000 // 15 minutes
+export const DEFAULT_TIMEOUT_MS = 30 * 60 * 1000 // 30 minutes
 export const SCREENSHOT_TIMEOUT_MS = 65_000 // 65s — ensures we get extension's error (60s)
 export const MAX_ACTIONS_PER_DELEGATION = 15
 export const CLADO_REQUEST_TIMEOUT_MS = 120_000
--- a/packages/browseros-agent/apps/eval/src/dashboard/index.html
+++ b/packages/browseros-agent/apps/eval/src/dashboard/index.html
@@ -354,7 +354,7 @@
          </div>
          <div class="config-field">
            <label>Timeout (ms)</label>
-            <input type="number" id="cfg-timeout" value="600000" min="30000" max="3600000">
+            <input type="number" id="cfg-timeout" value="1800000" min="30000" max="3600000">
          </div>
        </div>
        <div class="config-row" style="gap: 16px;">
@@ -454,6 +454,17 @@
        <button class="btn-run" id="btn-run" onclick="submitConfig()">Run Eval</button>
      </div>

+      <!-- Load previous run -->
+      <div class="config-actions">
+        <div class="load-config">
+          <label>Load run:</label>
+          <select id="cfg-run-select">
+            <option value="">-- select --</option>
+          </select>
+        </div>
+        <button class="btn-secondary" onclick="loadPreviousRun()">Load Run</button>
+      </div>
+
    </div>
  </div>
 </div>
@@ -529,6 +540,7 @@ async function init() {

  // Load saved configs into dropdown
  loadConfigList();
+  loadRunList();

  if (mode.configMode) {
    // Config mode — show panel expanded
@@ -656,6 +668,53 @@ async function loadConfigList() {
  } catch {}
 }

+async function loadRunList() {
+  try {
+    const res = await fetch('/api/runs');
+    const runs = await res.json();
+    const sel = document.getElementById('cfg-run-select');
+    sel.innerHTML = '<option value="">-- select --</option>';
+    runs.forEach(r => {
+      const opt = document.createElement('option');
+      opt.value = r;
+      opt.textContent = r;
+      sel.appendChild(opt);
+    });
+  } catch {}
+}
+
+async function loadPreviousRun() {
+  const runName = document.getElementById('cfg-run-select').value;
+  if (!runName) return;
+  const errEl = document.getElementById('config-error');
+  errEl.textContent = '';
+  try {
+    const res = await fetch('/api/load-run', {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({ runName }),
+    });
+    const result = await res.json();
+    if (!res.ok) {
+      errEl.textContent = result.error || 'Failed to load run';
+      return;
+    }
+    const stateRes = await fetch('/api/state');
+    const state = await stateRes.json();
+    document.getElementById('config-name').textContent =
+      state.configName ? `${state.configName} \u00B7 ${state.agentType}` : '';
+    tasks = state.tasks;
+    setConfigPanelOpen(false);
+    updateConfigSummary(state.configName, state.agentType);
+    selectedTaskId = null;
+    renderTaskList();
+    updateProgress();
+    if (tasks.length > 0) selectTask(tasks[0].queryId);
+  } catch (e) {
+    errEl.textContent = `Network error: ${e.message}`;
+  }
+}
+
 async function loadSavedConfig(name) {
  if (!name) return;
  try {
@@ -1055,7 +1114,8 @@ function renderTaskList() {
    if (t.graderResults) {
      const primary = getPrimaryGrader(t.graderResults);
      if (primary) {
-        graderBadge = `<span class="grade-badge ${primary.pass ? 'pass' : 'fail'}">${primary.pass ? 'PASS' : 'FAIL'}</span>`;
+        const pct = typeof primary.score === 'number' ? `${(primary.score * 100).toFixed(0)}%` : (primary.pass ? 'PASS' : 'FAIL');
+        graderBadge = `<span class="grade-badge ${primary.pass ? 'pass' : 'fail'}">${pct}</span>`;
      }
    }

@@ -1144,12 +1204,35 @@ function toggleAutoplay() {
 // ============================================================================
 // Agent Stream
 // ============================================================================
-function renderStreamForTask(taskId) {
+async function renderStreamForTask(taskId) {
  const body = document.getElementById('stream-body');
  body.innerHTML = '';
  const events = streamEvents[taskId] || [];
-  events.forEach(e => appendStreamEntry(e, false));
-  body.scrollTop = body.scrollHeight;
+  if (events.length > 0) {
+    events.forEach(e => appendStreamEntry(e, false));
+    body.scrollTop = body.scrollHeight;
+    return;
+  }
+  const task = tasks.find(t => t.queryId === taskId);
+  if (!task || task.status === 'pending' || task.status === 'running') return;
+  body.innerHTML = '<div class="empty-state">Loading events...</div>';
+  try {
+    const res = await fetch(`/api/messages/${taskId}`);
+    if (!res.ok) {
+      body.innerHTML = '<div class="empty-state">No event log available</div>';
+      return;
+    }
+    const text = await res.text();
+    const parsed = text.trim().split('\n').filter(Boolean).map(line => {
+      try { return JSON.parse(line); } catch { return null; }
+    }).filter(Boolean);
+    streamEvents[taskId] = parsed;
+    body.innerHTML = '';
+    parsed.forEach(e => appendStreamEntry(e, false));
+    body.scrollTop = body.scrollHeight;
+  } catch {
+    body.innerHTML = '<div class="empty-state">Failed to load events</div>';
+  }
 }

 function appendStreamEntry(event, scroll = true) {
--- a/packages/browseros-agent/apps/eval/src/dashboard/server.ts
+++ b/packages/browseros-agent/apps/eval/src/dashboard/server.ts
@@ -1,4 +1,4 @@
-import { mkdir, readdir, readFile } from 'node:fs/promises'
+import { mkdir, readdir, readFile, stat } from 'node:fs/promises'
 import { join, resolve } from 'node:path'
 import { Hono } from 'hono'
 import { streamSSE } from 'hono/streaming'
@@ -199,6 +199,133 @@ app.get('/api/screenshots/:taskId/:index', async (c) => {
  }
 })

+app.get('/api/messages/:taskId', async (c) => {
+  const { taskId } = c.req.param()
+  if (taskId.includes('..') || taskId.includes('/')) {
+    return c.json({ error: 'Invalid parameters' }, 400)
+  }
+  const filepath = join(dashboardState.outputDir, taskId, 'messages.jsonl')
+  const resolved = resolve(filepath)
+  if (!resolved.startsWith(resolve(dashboardState.outputDir))) {
+    return c.json({ error: 'Invalid path' }, 400)
+  }
+  try {
+    const file = Bun.file(filepath)
+    if (!(await file.exists())) return c.notFound()
+    const data = await file.arrayBuffer()
+    return c.body(data, 200, {
+      'Content-Type': 'application/x-ndjson',
+      'Cache-Control': 'no-cache',
+    })
+  } catch {
+    return c.notFound()
+  }
+})
+
+const resultsDir = join(import.meta.dir, '..', '..', 'results')
+
+app.get('/api/runs', async (c) => {
+  try {
+    const runs: string[] = []
+    const entries = await readdir(resultsDir, { withFileTypes: true })
+    for (const entry of entries.filter((e) => e.isDirectory())) {
+      const subEntries = await readdir(join(resultsDir, entry.name), {
+        withFileTypes: true,
+      }).catch(() => [] as import('node:fs').Dirent[])
+      const hasTimestampDirs = subEntries.some(
+        (s) => s.isDirectory() && /^\d{4}-\d{2}-\d{2}-\d{4}$/.test(s.name),
+      )
+      if (hasTimestampDirs) {
+        for (const sub of subEntries.filter((s) => s.isDirectory())) {
+          runs.push(`${entry.name}/${sub.name}`)
+        }
+      } else {
+        runs.push(entry.name)
+      }
+    }
+    runs.sort().reverse()
+    return c.json(runs)
+  } catch {
+    return c.json([])
+  }
+})
+
+app.post('/api/load-run', async (c) => {
+  if (evalRunning)
+    return c.json({ error: 'Cannot load while eval is running' }, 409)
+  let body: { runName: string }
+  try {
+    body = await c.req.json()
+  } catch {
+    return c.json({ error: 'Invalid JSON body' }, 400)
+  }
+  const runName = body.runName
+  if (!runName || runName.includes('..')) {
+    return c.json({ error: 'Invalid run name' }, 400)
+  }
+  if ((runName.match(/\//g) || []).length > 1) {
+    return c.json({ error: 'Invalid run name' }, 400)
+  }
+  const outputDir = resolve(resultsDir, runName)
+  if (!outputDir.startsWith(resolve(resultsDir))) {
+    return c.json({ error: 'Invalid path' }, 400)
+  }
+  const dirStat = await stat(outputDir).catch(() => null)
+  if (!dirStat?.isDirectory()) {
+    return c.json({ error: 'Run directory not found' }, 404)
+  }
+  const entries = await readdir(outputDir, { withFileTypes: true })
+  const taskDirs = entries.filter((e) => e.isDirectory())
+  const loadedTasks: DashboardTask[] = []
+  let agentType = ''
+  for (const taskDir of taskDirs) {
+    const metaPath = join(outputDir, taskDir.name, 'metadata.json')
+    try {
+      const raw = JSON.parse(await readFile(metaPath, 'utf-8'))
+      if (!agentType && raw.agent_config?.type) {
+        agentType = raw.agent_config.type
+      }
+      const screenshotDir = join(outputDir, taskDir.name, 'screenshots')
+      let screenshotCount = raw.screenshot_count ?? 0
+      if (!screenshotCount) {
+        try {
+          const files = await readdir(screenshotDir)
+          screenshotCount = files.filter((f: string) =>
+            f.endsWith('.png'),
+          ).length
+        } catch {}
+      }
+      loadedTasks.push({
+        queryId: raw.query_id || taskDir.name,
+        query: raw.query || '',
+        startUrl: raw.start_url,
+        status:
+          raw.termination_reason === 'completed'
+            ? 'completed'
+            : raw.termination_reason === 'timeout'
+              ? 'timeout'
+              : 'failed',
+        durationMs: raw.total_duration_ms,
+        graderResults: raw.grader_results,
+        screenshotCount,
+      })
+    } catch {}
+  }
+  if (loadedTasks.length === 0) {
+    return c.json({ error: 'No completed tasks found in this run' }, 404)
+  }
+  dashboardState.configName = runName
+  dashboardState.agentType = agentType
+  dashboardState.outputDir = outputDir
+  dashboardState.tasks = loadedTasks
+  return c.json({
+    status: 'loaded',
+    configName: runName,
+    agentType,
+    taskCount: loadedTasks.length,
+  })
+})
+
 // ============================================================================
 // Config & Run API
 // ============================================================================
--- a/packages/browseros-agent/apps/eval/src/dashboard/viewer.html
+++ b/packages/browseros-agent/apps/eval/src/dashboard/viewer.html
--- a/packages/browseros-agent/apps/eval/src/graders/performance/axes.ts
+++ b/packages/browseros-agent/apps/eval/src/graders/performance/axes.ts
@@ -53,8 +53,8 @@ The raw event stream — one JSON object per line with a "type" field.
 - "tool-output-error" / "tool-input-error" — Tool call failed. Fields: toolCallId, error.
 - "text-delta" — Agent's reasoning text. Field: delta (small text chunk).

-**Event types to AVOID reading:**
- "tool-output-available" — Tool output. The "output" field contains FULL PAGE DOM CONTENT — hundreds of interactive elements, entire page text, etc. These lines are 5-50KB each. NEVER read them. The tool-input-available lines already tell you what the agent did. Screenshots show you the visual result.
+**Event types to handle carefully:**
+- "tool-output-available" — Tool output. The "output" field contains FULL PAGE DOM CONTENT — hundreds of interactive elements, entire page text, etc. These lines are 5-50KB each. NEVER read them in bulk. However, you CAN and SHOULD use Grep to search within these lines for specific keywords when screenshots alone can't verify a claim. For example, if the task asks "find the price of X" and the screenshot is unclear, grep messages.jsonl for the product name or price value to confirm the agent actually saw it in the DOM.

 ### 2. screenshots/ directory
 Numbered PNG screenshots (1.png, 2.png, ...) captured after each tool execution.
@@ -95,6 +95,13 @@ Grep for "tool-output-error" or "tool-input-error". If none found, zero errors.
 **Step 3: Sample reasoning (only if needed for reasoning_quality)**
 Grep for "text-delta" but LIMIT to the first 10 and last 10 results. Don't read all reasoning text.

+**Step 4: Verify claims from DOM content (critical for task_completion)**
+When the agent's final answer contains specific data (prices, names, dates, counts, etc.) that you can't confirm from screenshots alone, use Grep to search messages.jsonl for those specific values or keywords. This searches the tool-output-available lines which contain DOM content the agent actually saw. For example:
+- Task asks "find cheapest flight price" → grep for the dollar amount from the final answer
+- Task asks "list the top 3 articles" → grep for the article titles mentioned in the answer
+- Task asks "extract the email address" → grep for the email pattern
+This is the most reliable way to verify whether the agent actually found the data it claims, since screenshots may be blurry, truncated, or missing the relevant section.
+
 ## How to View Screenshots

 You have {screenshot_count} screenshots. View 3-5 strategically:
--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				`{"query_id":"showcase-amazon-order","dataset":"showcase","query":"Open amazon.com and order Sensodyne toothpaste","start_url":"https://www.amazon.com","metadata":{"original_task_id":"showcase-amazon-order"}}`