mirror of
https://github.com/browseros-ai/BrowserOS.git
synced 2026-05-13 23:53:25 +00:00
Compare commits
50 Commits
fix/dofgoo
...
fix/nerdct
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5297fe6b67 | ||
|
|
3397c4fbce | ||
|
|
70c41ff298 | ||
|
|
2c4626a339 | ||
|
|
97f0274853 | ||
|
|
d59203708c | ||
|
|
2982447487 | ||
|
|
620f8eb2ef | ||
|
|
455f8f1489 | ||
|
|
9250698bfc | ||
|
|
1145ec3c26 | ||
|
|
4a7fc77bc6 | ||
|
|
65cea3064e | ||
|
|
044a7801b4 | ||
|
|
8712f89f18 | ||
|
|
ba60bf466f | ||
|
|
26afb826c6 | ||
|
|
b2340c8afa | ||
|
|
790a270f47 | ||
|
|
84a79ba0a1 | ||
|
|
6e3306f5e5 | ||
|
|
c244462b29 | ||
|
|
ebf97f74f6 | ||
|
|
561f2baf97 | ||
|
|
df0f45dd29 | ||
|
|
edfc5c751c | ||
|
|
471256f31c | ||
|
|
4c90ca696b | ||
|
|
f2ac87d7c3 | ||
|
|
231bd6821d | ||
|
|
a228c278c6 | ||
|
|
e2ec1991cf | ||
|
|
0c84547e8f | ||
|
|
2ff5c12840 | ||
|
|
d87422eea1 | ||
|
|
1946ca0cf8 | ||
|
|
754f7d0e1d | ||
|
|
85bb3f7b42 | ||
|
|
cb32b8191d | ||
|
|
7a92654abc | ||
|
|
91d3285aa0 | ||
|
|
7bb6dac949 | ||
|
|
d9c254053e | ||
|
|
6b9945f933 | ||
|
|
6a5a7775a9 | ||
|
|
af48a2110c | ||
|
|
c5ff8d75bc | ||
|
|
445a6a6c45 | ||
|
|
72d39b9a0f | ||
|
|
3b47f330f5 |
157
.github/workflows/build-agent.yml
vendored
157
.github/workflows/build-agent.yml
vendored
@@ -1,157 +0,0 @@
|
||||
name: build-agent
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
agent:
|
||||
description: "Agent name from bundle.json"
|
||||
required: true
|
||||
type: string
|
||||
default: openclaw
|
||||
publish:
|
||||
description: "Upload to R2 and merge manifest slice"
|
||||
required: false
|
||||
default: false
|
||||
type: boolean
|
||||
pull_request:
|
||||
paths:
|
||||
- "packages/browseros-agent/packages/build-tools/**"
|
||||
- ".github/workflows/build-agent.yml"
|
||||
|
||||
env:
|
||||
BUN_VERSION: "1.3.6"
|
||||
PKG_DIR: packages/browseros-agent/packages/build-tools
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
check:
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: oven-sh/setup-bun@v2
|
||||
with:
|
||||
bun-version: ${{ env.BUN_VERSION }}
|
||||
- working-directory: packages/browseros-agent
|
||||
run: bun install --frozen-lockfile
|
||||
- working-directory: packages/browseros-agent
|
||||
run: bun run --filter @browseros/build-tools typecheck
|
||||
- working-directory: packages/browseros-agent
|
||||
run: bun run --filter @browseros/build-tools test
|
||||
|
||||
build:
|
||||
needs: check
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- arch: arm64
|
||||
runner: ubuntu-24.04-arm
|
||||
runs-on: ${{ matrix.runner }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: oven-sh/setup-bun@v2
|
||||
with:
|
||||
bun-version: ${{ env.BUN_VERSION }}
|
||||
- name: Install podman
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y podman
|
||||
- working-directory: packages/browseros-agent
|
||||
run: bun install --frozen-lockfile
|
||||
- name: Build tarball
|
||||
working-directory: ${{ env.PKG_DIR }}
|
||||
env:
|
||||
AGENT: ${{ inputs.agent || 'openclaw' }}
|
||||
OUT: ${{ github.workspace }}/dist/images
|
||||
run: bun run build:tarball -- --agent "$AGENT" --arch "${{ matrix.arch }}" --output-dir "$OUT"
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: tarball-${{ inputs.agent || 'openclaw' }}-${{ matrix.arch }}
|
||||
path: dist/images/
|
||||
retention-days: 7
|
||||
|
||||
smoke:
|
||||
needs: build
|
||||
runs-on: ubuntu-24.04-arm
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: oven-sh/setup-bun@v2
|
||||
with:
|
||||
bun-version: ${{ env.BUN_VERSION }}
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: tarball-${{ inputs.agent || 'openclaw' }}-arm64
|
||||
path: dist/images
|
||||
- name: Install podman
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y podman
|
||||
- working-directory: packages/browseros-agent
|
||||
run: bun install --frozen-lockfile
|
||||
- name: Smoke test tarball
|
||||
working-directory: ${{ env.PKG_DIR }}
|
||||
env:
|
||||
AGENT: ${{ inputs.agent || 'openclaw' }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
tarball="$(find "$GITHUB_WORKSPACE/dist/images" -name "${AGENT}-*-arm64.tar.gz" -print -quit)"
|
||||
if [ -z "$tarball" ]; then
|
||||
echo "missing arm64 tarball artifact for ${AGENT}" >&2
|
||||
exit 1
|
||||
fi
|
||||
bun run smoke:tarball -- --agent "$AGENT" --arch arm64 --tarball "$tarball"
|
||||
|
||||
publish:
|
||||
needs: [build, smoke]
|
||||
if: ${{ github.event_name == 'workflow_dispatch' && inputs.publish == true }}
|
||||
runs-on: ubuntu-24.04
|
||||
environment: release
|
||||
concurrency:
|
||||
group: r2-manifest-publish
|
||||
cancel-in-progress: false
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: oven-sh/setup-bun@v2
|
||||
with:
|
||||
bun-version: ${{ env.BUN_VERSION }}
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
pattern: tarball-*
|
||||
path: dist/images
|
||||
merge-multiple: true
|
||||
- working-directory: packages/browseros-agent
|
||||
run: bun install --frozen-lockfile
|
||||
- name: Upload tarballs to R2
|
||||
working-directory: ${{ env.PKG_DIR }}
|
||||
env:
|
||||
R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }}
|
||||
R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }}
|
||||
R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }}
|
||||
R2_BUCKET: ${{ secrets.R2_BUCKET }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
for file in "$GITHUB_WORKSPACE"/dist/images/*.tar.gz; do
|
||||
base="$(basename "$file")"
|
||||
bun run upload -- --file "$file" --key "vm/images/$base" --content-type "application/gzip" --sidecar-sha
|
||||
done
|
||||
- name: Merge agent slice into manifest
|
||||
working-directory: ${{ env.PKG_DIR }}
|
||||
env:
|
||||
AGENT: ${{ inputs.agent || 'openclaw' }}
|
||||
R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }}
|
||||
R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }}
|
||||
R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }}
|
||||
R2_BUCKET: ${{ secrets.R2_BUCKET }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
mkdir -p dist/images
|
||||
cp -R "$GITHUB_WORKSPACE"/dist/images/* dist/images/
|
||||
bun run download -- --key vm/manifest.json --out dist/baseline-manifest.json
|
||||
bun run emit-manifest -- \
|
||||
--slice "agents:${AGENT}" \
|
||||
--dist-dir dist \
|
||||
--merge-from dist/baseline-manifest.json \
|
||||
--out dist/manifest.json
|
||||
bun run upload -- --file dist/manifest.json --key vm/manifest.json --content-type "application/json"
|
||||
45
.github/workflows/eval-weekly.yml
vendored
45
.github/workflows/eval-weekly.yml
vendored
@@ -14,7 +14,7 @@ on:
|
||||
config:
|
||||
description: 'Eval config file (relative to apps/eval/)'
|
||||
required: false
|
||||
default: 'configs/browseros-agent-weekly.json'
|
||||
default: 'configs/legacy/browseros-agent-weekly.json'
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -30,8 +30,9 @@ jobs:
|
||||
|
||||
- name: Install BrowserOS
|
||||
run: |
|
||||
wget -q https://github.com/browseros-ai/BrowserOS/releases/download/v0.44.0.1/BrowserOS_v0.44.0.1_amd64.deb
|
||||
sudo dpkg -i BrowserOS_v0.44.0.1_amd64.deb
|
||||
# Rolling stable channel — see https://cdn.browseros.com/download/BrowserOS.deb
|
||||
wget -q -O BrowserOS.deb https://cdn.browseros.com/download/BrowserOS.deb
|
||||
sudo dpkg -i BrowserOS.deb
|
||||
browseros --version || echo "BrowserOS installed at $(which browseros)"
|
||||
|
||||
- name: Install Bun
|
||||
@@ -41,10 +42,12 @@ jobs:
|
||||
|
||||
- name: Install dependencies
|
||||
working-directory: packages/browseros-agent
|
||||
run: bun install --ignore-scripts && bun run build:agent-sdk
|
||||
run: bun install --ignore-scripts
|
||||
|
||||
- name: Install Python eval dependencies
|
||||
run: pip install agisdk requests
|
||||
# agisdk pinned so silent upstream releases can't shift task definitions
|
||||
# or grader behavior. Bump intentionally with a documented re-baseline.
|
||||
run: pip install agisdk==0.3.5 requests
|
||||
|
||||
- name: Clone WebArena-Infinity
|
||||
run: git clone --depth 1 https://github.com/web-arena-x/webarena-infinity.git /tmp/webarena-infinity
|
||||
@@ -59,33 +62,27 @@ jobs:
|
||||
curl -sL -o /tmp/nopecha.zip https://github.com/NopeCHALLC/nopecha-extension/releases/latest/download/chromium_automation.zip
|
||||
unzip -qo /tmp/nopecha.zip -d extensions/nopecha
|
||||
|
||||
- name: Run eval
|
||||
- name: Run eval and publish to R2
|
||||
working-directory: packages/browseros-agent/apps/eval
|
||||
env:
|
||||
FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
|
||||
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
|
||||
CLAUDE_CODE_OAUTH_TOKEN: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
|
||||
NOPECHA_API_KEY: ${{ secrets.NOPECHA_API_KEY }}
|
||||
BROWSEROS_BINARY: /usr/bin/browseros
|
||||
WEBARENA_INFINITY_DIR: /tmp/webarena-infinity
|
||||
EVAL_CONFIG: ${{ github.event.inputs.config || 'configs/browseros-agent-weekly.json' }}
|
||||
run: |
|
||||
echo "Running eval with config: $EVAL_CONFIG"
|
||||
xvfb-run --auto-servernum --server-args="-screen 0 1440x900x24" bun run src/index.ts -c "$EVAL_CONFIG"
|
||||
|
||||
- name: Upload runs to R2
|
||||
if: success()
|
||||
working-directory: packages/browseros-agent/apps/eval
|
||||
env:
|
||||
EVAL_R2_ACCOUNT_ID: ${{ secrets.EVAL_R2_ACCOUNT_ID }}
|
||||
EVAL_R2_ACCESS_KEY_ID: ${{ secrets.EVAL_R2_ACCESS_KEY_ID }}
|
||||
EVAL_R2_SECRET_ACCESS_KEY: ${{ secrets.EVAL_R2_SECRET_ACCESS_KEY }}
|
||||
EVAL_R2_BUCKET: ${{ secrets.EVAL_R2_BUCKET }}
|
||||
EVAL_R2_CDN_BASE_URL: ${{ secrets.EVAL_R2_CDN_BASE_URL }}
|
||||
EVAL_CONFIG: ${{ github.event.inputs.config || 'configs/browseros-agent-weekly.json' }}
|
||||
BROWSEROS_BINARY: /usr/bin/browseros
|
||||
WEBARENA_INFINITY_DIR: /tmp/webarena-infinity
|
||||
# OpenClaw container runtime is macOS-only; opt the Linux runner
|
||||
# into the no-op stub so the server can boot and the eval can run.
|
||||
BROWSEROS_SKIP_OPENCLAW: '1'
|
||||
EVAL_CONFIG: ${{ github.event.inputs.config || 'configs/legacy/browseros-agent-weekly.json' }}
|
||||
run: |
|
||||
CONFIG_NAME=$(basename "$EVAL_CONFIG" .json)
|
||||
bun scripts/upload-run.ts "results/$CONFIG_NAME"
|
||||
echo "Running eval with config: $EVAL_CONFIG"
|
||||
xvfb-run --auto-servernum --server-args="-screen 0 1440x900x24" bun run src/index.ts suite --config "$EVAL_CONFIG" --publish r2
|
||||
|
||||
- name: Generate trend report
|
||||
if: success()
|
||||
@@ -106,3 +103,11 @@ jobs:
|
||||
with:
|
||||
name: eval-report-${{ github.run_id }}
|
||||
path: /tmp/eval-report.html
|
||||
|
||||
- name: Upload server stderr logs (for post-mortem on startup failures)
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: browseros-server-logs-${{ github.run_id }}
|
||||
path: /tmp/browseros-server-logs/
|
||||
if-no-files-found: ignore
|
||||
|
||||
165
.github/workflows/release-agent-sdk.yml
vendored
165
.github/workflows/release-agent-sdk.yml
vendored
@@ -1,168 +1,11 @@
|
||||
name: Release BrowserOS Agent SDK
|
||||
name: Release BrowserOS Agent SDK (disabled)
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: release-agent-sdk
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
publish:
|
||||
if: github.ref == 'refs/heads/main'
|
||||
disabled:
|
||||
if: ${{ false }}
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write
|
||||
pull-requests: write
|
||||
defaults:
|
||||
run:
|
||||
working-directory: packages/browseros-agent/packages/agent-sdk
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- uses: oven-sh/setup-bun@v2
|
||||
|
||||
- uses: actions/setup-node@v6
|
||||
with:
|
||||
node-version: "20"
|
||||
registry-url: "https://registry.npmjs.org"
|
||||
|
||||
- name: Install dependencies
|
||||
run: bun ci
|
||||
working-directory: packages/browseros-agent
|
||||
|
||||
- name: Build
|
||||
run: bun run build
|
||||
|
||||
- name: Test
|
||||
run: bun test
|
||||
|
||||
- name: Get version
|
||||
id: version
|
||||
run: |
|
||||
echo "version=$(node -p "require('./package.json').version")" >> "$GITHUB_OUTPUT"
|
||||
echo "release_sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Generate release notes
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
SDK_PATH="packages/browseros-agent/packages/agent-sdk"
|
||||
CURRENT_TAG="agent-sdk-v${{ steps.version.outputs.version }}"
|
||||
# Find the previous tag, excluding the current version's tag
|
||||
# (which may already exist from a prior failed run)
|
||||
PREV_TAG=$(git tag -l "agent-sdk-v*" --sort=-v:refname | grep -v "^${CURRENT_TAG}$" | head -n 1)
|
||||
|
||||
if [ -z "$PREV_TAG" ]; then
|
||||
echo "Initial release" > /tmp/release-notes.md
|
||||
else
|
||||
# Get commits scoped to the SDK directory
|
||||
COMMITS=$(git log "$PREV_TAG"..HEAD --pretty=format:"%H" -- "$SDK_PATH")
|
||||
|
||||
if [ -z "$COMMITS" ]; then
|
||||
echo "No notable changes." > /tmp/release-notes.md
|
||||
else
|
||||
echo "## What's Changed" > /tmp/release-notes.md
|
||||
echo "" >> /tmp/release-notes.md
|
||||
|
||||
# For each commit, find the associated PR and format with author
|
||||
CONTRIBUTORS=""
|
||||
while IFS= read -r SHA; do
|
||||
# Get commit subject and author
|
||||
SUBJECT=$(git log -1 --pretty=format:"%s" "$SHA")
|
||||
AUTHOR=$(git log -1 --pretty=format:"%an" "$SHA")
|
||||
GITHUB_USER=$(gh api "/repos/${{ github.repository }}/commits/${SHA}" --jq '.author.login // empty' 2>/dev/null)
|
||||
|
||||
# Find associated PR number
|
||||
PR_NUM=$(gh api "/repos/${{ github.repository }}/commits/${SHA}/pulls" --jq '.[0].number // empty' 2>/dev/null)
|
||||
|
||||
# Format line: skip PR number if already in the commit subject
|
||||
# (squash merges include "(#123)" in the subject automatically)
|
||||
if [ -n "$PR_NUM" ] && ! echo "$SUBJECT" | grep -qF "(#${PR_NUM})"; then
|
||||
echo "- ${SUBJECT} (#${PR_NUM})" >> /tmp/release-notes.md
|
||||
else
|
||||
echo "- ${SUBJECT}" >> /tmp/release-notes.md
|
||||
fi
|
||||
done <<< "$COMMITS"
|
||||
fi
|
||||
fi
|
||||
working-directory: ${{ github.workspace }}
|
||||
|
||||
- name: Publish
|
||||
run: npm publish --access public
|
||||
env:
|
||||
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
|
||||
|
||||
- name: Create GitHub release
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
TAG="agent-sdk-v${{ steps.version.outputs.version }}"
|
||||
RELEASE_SHA="${{ steps.version.outputs.release_sha }}"
|
||||
TITLE="BrowserOS Agent SDK - v${{ steps.version.outputs.version }}"
|
||||
|
||||
# Create or reuse tag (idempotent for re-runs)
|
||||
if git rev-parse "$TAG" >/dev/null 2>&1; then
|
||||
echo "Tag $TAG already exists, skipping tag creation"
|
||||
else
|
||||
git tag "$TAG" "$RELEASE_SHA"
|
||||
fi
|
||||
|
||||
# Push tag (skip if already on remote)
|
||||
if git ls-remote --tags origin "$TAG" | grep -q "$TAG"; then
|
||||
echo "Tag $TAG already on remote, skipping push"
|
||||
else
|
||||
git push origin "$TAG"
|
||||
fi
|
||||
|
||||
# Create or update release
|
||||
if gh release view "$TAG" >/dev/null 2>&1; then
|
||||
echo "Release $TAG already exists, updating"
|
||||
gh release edit "$TAG" --title "$TITLE" --notes-file /tmp/release-notes.md
|
||||
else
|
||||
gh release create "$TAG" --title "$TITLE" --notes-file /tmp/release-notes.md
|
||||
fi
|
||||
working-directory: ${{ github.workspace }}
|
||||
|
||||
- name: Update CHANGELOG.md via PR
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
VERSION="${{ steps.version.outputs.version }}"
|
||||
DATE=$(date -u +"%Y-%m-%d")
|
||||
BRANCH="docs/agent-sdk-changelog-v${VERSION}"
|
||||
CHANGELOG="packages/browseros-agent/packages/agent-sdk/CHANGELOG.md"
|
||||
|
||||
# Return to main before branching
|
||||
git checkout main
|
||||
|
||||
# Use head/tail to safely insert without sed quoting issues
|
||||
{
|
||||
head -n 1 "$CHANGELOG"
|
||||
echo ""
|
||||
echo "## v${VERSION} (${DATE})"
|
||||
echo ""
|
||||
cat /tmp/release-notes.md
|
||||
echo ""
|
||||
tail -n +2 "$CHANGELOG"
|
||||
} > /tmp/new-changelog.md
|
||||
mv /tmp/new-changelog.md "$CHANGELOG"
|
||||
|
||||
git config user.name "github-actions[bot]"
|
||||
git config user.email "github-actions[bot]@users.noreply.github.com"
|
||||
git checkout -b "$BRANCH"
|
||||
git add "$CHANGELOG"
|
||||
git commit -m "docs: update agent-sdk changelog for v${VERSION}"
|
||||
git push origin "$BRANCH"
|
||||
|
||||
gh pr create \
|
||||
--title "docs: update agent-sdk changelog for v${VERSION}" \
|
||||
--body "Auto-generated changelog update for BrowserOS Agent SDK v${VERSION}." \
|
||||
--base main \
|
||||
--head "$BRANCH"
|
||||
|
||||
gh pr merge "$BRANCH" --squash --auto || true
|
||||
working-directory: ${{ github.workspace }}
|
||||
- run: echo "Agent SDK publishing is disabled."
|
||||
|
||||
12
.github/workflows/test.yml
vendored
12
.github/workflows/test.yml
vendored
@@ -54,10 +54,10 @@ jobs:
|
||||
command: (cd apps/server && bun run test:integration)
|
||||
junit_path: test-results/server-integration.xml
|
||||
needs_browser: true
|
||||
- suite: server-sdk
|
||||
command: (cd apps/server && bun run test:sdk)
|
||||
junit_path: test-results/server-sdk.xml
|
||||
needs_browser: true
|
||||
- suite: server-lib
|
||||
command: (cd apps/server && bun run test:lib)
|
||||
junit_path: test-results/server-lib.xml
|
||||
needs_browser: false
|
||||
- suite: server-root
|
||||
command: (cd apps/server && bun run test:root)
|
||||
junit_path: test-results/server-root.xml
|
||||
@@ -70,10 +70,6 @@ jobs:
|
||||
command: bun run test:eval
|
||||
junit_path: test-results/eval.xml
|
||||
needs_browser: false
|
||||
- suite: agent-sdk
|
||||
command: bun run test:agent-sdk
|
||||
junit_path: test-results/agent-sdk.xml
|
||||
needs_browser: false
|
||||
- suite: build
|
||||
command: bun run test:build
|
||||
junit_path: test-results/build.xml
|
||||
|
||||
15
README.md
15
README.md
@@ -188,6 +188,21 @@ We'd love your help making BrowserOS better! See our [Contributing Guide](CONTRI
|
||||
- [ungoogled-chromium](https://github.com/ungoogled-software/ungoogled-chromium) — BrowserOS uses some patches for enhanced privacy. Thanks to everyone behind this project!
|
||||
- [The Chromium Project](https://www.chromium.org/) — at the core of BrowserOS, making it possible to exist in the first place.
|
||||
|
||||
## Citation
|
||||
|
||||
If you use BrowserOS in your research or project, please cite:
|
||||
|
||||
```bibtex
|
||||
@software{browseros2025,
|
||||
author = {Nithin Sonti and Nikhil Sonti and {BrowserOS-team}},
|
||||
title = {BrowserOS: The open-source Agentic browser},
|
||||
url = {https://github.com/browseros-ai/BrowserOS},
|
||||
year = {2025},
|
||||
publisher = {GitHub},
|
||||
license = {AGPL-3.0},
|
||||
}
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
BrowserOS is open source under the [AGPL-3.0 license](LICENSE).
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# BrowserOS Agent
|
||||
|
||||
The agent platform powering [BrowserOS](https://github.com/browseros-ai/BrowserOS) — contains the MCP server, agent UI, CLI, evaluation framework, and SDK.
|
||||
The agent platform powering [BrowserOS](https://github.com/browseros-ai/BrowserOS) — contains the MCP server, agent UI, CLI, and evaluation framework.
|
||||
|
||||
## Monorepo Structure
|
||||
|
||||
@@ -12,7 +12,6 @@ apps/
|
||||
eval/ # Evaluation framework for benchmarking agents
|
||||
|
||||
packages/
|
||||
agent-sdk/ # Node.js SDK (@browseros-ai/agent-sdk)
|
||||
cdp-protocol/ # Type-safe Chrome DevTools Protocol bindings
|
||||
shared/ # Shared constants (ports, timeouts, limits)
|
||||
```
|
||||
@@ -23,7 +22,6 @@ packages/
|
||||
| `apps/agent` | Agent UI — Chrome extension for the chat interface |
|
||||
| `apps/cli` | Go CLI — control BrowserOS from the terminal or AI coding agents |
|
||||
| `apps/eval` | Benchmark framework — WebVoyager, Mind2Web evaluation |
|
||||
| `packages/agent-sdk` | Node.js SDK for browser automation with natural language |
|
||||
| `packages/cdp-protocol` | Auto-generated CDP type bindings used by the server |
|
||||
| `packages/shared` | Shared constants used across packages |
|
||||
|
||||
@@ -81,14 +79,15 @@ cp apps/server/.env.example apps/server/.env.development
|
||||
cp apps/agent/.env.example apps/agent/.env.development
|
||||
cp apps/server/.env.production.example apps/server/.env.production
|
||||
|
||||
# Install deps, generate agent code, and sync the VM cache
|
||||
# Install deps and generate agent code
|
||||
bun run dev:setup
|
||||
|
||||
# Start the full dev environment
|
||||
bun run dev:watch
|
||||
```
|
||||
|
||||
`dev:watch` exits when the VM cache manifest is missing, but setup stays in `dev:setup`.
|
||||
`dev:watch` starts the server immediately. OpenClaw VM/image prewarm runs from
|
||||
the server startup path and pulls the configured GHCR image on demand.
|
||||
|
||||
### Environment Variables
|
||||
|
||||
|
||||
@@ -0,0 +1,50 @@
|
||||
import type { Provider } from './chatComponentTypes'
|
||||
|
||||
export interface ProviderOptionGroup {
|
||||
key: 'llm' | 'acp'
|
||||
label: string
|
||||
options: Provider[]
|
||||
}
|
||||
|
||||
export function groupProviderOptions(
|
||||
providers: Provider[],
|
||||
): ProviderOptionGroup[] {
|
||||
const llm = providers.filter((provider) => provider.kind !== 'acp')
|
||||
const acp = providers.filter((provider) => provider.kind === 'acp')
|
||||
|
||||
return [
|
||||
...(llm.length
|
||||
? [{ key: 'llm' as const, label: 'AI Providers', options: llm }]
|
||||
: []),
|
||||
...(acp.length
|
||||
? [{ key: 'acp' as const, label: 'Agents', options: acp }]
|
||||
: []),
|
||||
]
|
||||
}
|
||||
|
||||
export function getProviderSearchValue(
|
||||
provider: Provider,
|
||||
groupLabel: string,
|
||||
): string {
|
||||
return [
|
||||
provider.id,
|
||||
provider.name,
|
||||
provider.type,
|
||||
groupLabel,
|
||||
provider.adapterName,
|
||||
provider.modelLabel,
|
||||
]
|
||||
.filter(Boolean)
|
||||
.join(' ')
|
||||
}
|
||||
|
||||
export function getProviderSubtitle(provider: Provider): string | undefined {
|
||||
if (provider.kind !== 'acp') return undefined
|
||||
return [
|
||||
provider.adapterName,
|
||||
provider.modelLabel,
|
||||
provider.modelControl === 'best-effort' ? 'best effort' : undefined,
|
||||
]
|
||||
.filter(Boolean)
|
||||
.join(' · ')
|
||||
}
|
||||
@@ -0,0 +1,72 @@
|
||||
import { describe, expect, it } from 'bun:test'
|
||||
import {
|
||||
getProviderSearchValue,
|
||||
getProviderSubtitle,
|
||||
groupProviderOptions,
|
||||
} from './ChatProviderSelector.helpers'
|
||||
import type { Provider } from './chatComponentTypes'
|
||||
|
||||
const options: Provider[] = [
|
||||
{ kind: 'llm', id: 'browseros', name: 'BrowserOS', type: 'browseros' },
|
||||
{
|
||||
kind: 'llm',
|
||||
id: 'anthropic-sonnet',
|
||||
name: 'Anthropic Sonnet',
|
||||
type: 'anthropic',
|
||||
},
|
||||
{
|
||||
kind: 'acp',
|
||||
id: 'agent-claude-review',
|
||||
name: 'Review Bot',
|
||||
type: 'acp',
|
||||
adapterName: 'Claude Code',
|
||||
modelLabel: 'Haiku',
|
||||
modelControl: 'best-effort',
|
||||
},
|
||||
{
|
||||
kind: 'acp',
|
||||
id: 'agent-codex-browser',
|
||||
name: 'Browser Driver',
|
||||
type: 'acp',
|
||||
adapterName: 'Codex',
|
||||
modelLabel: 'GPT-5.5',
|
||||
modelControl: 'runtime-supported',
|
||||
},
|
||||
]
|
||||
|
||||
describe('groupProviderOptions', () => {
|
||||
it('groups normal providers separately from created agents', () => {
|
||||
expect(groupProviderOptions(options)).toEqual([
|
||||
{
|
||||
key: 'llm',
|
||||
label: 'AI Providers',
|
||||
options: [options[0], options[1]],
|
||||
},
|
||||
{
|
||||
key: 'acp',
|
||||
label: 'Agents',
|
||||
options: [options[2], options[3]],
|
||||
},
|
||||
])
|
||||
})
|
||||
})
|
||||
|
||||
describe('getProviderSearchValue', () => {
|
||||
it('matches created-agent group labels and item labels', () => {
|
||||
expect(getProviderSearchValue(options[2], 'Agents')).toContain('Agents')
|
||||
expect(getProviderSearchValue(options[2], 'Agents')).toContain('Review Bot')
|
||||
expect(getProviderSearchValue(options[2], 'Agents')).toContain(
|
||||
'Claude Code',
|
||||
)
|
||||
})
|
||||
})
|
||||
|
||||
describe('getProviderSubtitle', () => {
|
||||
it('describes created-agent runtime context without model-target copy', () => {
|
||||
expect(getProviderSubtitle(options[2])).toBe(
|
||||
'Claude Code · Haiku · best effort',
|
||||
)
|
||||
expect(getProviderSubtitle(options[3])).toBe('Codex · GPT-5.5')
|
||||
expect(getProviderSubtitle(options[0])).toBeUndefined()
|
||||
})
|
||||
})
|
||||
@@ -1,4 +1,4 @@
|
||||
import { Check, Plus } from 'lucide-react'
|
||||
import { Bot, Check, Plus } from 'lucide-react'
|
||||
import type { FC, PropsWithChildren } from 'react'
|
||||
import { useState } from 'react'
|
||||
import {
|
||||
@@ -17,6 +17,11 @@ import {
|
||||
import { BrowserOSIcon, ProviderIcon } from '@/lib/llm-providers/providerIcons'
|
||||
import type { ProviderType } from '@/lib/llm-providers/types'
|
||||
import { cn } from '@/lib/utils'
|
||||
import {
|
||||
getProviderSearchValue,
|
||||
getProviderSubtitle,
|
||||
groupProviderOptions,
|
||||
} from './ChatProviderSelector.helpers'
|
||||
import type { Provider } from './chatComponentTypes'
|
||||
|
||||
interface ChatProviderSelectorProps {
|
||||
@@ -29,54 +34,58 @@ export const ChatProviderSelector: FC<
|
||||
PropsWithChildren<ChatProviderSelectorProps>
|
||||
> = ({ children, providers, selectedProvider, onSelectProvider }) => {
|
||||
const [open, setOpen] = useState(false)
|
||||
const groups = groupProviderOptions(providers)
|
||||
|
||||
return (
|
||||
<Popover open={open} onOpenChange={setOpen}>
|
||||
<PopoverTrigger asChild>{children}</PopoverTrigger>
|
||||
<PopoverContent side="bottom" align="start" className="w-48 p-0">
|
||||
<PopoverContent side="bottom" align="start" className="w-64 p-0">
|
||||
<Command>
|
||||
<CommandInput placeholder="Search providers..." className="h-9" />
|
||||
<CommandInput
|
||||
placeholder="Search providers or agents..."
|
||||
className="h-9"
|
||||
/>
|
||||
<CommandList>
|
||||
<div className="my-2 px-2 font-semibold text-muted-foreground text-xs uppercase tracking-wide">
|
||||
AI Provider
|
||||
</div>
|
||||
<CommandEmpty>No provider found</CommandEmpty>
|
||||
<CommandGroup>
|
||||
{providers.map((provider) => {
|
||||
const isSelected = selectedProvider.id === provider.id
|
||||
return (
|
||||
<CommandItem
|
||||
key={provider.id}
|
||||
value={`${provider.id} ${provider.name}`}
|
||||
onSelect={() => {
|
||||
onSelectProvider(provider)
|
||||
setOpen(false)
|
||||
}}
|
||||
className={cn(
|
||||
'flex w-full items-center gap-3 rounded-md p-2 transition-colors',
|
||||
isSelected && 'bg-[var(--accent-orange)]/10',
|
||||
)}
|
||||
>
|
||||
<span className="text-muted-foreground">
|
||||
{provider.type === 'browseros' ? (
|
||||
<BrowserOSIcon size={18} />
|
||||
) : (
|
||||
<ProviderIcon
|
||||
type={provider.type as ProviderType}
|
||||
size={18}
|
||||
/>
|
||||
{groups.map((group) => (
|
||||
<CommandGroup key={group.key} heading={group.label}>
|
||||
{group.options.map((provider) => {
|
||||
const isSelected = selectedProvider.id === provider.id
|
||||
const subtitle = getProviderSubtitle(provider)
|
||||
return (
|
||||
<CommandItem
|
||||
key={provider.id}
|
||||
value={getProviderSearchValue(provider, group.label)}
|
||||
onSelect={() => {
|
||||
onSelectProvider(provider)
|
||||
setOpen(false)
|
||||
}}
|
||||
className={cn(
|
||||
'flex w-full items-center gap-3 rounded-md p-2 transition-colors',
|
||||
isSelected && 'bg-[var(--accent-orange)]/10',
|
||||
)}
|
||||
</span>
|
||||
<span className="flex-1 text-left text-sm">
|
||||
{provider.name}
|
||||
</span>
|
||||
{isSelected && (
|
||||
<Check className="h-3.5 w-3.5 text-[var(--accent-orange)]" />
|
||||
)}
|
||||
</CommandItem>
|
||||
)
|
||||
})}
|
||||
</CommandGroup>
|
||||
>
|
||||
<span className="text-muted-foreground">
|
||||
<ProviderOptionIcon provider={provider} />
|
||||
</span>
|
||||
<span className="min-w-0 flex-1 text-left">
|
||||
<span className="block truncate text-sm">
|
||||
{provider.name}
|
||||
</span>
|
||||
{subtitle && (
|
||||
<span className="block truncate text-muted-foreground text-xs">
|
||||
{subtitle}
|
||||
</span>
|
||||
)}
|
||||
</span>
|
||||
{isSelected && (
|
||||
<Check className="h-3.5 w-3.5 text-[var(--accent-orange)]" />
|
||||
)}
|
||||
</CommandItem>
|
||||
)
|
||||
})}
|
||||
</CommandGroup>
|
||||
))}
|
||||
<div className="border-border border-t p-1">
|
||||
<button
|
||||
type="button"
|
||||
@@ -96,3 +105,9 @@ export const ChatProviderSelector: FC<
|
||||
</Popover>
|
||||
)
|
||||
}
|
||||
|
||||
function ProviderOptionIcon({ provider }: { provider: Provider }) {
|
||||
if (provider.kind === 'acp') return <Bot size={18} />
|
||||
if (provider.type === 'browseros') return <BrowserOSIcon size={18} />
|
||||
return <ProviderIcon type={provider.type as ProviderType} size={18} />
|
||||
}
|
||||
|
||||
@@ -1,7 +1,14 @@
|
||||
import type { ProviderType } from '@/lib/llm-providers/types'
|
||||
|
||||
export type ChatProviderType = ProviderType | 'acp'
|
||||
|
||||
export interface Provider {
|
||||
id: string
|
||||
name: string
|
||||
type: ProviderType
|
||||
type: ChatProviderType
|
||||
kind: 'llm' | 'acp'
|
||||
agentId?: string
|
||||
adapterName?: string
|
||||
modelLabel?: string
|
||||
modelControl?: 'runtime-supported' | 'best-effort'
|
||||
}
|
||||
|
||||
@@ -1,136 +0,0 @@
|
||||
import { Bot, Loader2, Wrench } from 'lucide-react'
|
||||
import type { FC } from 'react'
|
||||
import type { AgentCardData } from '@/lib/agent-conversations/types'
|
||||
import { cn } from '@/lib/utils'
|
||||
|
||||
interface AgentCardProps {
|
||||
agent: AgentCardData
|
||||
onClick: () => void
|
||||
active?: boolean
|
||||
}
|
||||
|
||||
function formatTimestamp(timestamp?: number): string {
|
||||
if (!timestamp) return 'No activity yet'
|
||||
const diff = Date.now() - timestamp
|
||||
const minutes = Math.floor(diff / 60000)
|
||||
if (minutes < 1) return 'just now'
|
||||
if (minutes < 60) return `${minutes}m ago`
|
||||
const hours = Math.floor(minutes / 60)
|
||||
if (hours < 24) return `${hours}h ago`
|
||||
return `${Math.floor(hours / 24)}d ago`
|
||||
}
|
||||
|
||||
function getStatusLabel(status: AgentCardData['status']): string {
|
||||
if (status === 'working') return 'Working'
|
||||
if (status === 'error') return 'Error'
|
||||
return 'Ready'
|
||||
}
|
||||
|
||||
function getStatusTone(status: AgentCardData['status']): string {
|
||||
if (status === 'working') return 'bg-amber-500'
|
||||
if (status === 'error') return 'bg-destructive'
|
||||
return 'bg-emerald-500'
|
||||
}
|
||||
|
||||
function formatCost(usd: number): string {
|
||||
if (usd < 0.005) return `$${usd.toFixed(4)}`
|
||||
return `$${usd.toFixed(2)}`
|
||||
}
|
||||
|
||||
export const AgentCardExpanded: FC<AgentCardProps> = ({
|
||||
agent,
|
||||
onClick,
|
||||
active,
|
||||
}) => (
|
||||
<button
|
||||
type="button"
|
||||
onClick={onClick}
|
||||
className={cn(
|
||||
'group flex min-h-32 w-full min-w-0 flex-col rounded-2xl border p-4 text-left shadow-sm transition-all duration-200',
|
||||
active
|
||||
? 'border-border/80 bg-card shadow-md ring-1 ring-[var(--accent-orange)]/20'
|
||||
: 'border-border/60 bg-card/85 hover:border-border hover:bg-card hover:shadow-md',
|
||||
)}
|
||||
>
|
||||
<div className="flex items-start justify-between gap-3">
|
||||
<div className="flex min-w-0 items-center gap-3">
|
||||
<div
|
||||
className={cn(
|
||||
'flex size-10 shrink-0 items-center justify-center rounded-xl',
|
||||
active
|
||||
? 'bg-[var(--accent-orange)]/10 text-[var(--accent-orange)]'
|
||||
: 'bg-muted text-muted-foreground',
|
||||
)}
|
||||
>
|
||||
<Bot className="size-5" />
|
||||
</div>
|
||||
<div className="min-w-0">
|
||||
<div className="truncate font-semibold text-sm">{agent.name}</div>
|
||||
<div className="truncate text-muted-foreground text-xs">
|
||||
{agent.model ?? 'OpenClaw agent'}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex items-center gap-2 rounded-full border border-border/60 bg-background/70 px-2.5 py-1 text-[11px] text-muted-foreground">
|
||||
<span
|
||||
className={cn('size-2 rounded-full', getStatusTone(agent.status))}
|
||||
/>
|
||||
<span>{getStatusLabel(agent.status)}</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="mt-4 flex-1">
|
||||
<p className="line-clamp-2 text-foreground/90 text-sm">
|
||||
{agent.lastMessage ??
|
||||
'Start a conversation to see recent work and summaries.'}
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div className="mt-4 space-y-1.5 text-muted-foreground text-xs">
|
||||
<div className="flex items-center justify-between gap-3">
|
||||
<span>{formatTimestamp(agent.lastMessageTimestamp)}</span>
|
||||
{agent.costUsd ? (
|
||||
<span className="tabular-nums opacity-70">
|
||||
{formatCost(agent.costUsd)}
|
||||
</span>
|
||||
) : null}
|
||||
</div>
|
||||
{agent.status === 'working' && agent.currentTool ? (
|
||||
<div className="flex items-center gap-1.5 text-[var(--accent-orange)]/70">
|
||||
<Loader2 className="size-3 shrink-0 animate-spin" />
|
||||
<span className="truncate">{agent.currentTool}</span>
|
||||
</div>
|
||||
) : agent.activitySummary ? (
|
||||
<div className="flex items-center gap-1.5 text-muted-foreground/60">
|
||||
<Wrench className="size-3 shrink-0" />
|
||||
<span className="truncate">{agent.activitySummary}</span>
|
||||
</div>
|
||||
) : null}
|
||||
</div>
|
||||
</button>
|
||||
)
|
||||
|
||||
export const AgentCardCompact: FC<AgentCardProps> = ({
|
||||
agent,
|
||||
onClick,
|
||||
active,
|
||||
}) => (
|
||||
<button
|
||||
type="button"
|
||||
onClick={onClick}
|
||||
className={cn(
|
||||
'inline-flex items-center gap-2 rounded-full border px-3 py-2 text-sm transition-colors',
|
||||
active
|
||||
? 'border-border bg-card shadow-sm ring-1 ring-[var(--accent-orange)]/20'
|
||||
: 'border-border/60 bg-card/85 text-foreground hover:border-border hover:bg-card',
|
||||
)}
|
||||
>
|
||||
<span
|
||||
className={cn(
|
||||
'size-2 rounded-full',
|
||||
active ? 'bg-[var(--accent-orange)]' : getStatusTone(agent.status),
|
||||
)}
|
||||
/>
|
||||
<span className="truncate">{agent.name}</span>
|
||||
</button>
|
||||
)
|
||||
@@ -1,70 +1,71 @@
|
||||
import { Plus } from 'lucide-react'
|
||||
import type { FC } from 'react'
|
||||
import type { AgentCardData } from '@/lib/agent-conversations/types'
|
||||
import type {
|
||||
HarnessAdapterDescriptor,
|
||||
HarnessAdapterHealth,
|
||||
HarnessAgent,
|
||||
HarnessAgentAdapter,
|
||||
} from '@/entrypoints/app/agents/agent-harness-types'
|
||||
import { cn } from '@/lib/utils'
|
||||
import { AgentCardCompact, AgentCardExpanded } from './AgentCard'
|
||||
import { HomeAgentCard } from './HomeAgentCard'
|
||||
|
||||
interface AgentCardDockProps {
|
||||
agents: AgentCardData[]
|
||||
agents: HarnessAgent[]
|
||||
adapters: HarnessAdapterDescriptor[]
|
||||
activeAgentId?: string
|
||||
onSelectAgent: (agentId: string) => void
|
||||
onCreateAgent?: () => void
|
||||
compact?: boolean
|
||||
}
|
||||
|
||||
function CreateAgentButton({
|
||||
compact,
|
||||
onCreateAgent,
|
||||
}: {
|
||||
compact?: boolean
|
||||
onCreateAgent: () => void
|
||||
}) {
|
||||
function CreateAgentButton({ onCreateAgent }: { onCreateAgent: () => void }) {
|
||||
return (
|
||||
<button
|
||||
type="button"
|
||||
onClick={onCreateAgent}
|
||||
className={cn(
|
||||
'flex shrink-0 items-center justify-center gap-2 border border-dashed text-muted-foreground transition-colors hover:border-[var(--accent-orange)] hover:text-[var(--accent-orange)]',
|
||||
compact
|
||||
? 'rounded-full px-3 py-2 text-sm'
|
||||
: 'min-h-32 rounded-2xl px-5 py-4',
|
||||
'flex min-h-32 shrink-0 items-center justify-center gap-2 rounded-2xl border border-dashed px-5 py-4 text-muted-foreground transition-colors',
|
||||
'hover:border-[var(--accent-orange)] hover:text-[var(--accent-orange)]',
|
||||
)}
|
||||
>
|
||||
<Plus className={compact ? 'size-3.5' : 'size-5'} />
|
||||
<span>{compact ? 'New' : 'Create agent'}</span>
|
||||
<Plus className="size-5" />
|
||||
<span>Create agent</span>
|
||||
</button>
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* 3-column grid of HomeAgentCards plus a trailing "Create agent"
|
||||
* tile. The previous `compact` mode (rendered a horizontal pill rail)
|
||||
* had no callers and was dropped along with the legacy AgentCard.
|
||||
*/
|
||||
export const AgentCardDock: FC<AgentCardDockProps> = ({
|
||||
agents,
|
||||
adapters,
|
||||
activeAgentId,
|
||||
onSelectAgent,
|
||||
onCreateAgent,
|
||||
compact,
|
||||
}) => {
|
||||
if (agents.length === 0 && !onCreateAgent) return null
|
||||
|
||||
const Card = compact ? AgentCardCompact : AgentCardExpanded
|
||||
const adapterHealth = new Map<HarnessAgentAdapter, HarnessAdapterHealth>()
|
||||
for (const descriptor of adapters) {
|
||||
if (descriptor.health) adapterHealth.set(descriptor.id, descriptor.health)
|
||||
}
|
||||
|
||||
return (
|
||||
<div
|
||||
className={cn(
|
||||
compact
|
||||
? 'flex items-center gap-2 overflow-x-auto pb-1'
|
||||
: 'grid gap-4 md:grid-cols-3',
|
||||
)}
|
||||
>
|
||||
<div className="grid gap-4 md:grid-cols-3">
|
||||
{agents.map((agent) => (
|
||||
<Card
|
||||
key={agent.agentId}
|
||||
<HomeAgentCard
|
||||
key={agent.id}
|
||||
agent={agent}
|
||||
active={agent.agentId === activeAgentId}
|
||||
onClick={() => onSelectAgent(agent.agentId)}
|
||||
adapter={agent.adapter}
|
||||
adapterHealth={adapterHealth.get(agent.adapter) ?? null}
|
||||
active={agent.id === activeAgentId}
|
||||
onClick={() => onSelectAgent(agent.id)}
|
||||
/>
|
||||
))}
|
||||
{onCreateAgent ? (
|
||||
<CreateAgentButton compact={compact} onCreateAgent={onCreateAgent} />
|
||||
<CreateAgentButton onCreateAgent={onCreateAgent} />
|
||||
) : null}
|
||||
</div>
|
||||
)
|
||||
|
||||
@@ -1,7 +1,13 @@
|
||||
import { ArrowLeft, Bot, Home } from 'lucide-react'
|
||||
import { type FC, useEffect, useMemo, useRef, useState } from 'react'
|
||||
import { type FC, useEffect, useMemo, useRef } from 'react'
|
||||
import { Navigate, useNavigate, useParams, useSearchParams } from 'react-router'
|
||||
import { Button } from '@/components/ui/button'
|
||||
import {
|
||||
cancelHarnessTurn,
|
||||
useEnqueueHarnessMessage,
|
||||
useHarnessAgents,
|
||||
useRemoveHarnessQueuedMessage,
|
||||
} from '@/entrypoints/app/agents/useAgents'
|
||||
import {
|
||||
type AgentEntry,
|
||||
getModelDisplayName,
|
||||
@@ -12,11 +18,12 @@ import { ClawChat } from './ClawChat'
|
||||
import { ConversationInput } from './ConversationInput'
|
||||
import {
|
||||
buildChatHistoryFromClawMessages,
|
||||
filterTurnsPersistedInHistory,
|
||||
flattenHistoryPages,
|
||||
} from './claw-chat-types'
|
||||
import { QueuePanel } from './QueuePanel'
|
||||
import { useAgentConversation } from './useAgentConversation'
|
||||
import { useClawChatHistory } from './useClawChatHistory'
|
||||
import { useOutboundQueue } from './useOutboundQueue'
|
||||
import { useHarnessChatHistory } from './useHarnessChatHistory'
|
||||
|
||||
function StatusBadge({ status }: { status: string }) {
|
||||
return (
|
||||
@@ -132,7 +139,7 @@ function AgentRailList({
|
||||
<div className="styled-scrollbar min-h-0 flex-1 space-y-2 overflow-y-auto px-3 py-3">
|
||||
{agents.map((entry) => {
|
||||
const active = entry.agentId === activeAgentId
|
||||
const modelName = getModelDisplayName(entry.model) ?? 'OpenClaw agent'
|
||||
const modelName = getAgentEntryMeta(entry)
|
||||
|
||||
return (
|
||||
<button
|
||||
@@ -167,19 +174,17 @@ function AgentRailList({
|
||||
)
|
||||
}
|
||||
|
||||
function getConversationStatusCopy(status: string | undefined): string {
|
||||
if (status === 'running') return 'Ready'
|
||||
if (status === 'starting') return 'Connecting'
|
||||
if (status === 'error') return 'Attention'
|
||||
if (status === 'stopped') return 'Offline'
|
||||
return 'Setup'
|
||||
function getAgentEntryMeta(agent: AgentEntry | undefined): string {
|
||||
if (agent?.source === 'agent-harness') {
|
||||
return getModelDisplayName(agent.model) ?? 'ACP agent'
|
||||
}
|
||||
return getModelDisplayName(agent?.model) ?? 'OpenClaw agent'
|
||||
}
|
||||
|
||||
function AgentConversationController({
|
||||
agentId,
|
||||
initialMessage,
|
||||
onInitialMessageConsumed,
|
||||
status,
|
||||
agents,
|
||||
agentPathPrefix,
|
||||
createAgentPath,
|
||||
@@ -187,7 +192,6 @@ function AgentConversationController({
|
||||
agentId: string
|
||||
initialMessage: string | null
|
||||
onInitialMessageConsumed: () => void
|
||||
status: ReturnType<typeof useAgentCommandData>['status']
|
||||
agents: AgentEntry[]
|
||||
agentPathPrefix: string
|
||||
createAgentPath: string
|
||||
@@ -195,80 +199,69 @@ function AgentConversationController({
|
||||
const navigate = useNavigate()
|
||||
const initialMessageSentRef = useRef<string | null>(null)
|
||||
const onInitialMessageConsumedRef = useRef(onInitialMessageConsumed)
|
||||
const [streamSessionKey, setStreamSessionKey] = useState<string | null>(null)
|
||||
const agent = agents.find((entry) => entry.agentId === agentId)
|
||||
const agentName = agent?.name || agentId || 'Agent'
|
||||
// Single source of truth: the history endpoint resolves the session itself
|
||||
// when sessionKey is null. Once a chat creates a new session, streamSessionKey
|
||||
// overrides it and the history queryKey rotates to refetch for that session.
|
||||
const historyQuery = useClawChatHistory({
|
||||
agentId,
|
||||
sessionKey: streamSessionKey,
|
||||
})
|
||||
// Routing is now harness-only. Every OpenClaw agent has a harness
|
||||
// record post the gateway → harness backfill, so the chat panel
|
||||
// always talks to /agents/<id>/chat. The legacy ClawChat surface
|
||||
// was deleted with the /claw/agents/:id/chat server route.
|
||||
const harnessHistoryQuery = useHarnessChatHistory(agentId, Boolean(agent))
|
||||
|
||||
const historyMessages = useMemo(
|
||||
() => flattenHistoryPages(historyQuery.data?.pages ?? []),
|
||||
[historyQuery.data?.pages],
|
||||
() =>
|
||||
flattenHistoryPages(
|
||||
harnessHistoryQuery.data ? [harnessHistoryQuery.data] : [],
|
||||
),
|
||||
[harnessHistoryQuery.data],
|
||||
)
|
||||
const chatHistory = useMemo(
|
||||
() => buildChatHistoryFromClawMessages(historyMessages),
|
||||
[historyMessages],
|
||||
)
|
||||
const resolvedSessionKey =
|
||||
streamSessionKey ?? historyQuery.data?.pages?.[0]?.sessionKey ?? null
|
||||
|
||||
const { turns, streaming } = useAgentConversation(agentId, {
|
||||
sessionKey: resolvedSessionKey,
|
||||
// Listing query feeds queue + active-turn state for this agent. We
|
||||
// already poll it every 5s for the rail; reusing the same cache
|
||||
// keeps cross-tab queue state in sync without a second poll.
|
||||
const { harnessAgents } = useHarnessAgents()
|
||||
const harnessAgent = harnessAgents.find((entry) => entry.id === agentId)
|
||||
const queue = harnessAgent?.queue ?? []
|
||||
const activeTurnId = harnessAgent?.activeTurnId ?? null
|
||||
|
||||
const { turns, streaming, send } = useAgentConversation(agentId, {
|
||||
runtime: 'agent-harness',
|
||||
sessionKey: null,
|
||||
history: chatHistory,
|
||||
onSessionKeyChange: (sessionKey) => {
|
||||
setStreamSessionKey(sessionKey)
|
||||
activeTurnId,
|
||||
onComplete: () => {
|
||||
void harnessHistoryQuery.refetch()
|
||||
},
|
||||
onSessionKeyChange: () => {},
|
||||
})
|
||||
const outboundQueue = useOutboundQueue({
|
||||
agentId,
|
||||
sessionKey: resolvedSessionKey,
|
||||
})
|
||||
const enqueueMessage = useEnqueueHarnessMessage()
|
||||
const removeQueuedMessage = useRemoveHarnessQueuedMessage()
|
||||
|
||||
const handleStop = () => {
|
||||
void cancelHarnessTurn(agentId, {
|
||||
turnId: activeTurnId ?? undefined,
|
||||
reason: 'user pressed stop',
|
||||
})
|
||||
}
|
||||
const visibleTurns = useMemo(
|
||||
() => filterTurnsPersistedInHistory(turns, historyMessages),
|
||||
[historyMessages, turns],
|
||||
)
|
||||
onInitialMessageConsumedRef.current = onInitialMessageConsumed
|
||||
|
||||
// Refetch history whenever a server-dispatched queue item completes.
|
||||
// The server worker streams the queued turn into OpenClaw directly, so
|
||||
// the client never observes the live tokens — we only see the new
|
||||
// assistant turn once the JSONL is updated. Watching the queue for
|
||||
// any 'sending' item dropping out is the cleanest "turn finalized"
|
||||
// signal we have without exposing per-turn SSE.
|
||||
const previousSendingIdsRef = useRef<Set<string>>(new Set())
|
||||
useEffect(() => {
|
||||
const currentSending = new Set(
|
||||
outboundQueue.queue
|
||||
.filter((item) => item.status === 'sending')
|
||||
.map((item) => item.id),
|
||||
)
|
||||
const dropped = [...previousSendingIdsRef.current].filter(
|
||||
(id) => !currentSending.has(id),
|
||||
)
|
||||
previousSendingIdsRef.current = currentSending
|
||||
if (dropped.length > 0) {
|
||||
void historyQuery.refetch()
|
||||
}
|
||||
}, [outboundQueue.queue, historyQuery])
|
||||
|
||||
const disabled = status?.status !== 'running'
|
||||
// Two-part gate: cover both "still fetching" AND "just got enabled but
|
||||
// hasn't started fetching yet". When `enabled` flips true (baseUrl
|
||||
// resolves), there's a render frame where React Query reports
|
||||
// isLoading=false but hasn't run the queryFn yet — `isFetched` is still
|
||||
// false. Without this we render EmptyState during that one frame.
|
||||
const isInitialLoading =
|
||||
historyQuery.isLoading || (!historyQuery.isFetched && !historyQuery.isError)
|
||||
|
||||
const historyReady = historyQuery.isFetched || historyQuery.isError
|
||||
const disabled = !agent
|
||||
const historyReady =
|
||||
harnessHistoryQuery.isFetched || harnessHistoryQuery.isError
|
||||
const initialMessageKey = initialMessage
|
||||
? `${agentId}:${initialMessage}`
|
||||
: null
|
||||
const error = historyQuery.error ?? null
|
||||
const error = harnessHistoryQuery.error ?? null
|
||||
|
||||
const enqueueRef = useRef(outboundQueue.enqueue)
|
||||
enqueueRef.current = outboundQueue.enqueue
|
||||
const sendRef = useRef(send)
|
||||
sendRef.current = send
|
||||
|
||||
useEffect(() => {
|
||||
const query = initialMessage?.trim()
|
||||
@@ -277,11 +270,6 @@ function AgentConversationController({
|
||||
return
|
||||
}
|
||||
|
||||
// The initial-message handoff (home composer → conversation page via
|
||||
// ?q=) goes through the outbound queue too, so it inherits the same
|
||||
// single-flight serialization. We no longer need to gate on
|
||||
// `streaming` — the queue worker drains as soon as the agent is
|
||||
// free.
|
||||
if (
|
||||
!query ||
|
||||
initialMessageSentRef.current === initialMessageKey ||
|
||||
@@ -293,7 +281,7 @@ function AgentConversationController({
|
||||
|
||||
initialMessageSentRef.current = initialMessageKey
|
||||
onInitialMessageConsumedRef.current()
|
||||
enqueueRef.current({ text: query })
|
||||
void sendRef.current({ text: query })
|
||||
}, [disabled, historyReady, initialMessage, initialMessageKey])
|
||||
|
||||
const handleSelectAgent = (entry: AgentEntry) => {
|
||||
@@ -305,49 +293,67 @@ function AgentConversationController({
|
||||
<ClawChat
|
||||
agentName={agentName}
|
||||
historyMessages={historyMessages}
|
||||
turns={turns}
|
||||
turns={visibleTurns}
|
||||
streaming={streaming}
|
||||
isInitialLoading={isInitialLoading}
|
||||
isInitialLoading={harnessHistoryQuery.isLoading}
|
||||
error={error}
|
||||
hasNextPage={Boolean(historyQuery.hasNextPage)}
|
||||
isFetchingNextPage={historyQuery.isFetchingNextPage}
|
||||
onFetchNextPage={() => {
|
||||
void historyQuery.fetchNextPage()
|
||||
}}
|
||||
hasNextPage={false}
|
||||
isFetchingNextPage={false}
|
||||
onFetchNextPage={() => {}}
|
||||
onRetry={() => {
|
||||
void historyQuery.refetch()
|
||||
void harnessHistoryQuery.refetch()
|
||||
}}
|
||||
/>
|
||||
|
||||
<div className="border-border/50 border-t bg-background/88 px-4 py-3 backdrop-blur-md">
|
||||
<div className="mx-auto max-w-3xl">
|
||||
<div className="mx-auto max-w-3xl space-y-3">
|
||||
{queue.length > 0 ? (
|
||||
<QueuePanel
|
||||
queue={queue}
|
||||
onRemove={(messageId) =>
|
||||
removeQueuedMessage.mutate({ agentId, messageId })
|
||||
}
|
||||
/>
|
||||
) : null}
|
||||
<ConversationInput
|
||||
variant="conversation"
|
||||
agents={agents}
|
||||
selectedAgentId={agentId}
|
||||
onSelectAgent={handleSelectAgent}
|
||||
onSend={(input) => {
|
||||
outboundQueue.enqueue({
|
||||
text: input.text,
|
||||
attachments: input.attachments.map((a) => a.payload),
|
||||
attachmentPreviews: input.attachments.map((a) => ({
|
||||
id: a.id,
|
||||
kind: a.kind,
|
||||
mediaType: a.mediaType,
|
||||
name: a.name,
|
||||
dataUrl: a.dataUrl,
|
||||
})),
|
||||
history: chatHistory,
|
||||
})
|
||||
const attachments = input.attachments.map((a) => a.payload)
|
||||
const attachmentPreviews = input.attachments.map((a) => ({
|
||||
id: a.id,
|
||||
kind: a.kind,
|
||||
mediaType: a.mediaType,
|
||||
name: a.name,
|
||||
dataUrl: a.dataUrl,
|
||||
}))
|
||||
// When the agent already has an in-flight turn, route
|
||||
// the new message into the durable queue instead of
|
||||
// starting a parallel turn. Drains automatically as
|
||||
// soon as the active turn ends.
|
||||
if (streaming || activeTurnId) {
|
||||
enqueueMessage.mutate({
|
||||
agentId,
|
||||
message: input.text,
|
||||
attachments,
|
||||
})
|
||||
return
|
||||
}
|
||||
void send({ text: input.text, attachments, attachmentPreviews })
|
||||
}}
|
||||
onCreateAgent={() => navigate(createAgentPath)}
|
||||
onStop={handleStop}
|
||||
streaming={streaming}
|
||||
disabled={disabled}
|
||||
status={status?.status}
|
||||
placeholder={`Message ${agentName}...`}
|
||||
outboundQueue={outboundQueue.queue}
|
||||
onCancelQueued={outboundQueue.cancel}
|
||||
onRetryQueued={outboundQueue.retry}
|
||||
status="running"
|
||||
attachmentsEnabled={true}
|
||||
placeholder={
|
||||
streaming
|
||||
? `Type to queue another message for ${agentName}...`
|
||||
: `Message ${agentName}...`
|
||||
}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
@@ -371,12 +377,12 @@ export const AgentCommandConversation: FC<AgentCommandConversationProps> = ({
|
||||
const { agentId } = useParams<{ agentId: string }>()
|
||||
const [searchParams, setSearchParams] = useSearchParams()
|
||||
const navigate = useNavigate()
|
||||
const { status, agents } = useAgentCommandData()
|
||||
const { agents } = useAgentCommandData()
|
||||
const shouldRedirectHome = !agentId
|
||||
const resolvedAgentId = agentId ?? ''
|
||||
const agent = agents.find((entry) => entry.agentId === resolvedAgentId)
|
||||
const agentName = agent?.name || resolvedAgentId || 'Agent'
|
||||
const agentMeta = getModelDisplayName(agent?.model) ?? 'OpenClaw agent'
|
||||
const agentMeta = getAgentEntryMeta(agent)
|
||||
const initialMessage = searchParams.get('q')
|
||||
const isPageVariant = variant === 'page'
|
||||
const backLabel = isPageVariant ? 'Back to agents' : 'Back to home'
|
||||
@@ -389,7 +395,11 @@ export const AgentCommandConversation: FC<AgentCommandConversationProps> = ({
|
||||
navigate(`${agentPathPrefix}/${entry.agentId}`)
|
||||
}
|
||||
|
||||
const statusCopy = getConversationStatusCopy(status?.status)
|
||||
// Every visible agent runs through the harness now, so per-agent
|
||||
// runtime status doesn't gate chat the way OpenClaw's legacy
|
||||
// gateway lifecycle did. Show "Ready" once the agent record is
|
||||
// resolved from the rail, "Setup" otherwise.
|
||||
const statusCopy = agent ? 'Ready' : 'Setup'
|
||||
|
||||
return (
|
||||
<div className="absolute inset-0 overflow-hidden bg-background md:pl-[theme(spacing.14)]">
|
||||
@@ -415,7 +425,6 @@ export const AgentCommandConversation: FC<AgentCommandConversationProps> = ({
|
||||
key={resolvedAgentId}
|
||||
agentId={resolvedAgentId}
|
||||
agents={agents}
|
||||
status={status}
|
||||
initialMessage={initialMessage}
|
||||
onInitialMessageConsumed={() =>
|
||||
setSearchParams({}, { replace: true })
|
||||
|
||||
@@ -1,46 +1,25 @@
|
||||
import { ArrowRight, Bot, Plus, Settings2 } from 'lucide-react'
|
||||
import { type FC, useEffect, useState } from 'react'
|
||||
import { Plus } from 'lucide-react'
|
||||
import { type FC, useEffect, useMemo, useState } from 'react'
|
||||
import { useNavigate } from 'react-router'
|
||||
import { Button } from '@/components/ui/button'
|
||||
import { Card, CardContent } from '@/components/ui/card'
|
||||
import { Separator } from '@/components/ui/separator'
|
||||
import type {
|
||||
HarnessAdapterDescriptor,
|
||||
HarnessAgent,
|
||||
} from '@/entrypoints/app/agents/agent-harness-types'
|
||||
import {
|
||||
useAgentAdapters,
|
||||
useHarnessAgents,
|
||||
} from '@/entrypoints/app/agents/useAgents'
|
||||
import type { AgentEntry } from '@/entrypoints/app/agents/useOpenClaw'
|
||||
import { ImportDataHint } from '@/entrypoints/newtab/index/ImportDataHint'
|
||||
import { SignInHint } from '@/entrypoints/newtab/index/SignInHint'
|
||||
import { useActiveHint } from '@/entrypoints/newtab/index/useActiveHint'
|
||||
import type { AgentCardData } from '@/lib/agent-conversations/types'
|
||||
import { AgentCardDock } from './AgentCardDock'
|
||||
import { useAgentCommandData } from './agent-command-layout'
|
||||
import { ConversationInput } from './ConversationInput'
|
||||
import { buildAgentCardData } from './useAgentCardData'
|
||||
import { useAgentDashboard } from './useAgentDashboard'
|
||||
|
||||
function AgentCommandSetupState({
|
||||
onOpenAgents,
|
||||
}: {
|
||||
onOpenAgents: () => void
|
||||
}) {
|
||||
return (
|
||||
<Card className="border-border/60 bg-card/90 shadow-sm">
|
||||
<CardContent className="flex flex-col items-center gap-4 p-8 text-center">
|
||||
<div className="flex size-12 items-center justify-center rounded-2xl bg-muted text-muted-foreground">
|
||||
<Bot className="size-5" />
|
||||
</div>
|
||||
<div className="space-y-2">
|
||||
<h2 className="font-semibold text-lg">Set up your first agent</h2>
|
||||
<p className="max-w-md text-muted-foreground text-sm leading-6">
|
||||
Connect OpenClaw and create an agent before using the new tab as
|
||||
your workspace.
|
||||
</p>
|
||||
</div>
|
||||
<Button onClick={onOpenAgents} className="gap-2 rounded-xl">
|
||||
Open Agent Setup
|
||||
<ArrowRight className="size-4" />
|
||||
</Button>
|
||||
</CardContent>
|
||||
</Card>
|
||||
)
|
||||
}
|
||||
import { orderHomeAgents } from './home-agent-card.helpers'
|
||||
|
||||
function EmptyAgentsState({ onOpenAgents }: { onOpenAgents: () => void }) {
|
||||
return (
|
||||
@@ -63,41 +42,16 @@ function EmptyAgentsState({ onOpenAgents }: { onOpenAgents: () => void }) {
|
||||
)
|
||||
}
|
||||
|
||||
function OpenClawUnavailableState({
|
||||
onOpenAgents,
|
||||
}: {
|
||||
onOpenAgents: () => void
|
||||
}) {
|
||||
return (
|
||||
<Card className="border-border/60 bg-card/90 shadow-sm">
|
||||
<CardContent className="flex flex-col items-center gap-4 p-8 text-center">
|
||||
<div className="flex size-12 items-center justify-center rounded-2xl bg-muted text-muted-foreground">
|
||||
<Settings2 className="size-5" />
|
||||
</div>
|
||||
<div className="space-y-2">
|
||||
<h2 className="font-semibold text-lg">OpenClaw is unavailable</h2>
|
||||
<p className="max-w-md text-muted-foreground text-sm leading-6">
|
||||
Review your agent setup to restart the gateway or reconnect the
|
||||
local service.
|
||||
</p>
|
||||
</div>
|
||||
<Button onClick={onOpenAgents} className="gap-2 rounded-xl">
|
||||
Open Agent Setup
|
||||
<ArrowRight className="size-4" />
|
||||
</Button>
|
||||
</CardContent>
|
||||
</Card>
|
||||
)
|
||||
}
|
||||
|
||||
function RecentThreads({
|
||||
activeAgentId,
|
||||
agents,
|
||||
adapters,
|
||||
onOpenAgents,
|
||||
onSelectAgent,
|
||||
}: {
|
||||
activeAgentId?: string | null
|
||||
agents: AgentCardData[]
|
||||
agents: HarnessAgent[]
|
||||
adapters: HarnessAdapterDescriptor[]
|
||||
onOpenAgents: () => void
|
||||
onSelectAgent: (agentId: string) => void
|
||||
}) {
|
||||
@@ -123,6 +77,7 @@ function RecentThreads({
|
||||
</div>
|
||||
<AgentCardDock
|
||||
agents={agents}
|
||||
adapters={adapters}
|
||||
activeAgentId={activeAgentId ?? undefined}
|
||||
onSelectAgent={onSelectAgent}
|
||||
onCreateAgent={onOpenAgents}
|
||||
@@ -134,34 +89,35 @@ function RecentThreads({
|
||||
export const AgentCommandHome: FC = () => {
|
||||
const navigate = useNavigate()
|
||||
const activeHint = useActiveHint()
|
||||
const { status, agents } = useAgentCommandData()
|
||||
// The conversation input still consumes the merged AgentEntry list
|
||||
// from the layout context (handles legacy /claw/agents entries that
|
||||
// haven't yet been backfilled into the harness store). The Recent
|
||||
// Agents grid below reads the richer harness payload directly.
|
||||
const { agents: legacyAgents, status } = useAgentCommandData()
|
||||
const { harnessAgents } = useHarnessAgents()
|
||||
const { adapters } = useAgentAdapters()
|
||||
const [selectedAgentId, setSelectedAgentId] = useState<string | null>(null)
|
||||
const { data: dashboard } = useAgentDashboard(status?.status === 'running')
|
||||
const cardData = buildAgentCardData(agents, status?.status, dashboard?.agents)
|
||||
|
||||
const orderedAgents = useMemo(
|
||||
() => orderHomeAgents(harnessAgents),
|
||||
[harnessAgents],
|
||||
)
|
||||
|
||||
useEffect(() => {
|
||||
if (agents.length === 0) {
|
||||
if (selectedAgentId) {
|
||||
setSelectedAgentId(null)
|
||||
}
|
||||
if (legacyAgents.length === 0) {
|
||||
if (selectedAgentId) setSelectedAgentId(null)
|
||||
return
|
||||
}
|
||||
|
||||
if (
|
||||
!selectedAgentId ||
|
||||
!agents.some((agent) => agent.agentId === selectedAgentId)
|
||||
!legacyAgents.some((agent) => agent.agentId === selectedAgentId)
|
||||
) {
|
||||
setSelectedAgentId(agents[0].agentId)
|
||||
setSelectedAgentId(legacyAgents[0].agentId)
|
||||
}
|
||||
}, [agents, selectedAgentId])
|
||||
}, [legacyAgents, selectedAgentId])
|
||||
|
||||
const handleSend = (input: { text: string }) => {
|
||||
if (!selectedAgentId) return
|
||||
// Home composer navigates to the conversation page with the prompt in
|
||||
// the query string. Attachments are dropped at this boundary in v1 —
|
||||
// the conversation page (where staging UX is most useful anyway) is
|
||||
// where users can attach. A future iteration can stash staged files
|
||||
// in chrome.storage.session and replay them on first mount there.
|
||||
navigate(
|
||||
`/home/agents/${selectedAgentId}?q=${encodeURIComponent(input.text)}`,
|
||||
)
|
||||
@@ -171,71 +127,68 @@ export const AgentCommandHome: FC = () => {
|
||||
setSelectedAgentId(agent.agentId)
|
||||
}
|
||||
|
||||
const openClawStatus = status?.status
|
||||
const isSetup = openClawStatus != null && openClawStatus !== 'uninitialized'
|
||||
const shouldShowUnavailableState =
|
||||
openClawStatus != null &&
|
||||
openClawStatus !== 'running' &&
|
||||
openClawStatus !== 'uninitialized' &&
|
||||
cardData.length === 0
|
||||
const selectedCard =
|
||||
cardData.find((agent) => agent.agentId === selectedAgentId) ?? cardData[0]
|
||||
const selectedAgent = legacyAgents.find(
|
||||
(agent) => agent.agentId === selectedAgentId,
|
||||
)
|
||||
const selectedAgentReady = selectedAgent
|
||||
? selectedAgent.source === 'agent-harness' || status?.status === 'running'
|
||||
: false
|
||||
const selectedAgentStatus =
|
||||
selectedAgent?.source === 'agent-harness' ? 'running' : status?.status
|
||||
const selectedAgentName =
|
||||
selectedAgent?.name ?? orderedAgents[0]?.name ?? 'your agent'
|
||||
|
||||
const hasAgents = legacyAgents.length > 0
|
||||
|
||||
return (
|
||||
<div className="min-h-full px-4 py-6">
|
||||
<div className="mx-auto flex w-full max-w-5xl flex-col gap-8">
|
||||
{isSetup ? (
|
||||
shouldShowUnavailableState ? (
|
||||
<OpenClawUnavailableState
|
||||
onOpenAgents={() => navigate('/agents')}
|
||||
/>
|
||||
) : cardData.length > 0 ? (
|
||||
<>
|
||||
<div className="flex flex-col items-center gap-5 pt-[max(10vh,24px)] text-center">
|
||||
<div className="space-y-3">
|
||||
<h1 className="font-semibold text-[clamp(2rem,4vw,3.25rem)] leading-tight tracking-tight">
|
||||
What should your agent work on next?
|
||||
</h1>
|
||||
<p className="mx-auto max-w-2xl text-muted-foreground text-sm leading-6">
|
||||
Start with a task, continue a thread, or switch to another
|
||||
agent without leaving the new tab.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div className="w-full max-w-3xl">
|
||||
<ConversationInput
|
||||
variant="home"
|
||||
agents={agents}
|
||||
selectedAgentId={selectedAgentId}
|
||||
onSelectAgent={handleSelectAgent}
|
||||
onSend={handleSend}
|
||||
onCreateAgent={() => navigate('/agents')}
|
||||
streaming={false}
|
||||
disabled={status?.status !== 'running'}
|
||||
status={status?.status}
|
||||
placeholder={
|
||||
status?.status === 'running'
|
||||
? `Ask ${selectedCard?.name ?? 'your agent'} to handle a task...`
|
||||
: 'OpenClaw is not running...'
|
||||
}
|
||||
/>
|
||||
</div>
|
||||
{hasAgents ? (
|
||||
<>
|
||||
<div className="flex flex-col items-center gap-5 pt-[max(10vh,24px)] text-center">
|
||||
<div className="space-y-3">
|
||||
<h1 className="font-semibold text-[clamp(2rem,4vw,3.25rem)] leading-tight tracking-tight">
|
||||
What should your agent work on next?
|
||||
</h1>
|
||||
<p className="mx-auto max-w-2xl text-muted-foreground text-sm leading-6">
|
||||
Start with a task, continue a thread, or switch to another
|
||||
agent without leaving the new tab.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<Separator />
|
||||
<div className="w-full max-w-3xl">
|
||||
<ConversationInput
|
||||
variant="home"
|
||||
agents={legacyAgents}
|
||||
selectedAgentId={selectedAgentId}
|
||||
onSelectAgent={handleSelectAgent}
|
||||
onSend={handleSend}
|
||||
onCreateAgent={() => navigate('/agents')}
|
||||
streaming={false}
|
||||
disabled={!selectedAgentReady}
|
||||
status={selectedAgentStatus}
|
||||
attachmentsEnabled={false}
|
||||
placeholder={
|
||||
selectedAgentReady
|
||||
? `Ask ${selectedAgentName} to handle a task...`
|
||||
: 'Agent runtime is not running...'
|
||||
}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<RecentThreads
|
||||
activeAgentId={selectedAgentId}
|
||||
agents={cardData}
|
||||
onOpenAgents={() => navigate('/agents')}
|
||||
onSelectAgent={(agentId) => navigate(`/home/agents/${agentId}`)}
|
||||
/>
|
||||
</>
|
||||
) : (
|
||||
<EmptyAgentsState onOpenAgents={() => navigate('/agents')} />
|
||||
)
|
||||
<Separator />
|
||||
|
||||
<RecentThreads
|
||||
activeAgentId={selectedAgentId}
|
||||
agents={orderedAgents}
|
||||
adapters={adapters}
|
||||
onOpenAgents={() => navigate('/agents')}
|
||||
onSelectAgent={(agentId) => navigate(`/home/agents/${agentId}`)}
|
||||
/>
|
||||
</>
|
||||
) : (
|
||||
<AgentCommandSetupState onOpenAgents={() => navigate('/agents')} />
|
||||
<EmptyAgentsState onOpenAgents={() => navigate('/agents')} />
|
||||
)}
|
||||
</div>
|
||||
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import {
|
||||
AlertTriangle,
|
||||
ArrowRight,
|
||||
Bot,
|
||||
ChevronDown,
|
||||
@@ -9,7 +8,6 @@ import {
|
||||
Loader2,
|
||||
Mic,
|
||||
Paperclip,
|
||||
RefreshCw,
|
||||
Square,
|
||||
X,
|
||||
} from 'lucide-react'
|
||||
@@ -38,7 +36,6 @@ import { cn } from '@/lib/utils'
|
||||
import { useVoiceInput } from '@/lib/voice/useVoiceInput'
|
||||
import { useWorkspace } from '@/lib/workspace/use-workspace'
|
||||
import { AgentSelector } from './AgentSelector'
|
||||
import type { OutboundMessage } from './useOutboundQueue'
|
||||
|
||||
export interface ConversationInputSendInput {
|
||||
text: string
|
||||
@@ -55,35 +52,42 @@ interface ConversationInputProps {
|
||||
disabled?: boolean
|
||||
status?: string
|
||||
placeholder?: string
|
||||
attachmentsEnabled?: boolean
|
||||
variant?: 'home' | 'conversation'
|
||||
// Outbound queue: when present, the composer renders the queue strip
|
||||
// above the textarea and lets the user keep sending while a previous
|
||||
// turn is in flight. Optional so non-conversation variants (the home
|
||||
// page) can opt out — the queue only makes sense in the conversation
|
||||
// page where each enqueued message will eventually be delivered to the
|
||||
// active agent.
|
||||
outboundQueue?: OutboundMessage[]
|
||||
onCancelQueued?: (id: string) => void
|
||||
onRetryQueued?: (id: string) => void
|
||||
/**
|
||||
* When set, a Stop button surfaces to the left of the voice mic
|
||||
* while `streaming === true`. Click cancels the active turn
|
||||
* server-side via the chat-cancel endpoint. Absent → no Stop
|
||||
* button (legacy behaviour for the home composer).
|
||||
*/
|
||||
onStop?: () => void
|
||||
}
|
||||
|
||||
function InputActionButton({
|
||||
disabled,
|
||||
onClick,
|
||||
streaming,
|
||||
hasContent,
|
||||
}: {
|
||||
disabled: boolean
|
||||
onClick: () => void
|
||||
streaming: boolean
|
||||
hasContent: boolean
|
||||
}) {
|
||||
// Show the spinner while streaming only when there's nothing to
|
||||
// send — once the user types something, the icon flips back to the
|
||||
// paper-plane so it reads as "queue this message" instead of
|
||||
// "still working".
|
||||
const showSpinner = streaming && !hasContent
|
||||
return (
|
||||
<Button
|
||||
onClick={onClick}
|
||||
size="icon"
|
||||
disabled={disabled}
|
||||
title={streaming && hasContent ? 'Queue message' : undefined}
|
||||
className="h-10 w-10 flex-shrink-0 rounded-xl bg-primary text-primary-foreground hover:bg-primary/90"
|
||||
>
|
||||
{streaming ? (
|
||||
{showSpinner ? (
|
||||
<Loader2 className="h-5 w-5 animate-spin" />
|
||||
) : (
|
||||
<ArrowRight className="h-5 w-5" />
|
||||
@@ -92,6 +96,22 @@ function InputActionButton({
|
||||
)
|
||||
}
|
||||
|
||||
function StopButton({ onStop }: { onStop: () => void }) {
|
||||
return (
|
||||
<Button
|
||||
type="button"
|
||||
size="icon"
|
||||
variant="ghost"
|
||||
onClick={onStop}
|
||||
title="Stop current turn — queued messages will start next."
|
||||
aria-label="Stop current turn"
|
||||
className="h-8 w-8 flex-shrink-0 rounded-lg bg-destructive/10 text-destructive transition-colors hover:bg-destructive/15 hover:text-destructive"
|
||||
>
|
||||
<Square className="h-3.5 w-3.5 fill-current" />
|
||||
</Button>
|
||||
)
|
||||
}
|
||||
|
||||
function VoiceButton({
|
||||
isRecording,
|
||||
isTranscribing,
|
||||
@@ -155,6 +175,7 @@ function ContextControls({
|
||||
status,
|
||||
onAttachClick,
|
||||
attachDisabled,
|
||||
attachmentsEnabled,
|
||||
}: {
|
||||
agents: AgentEntry[]
|
||||
onCreateAgent?: () => void
|
||||
@@ -166,6 +187,7 @@ function ContextControls({
|
||||
status?: string
|
||||
onAttachClick: () => void
|
||||
attachDisabled: boolean
|
||||
attachmentsEnabled: boolean
|
||||
}) {
|
||||
const { supports } = useCapabilities()
|
||||
const { selectedFolder } = useWorkspace()
|
||||
@@ -229,7 +251,7 @@ function ContextControls({
|
||||
type="button"
|
||||
variant="ghost"
|
||||
onClick={onAttachClick}
|
||||
disabled={attachDisabled}
|
||||
disabled={attachDisabled || !attachmentsEnabled}
|
||||
title="Attach files"
|
||||
className={cn(
|
||||
'flex items-center gap-2 rounded-lg px-3 py-1.5 font-medium text-sm transition-all',
|
||||
@@ -306,10 +328,9 @@ export const ConversationInput: FC<ConversationInputProps> = ({
|
||||
disabled,
|
||||
status,
|
||||
placeholder,
|
||||
attachmentsEnabled = true,
|
||||
variant = 'conversation',
|
||||
outboundQueue,
|
||||
onCancelQueued,
|
||||
onRetryQueued,
|
||||
onStop,
|
||||
}) => {
|
||||
const [input, setInput] = useState('')
|
||||
const [selectedTabs, setSelectedTabs] = useState<chrome.tabs.Tab[]>([])
|
||||
@@ -328,6 +349,10 @@ export const ConversationInput: FC<ConversationInputProps> = ({
|
||||
|
||||
const stageFiles = async (files: File[]) => {
|
||||
if (files.length === 0) return
|
||||
if (!attachmentsEnabled) {
|
||||
setAttachmentError('Attachments are not supported for this agent yet.')
|
||||
return
|
||||
}
|
||||
setIsStaging(true)
|
||||
setAttachmentError(null)
|
||||
try {
|
||||
@@ -369,6 +394,12 @@ export const ConversationInput: FC<ConversationInputProps> = ({
|
||||
}
|
||||
}, [voice.transcript, voice.isTranscribing, voice])
|
||||
|
||||
useEffect(() => {
|
||||
if (attachmentsEnabled) return
|
||||
setAttachments([])
|
||||
setAttachmentError(null)
|
||||
}, [attachmentsEnabled])
|
||||
|
||||
const toggleTab = (tab: chrome.tabs.Tab) => {
|
||||
setSelectedTabs((prev) => {
|
||||
const isSelected = prev.some((selected) => selected.id === tab.id)
|
||||
@@ -380,15 +411,17 @@ export const ConversationInput: FC<ConversationInputProps> = ({
|
||||
}
|
||||
|
||||
const hasContent = input.trim().length > 0 || attachments.length > 0
|
||||
const queueEnabled = outboundQueue !== undefined
|
||||
// Queue-aware composers (the conversation panel passes `onStop`)
|
||||
// accept input while streaming — the parent decides whether the
|
||||
// submission opens a new turn or enqueues onto the active one.
|
||||
// Surfaces without a Stop hook (home) keep the legacy behaviour
|
||||
// and block input until the current turn finishes.
|
||||
const queueAware = Boolean(onStop)
|
||||
|
||||
const handleSend = () => {
|
||||
const text = input.trim()
|
||||
// The outbound queue accepts new messages while streaming; legacy
|
||||
// direct-send callers (e.g., the home composer) keep the original
|
||||
// streaming-blocks-send semantic.
|
||||
if (disabled || isStaging) return
|
||||
if (!queueEnabled && streaming) return
|
||||
if (streaming && !queueAware) return
|
||||
if (!text && attachments.length === 0) return
|
||||
onSend({ text, attachments })
|
||||
setInput('')
|
||||
@@ -435,6 +468,10 @@ export const ConversationInput: FC<ConversationInputProps> = ({
|
||||
}
|
||||
|
||||
const openFilePicker = () => {
|
||||
if (!attachmentsEnabled) {
|
||||
setAttachmentError('Attachments are not supported for this agent yet.')
|
||||
return
|
||||
}
|
||||
fileInputRef.current?.click()
|
||||
}
|
||||
|
||||
@@ -476,13 +513,6 @@ export const ConversationInput: FC<ConversationInputProps> = ({
|
||||
error={attachmentError}
|
||||
/>
|
||||
) : null}
|
||||
{queueEnabled && outboundQueue && outboundQueue.length > 0 ? (
|
||||
<OutboundQueueStrip
|
||||
messages={outboundQueue}
|
||||
onCancel={onCancelQueued}
|
||||
onRetry={onRetryQueued}
|
||||
/>
|
||||
) : null}
|
||||
<div
|
||||
className={cn(
|
||||
'flex gap-3',
|
||||
@@ -521,6 +551,7 @@ export const ConversationInput: FC<ConversationInputProps> = ({
|
||||
)}
|
||||
/>
|
||||
</div>
|
||||
{streaming && onStop ? <StopButton onStop={onStop} /> : null}
|
||||
<VoiceButton
|
||||
isRecording={voice.isRecording}
|
||||
isTranscribing={voice.isTranscribing}
|
||||
@@ -538,15 +569,13 @@ export const ConversationInput: FC<ConversationInputProps> = ({
|
||||
!!disabled ||
|
||||
voice.isRecording ||
|
||||
voice.isTranscribing ||
|
||||
// Only block on `streaming` for the legacy direct-send path
|
||||
// (no queue). With the queue active the press always
|
||||
// succeeds — it just enqueues instead of dispatching.
|
||||
(!queueEnabled && streaming)
|
||||
(streaming && !queueAware)
|
||||
}
|
||||
onClick={handleSend}
|
||||
// Spinner stays the user-facing "agent is busy" hint; with the
|
||||
// queue active we still spin while a turn is in flight.
|
||||
streaming={streaming}
|
||||
hasContent={hasContent}
|
||||
/>
|
||||
</div>
|
||||
{voice.error ? (
|
||||
@@ -565,6 +594,7 @@ export const ConversationInput: FC<ConversationInputProps> = ({
|
||||
status={status}
|
||||
onAttachClick={openFilePicker}
|
||||
attachDisabled={attachments.length >= 10 || isStaging || !!disabled}
|
||||
attachmentsEnabled={attachmentsEnabled}
|
||||
/>
|
||||
{isDragOver ? (
|
||||
<div className="pointer-events-none absolute inset-0 flex items-center justify-center rounded-[inherit] bg-background/80 font-medium text-foreground text-sm backdrop-blur-sm">
|
||||
@@ -576,117 +606,6 @@ export const ConversationInput: FC<ConversationInputProps> = ({
|
||||
)
|
||||
}
|
||||
|
||||
function OutboundQueueStrip({
|
||||
messages,
|
||||
onCancel,
|
||||
onRetry,
|
||||
}: {
|
||||
messages: OutboundMessage[]
|
||||
onCancel?: (id: string) => void
|
||||
onRetry?: (id: string) => void
|
||||
}) {
|
||||
return (
|
||||
<div className="border-border/40 border-b px-4 pt-3 pb-2">
|
||||
<ul className="flex flex-col gap-1">
|
||||
{messages.map((message) => (
|
||||
<OutboundQueueItem
|
||||
key={message.id}
|
||||
message={message}
|
||||
onCancel={onCancel}
|
||||
onRetry={onRetry}
|
||||
/>
|
||||
))}
|
||||
</ul>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
function OutboundQueueItem({
|
||||
message,
|
||||
onCancel,
|
||||
onRetry,
|
||||
}: {
|
||||
message: OutboundMessage
|
||||
onCancel?: (id: string) => void
|
||||
onRetry?: (id: string) => void
|
||||
}) {
|
||||
const preview = message.text.trim() || '(attachments only)'
|
||||
return (
|
||||
<li className="flex items-center gap-2 rounded-md px-2 py-1 text-xs">
|
||||
<OutboundQueueStatusIcon status={message.status} />
|
||||
<span className="min-w-0 flex-1 truncate text-muted-foreground">
|
||||
{preview}
|
||||
</span>
|
||||
{message.attachmentPreviews.length > 0 ? (
|
||||
<span className="inline-flex items-center gap-1 text-muted-foreground/70">
|
||||
<Paperclip className="size-3" />
|
||||
<span className="tabular-nums">
|
||||
{message.attachmentPreviews.length}
|
||||
</span>
|
||||
</span>
|
||||
) : null}
|
||||
{message.status === 'queued' && onCancel ? (
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => onCancel(message.id)}
|
||||
className="ml-1 inline-flex size-5 items-center justify-center rounded-full text-muted-foreground hover:bg-accent hover:text-foreground"
|
||||
aria-label="Cancel queued message"
|
||||
title="Cancel"
|
||||
>
|
||||
<X className="size-3" />
|
||||
</button>
|
||||
) : null}
|
||||
{message.status === 'failed' ? (
|
||||
<span className="ml-1 inline-flex items-center gap-2 text-destructive">
|
||||
<span className="max-w-[160px] truncate" title={message.error}>
|
||||
{message.error ?? 'Failed'}
|
||||
</span>
|
||||
{onRetry ? (
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => onRetry(message.id)}
|
||||
className="inline-flex size-5 items-center justify-center rounded-full hover:bg-accent hover:text-foreground"
|
||||
aria-label="Retry failed message"
|
||||
title="Retry"
|
||||
>
|
||||
<RefreshCw className="size-3" />
|
||||
</button>
|
||||
) : null}
|
||||
{onCancel ? (
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => onCancel(message.id)}
|
||||
className="inline-flex size-5 items-center justify-center rounded-full hover:bg-accent hover:text-foreground"
|
||||
aria-label="Discard failed message"
|
||||
title="Discard"
|
||||
>
|
||||
<X className="size-3" />
|
||||
</button>
|
||||
) : null}
|
||||
</span>
|
||||
) : null}
|
||||
</li>
|
||||
)
|
||||
}
|
||||
|
||||
function OutboundQueueStatusIcon({
|
||||
status,
|
||||
}: {
|
||||
status: OutboundMessage['status']
|
||||
}) {
|
||||
if (status === 'sending') {
|
||||
return (
|
||||
<Loader2 className="size-3.5 shrink-0 animate-spin text-muted-foreground" />
|
||||
)
|
||||
}
|
||||
if (status === 'failed') {
|
||||
return <AlertTriangle className="size-3.5 shrink-0 text-destructive" />
|
||||
}
|
||||
return (
|
||||
<span className="inline-block size-2 shrink-0 rounded-full bg-muted-foreground/40" />
|
||||
)
|
||||
}
|
||||
|
||||
function AttachmentStrip({
|
||||
attachments,
|
||||
onRemove,
|
||||
|
||||
@@ -0,0 +1,243 @@
|
||||
import { Quote, TriangleAlert } from 'lucide-react'
|
||||
import type { FC } from 'react'
|
||||
import { Badge } from '@/components/ui/badge'
|
||||
import {
|
||||
HoverCard,
|
||||
HoverCardContent,
|
||||
HoverCardTrigger,
|
||||
} from '@/components/ui/hover-card'
|
||||
import { adapterLabel } from '@/entrypoints/app/agents/AdapterIcon'
|
||||
import { formatRelativeTime } from '@/entrypoints/app/agents/agent-display.helpers'
|
||||
import type {
|
||||
HarnessAdapterHealth,
|
||||
HarnessAgent,
|
||||
HarnessAgentAdapter,
|
||||
} from '@/entrypoints/app/agents/agent-harness-types'
|
||||
import { AgentTile } from '@/entrypoints/app/agents/agent-row/AgentTile'
|
||||
import {
|
||||
firstNonBlankLine,
|
||||
truncate,
|
||||
} from '@/entrypoints/app/agents/agent-row/agent-row.helpers'
|
||||
import type { AgentLiveness } from '@/entrypoints/app/agents/LivenessDot'
|
||||
import { cn } from '@/lib/utils'
|
||||
|
||||
interface HomeAgentCardProps {
|
||||
agent: HarnessAgent
|
||||
adapter: HarnessAgentAdapter | 'unknown'
|
||||
/** Per-adapter health snapshot, shared across cards rendering the
|
||||
* same adapter. `null` when the /adapters response hasn't surfaced
|
||||
* health yet (we treat that as healthy until proven otherwise). */
|
||||
adapterHealth: HarnessAdapterHealth | null
|
||||
/** Highlights the card with an accent ring; tells the user which
|
||||
* agent the conversation input is bound to. */
|
||||
active?: boolean
|
||||
onClick: () => void
|
||||
}
|
||||
|
||||
const PREVIEW_CHARS = 100
|
||||
|
||||
/**
|
||||
* Grid-shaped card for the /home Recent agents section. Composition
|
||||
* mirrors the rail's `AgentRowCard` but the layout is a vertical
|
||||
* column sized for a 1/3-width tile rather than a full-width row.
|
||||
*
|
||||
* Reuses `<AgentTile>`, `<LivenessDot>`, `livenessDetail`,
|
||||
* `formatRelativeTime`, `firstNonBlankLine`, `truncate`, and the
|
||||
* inline `Unavailable` chip pattern so the visual language is
|
||||
* continuous between rail and grid.
|
||||
*/
|
||||
export const HomeAgentCard: FC<HomeAgentCardProps> = ({
|
||||
agent,
|
||||
adapter,
|
||||
adapterHealth,
|
||||
active,
|
||||
onClick,
|
||||
}) => {
|
||||
const status = agent.status ?? 'unknown'
|
||||
const lastUsedAt = agent.lastUsedAt ?? null
|
||||
const isWorking = status === 'working'
|
||||
const isAsleep = status === 'asleep'
|
||||
const isError = status === 'error'
|
||||
const hasActiveTurn = Boolean(agent.activeTurnId)
|
||||
|
||||
return (
|
||||
<button
|
||||
type="button"
|
||||
onClick={onClick}
|
||||
className={cn(
|
||||
'group flex min-h-32 w-full min-w-0 flex-col rounded-2xl border bg-card p-4 text-left shadow-sm transition-colors',
|
||||
active && 'ring-1 ring-[var(--accent-orange)]/30',
|
||||
isWorking
|
||||
? 'border-[var(--accent-orange)]/40'
|
||||
: isError
|
||||
? 'border-destructive/30'
|
||||
: 'border-border/60 hover:border-[var(--accent-orange)]/30',
|
||||
)}
|
||||
>
|
||||
<div className="flex items-start gap-3">
|
||||
<AgentTile adapter={adapter} status={status} lastUsedAt={lastUsedAt} />
|
||||
<div className="min-w-0 flex-1">
|
||||
<div className="flex items-center gap-1.5">
|
||||
<span className="truncate font-semibold text-sm">
|
||||
{displayName(agent)}
|
||||
</span>
|
||||
{isWorking && (
|
||||
<Badge
|
||||
variant="secondary"
|
||||
className="ml-auto bg-amber-50 text-amber-900 hover:bg-amber-50"
|
||||
>
|
||||
Working
|
||||
</Badge>
|
||||
)}
|
||||
</div>
|
||||
<SummaryLine
|
||||
adapter={adapter}
|
||||
modelId={agent.modelId ?? null}
|
||||
reasoningEffort={agent.reasoningEffort ?? null}
|
||||
adapterHealth={adapterHealth}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<LastMessage message={agent.lastUserMessage ?? null} />
|
||||
|
||||
<div className="mt-3 flex items-center justify-between gap-2 text-muted-foreground text-xs">
|
||||
<span>{statusFootnote(status, lastUsedAt)}</span>
|
||||
{hasActiveTurn ? (
|
||||
<ResumeChip />
|
||||
) : isAsleep ? (
|
||||
<Badge variant="outline" className="text-muted-foreground">
|
||||
Asleep
|
||||
</Badge>
|
||||
) : isError ? (
|
||||
<ErrorChip lastError={agent.lastError ?? null} />
|
||||
) : null}
|
||||
</div>
|
||||
</button>
|
||||
)
|
||||
}
|
||||
|
||||
const SummaryLine: FC<{
|
||||
adapter: HarnessAgentAdapter | 'unknown'
|
||||
modelId: string | null
|
||||
reasoningEffort: string | null
|
||||
adapterHealth: HarnessAdapterHealth | null
|
||||
}> = ({ adapter, modelId, reasoningEffort, adapterHealth }) => {
|
||||
const parts = [adapterLabel(adapter)]
|
||||
if (modelId) parts.push(modelId)
|
||||
if (reasoningEffort) parts.push(reasoningEffort)
|
||||
const unhealthy = adapterHealth?.healthy === false
|
||||
return (
|
||||
<div
|
||||
className={cn(
|
||||
'mt-0.5 flex items-center gap-1.5 text-muted-foreground text-xs',
|
||||
unhealthy && 'text-muted-foreground/70',
|
||||
)}
|
||||
>
|
||||
<span className="truncate">{parts.join(' · ')}</span>
|
||||
{unhealthy && (
|
||||
<HoverCard openDelay={200}>
|
||||
<HoverCardTrigger asChild>
|
||||
<Badge
|
||||
variant="outline"
|
||||
className="h-5 cursor-default gap-1 border-amber-500/40 bg-amber-50 px-1.5 text-amber-900 hover:bg-amber-50"
|
||||
>
|
||||
<TriangleAlert className="size-2.5" />
|
||||
<span className="font-normal">Unavailable</span>
|
||||
</Badge>
|
||||
</HoverCardTrigger>
|
||||
<HoverCardContent side="right" className="w-72 text-sm">
|
||||
<div className="font-medium">
|
||||
{adapterLabel(adapter)} CLI not available
|
||||
</div>
|
||||
<div className="mt-1 text-muted-foreground text-xs">
|
||||
{adapterHealth?.reason ??
|
||||
'Adapter binary missing on $PATH. Install it from the adapter docs to use this agent.'}
|
||||
</div>
|
||||
</HoverCardContent>
|
||||
</HoverCard>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
const LastMessage: FC<{ message: string | null }> = ({ message }) => {
|
||||
if (!message) {
|
||||
return (
|
||||
<p className="mt-3 flex-1 text-muted-foreground/70 text-xs italic">
|
||||
No messages yet — start a chat
|
||||
</p>
|
||||
)
|
||||
}
|
||||
return (
|
||||
<p className="mt-3 line-clamp-2 flex flex-1 items-start gap-1.5 text-foreground/85 text-sm italic leading-snug">
|
||||
<Quote
|
||||
className="mt-1 size-3 shrink-0 text-muted-foreground/60"
|
||||
aria-hidden
|
||||
/>
|
||||
<span className="line-clamp-2">
|
||||
{truncate(firstNonBlankLine(message), PREVIEW_CHARS)}
|
||||
</span>
|
||||
</p>
|
||||
)
|
||||
}
|
||||
|
||||
const ResumeChip: FC = () => (
|
||||
<span className="inline-flex items-center gap-1.5 rounded-full bg-[var(--accent-orange)] px-2.5 py-0.5 font-medium text-[11px] text-white shadow-sm">
|
||||
<span className="relative flex size-1.5">
|
||||
<span className="absolute inline-flex h-full w-full animate-ping rounded-full bg-white/70 opacity-75" />
|
||||
<span className="relative inline-flex size-1.5 rounded-full bg-white" />
|
||||
</span>
|
||||
Resume
|
||||
</span>
|
||||
)
|
||||
|
||||
const ErrorChip: FC<{ lastError: string | null }> = ({ lastError }) => {
|
||||
if (!lastError) {
|
||||
return <Badge variant="destructive">Attention</Badge>
|
||||
}
|
||||
return (
|
||||
<HoverCard openDelay={200}>
|
||||
<HoverCardTrigger asChild>
|
||||
<Badge variant="destructive" className="cursor-default">
|
||||
Attention
|
||||
</Badge>
|
||||
</HoverCardTrigger>
|
||||
<HoverCardContent
|
||||
side="left"
|
||||
className="max-w-xs whitespace-pre-wrap font-mono text-xs"
|
||||
>
|
||||
{lastError}
|
||||
</HoverCardContent>
|
||||
</HoverCard>
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Footer left side: relative time on every state EXCEPT working,
|
||||
* which shows `now` (the dot is already pulsing — restating it as
|
||||
* "Working" would duplicate the pill in the title row).
|
||||
*/
|
||||
function statusFootnote(
|
||||
status: AgentLiveness,
|
||||
lastUsedAt: number | null,
|
||||
): string {
|
||||
if (status === 'working') return 'now'
|
||||
return formatRelativeTime(lastUsedAt)
|
||||
}
|
||||
|
||||
const UUID_PATTERN =
|
||||
/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i
|
||||
const OC_UUID_PATTERN =
|
||||
/^oc-[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i
|
||||
|
||||
function displayName(agent: HarnessAgent): string {
|
||||
const name = agent.name?.trim()
|
||||
const id = agent.id
|
||||
if (!name || name === id) {
|
||||
if (OC_UUID_PATTERN.test(id)) return id.slice(0, 11)
|
||||
if (UUID_PATTERN.test(id)) return id.slice(0, 8)
|
||||
return id
|
||||
}
|
||||
return name
|
||||
}
|
||||
@@ -0,0 +1,94 @@
|
||||
import { ListPlus, X } from 'lucide-react'
|
||||
import type { FC } from 'react'
|
||||
import {
|
||||
Queue,
|
||||
QueueItem,
|
||||
QueueItemAction,
|
||||
QueueItemActions,
|
||||
QueueItemAttachment,
|
||||
QueueItemContent,
|
||||
QueueItemFile,
|
||||
QueueItemImage,
|
||||
QueueList,
|
||||
QueueSection,
|
||||
QueueSectionContent,
|
||||
QueueSectionLabel,
|
||||
QueueSectionTrigger,
|
||||
} from '@/components/ai-elements/queue'
|
||||
import type {
|
||||
HarnessQueuedMessage,
|
||||
HarnessQueuedMessageAttachment,
|
||||
} from '@/entrypoints/app/agents/agent-harness-types'
|
||||
import { firstNonBlankLine } from '@/entrypoints/app/agents/agent-row/agent-row.helpers'
|
||||
|
||||
interface QueuePanelProps {
|
||||
queue: HarnessQueuedMessage[]
|
||||
onRemove: (messageId: string) => void
|
||||
}
|
||||
|
||||
/**
|
||||
* Renders the agent's pending message queue using the shared AI
|
||||
* Elements `Queue` primitives. Caller is expected to gate render on
|
||||
* `queue.length > 0` — when empty, this returns null so the panel
|
||||
* disappears cleanly between turns.
|
||||
*/
|
||||
export const QueuePanel: FC<QueuePanelProps> = ({ queue, onRemove }) => {
|
||||
if (queue.length === 0) return null
|
||||
return (
|
||||
<Queue>
|
||||
<QueueSection>
|
||||
<QueueSectionTrigger>
|
||||
<QueueSectionLabel
|
||||
count={queue.length}
|
||||
label={queue.length === 1 ? 'queued message' : 'queued messages'}
|
||||
icon={<ListPlus className="size-3.5" />}
|
||||
/>
|
||||
</QueueSectionTrigger>
|
||||
<QueueSectionContent>
|
||||
<QueueList>
|
||||
{queue.map((entry) => (
|
||||
<QueueItem key={entry.id}>
|
||||
<div className="flex items-center gap-2">
|
||||
<QueueItemContent>
|
||||
{firstNonBlankLine(entry.message)}
|
||||
</QueueItemContent>
|
||||
<QueueItemActions>
|
||||
<QueueItemAction
|
||||
aria-label="Remove from queue"
|
||||
onClick={() => onRemove(entry.id)}
|
||||
>
|
||||
<X className="size-3" />
|
||||
</QueueItemAction>
|
||||
</QueueItemActions>
|
||||
</div>
|
||||
{entry.attachments && entry.attachments.length > 0 ? (
|
||||
<QueueItemAttachment>
|
||||
{entry.attachments.map((attachment, idx) =>
|
||||
renderAttachment(entry.id, attachment, idx),
|
||||
)}
|
||||
</QueueItemAttachment>
|
||||
) : null}
|
||||
</QueueItem>
|
||||
))}
|
||||
</QueueList>
|
||||
</QueueSectionContent>
|
||||
</QueueSection>
|
||||
</Queue>
|
||||
)
|
||||
}
|
||||
|
||||
function renderAttachment(
|
||||
messageId: string,
|
||||
attachment: HarnessQueuedMessageAttachment,
|
||||
idx: number,
|
||||
) {
|
||||
if (attachment.mediaType.startsWith('image/')) {
|
||||
const src = `data:${attachment.mediaType};base64,${attachment.data}`
|
||||
return <QueueItemImage key={`${messageId}-${idx}`} src={src} />
|
||||
}
|
||||
return (
|
||||
<QueueItemFile key={`${messageId}-${idx}`}>
|
||||
{attachment.mediaType}
|
||||
</QueueItemFile>
|
||||
)
|
||||
}
|
||||
@@ -1,8 +1,11 @@
|
||||
import type { FC } from 'react'
|
||||
import { Outlet, useOutletContext } from 'react-router'
|
||||
import { useHarnessAgents } from '@/entrypoints/app/agents/useAgents'
|
||||
import type {
|
||||
AgentEntry,
|
||||
OpenClawStatus,
|
||||
} from '@/entrypoints/app/agents/useOpenClaw'
|
||||
import {
|
||||
type AgentEntry,
|
||||
type OpenClawStatus,
|
||||
useOpenClawAgents,
|
||||
useOpenClawStatus,
|
||||
} from '@/entrypoints/app/agents/useOpenClaw'
|
||||
@@ -16,16 +19,32 @@ interface AgentCommandContextValue {
|
||||
|
||||
export const AgentCommandLayout: FC = () => {
|
||||
const { status, loading: statusLoading } = useOpenClawStatus(5000)
|
||||
const { agents, loading: agentsLoading } = useOpenClawAgents(
|
||||
status?.status === 'running' && status.controlPlaneStatus === 'connected',
|
||||
const openClawEnabled =
|
||||
status?.status === 'running' && status.controlPlaneStatus === 'connected'
|
||||
const { agents: openClawAgents, loading: openClawAgentsLoading } =
|
||||
useOpenClawAgents(openClawEnabled)
|
||||
const { agents: harnessAgents, loading: harnessAgentsLoading } =
|
||||
useHarnessAgents()
|
||||
const visibleOpenClawAgents = openClawEnabled ? openClawAgents : []
|
||||
// Dual-created OpenClaw agents appear in both `/claw/agents` (gateway
|
||||
// record) and `/agents` (harness record) under the same id. Prefer the
|
||||
// harness entry so the chat panel can route through the harness path
|
||||
// and the rail doesn't show duplicates.
|
||||
const harnessAgentIds = new Set(harnessAgents.map((entry) => entry.agentId))
|
||||
const dedupedOpenClawAgents = visibleOpenClawAgents.filter(
|
||||
(entry) => !harnessAgentIds.has(entry.agentId),
|
||||
)
|
||||
const agents = [...dedupedOpenClawAgents, ...harnessAgents]
|
||||
|
||||
return (
|
||||
<Outlet
|
||||
context={
|
||||
{
|
||||
agents,
|
||||
agentsLoading,
|
||||
agentsLoading:
|
||||
harnessAgentsLoading ||
|
||||
statusLoading ||
|
||||
(openClawEnabled && openClawAgentsLoading),
|
||||
status,
|
||||
statusLoading,
|
||||
} satisfies AgentCommandContextValue
|
||||
|
||||
@@ -0,0 +1,12 @@
|
||||
import { describe, expect, it } from 'bun:test'
|
||||
import { mapAgentHarnessToolStatus } from './agent-stream-events'
|
||||
|
||||
describe('mapAgentHarnessToolStatus', () => {
|
||||
it('normalizes ACP tool statuses for the chat renderer', () => {
|
||||
expect(mapAgentHarnessToolStatus('running')).toBe('running')
|
||||
expect(mapAgentHarnessToolStatus('completed')).toBe('completed')
|
||||
expect(mapAgentHarnessToolStatus('failed')).toBe('error')
|
||||
expect(mapAgentHarnessToolStatus('incomplete')).toBe('running')
|
||||
expect(mapAgentHarnessToolStatus(undefined)).toBe('running')
|
||||
})
|
||||
})
|
||||
@@ -0,0 +1,19 @@
|
||||
import type { ToolEntry } from '@/lib/agent-conversations/types'
|
||||
|
||||
export function mapAgentHarnessToolStatus(
|
||||
status: string | undefined,
|
||||
): ToolEntry['status'] {
|
||||
if (!status) return 'running'
|
||||
const normalized = status.toLowerCase()
|
||||
if (['error', 'failed', 'failure', 'denied'].includes(normalized)) {
|
||||
return 'error'
|
||||
}
|
||||
if (
|
||||
['complete', 'completed', 'done', 'success', 'succeeded'].includes(
|
||||
normalized,
|
||||
)
|
||||
) {
|
||||
return 'completed'
|
||||
}
|
||||
return 'running'
|
||||
}
|
||||
@@ -1,8 +1,10 @@
|
||||
import { describe, expect, it } from 'bun:test'
|
||||
import type { AgentConversationTurn } from '@/lib/agent-conversations/types'
|
||||
import {
|
||||
type AgentHistoryPageResponse,
|
||||
type BrowserOSChatHistoryItem,
|
||||
buildChatHistoryFromClawMessages,
|
||||
filterTurnsPersistedInHistory,
|
||||
flattenHistoryPages,
|
||||
mapHistoryItemToClawMessage,
|
||||
} from './claw-chat-types'
|
||||
@@ -118,4 +120,64 @@ describe('claw-chat-types', () => {
|
||||
{ role: 'assistant', content: 'Assistant answer' },
|
||||
])
|
||||
})
|
||||
|
||||
it('hides completed live turns once harness history contains the same turn', () => {
|
||||
const turn: AgentConversationTurn = {
|
||||
id: 'live-turn',
|
||||
userText: 'hello',
|
||||
parts: [{ kind: 'text', text: 'hi there' }],
|
||||
done: true,
|
||||
timestamp: 1_000,
|
||||
}
|
||||
|
||||
const visible = filterTurnsPersistedInHistory(
|
||||
[turn],
|
||||
[
|
||||
{
|
||||
id: 'history-user',
|
||||
role: 'user',
|
||||
sessionKey: 'main',
|
||||
timestamp: 1_050,
|
||||
status: 'historical',
|
||||
parts: [{ type: 'text', text: 'hello' }],
|
||||
},
|
||||
{
|
||||
id: 'history-assistant',
|
||||
role: 'assistant',
|
||||
sessionKey: 'main',
|
||||
timestamp: 1_100,
|
||||
status: 'historical',
|
||||
parts: [{ type: 'text', text: 'hi there' }],
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
expect(visible).toEqual([])
|
||||
})
|
||||
|
||||
it('keeps completed live turns until matching assistant history arrives', () => {
|
||||
const turn: AgentConversationTurn = {
|
||||
id: 'live-turn',
|
||||
userText: 'hello',
|
||||
parts: [{ kind: 'text', text: 'hi there' }],
|
||||
done: true,
|
||||
timestamp: 1_000,
|
||||
}
|
||||
|
||||
const visible = filterTurnsPersistedInHistory(
|
||||
[turn],
|
||||
[
|
||||
{
|
||||
id: 'history-user',
|
||||
role: 'user',
|
||||
sessionKey: 'main',
|
||||
timestamp: 1_050,
|
||||
status: 'historical',
|
||||
parts: [{ type: 'text', text: 'hello' }],
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
expect(visible).toEqual([turn])
|
||||
})
|
||||
})
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import type { OpenClawChatHistoryMessage } from '@/entrypoints/app/agents/useOpenClaw'
|
||||
import type { AgentConversationTurn } from '@/lib/agent-conversations/types'
|
||||
|
||||
export type ClawChatRole = 'user' | 'assistant'
|
||||
|
||||
@@ -22,9 +23,9 @@ export interface BrowserOSChatHistoryToolCall {
|
||||
toolName: string
|
||||
label: string
|
||||
subject?: string
|
||||
status: 'completed' | 'failed'
|
||||
input?: Record<string, unknown>
|
||||
output?: string
|
||||
status: 'pending' | 'running' | 'completed' | 'failed'
|
||||
input?: unknown
|
||||
output?: unknown
|
||||
error?: string
|
||||
durationMs?: number
|
||||
}
|
||||
@@ -221,3 +222,66 @@ export function buildChatHistoryFromClawMessages(
|
||||
Boolean(message),
|
||||
)
|
||||
}
|
||||
|
||||
const TURN_HISTORY_MATCH_WINDOW_MS = 5_000
|
||||
|
||||
export function filterTurnsPersistedInHistory(
|
||||
turns: AgentConversationTurn[],
|
||||
historyMessages: ClawChatMessage[],
|
||||
): AgentConversationTurn[] {
|
||||
return turns.filter(
|
||||
(turn) => !isTurnPersistedInHistory(turn, historyMessages),
|
||||
)
|
||||
}
|
||||
|
||||
function isTurnPersistedInHistory(
|
||||
turn: AgentConversationTurn,
|
||||
historyMessages: ClawChatMessage[],
|
||||
): boolean {
|
||||
if (!turn.done) return false
|
||||
|
||||
const assistantText = getTurnAssistantText(turn)
|
||||
if (!assistantText) return false
|
||||
|
||||
const minTimestamp = turn.timestamp - TURN_HISTORY_MATCH_WINDOW_MS
|
||||
const userText = turn.userText.trim()
|
||||
const userPersisted =
|
||||
!userText ||
|
||||
historyMessages.some(
|
||||
(message) =>
|
||||
message.role === 'user' &&
|
||||
isHistoryMessageAfter(message, minTimestamp) &&
|
||||
getClawMessageText(message) === userText,
|
||||
)
|
||||
const assistantPersisted = historyMessages.some(
|
||||
(message) =>
|
||||
message.role === 'assistant' &&
|
||||
isHistoryMessageAfter(message, minTimestamp) &&
|
||||
getClawMessageText(message) === assistantText,
|
||||
)
|
||||
|
||||
return userPersisted && assistantPersisted
|
||||
}
|
||||
|
||||
function isHistoryMessageAfter(
|
||||
message: ClawChatMessage,
|
||||
minTimestamp: number,
|
||||
): boolean {
|
||||
return message.timestamp == null || message.timestamp >= minTimestamp
|
||||
}
|
||||
|
||||
function getTurnAssistantText(turn: AgentConversationTurn): string {
|
||||
return turn.parts
|
||||
.filter((part) => part.kind === 'text')
|
||||
.map((part) => part.text)
|
||||
.join('')
|
||||
.trim()
|
||||
}
|
||||
|
||||
function getClawMessageText(message: ClawChatMessage): string {
|
||||
return message.parts
|
||||
.filter((part) => part.type === 'text')
|
||||
.map((part) => part.text)
|
||||
.join('')
|
||||
.trim()
|
||||
}
|
||||
|
||||
@@ -0,0 +1,71 @@
|
||||
import { buildToolLabel } from '../../../lib/tool-labels'
|
||||
import type { HarnessAgentHistoryPage } from '../agents/agent-harness-types'
|
||||
import type {
|
||||
AgentHistoryPageResponse,
|
||||
BrowserOSChatHistoryItem,
|
||||
BrowserOSChatHistoryToolCall,
|
||||
} from './claw-chat-types'
|
||||
|
||||
export function mapHarnessHistoryPage(
|
||||
page: HarnessAgentHistoryPage,
|
||||
): AgentHistoryPageResponse {
|
||||
const items: BrowserOSChatHistoryItem[] = page.items.map((item, index) => {
|
||||
const toolCalls = item.toolCalls?.map(
|
||||
(tool): BrowserOSChatHistoryToolCall => {
|
||||
const input = asRecord(tool.input)
|
||||
const { label, subject } = buildToolLabel(tool.toolName, input)
|
||||
return {
|
||||
toolName: tool.toolName,
|
||||
label,
|
||||
status: tool.status,
|
||||
...(tool.toolCallId ? { toolCallId: tool.toolCallId } : {}),
|
||||
...(subject ? { subject } : {}),
|
||||
...(tool.input !== undefined ? { input: tool.input } : {}),
|
||||
...(tool.output !== undefined ? { output: tool.output } : {}),
|
||||
...(tool.error ? { error: tool.error } : {}),
|
||||
...(tool.durationMs != null ? { durationMs: tool.durationMs } : {}),
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
return {
|
||||
id: item.id,
|
||||
role: item.role,
|
||||
text: item.text,
|
||||
timestamp: item.createdAt,
|
||||
messageSeq: index + 1,
|
||||
sessionKey: 'main',
|
||||
source: 'user-chat',
|
||||
...(item.reasoning ? { reasoning: item.reasoning } : {}),
|
||||
...(toolCalls && toolCalls.length > 0 ? { toolCalls } : {}),
|
||||
}
|
||||
})
|
||||
const updatedAt =
|
||||
page.items.length > 0
|
||||
? Math.max(...page.items.map((item) => item.createdAt))
|
||||
: Date.now()
|
||||
|
||||
return {
|
||||
agentId: page.agentId,
|
||||
sessionKey: 'main',
|
||||
session: {
|
||||
key: 'main',
|
||||
updatedAt,
|
||||
sessionId: 'main',
|
||||
agentId: page.agentId,
|
||||
kind: 'agent-harness',
|
||||
source: 'user-chat',
|
||||
},
|
||||
items,
|
||||
page: {
|
||||
hasMore: false,
|
||||
limit: items.length,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
function asRecord(value: unknown): Record<string, unknown> | undefined {
|
||||
return value && typeof value === 'object' && !Array.isArray(value)
|
||||
? (value as Record<string, unknown>)
|
||||
: undefined
|
||||
}
|
||||
@@ -0,0 +1,69 @@
|
||||
import { describe, expect, it } from 'bun:test'
|
||||
import type { HarnessAgent } from '@/entrypoints/app/agents/agent-harness-types'
|
||||
import { orderHomeAgents } from './home-agent-card.helpers'
|
||||
|
||||
function agent(overrides: Partial<HarnessAgent>): HarnessAgent {
|
||||
return {
|
||||
id: overrides.id ?? 'agent-x',
|
||||
name: overrides.name ?? overrides.id ?? 'agent-x',
|
||||
adapter: overrides.adapter ?? 'codex',
|
||||
permissionMode: 'approve-all',
|
||||
sessionKey: `agent:${overrides.id ?? 'agent-x'}:main`,
|
||||
createdAt: 1000,
|
||||
updatedAt: 1000,
|
||||
...overrides,
|
||||
}
|
||||
}
|
||||
|
||||
describe('orderHomeAgents', () => {
|
||||
it('places active-turn agents before everyone else', () => {
|
||||
const sorted = orderHomeAgents([
|
||||
agent({ id: 'a', lastUsedAt: 5000 }),
|
||||
agent({ id: 'b', lastUsedAt: 9000, activeTurnId: 'turn-1' }),
|
||||
agent({ id: 'c', lastUsedAt: 7000 }),
|
||||
])
|
||||
expect(sorted.map((a) => a.id)).toEqual(['b', 'c', 'a'])
|
||||
})
|
||||
|
||||
it('orders non-active agents by lastUsedAt desc', () => {
|
||||
const sorted = orderHomeAgents([
|
||||
agent({ id: 'old', lastUsedAt: 1000 }),
|
||||
agent({ id: 'new', lastUsedAt: 9000 }),
|
||||
agent({ id: 'mid', lastUsedAt: 5000 }),
|
||||
])
|
||||
expect(sorted.map((a) => a.id)).toEqual(['new', 'mid', 'old'])
|
||||
})
|
||||
|
||||
it('puts the gateway `main` seed agent above other never-used agents', () => {
|
||||
const sorted = orderHomeAgents([
|
||||
agent({ id: 'oc-aaaaaa', lastUsedAt: null }),
|
||||
agent({ id: 'main', lastUsedAt: null }),
|
||||
agent({ id: 'oc-bbbbbb', lastUsedAt: null }),
|
||||
])
|
||||
expect(sorted.map((a) => a.id)).toEqual(['main', 'oc-aaaaaa', 'oc-bbbbbb'])
|
||||
})
|
||||
|
||||
it('sends never-used agents to the bottom even when `main` is among them', () => {
|
||||
const sorted = orderHomeAgents([
|
||||
agent({ id: 'main', lastUsedAt: null }),
|
||||
agent({ id: 'used', lastUsedAt: 5000 }),
|
||||
])
|
||||
expect(sorted.map((a) => a.id)).toEqual(['used', 'main'])
|
||||
})
|
||||
|
||||
it('does NOT sort by pinned — pinned agents are treated like any other', () => {
|
||||
const sorted = orderHomeAgents([
|
||||
agent({ id: 'unpinned-recent', lastUsedAt: 9000, pinned: false }),
|
||||
agent({ id: 'pinned-old', lastUsedAt: 1000, pinned: true }),
|
||||
])
|
||||
expect(sorted.map((a) => a.id)).toEqual(['unpinned-recent', 'pinned-old'])
|
||||
})
|
||||
|
||||
it('falls back to id-stable ordering when lastUsedAt ties', () => {
|
||||
const sorted = orderHomeAgents([
|
||||
agent({ id: 'b', lastUsedAt: 5000 }),
|
||||
agent({ id: 'a', lastUsedAt: 5000 }),
|
||||
])
|
||||
expect(sorted.map((a) => a.id)).toEqual(['a', 'b'])
|
||||
})
|
||||
})
|
||||
@@ -0,0 +1,42 @@
|
||||
import type { HarnessAgent } from '@/entrypoints/app/agents/agent-harness-types'
|
||||
|
||||
/**
|
||||
* Order for the /home Recent agents grid.
|
||||
*
|
||||
* 1. Active turn first — agents mid-turn float to the top so the
|
||||
* Resume affordance is the first thing the user sees on /home.
|
||||
* 2. The protected gateway-side `main` agent stays pinned-to-top in
|
||||
* the never-used group on a fresh install (mirrors the rail).
|
||||
* 3. Recency (`lastUsedAt` desc).
|
||||
* 4. `id` tiebreaker for stability so the grid doesn't reshuffle on
|
||||
* every 5-second poll.
|
||||
*
|
||||
* Pin is NOT a sort key. The home grid is action-oriented and trusts
|
||||
* recency + active-turn to surface the right agent; pinning is an
|
||||
* organisation tool that lives on the rail at /agents.
|
||||
*/
|
||||
export function orderHomeAgents(agents: HarnessAgent[]): HarnessAgent[] {
|
||||
return [...agents].sort((a, b) => {
|
||||
const aActive = a.activeTurnId != null
|
||||
const bActive = b.activeTurnId != null
|
||||
if (aActive !== bActive) return aActive ? -1 : 1
|
||||
|
||||
// Recency wins outright. Never-used agents (`lastUsedAt == null`)
|
||||
// both fall to the same `-Infinity` bucket and the seed/id rules
|
||||
// below decide their order — but a used agent always beats any
|
||||
// never-used agent regardless of id.
|
||||
const aValue = a.lastUsedAt ?? Number.NEGATIVE_INFINITY
|
||||
const bValue = b.lastUsedAt ?? Number.NEGATIVE_INFINITY
|
||||
if (aValue !== bValue) return bValue - aValue
|
||||
|
||||
// Inside the never-used (or exact-tie) group: pin the gateway
|
||||
// `main` seed to the top of the group on a fresh install, then
|
||||
// fall back to id-stable order so the grid doesn't reshuffle on
|
||||
// every poll.
|
||||
const aSeed = a.id === 'main' && a.lastUsedAt == null
|
||||
const bSeed = b.id === 'main' && b.lastUsedAt == null
|
||||
if (aSeed !== bSeed) return aSeed ? -1 : 1
|
||||
|
||||
return a.id.localeCompare(b.id)
|
||||
})
|
||||
}
|
||||
@@ -1,50 +0,0 @@
|
||||
import {
|
||||
type AgentEntry,
|
||||
getModelDisplayName,
|
||||
type OpenClawStatus,
|
||||
} from '@/entrypoints/app/agents/useOpenClaw'
|
||||
import type { AgentCardData } from '@/lib/agent-conversations/types'
|
||||
import type { AgentOverview } from './useAgentDashboard'
|
||||
|
||||
function resolveAgentStatus(
|
||||
gatewayStatus: OpenClawStatus['status'] | undefined,
|
||||
liveStatus: AgentOverview['status'] | undefined,
|
||||
): AgentCardData['status'] {
|
||||
// Gateway-level errors take precedence
|
||||
if (gatewayStatus === 'error') return 'error'
|
||||
if (gatewayStatus === 'starting') return 'working'
|
||||
|
||||
// Per-agent live status from the WS observer
|
||||
if (liveStatus === 'working') return 'working'
|
||||
if (liveStatus === 'error') return 'error'
|
||||
|
||||
return 'idle'
|
||||
}
|
||||
|
||||
/**
|
||||
* Build agent card display data by merging the raw agent entries from
|
||||
* the gateway with enriched overview data from the dashboard API.
|
||||
*
|
||||
* Pure function — no hooks, no IndexedDB, no async.
|
||||
*/
|
||||
export function buildAgentCardData(
|
||||
agents: AgentEntry[],
|
||||
status: OpenClawStatus['status'] | undefined,
|
||||
dashboard: AgentOverview[] | undefined,
|
||||
): AgentCardData[] {
|
||||
return agents.map((agent) => {
|
||||
const overview = dashboard?.find((d) => d.agentId === agent.agentId)
|
||||
|
||||
return {
|
||||
agentId: agent.agentId,
|
||||
name: agent.name,
|
||||
model: getModelDisplayName(agent.model),
|
||||
status: resolveAgentStatus(status, overview?.status),
|
||||
lastMessage: overview?.latestMessage?.slice(0, 200) ?? undefined,
|
||||
lastMessageTimestamp: overview?.latestMessageAt ?? undefined,
|
||||
activitySummary: overview?.activitySummary ?? undefined,
|
||||
currentTool: overview?.currentTool ?? undefined,
|
||||
costUsd: overview?.totalCostUsd ?? undefined,
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -1,17 +1,22 @@
|
||||
import { useEffect, useRef, useState } from 'react'
|
||||
import {
|
||||
chatWithAgent,
|
||||
type OpenClawChatHistoryMessage,
|
||||
type OpenClawStreamEvent,
|
||||
} from '@/entrypoints/app/agents/useOpenClaw'
|
||||
type AgentHarnessStreamEvent,
|
||||
attachToHarnessTurn,
|
||||
cancelHarnessTurn,
|
||||
chatWithHarnessAgent,
|
||||
fetchActiveHarnessTurn,
|
||||
} from '@/entrypoints/app/agents/useAgents'
|
||||
import type { OpenClawChatHistoryMessage } from '@/entrypoints/app/agents/useOpenClaw'
|
||||
import type {
|
||||
AgentConversationTurn,
|
||||
AssistantPart,
|
||||
ToolEntry,
|
||||
UserAttachmentPreview,
|
||||
} from '@/lib/agent-conversations/types'
|
||||
import type { ServerAttachmentPayload } from '@/lib/attachments'
|
||||
import { consumeSSEStream } from '@/lib/sse'
|
||||
import { buildToolLabel } from '@/lib/tool-labels'
|
||||
import { mapAgentHarnessToolStatus } from './agent-stream-events'
|
||||
|
||||
export interface SendInput {
|
||||
text: string
|
||||
@@ -23,9 +28,23 @@ export interface SendInput {
|
||||
}
|
||||
|
||||
interface UseAgentConversationOptions {
|
||||
// The hook always speaks to the harness chat path now; the OpenClaw
|
||||
// legacy /claw/agents/:id/chat surface was removed in Step 12. The
|
||||
// option remains for forward-compatibility.
|
||||
runtime?: 'agent-harness'
|
||||
sessionKey?: string | null
|
||||
history?: OpenClawChatHistoryMessage[]
|
||||
onComplete?: () => void
|
||||
onSessionKeyChange?: (sessionKey: string) => void
|
||||
/**
|
||||
* Server-side active turn id, surfaced via the listing query. When
|
||||
* this changes from null/<id> to a different non-null id while we
|
||||
* aren't already streaming (e.g. the server just popped a queued
|
||||
* message and started a new turn), the hook reattaches via
|
||||
* /chat/active so the chat panel picks up the live stream without
|
||||
* waiting for a remount.
|
||||
*/
|
||||
activeTurnId?: string | null
|
||||
}
|
||||
|
||||
export function useAgentConversation(
|
||||
@@ -39,7 +58,13 @@ export function useAgentConversation(
|
||||
const textAccRef = useRef('')
|
||||
const thinkAccRef = useRef('')
|
||||
const streamAbortRef = useRef<AbortController | null>(null)
|
||||
const onCompleteRef = useRef(options.onComplete)
|
||||
const onSessionKeyChangeRef = useRef(options.onSessionKeyChange)
|
||||
// Per-turn resume bookkeeping. `turnId` is captured from the response
|
||||
// header; `lastSeq` advances with every SSE event so a reconnect can
|
||||
// resume via Last-Event-ID.
|
||||
const turnIdRef = useRef<string | null>(null)
|
||||
const lastSeqRef = useRef<number | null>(null)
|
||||
|
||||
useEffect(() => {
|
||||
sessionKeyRef.current = options.sessionKey ?? ''
|
||||
@@ -49,6 +74,10 @@ export function useAgentConversation(
|
||||
historyRef.current = options.history ?? []
|
||||
}, [options.history])
|
||||
|
||||
useEffect(() => {
|
||||
onCompleteRef.current = options.onComplete
|
||||
}, [options.onComplete])
|
||||
|
||||
useEffect(() => {
|
||||
onSessionKeyChangeRef.current = options.onSessionKeyChange
|
||||
}, [options.onSessionKeyChange])
|
||||
@@ -59,6 +88,12 @@ export function useAgentConversation(
|
||||
}
|
||||
}, [])
|
||||
|
||||
// Indirection for the resume effect below: lets it call the latest
|
||||
// event handler without re-subscribing on every render.
|
||||
const processEventRef = useRef<(event: AgentHarnessStreamEvent) => void>(
|
||||
() => {},
|
||||
)
|
||||
|
||||
const updateCurrentTurnParts = (
|
||||
updater: (parts: AssistantPart[]) => AssistantPart[],
|
||||
) => {
|
||||
@@ -69,118 +104,219 @@ export function useAgentConversation(
|
||||
})
|
||||
}
|
||||
|
||||
const processStreamEvent = (event: OpenClawStreamEvent) => {
|
||||
switch (event.type) {
|
||||
case 'text-delta': {
|
||||
const delta = (event.data.text as string) ?? ''
|
||||
textAccRef.current += delta
|
||||
const text = textAccRef.current
|
||||
updateCurrentTurnParts((parts) => {
|
||||
const last = parts[parts.length - 1]
|
||||
if (last?.kind === 'text') {
|
||||
return [...parts.slice(0, -1), { ...last, text }]
|
||||
}
|
||||
return [...parts, { kind: 'text', text }]
|
||||
})
|
||||
break
|
||||
const appendTextDelta = (delta: string) => {
|
||||
textAccRef.current += delta
|
||||
const text = textAccRef.current
|
||||
updateCurrentTurnParts((parts) => {
|
||||
const last = parts[parts.length - 1]
|
||||
if (last?.kind === 'text') {
|
||||
return [...parts.slice(0, -1), { ...last, text }]
|
||||
}
|
||||
return [...parts, { kind: 'text', text }]
|
||||
})
|
||||
}
|
||||
|
||||
case 'thinking': {
|
||||
const delta = (event.data.text as string) ?? ''
|
||||
thinkAccRef.current += delta
|
||||
const text = thinkAccRef.current
|
||||
updateCurrentTurnParts((parts) => {
|
||||
const idx = parts.findIndex((p) => p.kind === 'thinking' && !p.done)
|
||||
if (idx >= 0) {
|
||||
return [
|
||||
...parts.slice(0, idx),
|
||||
{ ...parts[idx], text, done: false },
|
||||
...parts.slice(idx + 1),
|
||||
]
|
||||
}
|
||||
return [...parts, { kind: 'thinking', text, done: false }]
|
||||
})
|
||||
break
|
||||
const appendThinkingDelta = (delta: string) => {
|
||||
thinkAccRef.current += delta
|
||||
const text = thinkAccRef.current
|
||||
updateCurrentTurnParts((parts) => {
|
||||
const idx = parts.findIndex((p) => p.kind === 'thinking' && !p.done)
|
||||
if (idx >= 0) {
|
||||
return [
|
||||
...parts.slice(0, idx),
|
||||
{ ...parts[idx], text, done: false },
|
||||
...parts.slice(idx + 1),
|
||||
]
|
||||
}
|
||||
return [...parts, { kind: 'thinking', text, done: false }]
|
||||
})
|
||||
}
|
||||
|
||||
case 'tool-start': {
|
||||
const rawName = (event.data.toolName as string) ?? 'unknown'
|
||||
const args = event.data.args as Record<string, unknown> | undefined
|
||||
const { label, subject } = buildToolLabel(rawName, args)
|
||||
const tool = {
|
||||
id: (event.data.toolCallId as string) ?? crypto.randomUUID(),
|
||||
name: rawName,
|
||||
label,
|
||||
subject,
|
||||
status: 'running' as const,
|
||||
const appendErrorText = (message: string) => {
|
||||
updateCurrentTurnParts((parts) => [
|
||||
...parts,
|
||||
{ kind: 'text', text: `Error: ${message}` },
|
||||
])
|
||||
}
|
||||
|
||||
const markCurrentTurnDone = () => {
|
||||
updateCurrentTurnParts((parts) =>
|
||||
parts.map((part) =>
|
||||
part.kind === 'thinking' ? { ...part, done: true } : part,
|
||||
),
|
||||
)
|
||||
setTurns((prev) => {
|
||||
const last = prev[prev.length - 1]
|
||||
if (!last) return prev
|
||||
return [...prev.slice(0, -1), { ...last, done: true }]
|
||||
})
|
||||
}
|
||||
|
||||
const upsertAgentHarnessTool = (event: AgentHarnessStreamEvent) => {
|
||||
if (event.type !== 'tool_call') return
|
||||
const rawName = event.title || event.rawType || 'tool call'
|
||||
const { label, subject } = buildToolLabel(
|
||||
rawName,
|
||||
event.text ? { description: event.text } : undefined,
|
||||
)
|
||||
const tool: ToolEntry = {
|
||||
id: event.id ?? crypto.randomUUID(),
|
||||
name: rawName,
|
||||
label,
|
||||
subject,
|
||||
status: mapAgentHarnessToolStatus(event.status),
|
||||
}
|
||||
|
||||
updateCurrentTurnParts((parts) => {
|
||||
for (let i = parts.length - 1; i >= 0; i--) {
|
||||
const part = parts[i]
|
||||
if (
|
||||
part.kind === 'tool-batch' &&
|
||||
part.tools.some((existing) => existing.id === tool.id)
|
||||
) {
|
||||
const tools = part.tools.map((existing) =>
|
||||
existing.id === tool.id ? { ...existing, ...tool } : existing,
|
||||
)
|
||||
return [
|
||||
...parts.slice(0, i),
|
||||
{ ...part, tools },
|
||||
...parts.slice(i + 1),
|
||||
]
|
||||
}
|
||||
updateCurrentTurnParts((parts) => {
|
||||
const last = parts[parts.length - 1]
|
||||
if (last?.kind === 'tool-batch') {
|
||||
return [
|
||||
...parts.slice(0, -1),
|
||||
{ ...last, tools: [...last.tools, tool] },
|
||||
]
|
||||
}
|
||||
return [...parts, { kind: 'tool-batch', tools: [tool] }]
|
||||
})
|
||||
break
|
||||
}
|
||||
|
||||
case 'tool-end': {
|
||||
const toolId = event.data.toolCallId as string
|
||||
const toolStatus: 'completed' | 'error' =
|
||||
(event.data.status as string) === 'error' ? 'error' : 'completed'
|
||||
const durationMs = event.data.durationMs as number | undefined
|
||||
updateCurrentTurnParts((parts) => {
|
||||
for (let i = parts.length - 1; i >= 0; i--) {
|
||||
const part = parts[i]
|
||||
if (
|
||||
part.kind === 'tool-batch' &&
|
||||
part.tools.some((t) => t.id === toolId)
|
||||
) {
|
||||
const updatedTools = part.tools.map((t) =>
|
||||
t.id === toolId ? { ...t, status: toolStatus, durationMs } : t,
|
||||
)
|
||||
return [
|
||||
...parts.slice(0, i),
|
||||
{ ...part, tools: updatedTools },
|
||||
...parts.slice(i + 1),
|
||||
]
|
||||
}
|
||||
}
|
||||
return parts
|
||||
})
|
||||
break
|
||||
const last = parts[parts.length - 1]
|
||||
if (last?.kind === 'tool-batch') {
|
||||
return [
|
||||
...parts.slice(0, -1),
|
||||
{ ...last, tools: [...last.tools, tool] },
|
||||
]
|
||||
}
|
||||
return [...parts, { kind: 'tool-batch', tools: [tool] }]
|
||||
})
|
||||
}
|
||||
|
||||
case 'done': {
|
||||
updateCurrentTurnParts((parts) =>
|
||||
parts.map((part) =>
|
||||
part.kind === 'thinking' ? { ...part, done: true } : part,
|
||||
),
|
||||
)
|
||||
setTurns((prev) => {
|
||||
const last = prev[prev.length - 1]
|
||||
if (!last) return prev
|
||||
return [...prev.slice(0, -1), { ...last, done: true }]
|
||||
})
|
||||
const processAgentHarnessStreamEvent = (event: AgentHarnessStreamEvent) => {
|
||||
switch (event.type) {
|
||||
case 'text_delta':
|
||||
if (event.stream === 'thought') {
|
||||
appendThinkingDelta(event.text)
|
||||
} else {
|
||||
appendTextDelta(event.text)
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
case 'error': {
|
||||
const msg =
|
||||
(event.data.message as string) ??
|
||||
(event.data.error as string) ??
|
||||
'Unknown error'
|
||||
updateCurrentTurnParts((parts) => [
|
||||
...parts,
|
||||
{ kind: 'text', text: `Error: ${msg}` },
|
||||
])
|
||||
case 'tool_call':
|
||||
upsertAgentHarnessTool(event)
|
||||
break
|
||||
case 'done':
|
||||
markCurrentTurnDone()
|
||||
break
|
||||
case 'error':
|
||||
appendErrorText(event.message)
|
||||
break
|
||||
case 'status':
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
processEventRef.current = processAgentHarnessStreamEvent
|
||||
|
||||
const activeTurnIdDep = options.activeTurnId ?? null
|
||||
|
||||
// On mount, on agent change, and whenever the listing reports a
|
||||
// *new* active turn id, check whether the server has an in-flight
|
||||
// turn for this agent and reattach to it. This catches three
|
||||
// cases at once: the chat resilience flow (tab close/reopen),
|
||||
// navigation between agents, AND queue drain (the server starts a
|
||||
// new turn from a queued message → activeTurnId flips → attach).
|
||||
useEffect(() => {
|
||||
let cancelled = false
|
||||
const abortController = new AbortController()
|
||||
// Reference the dep inside the body so biome's exhaustive-deps
|
||||
// rule sees it consumed; the value is just an "any non-null
|
||||
// active turn id" trigger — the actual id we attach to comes
|
||||
// from the fresh fetchActiveHarnessTurn call below.
|
||||
void activeTurnIdDep
|
||||
|
||||
const attemptResume = async () => {
|
||||
// Track whether *we* started a stream in this run. When the
|
||||
// early-return paths fire (no active turn, or a `send()` /
|
||||
// earlier resume already owns `streamAbortRef`), the finally
|
||||
// block must NOT touch streaming/turnIdRef/lastSeqRef —
|
||||
// otherwise we clobber the in-flight stream's state and the
|
||||
// Stop button drops out mid-turn while events keep arriving.
|
||||
let weStartedStream = false
|
||||
try {
|
||||
const active = await fetchActiveHarnessTurn(agentId)
|
||||
if (cancelled || !active || active.status !== 'running') return
|
||||
if (streamAbortRef.current) return // someone else already owns the stream
|
||||
|
||||
// Stage a placeholder turn so the streamed events have a row
|
||||
// to render into. The server now persists the kicking-off
|
||||
// prompt on the active turn, so we render it as the user
|
||||
// bubble immediately — no empty-bubble flicker when a queued
|
||||
// message starts running.
|
||||
setTurns((prev) => [
|
||||
...prev,
|
||||
{
|
||||
id: crypto.randomUUID(),
|
||||
userText: active.prompt ?? '',
|
||||
parts: [],
|
||||
done: false,
|
||||
timestamp: active.startedAt,
|
||||
},
|
||||
])
|
||||
textAccRef.current = ''
|
||||
thinkAccRef.current = ''
|
||||
turnIdRef.current = active.turnId
|
||||
lastSeqRef.current = null
|
||||
streamAbortRef.current = abortController
|
||||
setStreaming(true)
|
||||
weStartedStream = true
|
||||
|
||||
const response = await attachToHarnessTurn(agentId, {
|
||||
turnId: active.turnId,
|
||||
signal: abortController.signal,
|
||||
})
|
||||
if (!response.ok) return
|
||||
await consumeSSEStream<AgentHarnessStreamEvent>(
|
||||
response,
|
||||
(event, meta) => {
|
||||
if (typeof meta.seq === 'number') lastSeqRef.current = meta.seq
|
||||
processEventRef.current(event)
|
||||
},
|
||||
abortController.signal,
|
||||
)
|
||||
} catch {
|
||||
// Resume is best-effort; transient errors fall back to the
|
||||
// user starting a new turn manually.
|
||||
} finally {
|
||||
// Always release `streamAbortRef` if we owned it — even when
|
||||
// the effect was cancelled mid-stream (a listing poll
|
||||
// captured the next queue-drain turn id, for example). If we
|
||||
// don't, the next effect run hits `if (streamAbortRef.current)
|
||||
// return` against our now-aborted controller and never
|
||||
// reattaches, leaving `streaming === true` with no live stream.
|
||||
if (weStartedStream && streamAbortRef.current === abortController) {
|
||||
streamAbortRef.current = null
|
||||
}
|
||||
// The other state (streaming flag, turn id, lastSeq) is the
|
||||
// *current run's* lifecycle: only reset it on a clean exit.
|
||||
// When `cancelled` is true the next run will set these
|
||||
// itself, so resetting here would only cause a brief flicker.
|
||||
if (!cancelled && weStartedStream) {
|
||||
turnIdRef.current = null
|
||||
lastSeqRef.current = null
|
||||
setStreaming(false)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void attemptResume()
|
||||
return () => {
|
||||
cancelled = true
|
||||
abortController.abort()
|
||||
}
|
||||
}, [agentId, activeTurnIdDep])
|
||||
|
||||
const send = async (input: string | SendInput) => {
|
||||
const normalized: SendInput =
|
||||
@@ -210,19 +346,37 @@ export function useAgentConversation(
|
||||
streamAbortRef.current = abortController
|
||||
|
||||
try {
|
||||
const response = await chatWithAgent(
|
||||
let response = await chatWithHarnessAgent(
|
||||
agentId,
|
||||
trimmed,
|
||||
sessionKeyRef.current || undefined,
|
||||
historyRef.current,
|
||||
abortController.signal,
|
||||
attachments,
|
||||
)
|
||||
const responseSessionKey = response.headers.get('X-Session-Key')
|
||||
// 409 means the server already has an active turn for this
|
||||
// agent (e.g. a previous tab kicked one off and we're a fresh
|
||||
// mount that missed the resume window). Attach to it instead of
|
||||
// double-sending.
|
||||
if (response.status === 409) {
|
||||
const body = (await response.json()) as { turnId?: string }
|
||||
if (body.turnId) {
|
||||
response = await attachToHarnessTurn(agentId, {
|
||||
turnId: body.turnId,
|
||||
signal: abortController.signal,
|
||||
})
|
||||
}
|
||||
}
|
||||
const responseSessionKey =
|
||||
response.headers.get('X-Session-Key') ??
|
||||
response.headers.get('X-Session-Id')
|
||||
if (responseSessionKey) {
|
||||
sessionKeyRef.current = responseSessionKey
|
||||
onSessionKeyChangeRef.current?.(responseSessionKey)
|
||||
}
|
||||
const responseTurnId = response.headers.get('X-Turn-Id')
|
||||
if (responseTurnId) {
|
||||
turnIdRef.current = responseTurnId
|
||||
lastSeqRef.current = null
|
||||
}
|
||||
if (!response.ok) {
|
||||
const err = await response.text()
|
||||
updateCurrentTurnParts((parts) => [
|
||||
@@ -231,9 +385,12 @@ export function useAgentConversation(
|
||||
])
|
||||
return
|
||||
}
|
||||
await consumeSSEStream(
|
||||
await consumeSSEStream<AgentHarnessStreamEvent>(
|
||||
response,
|
||||
processStreamEvent,
|
||||
(event, meta) => {
|
||||
if (typeof meta.seq === 'number') lastSeqRef.current = meta.seq
|
||||
processAgentHarnessStreamEvent(event)
|
||||
},
|
||||
abortController.signal,
|
||||
)
|
||||
} catch (err) {
|
||||
@@ -247,13 +404,35 @@ export function useAgentConversation(
|
||||
if (streamAbortRef.current === abortController) {
|
||||
streamAbortRef.current = null
|
||||
}
|
||||
turnIdRef.current = null
|
||||
lastSeqRef.current = null
|
||||
onCompleteRef.current?.()
|
||||
setStreaming(false)
|
||||
}
|
||||
}
|
||||
|
||||
const resetConversation = () => {
|
||||
/**
|
||||
* Stop button. The fetch abort only detaches *this* SSE subscriber
|
||||
* now — the underlying turn would otherwise keep running on the
|
||||
* server. So we explicitly cancel via the new endpoint, then unwind
|
||||
* the local stream.
|
||||
*/
|
||||
const stop = async () => {
|
||||
const turnId = turnIdRef.current ?? undefined
|
||||
streamAbortRef.current?.abort()
|
||||
streamAbortRef.current = null
|
||||
try {
|
||||
await cancelHarnessTurn(agentId, {
|
||||
turnId,
|
||||
reason: 'user pressed stop',
|
||||
})
|
||||
} catch {
|
||||
// Best-effort — UI already aborted.
|
||||
}
|
||||
}
|
||||
|
||||
const resetConversation = () => {
|
||||
void stop()
|
||||
setTurns([])
|
||||
setStreaming(false)
|
||||
}
|
||||
@@ -263,6 +442,7 @@ export function useAgentConversation(
|
||||
streaming,
|
||||
sessionKey: sessionKeyRef.current,
|
||||
send,
|
||||
stop,
|
||||
resetConversation,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,95 +0,0 @@
|
||||
import { useQuery, useQueryClient } from '@tanstack/react-query'
|
||||
import { useEffect } from 'react'
|
||||
import { useAgentServerUrl } from '@/lib/browseros/useBrowserOSProviders'
|
||||
|
||||
export interface AgentOverview {
|
||||
agentId: string
|
||||
status: 'working' | 'idle' | 'error' | 'unknown'
|
||||
latestMessage: string | null
|
||||
latestMessageAt: number | null
|
||||
activitySummary: string | null
|
||||
currentTool: string | null
|
||||
totalCostUsd: number
|
||||
sessionCount: number
|
||||
}
|
||||
|
||||
export interface DashboardResponse {
|
||||
agents: AgentOverview[]
|
||||
summary: {
|
||||
totalAgents: number
|
||||
totalCostUsd: number
|
||||
}
|
||||
}
|
||||
|
||||
interface StatusEvent {
|
||||
agentId: string
|
||||
status: AgentOverview['status']
|
||||
currentTool: string | null
|
||||
error: string | null
|
||||
timestamp: number
|
||||
}
|
||||
|
||||
const DASHBOARD_QUERY_KEY = ['claw', 'dashboard']
|
||||
|
||||
export function useAgentDashboard(enabled: boolean) {
|
||||
const { baseUrl, isLoading: urlLoading } = useAgentServerUrl()
|
||||
const queryClient = useQueryClient()
|
||||
const ready = enabled && Boolean(baseUrl) && !urlLoading
|
||||
|
||||
// Initial data load + periodic refresh as fallback
|
||||
const query = useQuery<DashboardResponse>({
|
||||
queryKey: [...DASHBOARD_QUERY_KEY, baseUrl],
|
||||
queryFn: async () => {
|
||||
const url = new URL('/claw/dashboard', baseUrl as string)
|
||||
const response = await fetch(url.toString())
|
||||
if (!response.ok) throw new Error('Failed to fetch dashboard')
|
||||
return response.json()
|
||||
},
|
||||
enabled: ready,
|
||||
})
|
||||
|
||||
// SSE subscription for real-time status patches
|
||||
useEffect(() => {
|
||||
if (!ready || !baseUrl) return
|
||||
|
||||
const streamUrl = new URL('/claw/dashboard/stream', baseUrl)
|
||||
const eventSource = new EventSource(streamUrl.toString())
|
||||
|
||||
eventSource.addEventListener('snapshot', (event) => {
|
||||
try {
|
||||
const dashboard = JSON.parse(event.data) as DashboardResponse
|
||||
queryClient.setQueryData([...DASHBOARD_QUERY_KEY, baseUrl], dashboard)
|
||||
} catch {}
|
||||
})
|
||||
|
||||
eventSource.addEventListener('status', (event) => {
|
||||
try {
|
||||
const status = JSON.parse(event.data) as StatusEvent
|
||||
queryClient.setQueryData<DashboardResponse>(
|
||||
[...DASHBOARD_QUERY_KEY, baseUrl],
|
||||
(prev) => {
|
||||
if (!prev) return prev
|
||||
return {
|
||||
...prev,
|
||||
agents: prev.agents.map((agent) =>
|
||||
agent.agentId === status.agentId
|
||||
? {
|
||||
...agent,
|
||||
status: status.status,
|
||||
currentTool: status.currentTool,
|
||||
}
|
||||
: agent,
|
||||
),
|
||||
}
|
||||
},
|
||||
)
|
||||
} catch {}
|
||||
})
|
||||
|
||||
return () => {
|
||||
eventSource.close()
|
||||
}
|
||||
}, [ready, baseUrl, queryClient])
|
||||
|
||||
return query
|
||||
}
|
||||
@@ -1,71 +0,0 @@
|
||||
import { useInfiniteQuery } from '@tanstack/react-query'
|
||||
import { useAgentServerUrl } from '@/lib/browseros/useBrowserOSProviders'
|
||||
import type { AgentHistoryPageResponse } from './claw-chat-types'
|
||||
|
||||
const HISTORY_QUERY_KEY = 'claw-agent-history'
|
||||
|
||||
async function fetchClawJson<T>(url: string): Promise<T> {
|
||||
const response = await fetch(url)
|
||||
|
||||
if (!response.ok) {
|
||||
let message = `Request failed with status ${response.status}`
|
||||
try {
|
||||
const body = (await response.json()) as { error?: string }
|
||||
if (body.error) message = body.error
|
||||
} catch {}
|
||||
throw new Error(message)
|
||||
}
|
||||
|
||||
return response.json() as Promise<T>
|
||||
}
|
||||
|
||||
function buildClawUrl(baseUrl: string, path: string): URL {
|
||||
return new URL(`/claw${path}`, baseUrl)
|
||||
}
|
||||
|
||||
export function useClawChatHistory({
|
||||
agentId,
|
||||
sessionKey,
|
||||
enabled = true,
|
||||
limit = 50,
|
||||
}: {
|
||||
agentId: string
|
||||
// null lets the server resolve the most recent user-chat session for the
|
||||
// agent — avoids an extra /session round-trip and the race that came with it.
|
||||
sessionKey: string | null
|
||||
enabled?: boolean
|
||||
limit?: number
|
||||
}) {
|
||||
const {
|
||||
baseUrl,
|
||||
isLoading: urlLoading,
|
||||
error: urlError,
|
||||
} = useAgentServerUrl()
|
||||
|
||||
const query = useInfiniteQuery<AgentHistoryPageResponse, Error>({
|
||||
queryKey: [HISTORY_QUERY_KEY, baseUrl, agentId, sessionKey],
|
||||
initialPageParam: undefined as string | undefined,
|
||||
queryFn: async ({ pageParam }) => {
|
||||
const url = buildClawUrl(baseUrl as string, `/agents/${agentId}/history`)
|
||||
url.searchParams.set('limit', String(limit))
|
||||
|
||||
if (sessionKey) {
|
||||
url.searchParams.set('sessionKey', sessionKey)
|
||||
}
|
||||
if (typeof pageParam === 'string' && pageParam) {
|
||||
url.searchParams.set('cursor', pageParam)
|
||||
}
|
||||
|
||||
return fetchClawJson<AgentHistoryPageResponse>(url.toString())
|
||||
},
|
||||
getNextPageParam: (lastPage) =>
|
||||
lastPage.page.hasMore ? lastPage.page.cursor : undefined,
|
||||
enabled: enabled && Boolean(baseUrl) && !urlLoading && Boolean(agentId),
|
||||
})
|
||||
|
||||
return {
|
||||
...query,
|
||||
error: query.error ?? urlError,
|
||||
isLoading: query.isLoading || urlLoading,
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,55 @@
|
||||
import { describe, expect, it } from 'bun:test'
|
||||
import { mapHarnessHistoryPage } from './harness-history-mapper'
|
||||
|
||||
describe('mapHarnessHistoryPage', () => {
|
||||
it('maps rich harness history into chat history items', () => {
|
||||
const page = mapHarnessHistoryPage({
|
||||
agentId: 'agent-1',
|
||||
sessionId: 'main',
|
||||
items: [
|
||||
{
|
||||
id: 'agent:agent-1:main:1',
|
||||
agentId: 'agent-1',
|
||||
sessionId: 'main',
|
||||
role: 'assistant',
|
||||
text: 'Done.',
|
||||
createdAt: 1000,
|
||||
reasoning: { text: 'checking state' },
|
||||
toolCalls: [
|
||||
{
|
||||
toolCallId: 'tool-1',
|
||||
toolName: 'read_file',
|
||||
status: 'completed',
|
||||
input: { path: 'src/index.ts' },
|
||||
output: 'file contents',
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
})
|
||||
|
||||
expect(page.items).toEqual([
|
||||
{
|
||||
id: 'agent:agent-1:main:1',
|
||||
role: 'assistant',
|
||||
text: 'Done.',
|
||||
timestamp: 1000,
|
||||
messageSeq: 1,
|
||||
sessionKey: 'main',
|
||||
source: 'user-chat',
|
||||
reasoning: { text: 'checking state' },
|
||||
toolCalls: [
|
||||
{
|
||||
toolCallId: 'tool-1',
|
||||
toolName: 'read_file',
|
||||
label: 'Read file',
|
||||
subject: 'index.ts',
|
||||
status: 'completed',
|
||||
input: { path: 'src/index.ts' },
|
||||
output: 'file contents',
|
||||
},
|
||||
],
|
||||
},
|
||||
])
|
||||
})
|
||||
})
|
||||
@@ -0,0 +1,29 @@
|
||||
import { useQuery } from '@tanstack/react-query'
|
||||
import { fetchHarnessAgentHistory } from '@/entrypoints/app/agents/useAgents'
|
||||
import { useAgentServerUrl } from '@/lib/browseros/useBrowserOSProviders'
|
||||
import type { AgentHistoryPageResponse } from './claw-chat-types'
|
||||
import { mapHarnessHistoryPage } from './harness-history-mapper'
|
||||
|
||||
const HISTORY_QUERY_KEY = 'harness-agent-history'
|
||||
|
||||
export function useHarnessChatHistory(agentId: string, enabled = true) {
|
||||
const {
|
||||
baseUrl,
|
||||
isLoading: urlLoading,
|
||||
error: urlError,
|
||||
} = useAgentServerUrl()
|
||||
|
||||
const query = useQuery<AgentHistoryPageResponse, Error>({
|
||||
queryKey: [HISTORY_QUERY_KEY, baseUrl, agentId, 'main'],
|
||||
queryFn: async () => {
|
||||
return mapHarnessHistoryPage(await fetchHarnessAgentHistory(agentId))
|
||||
},
|
||||
enabled: Boolean(baseUrl) && !urlLoading && enabled && Boolean(agentId),
|
||||
})
|
||||
|
||||
return {
|
||||
...query,
|
||||
error: query.error ?? urlError,
|
||||
isLoading: query.isLoading || urlLoading,
|
||||
}
|
||||
}
|
||||
@@ -1,270 +0,0 @@
|
||||
import { useCallback, useEffect, useRef, useState } from 'react'
|
||||
import type { OpenClawChatHistoryMessage } from '@/entrypoints/app/agents/useOpenClaw'
|
||||
import type { UserAttachmentPreview } from '@/lib/agent-conversations/types'
|
||||
import type { ServerAttachmentPayload } from '@/lib/attachments'
|
||||
import { useAgentServerUrl } from '@/lib/browseros/useBrowserOSProviders'
|
||||
|
||||
export type OutboundMessageStatus = 'queued' | 'sending' | 'failed'
|
||||
|
||||
export interface OutboundMessage {
|
||||
id: string
|
||||
text: string
|
||||
attachments: ServerAttachmentPayload[]
|
||||
attachmentPreviews: UserAttachmentPreview[]
|
||||
status: OutboundMessageStatus
|
||||
error?: string
|
||||
createdAt: number
|
||||
}
|
||||
|
||||
export interface OutboundQueueEnqueueInput {
|
||||
text: string
|
||||
attachments?: ServerAttachmentPayload[]
|
||||
attachmentPreviews?: UserAttachmentPreview[]
|
||||
history?: OpenClawChatHistoryMessage[]
|
||||
}
|
||||
|
||||
export interface OutboundQueueApi {
|
||||
queue: OutboundMessage[]
|
||||
enqueue(input: OutboundQueueEnqueueInput): void
|
||||
cancel(id: string): void
|
||||
retry(id: string): void
|
||||
}
|
||||
|
||||
interface UseOutboundQueueOptions {
|
||||
agentId: string | null | undefined
|
||||
sessionKey?: string | null
|
||||
}
|
||||
|
||||
interface ServerQueuedItem {
|
||||
id: string
|
||||
status: 'queued' | 'dispatching' | 'failed'
|
||||
message: string
|
||||
attachmentsPreview: Array<{
|
||||
kind: 'image' | 'file'
|
||||
mediaType: string
|
||||
name?: string
|
||||
}>
|
||||
error?: string
|
||||
createdAt: number
|
||||
}
|
||||
|
||||
function makeId(): string {
|
||||
if (typeof crypto !== 'undefined' && crypto.randomUUID) {
|
||||
return crypto.randomUUID()
|
||||
}
|
||||
return `${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 10)}`
|
||||
}
|
||||
|
||||
/**
|
||||
* Server-backed outbound message queue. The browser is purely a
|
||||
* projection of server state — closing the tab is safe because the queue
|
||||
* keeps draining server-side via the OutboundQueueService.
|
||||
*
|
||||
* Single id-keyed list: the client generates the queue id and hands it
|
||||
* to the server in the POST body, so the optimistic row and the SSE
|
||||
* snapshot reconcile on the same key from frame zero — there is no
|
||||
* window in which the message renders twice.
|
||||
*/
|
||||
export function useOutboundQueue(
|
||||
options: UseOutboundQueueOptions,
|
||||
): OutboundQueueApi {
|
||||
const { agentId, sessionKey } = options
|
||||
const { baseUrl } = useAgentServerUrl()
|
||||
const sessionKeyRef = useRef<string | null | undefined>(sessionKey)
|
||||
sessionKeyRef.current = sessionKey
|
||||
|
||||
const [items, setItems] = useState<OutboundMessage[]>([])
|
||||
// Track which ids the server has confirmed seeing in any SSE snapshot.
|
||||
// We use this to know whether a missing-from-snapshot id is "drained
|
||||
// by the server" (drop it) or "still in flight client-side" (keep
|
||||
// showing the optimistic row).
|
||||
const everSeenByServerRef = useRef<Set<string>>(new Set())
|
||||
// Local-only attachment previews, keyed by queue id. Data URLs never
|
||||
// leave the browser — the SSE feed only carries metadata, so we hold
|
||||
// them here so the chip strip keeps rendering after server takeover.
|
||||
const previewMapRef = useRef<Map<string, UserAttachmentPreview[]>>(new Map())
|
||||
|
||||
useEffect(() => {
|
||||
if (!baseUrl || !agentId) {
|
||||
setItems([])
|
||||
everSeenByServerRef.current = new Set()
|
||||
previewMapRef.current = new Map()
|
||||
return
|
||||
}
|
||||
let cancelled = false
|
||||
const url = `${baseUrl}/claw/agents/${encodeURIComponent(agentId)}/queue/stream`
|
||||
const source = new EventSource(url)
|
||||
source.onmessage = (event) => {
|
||||
if (cancelled) return
|
||||
try {
|
||||
const parsed = JSON.parse(event.data) as { items: ServerQueuedItem[] }
|
||||
const snapshotIds = new Set(parsed.items.map((item) => item.id))
|
||||
for (const id of snapshotIds) everSeenByServerRef.current.add(id)
|
||||
|
||||
setItems((prev) => {
|
||||
const next: OutboundMessage[] = parsed.items.map((item) => ({
|
||||
id: item.id,
|
||||
text: item.message,
|
||||
attachments: [],
|
||||
attachmentPreviews: previewMapRef.current.get(item.id) ?? [],
|
||||
status: serverStatusToClient(item.status),
|
||||
error: item.error,
|
||||
createdAt: item.createdAt,
|
||||
}))
|
||||
// Carry forward any optimistic / failed entries the server
|
||||
// doesn't know about yet (POST in flight) or has finished
|
||||
// dispatching but the client wants to keep visible (failed).
|
||||
const carried = prev.filter((local) => {
|
||||
if (snapshotIds.has(local.id)) return false
|
||||
if (everSeenByServerRef.current.has(local.id)) {
|
||||
// Server saw it before and it's gone now — drained.
|
||||
previewMapRef.current.delete(local.id)
|
||||
return false
|
||||
}
|
||||
return local.status !== 'failed' || Boolean(local.error)
|
||||
})
|
||||
return [...carried, ...next]
|
||||
})
|
||||
} catch {
|
||||
// Malformed event — ignore; next snapshot will recover.
|
||||
}
|
||||
}
|
||||
source.onerror = () => {
|
||||
// Auto-reconnects; nothing to do here.
|
||||
}
|
||||
return () => {
|
||||
cancelled = true
|
||||
source.close()
|
||||
}
|
||||
}, [baseUrl, agentId])
|
||||
|
||||
const enqueue = useCallback(
|
||||
(input: OutboundQueueEnqueueInput) => {
|
||||
if (!baseUrl || !agentId) return
|
||||
const trimmed = input.text.trim()
|
||||
const attachments = input.attachments ?? []
|
||||
if (!trimmed && attachments.length === 0) return
|
||||
|
||||
const id = makeId()
|
||||
const previews = input.attachmentPreviews ?? []
|
||||
previewMapRef.current.set(id, previews)
|
||||
setItems((prev) => [
|
||||
...prev,
|
||||
{
|
||||
id,
|
||||
text: trimmed,
|
||||
attachments,
|
||||
attachmentPreviews: previews,
|
||||
status: 'queued',
|
||||
createdAt: Date.now(),
|
||||
},
|
||||
])
|
||||
|
||||
void (async () => {
|
||||
try {
|
||||
const response = await fetch(
|
||||
`${baseUrl}/claw/agents/${encodeURIComponent(agentId)}/queue`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
id,
|
||||
message: trimmed,
|
||||
attachments: attachments.length > 0 ? attachments : undefined,
|
||||
sessionKey: sessionKeyRef.current ?? undefined,
|
||||
history: input.history,
|
||||
}),
|
||||
},
|
||||
)
|
||||
if (!response.ok) {
|
||||
const text = await response.text().catch(() => '')
|
||||
previewMapRef.current.delete(id)
|
||||
setItems((prev) =>
|
||||
prev.map((item) =>
|
||||
item.id === id
|
||||
? {
|
||||
...item,
|
||||
status: 'failed',
|
||||
error:
|
||||
text || `Failed to enqueue (status ${response.status})`,
|
||||
}
|
||||
: item,
|
||||
),
|
||||
)
|
||||
}
|
||||
} catch (err) {
|
||||
// Only mark as failed if the SSE snapshot hasn't already
|
||||
// taken ownership of the entry (i.e. the request actually
|
||||
// reached the server).
|
||||
if (everSeenByServerRef.current.has(id)) return
|
||||
previewMapRef.current.delete(id)
|
||||
setItems((prev) =>
|
||||
prev.map((item) =>
|
||||
item.id === id
|
||||
? {
|
||||
...item,
|
||||
status: 'failed',
|
||||
error:
|
||||
err instanceof Error
|
||||
? err.message
|
||||
: 'Failed to enqueue message',
|
||||
}
|
||||
: item,
|
||||
),
|
||||
)
|
||||
}
|
||||
})()
|
||||
},
|
||||
[baseUrl, agentId],
|
||||
)
|
||||
|
||||
const cancel = useCallback(
|
||||
(id: string) => {
|
||||
// If the server has never seen this id, just drop it locally.
|
||||
if (!everSeenByServerRef.current.has(id)) {
|
||||
previewMapRef.current.delete(id)
|
||||
setItems((prev) => prev.filter((item) => item.id !== id))
|
||||
return
|
||||
}
|
||||
if (!baseUrl || !agentId) return
|
||||
void fetch(
|
||||
`${baseUrl}/claw/agents/${encodeURIComponent(agentId)}/queue/${encodeURIComponent(id)}`,
|
||||
{ method: 'DELETE' },
|
||||
).catch(() => {})
|
||||
},
|
||||
[baseUrl, agentId],
|
||||
)
|
||||
|
||||
const retry = useCallback(
|
||||
(id: string) => {
|
||||
if (!everSeenByServerRef.current.has(id)) {
|
||||
// Optimistic-only entry, never made it to the server. Reset
|
||||
// status so the user can press Send again.
|
||||
setItems((prev) =>
|
||||
prev.map((item) =>
|
||||
item.id === id
|
||||
? { ...item, status: 'queued', error: undefined }
|
||||
: item,
|
||||
),
|
||||
)
|
||||
return
|
||||
}
|
||||
if (!baseUrl || !agentId) return
|
||||
void fetch(
|
||||
`${baseUrl}/claw/agents/${encodeURIComponent(agentId)}/queue/${encodeURIComponent(id)}/retry`,
|
||||
{ method: 'POST' },
|
||||
).catch(() => {})
|
||||
},
|
||||
[baseUrl, agentId],
|
||||
)
|
||||
|
||||
return { queue: items, enqueue, cancel, retry }
|
||||
}
|
||||
|
||||
function serverStatusToClient(
|
||||
status: ServerQueuedItem['status'],
|
||||
): OutboundMessageStatus {
|
||||
if (status === 'dispatching') return 'sending'
|
||||
if (status === 'failed') return 'failed'
|
||||
return 'queued'
|
||||
}
|
||||
@@ -0,0 +1,42 @@
|
||||
import { Bot, Cpu, Sparkles } from 'lucide-react'
|
||||
import type { FC } from 'react'
|
||||
import type { HarnessAgentAdapter } from './agent-harness-types'
|
||||
|
||||
/**
|
||||
* Single icon component for any adapter the agent rail can render.
|
||||
* Falls back to a generic bot when the adapter is unknown so future
|
||||
* adapters land without a code change at the call site.
|
||||
*/
|
||||
interface AdapterIconProps {
|
||||
adapter: HarnessAgentAdapter | 'unknown'
|
||||
className?: string
|
||||
}
|
||||
|
||||
export const AdapterIcon: FC<AdapterIconProps> = ({ adapter, className }) => {
|
||||
switch (adapter) {
|
||||
case 'claude':
|
||||
// Claude Code — text-based agent, sparkles to evoke the "AI assistant" feel.
|
||||
return <Sparkles className={className} aria-label="Claude Code" />
|
||||
case 'codex':
|
||||
// Codex — code-leaning, CPU mark.
|
||||
return <Cpu className={className} aria-label="Codex" />
|
||||
case 'openclaw':
|
||||
// OpenClaw — bot/automation framing.
|
||||
return <Bot className={className} aria-label="OpenClaw" />
|
||||
default:
|
||||
return <Bot className={className} aria-label="Agent" />
|
||||
}
|
||||
}
|
||||
|
||||
export function adapterLabel(adapter: HarnessAgentAdapter | 'unknown'): string {
|
||||
switch (adapter) {
|
||||
case 'claude':
|
||||
return 'Claude Code'
|
||||
case 'codex':
|
||||
return 'Codex'
|
||||
case 'openclaw':
|
||||
return 'OpenClaw'
|
||||
default:
|
||||
return 'Agent'
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,176 @@
|
||||
import { Loader2 } from 'lucide-react'
|
||||
import { type FC, useMemo } from 'react'
|
||||
import { AgentRowCard } from './AgentRowCard'
|
||||
import { AgentsEmptyState } from './AgentsEmptyState'
|
||||
import type {
|
||||
HarnessAdapterDescriptor,
|
||||
HarnessAgent,
|
||||
HarnessAgentAdapter,
|
||||
} from './agent-harness-types'
|
||||
import type {
|
||||
AgentAdapterHealth,
|
||||
AgentRowData,
|
||||
} from './agent-row/agent-row.types'
|
||||
import type { AgentListItem } from './agents-page-types'
|
||||
import type { AgentLiveness } from './LivenessDot'
|
||||
|
||||
interface AgentListProps {
|
||||
agents: AgentListItem[]
|
||||
/** Optional per-agent activity metadata, keyed by `agentId`. */
|
||||
activity?: Record<
|
||||
string,
|
||||
{ status: AgentLiveness; lastUsedAt: number | null }
|
||||
>
|
||||
/** Lookup table from harness id → enriched agent record. */
|
||||
harnessAgentLookup?: Map<string, HarnessAgent>
|
||||
/** Adapter catalog (carries per-adapter health). */
|
||||
adapters: HarnessAdapterDescriptor[]
|
||||
loading: boolean
|
||||
deletingAgentKey: string | null
|
||||
onCreateAgent: () => void
|
||||
onDeleteAgent: (agent: AgentListItem) => void
|
||||
onPinToggle: (agent: AgentListItem, next: boolean) => void
|
||||
}
|
||||
|
||||
export const AgentList: FC<AgentListProps> = ({
|
||||
agents,
|
||||
activity,
|
||||
harnessAgentLookup,
|
||||
adapters,
|
||||
loading,
|
||||
deletingAgentKey,
|
||||
onCreateAgent,
|
||||
onDeleteAgent,
|
||||
onPinToggle,
|
||||
}) => {
|
||||
const adapterHealth = useMemo(() => {
|
||||
const map = new Map<HarnessAgentAdapter, AgentAdapterHealth>()
|
||||
for (const adapter of adapters) {
|
||||
if (adapter.health) {
|
||||
map.set(adapter.id, {
|
||||
healthy: adapter.health.healthy,
|
||||
reason: adapter.health.reason,
|
||||
})
|
||||
}
|
||||
}
|
||||
return map
|
||||
}, [adapters])
|
||||
|
||||
// Sort: pinned rows first, then most recently used, then never-used
|
||||
// agents in id-stable order. The gateway's `main` agent stays
|
||||
// pinned-to-top when never touched so a fresh install has an
|
||||
// obvious starting point.
|
||||
const ordered = useMemo(() => {
|
||||
const withMeta = agents.map((agent) => {
|
||||
const harness = harnessAgentLookup?.get(agent.agentId)
|
||||
return {
|
||||
agent,
|
||||
pinned: harness?.pinned ?? false,
|
||||
lastUsedAt: activity?.[agent.agentId]?.lastUsedAt ?? null,
|
||||
}
|
||||
})
|
||||
return withMeta
|
||||
.sort((a, b) => {
|
||||
if (a.pinned !== b.pinned) return a.pinned ? -1 : 1
|
||||
const aSeed = a.agent.agentId === 'main' && a.lastUsedAt === null
|
||||
const bSeed = b.agent.agentId === 'main' && b.lastUsedAt === null
|
||||
if (aSeed && !bSeed) return -1
|
||||
if (!aSeed && bSeed) return 1
|
||||
const aValue = a.lastUsedAt ?? -Infinity
|
||||
const bValue = b.lastUsedAt ?? -Infinity
|
||||
if (aValue !== bValue) return bValue - aValue
|
||||
return a.agent.agentId.localeCompare(b.agent.agentId)
|
||||
})
|
||||
.map((entry) => entry.agent)
|
||||
}, [activity, agents, harnessAgentLookup])
|
||||
|
||||
if (loading && agents.length === 0) {
|
||||
return (
|
||||
<div className="flex h-36 items-center justify-center rounded-xl border border-border border-dashed bg-card/50">
|
||||
<Loader2 className="size-5 animate-spin text-muted-foreground" />
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
if (agents.length === 0) {
|
||||
return <AgentsEmptyState onCreateAgent={onCreateAgent} />
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="grid gap-3">
|
||||
{ordered.map((agent) => {
|
||||
const harness = harnessAgentLookup?.get(agent.agentId)
|
||||
const adapter: HarnessAgentAdapter | 'unknown' =
|
||||
harness?.adapter ?? inferAdapterFromLabel(agent.runtimeLabel)
|
||||
const data = buildRowData({
|
||||
agent,
|
||||
adapter,
|
||||
harness,
|
||||
activity: activity?.[agent.agentId],
|
||||
adapterHealth:
|
||||
adapterHealth.get(adapter as HarnessAgentAdapter) ?? null,
|
||||
})
|
||||
return (
|
||||
<AgentRowCard
|
||||
key={agent.key}
|
||||
data={data}
|
||||
deleting={deletingAgentKey === agent.key}
|
||||
onDelete={onDeleteAgent}
|
||||
onPinToggle={onPinToggle}
|
||||
/>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
function inferAdapterFromLabel(label: string): HarnessAgentAdapter | 'unknown' {
|
||||
const lower = label?.toLowerCase()
|
||||
if (lower === 'claude code') return 'claude'
|
||||
if (lower === 'codex') return 'codex'
|
||||
if (lower === 'openclaw') return 'openclaw'
|
||||
return 'unknown'
|
||||
}
|
||||
|
||||
const ZERO_BUCKETS = (): number[] => Array.from({ length: 14 }, () => 0)
|
||||
|
||||
function buildRowData(input: {
|
||||
agent: AgentListItem
|
||||
adapter: HarnessAgentAdapter | 'unknown'
|
||||
harness: HarnessAgent | undefined
|
||||
activity: { status: AgentLiveness; lastUsedAt: number | null } | undefined
|
||||
adapterHealth: AgentAdapterHealth | null
|
||||
}): AgentRowData {
|
||||
const { agent, adapter, harness, activity, adapterHealth } = input
|
||||
return {
|
||||
agent,
|
||||
adapter,
|
||||
modelLabel: deriveModelLabel(agent, harness),
|
||||
reasoningEffort: harness?.reasoningEffort ?? null,
|
||||
status: activity?.status ?? 'unknown',
|
||||
lastUsedAt: activity?.lastUsedAt ?? harness?.lastUsedAt ?? null,
|
||||
pinned: harness?.pinned ?? false,
|
||||
cwd: harness?.cwd ?? null,
|
||||
lastUserMessage: harness?.lastUserMessage ?? null,
|
||||
tokens: harness?.tokens ?? null,
|
||||
turnsByDay: harness?.turnsByDay ?? ZERO_BUCKETS(),
|
||||
failedByDay: harness?.failedByDay ?? ZERO_BUCKETS(),
|
||||
lastError: harness?.lastError ?? null,
|
||||
lastErrorAt: harness?.lastErrorAt ?? null,
|
||||
activeTurnId: harness?.activeTurnId ?? null,
|
||||
adapterHealth,
|
||||
}
|
||||
}
|
||||
|
||||
function deriveModelLabel(
|
||||
agent: AgentListItem,
|
||||
harness: HarnessAgent | undefined,
|
||||
): string | null {
|
||||
// Prefer the agent rail's modelLabel when meaningful; harness's
|
||||
// modelId is a stable identifier but the rail's `modelLabel`
|
||||
// already maps to a friendly display string.
|
||||
if (agent.modelLabel && agent.modelLabel !== 'default') {
|
||||
return agent.modelLabel
|
||||
}
|
||||
return harness?.modelId ?? null
|
||||
}
|
||||
@@ -0,0 +1,99 @@
|
||||
import type { FC } from 'react'
|
||||
import { cn } from '@/lib/utils'
|
||||
import { AgentActions } from './agent-row/AgentActions'
|
||||
import { AgentErrorPanel } from './agent-row/AgentErrorPanel'
|
||||
import { AgentLastMessage } from './agent-row/AgentLastMessage'
|
||||
import { AgentMetaRow } from './agent-row/AgentMetaRow'
|
||||
import { AgentSummaryChips } from './agent-row/AgentSummaryChips'
|
||||
import { AgentTile } from './agent-row/AgentTile'
|
||||
import { AgentTitleRow } from './agent-row/AgentTitleRow'
|
||||
import type {
|
||||
AgentRowCallbacks,
|
||||
AgentRowData,
|
||||
} from './agent-row/agent-row.types'
|
||||
|
||||
interface AgentRowCardProps extends AgentRowCallbacks {
|
||||
data: AgentRowData
|
||||
/** Whether THIS agent is mid-delete; renders a spinner in the menu. */
|
||||
deleting?: boolean
|
||||
}
|
||||
|
||||
/**
|
||||
* Composition shell for the agent rail. Owns no state; sub-components
|
||||
* each handle their own micro-state (error-panel collapse, etc.) and
|
||||
* emit callbacks (delete, pin/unpin) for the page to act on.
|
||||
*
|
||||
* The whole card carries state — not just the tile — so the row's
|
||||
* border subtly tells the user what's going on at a glance:
|
||||
* working → accent-orange border with a soft glow
|
||||
* error → destructive border
|
||||
* idle → muted border, lifts on hover
|
||||
*/
|
||||
export const AgentRowCard: FC<AgentRowCardProps> = ({
|
||||
data,
|
||||
deleting,
|
||||
onDelete,
|
||||
onPinToggle,
|
||||
}) => {
|
||||
return (
|
||||
<div
|
||||
className={cn(
|
||||
// Layout-stable hover. No translate, no shadow change — both
|
||||
// visibly perturb neighbouring rows. Only the border tint
|
||||
// shifts on hover, and the rail's vertical rhythm stays
|
||||
// exactly the same in every state.
|
||||
'group rounded-xl border bg-card p-4 shadow-sm transition-colors',
|
||||
data.status === 'working'
|
||||
? 'border-[var(--accent-orange)]/40'
|
||||
: data.status === 'error'
|
||||
? 'border-destructive/40'
|
||||
: 'border-border hover:border-[var(--accent-orange)]/30',
|
||||
)}
|
||||
>
|
||||
<div className="flex items-start gap-4">
|
||||
<AgentTile
|
||||
adapter={data.adapter}
|
||||
status={data.status}
|
||||
lastUsedAt={data.lastUsedAt}
|
||||
/>
|
||||
|
||||
<div className="min-w-0 flex-1">
|
||||
<AgentTitleRow
|
||||
agent={data.agent}
|
||||
status={data.status}
|
||||
pinned={data.pinned}
|
||||
turnsByDay={data.turnsByDay}
|
||||
failedByDay={data.failedByDay}
|
||||
onPinToggle={(next) => onPinToggle(data.agent, next)}
|
||||
/>
|
||||
|
||||
<AgentSummaryChips
|
||||
adapter={data.adapter}
|
||||
modelLabel={data.modelLabel}
|
||||
reasoningEffort={data.reasoningEffort}
|
||||
adapterHealth={data.adapterHealth}
|
||||
/>
|
||||
|
||||
<AgentLastMessage message={data.lastUserMessage} />
|
||||
|
||||
<AgentMetaRow lastUsedAt={data.lastUsedAt} tokens={data.tokens} />
|
||||
|
||||
{data.status === 'error' && data.lastError && (
|
||||
<AgentErrorPanel
|
||||
agentId={data.agent.agentId}
|
||||
message={data.lastError}
|
||||
errorAt={data.lastErrorAt}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<AgentActions
|
||||
agent={data.agent}
|
||||
activeTurnId={data.activeTurnId}
|
||||
deleting={deleting}
|
||||
onDelete={onDelete}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,32 @@
|
||||
import { Bot, Plus } from 'lucide-react'
|
||||
import type { FC } from 'react'
|
||||
import { Button } from '@/components/ui/button'
|
||||
|
||||
interface AgentsEmptyStateProps {
|
||||
onCreateAgent: () => void
|
||||
}
|
||||
|
||||
export const AgentsEmptyState: FC<AgentsEmptyStateProps> = ({
|
||||
onCreateAgent,
|
||||
}) => {
|
||||
return (
|
||||
<div className="rounded-xl border border-border border-dashed bg-card/50 p-12 text-center">
|
||||
<div className="mx-auto mb-4 flex h-12 w-12 items-center justify-center rounded-xl bg-[var(--accent-orange)]/10">
|
||||
<Bot className="h-6 w-6 text-[var(--accent-orange)]" />
|
||||
</div>
|
||||
<h3 className="mb-1 font-semibold">No agents yet</h3>
|
||||
<p className="mx-auto mb-4 max-w-sm text-muted-foreground text-sm">
|
||||
Spin up an OpenClaw, Claude Code, or Codex agent to chat with, schedule,
|
||||
or run in the background.
|
||||
</p>
|
||||
<Button
|
||||
onClick={onCreateAgent}
|
||||
variant="outline"
|
||||
className="border-[var(--accent-orange)] bg-[var(--accent-orange)]/10 text-[var(--accent-orange)] hover:bg-[var(--accent-orange)]/20 hover:text-[var(--accent-orange)]"
|
||||
>
|
||||
<Plus className="mr-1.5 h-4 w-4" />
|
||||
Create your first agent
|
||||
</Button>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
import { Bot, Plus } from 'lucide-react'
|
||||
import type { FC } from 'react'
|
||||
import { Button } from '@/components/ui/button'
|
||||
|
||||
interface AgentsHeaderProps {
|
||||
onCreateAgent: () => void
|
||||
}
|
||||
|
||||
/**
|
||||
* Mirrors the visual shape of `SoulHeader` and `ScheduledTasksHeader`
|
||||
* so the page reads as part of the same family. Loose lifecycle
|
||||
* controls that used to sit next to the title moved into
|
||||
* `GatewayStatusBar` — they're OpenClaw-specific and don't apply to
|
||||
* Claude/Codex agents.
|
||||
*/
|
||||
export const AgentsHeader: FC<AgentsHeaderProps> = ({ onCreateAgent }) => {
|
||||
return (
|
||||
<div className="rounded-xl border border-border bg-card p-6 shadow-sm transition-all hover:shadow-md">
|
||||
<div className="flex items-start gap-4">
|
||||
<div className="flex h-12 w-12 shrink-0 items-center justify-center rounded-xl bg-[var(--accent-orange)]/10">
|
||||
<Bot className="h-6 w-6 text-[var(--accent-orange)]" />
|
||||
</div>
|
||||
<div className="flex-1">
|
||||
<h2 className="mb-1 font-semibold text-xl">Agents</h2>
|
||||
<p className="text-muted-foreground text-sm">
|
||||
OpenClaw, Claude Code, and Codex agents — chat, schedule, and run
|
||||
them in the background.
|
||||
</p>
|
||||
</div>
|
||||
<Button
|
||||
onClick={onCreateAgent}
|
||||
className="border-[var(--accent-orange)] bg-[var(--accent-orange)]/10 text-[var(--accent-orange)] hover:bg-[var(--accent-orange)]/20 hover:text-[var(--accent-orange)]"
|
||||
variant="outline"
|
||||
>
|
||||
<Plus className="mr-1.5 h-4 w-4" />
|
||||
New Agent
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,206 @@
|
||||
import { Loader2, RotateCcw, Terminal } from 'lucide-react'
|
||||
import type { FC, ReactNode } from 'react'
|
||||
import { Badge } from '@/components/ui/badge'
|
||||
import { Button } from '@/components/ui/button'
|
||||
import { Separator } from '@/components/ui/separator'
|
||||
import {
|
||||
Tooltip,
|
||||
TooltipContent,
|
||||
TooltipProvider,
|
||||
TooltipTrigger,
|
||||
} from '@/components/ui/tooltip'
|
||||
import { cn } from '@/lib/utils'
|
||||
import type { OpenClawStatus } from './useOpenClaw'
|
||||
|
||||
interface GatewayStatusBarProps {
|
||||
status: OpenClawStatus | null
|
||||
/** Disabled while a gateway lifecycle mutation is mid-flight. */
|
||||
actionInProgress: boolean
|
||||
onOpenTerminal: () => void
|
||||
onRestart: () => void
|
||||
}
|
||||
|
||||
/**
|
||||
* Compact one-line status bar for the OpenClaw gateway. Renders the
|
||||
* lifecycle pills (Running / Control plane connected) plus a Terminal
|
||||
* escape hatch and a Restart Gateway action. Lives between the page
|
||||
* header and the agent list when at least one OpenClaw agent is in
|
||||
* the merged list; collapses to nothing for Claude/Codex-only setups.
|
||||
*
|
||||
* Status is sourced from `GET /agents`'s `gateway` field — the agents
|
||||
* page no longer polls `/claw/status` directly. One endpoint, one
|
||||
* 5s interval, no duplicate state.
|
||||
*/
|
||||
export const GatewayStatusBar: FC<GatewayStatusBarProps> = ({
|
||||
status,
|
||||
actionInProgress,
|
||||
onOpenTerminal,
|
||||
onRestart,
|
||||
}) => {
|
||||
if (!status) return null
|
||||
|
||||
const runningPill = pillForRuntimeStatus(status.status)
|
||||
const controlPlanePill = pillForControlPlane(status.controlPlaneStatus)
|
||||
|
||||
return (
|
||||
<div className="rounded-xl border border-border bg-card px-4 py-3 shadow-sm">
|
||||
<div className="flex items-center gap-3 text-sm">
|
||||
<span className="font-medium text-muted-foreground">
|
||||
OpenClaw gateway
|
||||
</span>
|
||||
<Badge
|
||||
variant={runningPill.variant}
|
||||
className={cn('gap-1.5', runningPill.className)}
|
||||
>
|
||||
<span
|
||||
className={cn(
|
||||
'inline-block h-1.5 w-1.5 rounded-full',
|
||||
runningPill.dot,
|
||||
)}
|
||||
/>
|
||||
{runningPill.label}
|
||||
</Badge>
|
||||
<Badge
|
||||
variant={controlPlanePill.variant}
|
||||
className={cn('gap-1.5', controlPlanePill.className)}
|
||||
>
|
||||
<span
|
||||
className={cn(
|
||||
'inline-block h-1.5 w-1.5 rounded-full',
|
||||
controlPlanePill.dot,
|
||||
)}
|
||||
/>
|
||||
{controlPlanePill.label}
|
||||
</Badge>
|
||||
<Separator orientation="vertical" className="h-4" />
|
||||
<WithTooltip label="Open a shell into the OpenClaw gateway container for raw CLI access (config edits, session inspection).">
|
||||
<Button variant="ghost" size="sm" onClick={onOpenTerminal}>
|
||||
<Terminal className="mr-1.5 h-3.5 w-3.5" />
|
||||
Terminal
|
||||
</Button>
|
||||
</WithTooltip>
|
||||
<WithTooltip label="Restart the OpenClaw gateway. Useful when the gateway is stuck or after editing provider config.">
|
||||
<Button
|
||||
variant="ghost"
|
||||
size="sm"
|
||||
onClick={onRestart}
|
||||
disabled={actionInProgress}
|
||||
className="ml-auto"
|
||||
>
|
||||
{actionInProgress ? (
|
||||
<Loader2 className="mr-1.5 h-3.5 w-3.5 animate-spin" />
|
||||
) : (
|
||||
<RotateCcw className="mr-1.5 h-3.5 w-3.5" />
|
||||
)}
|
||||
Restart Gateway
|
||||
</Button>
|
||||
</WithTooltip>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
const WithTooltip: FC<{ label: string; children: ReactNode }> = ({
|
||||
label,
|
||||
children,
|
||||
}) => (
|
||||
<TooltipProvider delayDuration={250}>
|
||||
<Tooltip>
|
||||
<TooltipTrigger asChild>{children}</TooltipTrigger>
|
||||
<TooltipContent side="bottom" className="max-w-xs text-xs">
|
||||
{label}
|
||||
</TooltipContent>
|
||||
</Tooltip>
|
||||
</TooltipProvider>
|
||||
)
|
||||
|
||||
type PillKind = {
|
||||
variant: 'default' | 'secondary' | 'outline' | 'destructive'
|
||||
label: string
|
||||
dot: string
|
||||
className?: string
|
||||
}
|
||||
|
||||
function pillForRuntimeStatus(status: OpenClawStatus['status']): PillKind {
|
||||
switch (status) {
|
||||
case 'running':
|
||||
return {
|
||||
variant: 'secondary',
|
||||
label: 'Running',
|
||||
dot: 'bg-emerald-500',
|
||||
className: 'bg-emerald-50 text-emerald-900 hover:bg-emerald-50',
|
||||
}
|
||||
case 'starting':
|
||||
return {
|
||||
variant: 'secondary',
|
||||
label: 'Starting',
|
||||
dot: 'bg-amber-500 animate-pulse',
|
||||
className: 'bg-amber-50 text-amber-900 hover:bg-amber-50',
|
||||
}
|
||||
case 'stopped':
|
||||
return {
|
||||
variant: 'outline',
|
||||
label: 'Stopped',
|
||||
dot: 'bg-muted-foreground/40',
|
||||
}
|
||||
case 'error':
|
||||
return {
|
||||
variant: 'destructive',
|
||||
label: 'Error',
|
||||
dot: 'bg-destructive-foreground',
|
||||
}
|
||||
default:
|
||||
return {
|
||||
variant: 'outline',
|
||||
label: 'Unknown',
|
||||
dot: 'bg-muted-foreground/40',
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function pillForControlPlane(
|
||||
status: OpenClawStatus['controlPlaneStatus'],
|
||||
): PillKind {
|
||||
switch (status) {
|
||||
case 'connected':
|
||||
return {
|
||||
variant: 'secondary',
|
||||
label: 'Control plane connected',
|
||||
dot: 'bg-emerald-500',
|
||||
className: 'bg-emerald-50 text-emerald-900 hover:bg-emerald-50',
|
||||
}
|
||||
case 'connecting':
|
||||
return {
|
||||
variant: 'secondary',
|
||||
label: 'Connecting',
|
||||
dot: 'bg-amber-500 animate-pulse',
|
||||
className: 'bg-amber-50 text-amber-900 hover:bg-amber-50',
|
||||
}
|
||||
case 'reconnecting':
|
||||
return {
|
||||
variant: 'secondary',
|
||||
label: 'Reconnecting',
|
||||
dot: 'bg-amber-500 animate-pulse',
|
||||
className: 'bg-amber-50 text-amber-900 hover:bg-amber-50',
|
||||
}
|
||||
case 'recovering':
|
||||
return {
|
||||
variant: 'secondary',
|
||||
label: 'Recovering',
|
||||
dot: 'bg-amber-500 animate-pulse',
|
||||
className: 'bg-amber-50 text-amber-900 hover:bg-amber-50',
|
||||
}
|
||||
case 'failed':
|
||||
return {
|
||||
variant: 'destructive',
|
||||
label: 'Needs attention',
|
||||
dot: 'bg-destructive-foreground',
|
||||
}
|
||||
default:
|
||||
return {
|
||||
variant: 'outline',
|
||||
label: 'Disconnected',
|
||||
dot: 'bg-muted-foreground/40',
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,83 @@
|
||||
import type { FC } from 'react'
|
||||
import {
|
||||
Tooltip,
|
||||
TooltipContent,
|
||||
TooltipProvider,
|
||||
TooltipTrigger,
|
||||
} from '@/components/ui/tooltip'
|
||||
import { cn } from '@/lib/utils'
|
||||
|
||||
export type AgentLiveness = 'working' | 'idle' | 'asleep' | 'error' | 'unknown'
|
||||
|
||||
interface LivenessDotProps {
|
||||
status: AgentLiveness
|
||||
/**
|
||||
* Optional human-friendly secondary line, e.g. "Idle for 4 min" or
|
||||
* "Asleep — no activity for 22 min". When absent the tooltip just
|
||||
* reads the status label.
|
||||
*/
|
||||
detail?: string
|
||||
className?: string
|
||||
}
|
||||
|
||||
const VARIANT: Record<
|
||||
AgentLiveness,
|
||||
{ dot: string; ring: string; label: string }
|
||||
> = {
|
||||
working: {
|
||||
// Animated amber pulse + soft halo so the eye catches an active
|
||||
// agent in a long list without the dot screaming for attention.
|
||||
dot: 'bg-amber-500 animate-pulse',
|
||||
ring: 'ring-2 ring-amber-200',
|
||||
label: 'Working on a turn',
|
||||
},
|
||||
idle: {
|
||||
dot: 'bg-emerald-500',
|
||||
ring: 'ring-2 ring-emerald-100',
|
||||
label: 'Idle',
|
||||
},
|
||||
asleep: {
|
||||
dot: 'bg-muted-foreground/40',
|
||||
ring: 'ring-2 ring-muted',
|
||||
label: 'Asleep',
|
||||
},
|
||||
error: {
|
||||
dot: 'bg-destructive',
|
||||
ring: 'ring-2 ring-destructive/30',
|
||||
label: 'Attention',
|
||||
},
|
||||
unknown: {
|
||||
dot: 'bg-muted-foreground/30',
|
||||
ring: 'ring-2 ring-muted',
|
||||
label: 'Status unknown',
|
||||
},
|
||||
}
|
||||
|
||||
export const LivenessDot: FC<LivenessDotProps> = ({
|
||||
status,
|
||||
detail,
|
||||
className,
|
||||
}) => {
|
||||
const variant = VARIANT[status]
|
||||
return (
|
||||
<TooltipProvider delayDuration={150}>
|
||||
<Tooltip>
|
||||
<TooltipTrigger asChild>
|
||||
<span
|
||||
role="img"
|
||||
aria-label={detail ?? variant.label}
|
||||
className={cn(
|
||||
'inline-block h-3 w-3 rounded-full',
|
||||
variant.dot,
|
||||
variant.ring,
|
||||
className,
|
||||
)}
|
||||
/>
|
||||
</TooltipTrigger>
|
||||
<TooltipContent side="right" className="text-xs">
|
||||
{detail ?? variant.label}
|
||||
</TooltipContent>
|
||||
</Tooltip>
|
||||
</TooltipProvider>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,260 @@
|
||||
import { AlertCircle, Loader2 } from 'lucide-react'
|
||||
import type { FC } from 'react'
|
||||
import { Alert, AlertDescription, AlertTitle } from '@/components/ui/alert'
|
||||
import { Button } from '@/components/ui/button'
|
||||
import {
|
||||
Dialog,
|
||||
DialogContent,
|
||||
DialogFooter,
|
||||
DialogHeader,
|
||||
DialogTitle,
|
||||
} from '@/components/ui/dialog'
|
||||
import { Input } from '@/components/ui/input'
|
||||
import { Label } from '@/components/ui/label'
|
||||
import {
|
||||
Select,
|
||||
SelectContent,
|
||||
SelectItem,
|
||||
SelectTrigger,
|
||||
SelectValue,
|
||||
} from '@/components/ui/select'
|
||||
import type {
|
||||
HarnessAdapterDescriptor,
|
||||
HarnessAgentAdapter,
|
||||
} from './agent-harness-types'
|
||||
import type { CreateAgentRuntime, ProviderOption } from './agents-page-types'
|
||||
import { ProviderSelector } from './OpenClawControls'
|
||||
import {
|
||||
type OpenClawCliProvider,
|
||||
type OpenClawCliProviderAuthStatus,
|
||||
OpenClawCliProviderStatusPanel,
|
||||
} from './openclaw-cli-providers'
|
||||
|
||||
interface NewAgentDialogProps {
|
||||
adapters: HarnessAdapterDescriptor[]
|
||||
canManageOpenClaw: boolean
|
||||
createError: string | null
|
||||
createRuntime: CreateAgentRuntime
|
||||
creating: boolean
|
||||
defaultProviderId: string
|
||||
harnessAdapterId: HarnessAgentAdapter
|
||||
harnessModelId: string
|
||||
harnessReasoningEffort: string
|
||||
name: string
|
||||
open: boolean
|
||||
providers: ProviderOption[]
|
||||
selectedCliProvider: OpenClawCliProvider | undefined
|
||||
selectedProviderId: string
|
||||
cliAuthError: Error | null
|
||||
cliAuthLoading: boolean
|
||||
cliAuthStatus: OpenClawCliProviderAuthStatus | undefined
|
||||
onConnectCliProvider: () => void
|
||||
onCreate: () => void
|
||||
onOpenChange: (open: boolean) => void
|
||||
onRuntimeChange: (runtime: CreateAgentRuntime) => void
|
||||
onHarnessAdapterChange: (adapter: HarnessAgentAdapter) => void
|
||||
onHarnessModelChange: (modelId: string) => void
|
||||
onHarnessReasoningChange: (reasoningEffort: string) => void
|
||||
onNameChange: (name: string) => void
|
||||
onProviderChange: (providerId: string) => void
|
||||
}
|
||||
|
||||
export const NewAgentDialog: FC<NewAgentDialogProps> = ({
|
||||
adapters,
|
||||
canManageOpenClaw,
|
||||
createError,
|
||||
createRuntime,
|
||||
creating,
|
||||
defaultProviderId,
|
||||
harnessAdapterId,
|
||||
harnessModelId,
|
||||
harnessReasoningEffort,
|
||||
name,
|
||||
open,
|
||||
providers,
|
||||
selectedCliProvider,
|
||||
selectedProviderId,
|
||||
cliAuthError,
|
||||
cliAuthLoading,
|
||||
cliAuthStatus,
|
||||
onConnectCliProvider,
|
||||
onCreate,
|
||||
onOpenChange,
|
||||
onRuntimeChange,
|
||||
onHarnessAdapterChange,
|
||||
onHarnessModelChange,
|
||||
onHarnessReasoningChange,
|
||||
onNameChange,
|
||||
onProviderChange,
|
||||
}) => {
|
||||
const selectedHarnessAdapter =
|
||||
adapters.find((adapter) => adapter.id === harnessAdapterId) ?? adapters[0]
|
||||
const isHarnessRuntime = createRuntime !== 'openclaw'
|
||||
const openClawBlocked = createRuntime === 'openclaw' && !canManageOpenClaw
|
||||
const cliBlocked =
|
||||
createRuntime === 'openclaw' &&
|
||||
!!selectedCliProvider &&
|
||||
!cliAuthStatus?.loggedIn
|
||||
const canCreate =
|
||||
Boolean(name.trim()) &&
|
||||
!creating &&
|
||||
!openClawBlocked &&
|
||||
!cliBlocked &&
|
||||
(createRuntime === 'openclaw'
|
||||
? providers.length > 0
|
||||
: Boolean(selectedHarnessAdapter))
|
||||
|
||||
return (
|
||||
<Dialog open={open} onOpenChange={onOpenChange}>
|
||||
<DialogContent>
|
||||
<DialogHeader>
|
||||
<DialogTitle>New Agent</DialogTitle>
|
||||
</DialogHeader>
|
||||
|
||||
<div className="grid gap-4 py-2">
|
||||
{createError ? (
|
||||
<Alert variant="destructive">
|
||||
<AlertCircle className="size-4" />
|
||||
<AlertTitle>Create failed</AlertTitle>
|
||||
<AlertDescription>{createError}</AlertDescription>
|
||||
</Alert>
|
||||
) : null}
|
||||
|
||||
<div className="grid gap-2">
|
||||
<Label htmlFor="agent-name">Name</Label>
|
||||
<Input
|
||||
id="agent-name"
|
||||
value={name}
|
||||
onChange={(event) => onNameChange(event.target.value)}
|
||||
placeholder={
|
||||
createRuntime === 'openclaw' ? 'research-agent' : 'Review bot'
|
||||
}
|
||||
onKeyDown={(event) => {
|
||||
if (event.key === 'Enter' && canCreate) onCreate()
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div className="grid gap-2">
|
||||
<Label htmlFor="agent-runtime">Adapter</Label>
|
||||
<Select
|
||||
value={createRuntime}
|
||||
onValueChange={(value) => {
|
||||
if (
|
||||
value === 'openclaw' ||
|
||||
value === 'claude' ||
|
||||
value === 'codex'
|
||||
) {
|
||||
onRuntimeChange(value)
|
||||
if (value !== 'openclaw') onHarnessAdapterChange(value)
|
||||
}
|
||||
}}
|
||||
>
|
||||
<SelectTrigger id="agent-runtime">
|
||||
<SelectValue />
|
||||
</SelectTrigger>
|
||||
<SelectContent>
|
||||
{adapters.map((adapter) => (
|
||||
<SelectItem key={adapter.id} value={adapter.id}>
|
||||
{adapter.name}
|
||||
</SelectItem>
|
||||
))}
|
||||
</SelectContent>
|
||||
</Select>
|
||||
</div>
|
||||
|
||||
{createRuntime === 'openclaw' ? (
|
||||
<>
|
||||
{openClawBlocked ? (
|
||||
<Alert>
|
||||
<AlertCircle className="size-4" />
|
||||
<AlertTitle>OpenClaw is not ready</AlertTitle>
|
||||
<AlertDescription>
|
||||
Start or set up the OpenClaw gateway before creating an
|
||||
OpenClaw agent.
|
||||
</AlertDescription>
|
||||
</Alert>
|
||||
) : null}
|
||||
|
||||
<ProviderSelector
|
||||
providers={providers}
|
||||
defaultProviderId={defaultProviderId}
|
||||
selectedId={selectedProviderId}
|
||||
onSelect={onProviderChange}
|
||||
hideApiKeyHint={!!selectedCliProvider}
|
||||
/>
|
||||
|
||||
{selectedCliProvider ? (
|
||||
<OpenClawCliProviderStatusPanel
|
||||
provider={selectedCliProvider}
|
||||
status={cliAuthStatus}
|
||||
loading={cliAuthLoading}
|
||||
fetchError={cliAuthError}
|
||||
onConnect={onConnectCliProvider}
|
||||
/>
|
||||
) : null}
|
||||
</>
|
||||
) : null}
|
||||
|
||||
{isHarnessRuntime ? (
|
||||
<>
|
||||
<div className="grid gap-2">
|
||||
<Label htmlFor="harness-model">Model</Label>
|
||||
<Select
|
||||
value={harnessModelId}
|
||||
onValueChange={onHarnessModelChange}
|
||||
>
|
||||
<SelectTrigger id="harness-model">
|
||||
<SelectValue />
|
||||
</SelectTrigger>
|
||||
<SelectContent>
|
||||
{(selectedHarnessAdapter?.models ?? []).map((model) => (
|
||||
<SelectItem key={model.id} value={model.id}>
|
||||
{model.label}
|
||||
</SelectItem>
|
||||
))}
|
||||
</SelectContent>
|
||||
</Select>
|
||||
</div>
|
||||
|
||||
<div className="grid gap-2">
|
||||
<Label htmlFor="harness-effort">Reasoning</Label>
|
||||
<Select
|
||||
value={harnessReasoningEffort}
|
||||
onValueChange={onHarnessReasoningChange}
|
||||
>
|
||||
<SelectTrigger id="harness-effort">
|
||||
<SelectValue />
|
||||
</SelectTrigger>
|
||||
<SelectContent>
|
||||
{(selectedHarnessAdapter?.reasoningEfforts ?? []).map(
|
||||
(effort) => (
|
||||
<SelectItem key={effort.id} value={effort.id}>
|
||||
{effort.label}
|
||||
</SelectItem>
|
||||
),
|
||||
)}
|
||||
</SelectContent>
|
||||
</Select>
|
||||
</div>
|
||||
</>
|
||||
) : null}
|
||||
</div>
|
||||
|
||||
<DialogFooter>
|
||||
<Button
|
||||
variant="outline"
|
||||
onClick={() => onOpenChange(false)}
|
||||
disabled={creating}
|
||||
>
|
||||
Cancel
|
||||
</Button>
|
||||
<Button disabled={!canCreate} onClick={onCreate}>
|
||||
{creating ? <Loader2 className="mr-2 size-4 animate-spin" /> : null}
|
||||
Create
|
||||
</Button>
|
||||
</DialogFooter>
|
||||
</DialogContent>
|
||||
</Dialog>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,387 @@
|
||||
import {
|
||||
AlertCircle,
|
||||
Cpu,
|
||||
Loader2,
|
||||
Plus,
|
||||
RefreshCw,
|
||||
ShieldAlert,
|
||||
Square,
|
||||
TerminalSquare,
|
||||
WifiOff,
|
||||
Wrench,
|
||||
} from 'lucide-react'
|
||||
import type { FC } from 'react'
|
||||
import { Alert, AlertDescription, AlertTitle } from '@/components/ui/alert'
|
||||
import { Badge } from '@/components/ui/badge'
|
||||
import { Button } from '@/components/ui/button'
|
||||
import { Card, CardContent } from '@/components/ui/card'
|
||||
import { Label } from '@/components/ui/label'
|
||||
import {
|
||||
Select,
|
||||
SelectContent,
|
||||
SelectItem,
|
||||
SelectTrigger,
|
||||
SelectValue,
|
||||
} from '@/components/ui/select'
|
||||
import type { ProviderOption } from './agents-page-types'
|
||||
import {
|
||||
CONTROL_PLANE_COPY,
|
||||
FALLBACK_CONTROL_PLANE_COPY,
|
||||
} from './agents-page-types'
|
||||
import type { getControlPlaneCopy } from './agents-page-utils'
|
||||
import type { OpenClawStatus } from './useOpenClaw'
|
||||
|
||||
const StatusBadge: FC<{ status: OpenClawStatus['status'] }> = ({ status }) => {
|
||||
const variants: Record<
|
||||
OpenClawStatus['status'],
|
||||
{
|
||||
variant: 'default' | 'secondary' | 'outline' | 'destructive'
|
||||
label: string
|
||||
}
|
||||
> = {
|
||||
running: { variant: 'default', label: 'Running' },
|
||||
starting: { variant: 'secondary', label: 'Starting...' },
|
||||
stopped: { variant: 'outline', label: 'Stopped' },
|
||||
error: { variant: 'destructive', label: 'Error' },
|
||||
uninitialized: { variant: 'outline', label: 'Not Set Up' },
|
||||
}
|
||||
const current = variants[status] ?? {
|
||||
variant: 'outline' as const,
|
||||
label: 'Unknown',
|
||||
}
|
||||
return <Badge variant={current.variant}>{current.label}</Badge>
|
||||
}
|
||||
|
||||
const ControlPlaneBadge: FC<{
|
||||
status: OpenClawStatus['controlPlaneStatus']
|
||||
}> = ({ status }) => {
|
||||
const current = CONTROL_PLANE_COPY[status] ?? FALLBACK_CONTROL_PLANE_COPY
|
||||
return <Badge variant={current.badgeVariant}>{current.badgeLabel}</Badge>
|
||||
}
|
||||
|
||||
interface ProviderSelectorProps {
|
||||
providers: ProviderOption[]
|
||||
defaultProviderId: string
|
||||
selectedId: string
|
||||
onSelect: (id: string) => void
|
||||
hideApiKeyHint?: boolean
|
||||
}
|
||||
|
||||
export const ProviderSelector: FC<ProviderSelectorProps> = ({
|
||||
providers,
|
||||
defaultProviderId,
|
||||
selectedId,
|
||||
onSelect,
|
||||
hideApiKeyHint,
|
||||
}) => {
|
||||
if (providers.length === 0) {
|
||||
return (
|
||||
<div className="space-y-2">
|
||||
<p className="font-medium text-sm">LLM Provider</p>
|
||||
<p className="text-muted-foreground text-sm">
|
||||
No compatible LLM providers configured.{' '}
|
||||
<a href="#/settings/ai" className="underline">
|
||||
Add one in AI settings
|
||||
</a>{' '}
|
||||
first.
|
||||
</p>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="space-y-2">
|
||||
<Label htmlFor="provider-select">LLM Provider</Label>
|
||||
<Select value={selectedId} onValueChange={onSelect}>
|
||||
<SelectTrigger id="provider-select">
|
||||
<SelectValue placeholder="Select a provider" />
|
||||
</SelectTrigger>
|
||||
<SelectContent>
|
||||
{providers.map((provider) => (
|
||||
<SelectItem key={provider.id} value={provider.id}>
|
||||
{provider.name} - {provider.modelId}
|
||||
{provider.id === defaultProviderId ? ' (default)' : ''}
|
||||
</SelectItem>
|
||||
))}
|
||||
</SelectContent>
|
||||
</Select>
|
||||
{!hideApiKeyHint && (
|
||||
<p className="text-muted-foreground text-xs">
|
||||
Uses your existing API key from BrowserOS settings. The key is passed
|
||||
to the container and never leaves your machine.
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
interface AgentsPageHeaderProps {
|
||||
actionInProgress: boolean
|
||||
controlPlaneBusy: boolean
|
||||
reconnecting: boolean
|
||||
status: OpenClawStatus | null
|
||||
onCreateAgent: () => void
|
||||
onOpenTerminal: () => void
|
||||
onReconnect: () => void
|
||||
onRefresh: () => void
|
||||
onRestart: () => void
|
||||
onStop: () => void
|
||||
}
|
||||
|
||||
export const AgentsPageHeader: FC<AgentsPageHeaderProps> = ({
|
||||
actionInProgress,
|
||||
controlPlaneBusy,
|
||||
reconnecting,
|
||||
status,
|
||||
onCreateAgent,
|
||||
onOpenTerminal,
|
||||
onReconnect,
|
||||
onRefresh,
|
||||
onRestart,
|
||||
onStop,
|
||||
}) => (
|
||||
<div className="flex flex-wrap items-center justify-between gap-3">
|
||||
<div>
|
||||
<h1 className="font-semibold text-2xl tracking-normal">Agents</h1>
|
||||
<p className="text-muted-foreground text-sm">
|
||||
OpenClaw, Claude Code, and Codex agents
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div className="flex flex-wrap items-center gap-2">
|
||||
{status ? (
|
||||
<>
|
||||
<StatusBadge status={status.status} />
|
||||
{status.status !== 'uninitialized' && (
|
||||
<ControlPlaneBadge status={status.controlPlaneStatus} />
|
||||
)}
|
||||
</>
|
||||
) : null}
|
||||
|
||||
{status?.status === 'running' &&
|
||||
status.controlPlaneStatus !== 'connected' ? (
|
||||
<Button
|
||||
variant="outline"
|
||||
onClick={onReconnect}
|
||||
disabled={actionInProgress || controlPlaneBusy}
|
||||
>
|
||||
{reconnecting ? (
|
||||
<Loader2 className="mr-2 size-4 animate-spin" />
|
||||
) : (
|
||||
<RefreshCw className="mr-2 size-4" />
|
||||
)}
|
||||
Retry Connection
|
||||
</Button>
|
||||
) : null}
|
||||
|
||||
{status?.status === 'running' ? (
|
||||
<>
|
||||
<Button
|
||||
variant="ghost"
|
||||
size="icon"
|
||||
onClick={onRestart}
|
||||
disabled={actionInProgress}
|
||||
title="Restart gateway"
|
||||
>
|
||||
<RefreshCw className="size-4" />
|
||||
</Button>
|
||||
<Button
|
||||
variant="ghost"
|
||||
size="icon"
|
||||
onClick={onStop}
|
||||
disabled={actionInProgress}
|
||||
title="Stop gateway"
|
||||
>
|
||||
<Square className="size-4" />
|
||||
</Button>
|
||||
<Button variant="outline" onClick={onOpenTerminal}>
|
||||
<TerminalSquare className="mr-2 size-4" />
|
||||
Terminal
|
||||
</Button>
|
||||
</>
|
||||
) : null}
|
||||
|
||||
<Button variant="ghost" size="icon" onClick={onRefresh} title="Refresh">
|
||||
<RefreshCw className="size-4" />
|
||||
</Button>
|
||||
<Button onClick={onCreateAgent}>
|
||||
<Plus className="mr-2 size-4" />
|
||||
New Agent
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
|
||||
export function LifecycleAlert({ message }: { message: string }) {
|
||||
return (
|
||||
<Alert>
|
||||
<Loader2 className="size-4 animate-spin" />
|
||||
<AlertTitle>{message}</AlertTitle>
|
||||
</Alert>
|
||||
)
|
||||
}
|
||||
|
||||
export function InlineErrorAlert({
|
||||
message,
|
||||
onDismiss,
|
||||
}: {
|
||||
message: string
|
||||
onDismiss: () => void
|
||||
}) {
|
||||
return (
|
||||
<Alert variant="destructive">
|
||||
<AlertCircle className="size-4" />
|
||||
<AlertTitle>Agent action failed</AlertTitle>
|
||||
<AlertDescription>
|
||||
<p>{message}</p>
|
||||
<div className="mt-2">
|
||||
<Button variant="outline" size="sm" onClick={onDismiss}>
|
||||
Dismiss
|
||||
</Button>
|
||||
</div>
|
||||
</AlertDescription>
|
||||
</Alert>
|
||||
)
|
||||
}
|
||||
|
||||
interface ControlPlaneAlertProps {
|
||||
actionInProgress: boolean
|
||||
controlPlaneBusy: boolean
|
||||
controlPlaneCopy: ReturnType<typeof getControlPlaneCopy>
|
||||
reconnecting: boolean
|
||||
recoveryDetail: string | null
|
||||
status: OpenClawStatus
|
||||
onReconnect: () => void
|
||||
onRestart: () => void
|
||||
}
|
||||
|
||||
export const ControlPlaneAlert: FC<ControlPlaneAlertProps> = ({
|
||||
actionInProgress,
|
||||
controlPlaneBusy,
|
||||
controlPlaneCopy,
|
||||
reconnecting,
|
||||
recoveryDetail,
|
||||
status,
|
||||
onReconnect,
|
||||
onRestart,
|
||||
}) => (
|
||||
<Alert
|
||||
variant={status.controlPlaneStatus === 'failed' ? 'destructive' : 'default'}
|
||||
>
|
||||
{status.controlPlaneStatus === 'failed' ? (
|
||||
<ShieldAlert className="size-4" />
|
||||
) : status.controlPlaneStatus === 'recovering' ? (
|
||||
<Wrench className="size-4" />
|
||||
) : (
|
||||
<WifiOff className="size-4" />
|
||||
)}
|
||||
<AlertTitle>{controlPlaneCopy.title}</AlertTitle>
|
||||
<AlertDescription>
|
||||
<p>{controlPlaneCopy.description}</p>
|
||||
{recoveryDetail ? <p>{recoveryDetail}</p> : null}
|
||||
<div className="mt-2 flex flex-wrap gap-2">
|
||||
<Button
|
||||
variant="outline"
|
||||
size="sm"
|
||||
onClick={onReconnect}
|
||||
disabled={actionInProgress || controlPlaneBusy}
|
||||
>
|
||||
{reconnecting ? (
|
||||
<Loader2 className="mr-2 size-4 animate-spin" />
|
||||
) : (
|
||||
<RefreshCw className="mr-2 size-4" />
|
||||
)}
|
||||
Retry Connection
|
||||
</Button>
|
||||
<Button
|
||||
variant="outline"
|
||||
size="sm"
|
||||
onClick={onRestart}
|
||||
disabled={actionInProgress}
|
||||
>
|
||||
Restart Gateway
|
||||
</Button>
|
||||
</div>
|
||||
</AlertDescription>
|
||||
</Alert>
|
||||
)
|
||||
|
||||
interface GatewayStateCardsProps {
|
||||
actionInProgress: boolean
|
||||
status: OpenClawStatus | null
|
||||
onOpenSetup: () => void
|
||||
onRestart: () => void
|
||||
onStart: () => void
|
||||
}
|
||||
|
||||
export const GatewayStateCards: FC<GatewayStateCardsProps> = ({
|
||||
actionInProgress,
|
||||
status,
|
||||
onOpenSetup,
|
||||
onRestart,
|
||||
onStart,
|
||||
}) => (
|
||||
<>
|
||||
{status?.status === 'uninitialized' ? (
|
||||
<Card>
|
||||
<CardContent className="flex flex-col items-center gap-4 py-12">
|
||||
<Cpu className="size-12 text-muted-foreground" />
|
||||
<div className="text-center">
|
||||
<h3 className="font-semibold text-lg">Set Up OpenClaw</h3>
|
||||
<p className="text-muted-foreground text-sm">
|
||||
{status.podmanAvailable
|
||||
? 'Create a local BrowserOS VM to run autonomous agents with full tool access.'
|
||||
: 'BrowserOS VM runtime is unavailable on this system.'}
|
||||
</p>
|
||||
</div>
|
||||
{status.podmanAvailable ? (
|
||||
<Button onClick={onOpenSetup}>Set Up Now</Button>
|
||||
) : null}
|
||||
</CardContent>
|
||||
</Card>
|
||||
) : null}
|
||||
|
||||
{status?.status === 'stopped' ? (
|
||||
<Card>
|
||||
<CardContent className="flex flex-col items-center gap-4 py-12">
|
||||
<Cpu className="size-12 text-muted-foreground" />
|
||||
<div className="text-center">
|
||||
<h3 className="font-semibold text-lg">Gateway Stopped</h3>
|
||||
<p className="text-muted-foreground text-sm">
|
||||
The OpenClaw gateway is not running.
|
||||
</p>
|
||||
</div>
|
||||
<Button onClick={onStart} disabled={actionInProgress}>
|
||||
Start Gateway
|
||||
</Button>
|
||||
</CardContent>
|
||||
</Card>
|
||||
) : null}
|
||||
|
||||
{status?.status === 'error' ? (
|
||||
<Card className="border-destructive">
|
||||
<CardContent className="flex flex-col items-center gap-4 py-12">
|
||||
<AlertCircle className="size-12 text-destructive" />
|
||||
<div className="text-center">
|
||||
<h3 className="font-semibold text-lg">Gateway Error</h3>
|
||||
<p className="text-muted-foreground text-sm">
|
||||
{status.error ?? status.lastGatewayError}
|
||||
</p>
|
||||
</div>
|
||||
<div className="flex gap-2">
|
||||
<Button onClick={onStart} disabled={actionInProgress}>
|
||||
Start Gateway
|
||||
</Button>
|
||||
<Button
|
||||
variant="outline"
|
||||
onClick={onRestart}
|
||||
disabled={actionInProgress}
|
||||
>
|
||||
Restart Gateway
|
||||
</Button>
|
||||
</div>
|
||||
</CardContent>
|
||||
</Card>
|
||||
) : null}
|
||||
</>
|
||||
)
|
||||
@@ -0,0 +1,76 @@
|
||||
import { Loader2 } from 'lucide-react'
|
||||
import type { FC } from 'react'
|
||||
import { Button } from '@/components/ui/button'
|
||||
import {
|
||||
Dialog,
|
||||
DialogContent,
|
||||
DialogHeader,
|
||||
DialogTitle,
|
||||
} from '@/components/ui/dialog'
|
||||
import type { ProviderOption } from './agents-page-types'
|
||||
import { ProviderSelector } from './OpenClawControls'
|
||||
import type { OpenClawCliProvider } from './openclaw-cli-providers'
|
||||
|
||||
interface SetupOpenClawDialogProps {
|
||||
defaultProviderId: string
|
||||
open: boolean
|
||||
providers: ProviderOption[]
|
||||
selectedProviderId: string
|
||||
selectedCliProvider: OpenClawCliProvider | undefined
|
||||
settingUp: boolean
|
||||
onOpenChange: (open: boolean) => void
|
||||
onProviderChange: (providerId: string) => void
|
||||
onSetup: () => void
|
||||
}
|
||||
|
||||
export const SetupOpenClawDialog: FC<SetupOpenClawDialogProps> = ({
|
||||
defaultProviderId,
|
||||
open,
|
||||
providers,
|
||||
selectedProviderId,
|
||||
selectedCliProvider,
|
||||
settingUp,
|
||||
onOpenChange,
|
||||
onProviderChange,
|
||||
onSetup,
|
||||
}) => (
|
||||
<Dialog open={open} onOpenChange={onOpenChange}>
|
||||
<DialogContent>
|
||||
<DialogHeader>
|
||||
<DialogTitle>Set Up OpenClaw</DialogTitle>
|
||||
</DialogHeader>
|
||||
<div className="space-y-4 py-2">
|
||||
<ProviderSelector
|
||||
providers={providers}
|
||||
defaultProviderId={defaultProviderId}
|
||||
selectedId={selectedProviderId}
|
||||
onSelect={onProviderChange}
|
||||
hideApiKeyHint={!!selectedCliProvider}
|
||||
/>
|
||||
|
||||
{selectedCliProvider ? (
|
||||
<p className="rounded-md border border-border bg-muted/30 px-3 py-2 text-muted-foreground text-xs">
|
||||
{selectedCliProvider.description}. Clicking{' '}
|
||||
<span className="font-medium">Set Up & Start</span> starts the
|
||||
gateway and opens a terminal to sign in.
|
||||
</p>
|
||||
) : null}
|
||||
|
||||
<Button
|
||||
onClick={onSetup}
|
||||
disabled={settingUp || providers.length === 0}
|
||||
className="w-full"
|
||||
>
|
||||
{settingUp ? (
|
||||
<>
|
||||
<Loader2 className="mr-2 size-4 animate-spin" />
|
||||
Setting up...
|
||||
</>
|
||||
) : (
|
||||
'Set Up & Start'
|
||||
)}
|
||||
</Button>
|
||||
</div>
|
||||
</DialogContent>
|
||||
</Dialog>
|
||||
)
|
||||
@@ -0,0 +1,4 @@
|
||||
export function buildAgentApiUrl(baseUrl: string, path: string): string {
|
||||
const normalizedPath = path === '/' ? '' : path
|
||||
return `${baseUrl}/agents${normalizedPath}`
|
||||
}
|
||||
@@ -0,0 +1,107 @@
|
||||
import type { AgentListItem } from './agents-page-types'
|
||||
import type { AgentLiveness } from './LivenessDot'
|
||||
|
||||
/**
|
||||
* Display rules for the redesigned agent rows. Pure helpers — no React,
|
||||
* no API calls — so they're trivial to unit-test and the row card stays
|
||||
* focused on layout.
|
||||
*/
|
||||
|
||||
const UUID_PATTERN =
|
||||
/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i
|
||||
|
||||
const OC_UUID_PATTERN =
|
||||
/^oc-[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i
|
||||
|
||||
/**
|
||||
* The agent rail used to render whatever the gateway returned for `name`.
|
||||
* Post-migration that's frequently the agent's UUID — readable to nobody.
|
||||
* Prefer the explicit `name` when it differs meaningfully from the id;
|
||||
* otherwise fall back to a short prefix users can recognize on second
|
||||
* glance.
|
||||
*/
|
||||
export function displayName(agent: AgentListItem): string {
|
||||
const name = agent.name?.trim()
|
||||
const id = agent.agentId
|
||||
if (!name || name === id) {
|
||||
if (OC_UUID_PATTERN.test(id)) return id.slice(0, 11) // "oc-XXXXXXXX"
|
||||
if (UUID_PATTERN.test(id)) return id.slice(0, 8)
|
||||
return id
|
||||
}
|
||||
return name
|
||||
}
|
||||
|
||||
export function canDelete(agent: AgentListItem): boolean {
|
||||
// The gateway's protected `main` agent must not be deletable. The
|
||||
// server enforces this too, but disabling the menu item avoids users
|
||||
// hitting an opaque 400.
|
||||
if (agent.agentId === 'main') return false
|
||||
return agent.canDelete
|
||||
}
|
||||
|
||||
/**
|
||||
* Rename will be wired to a future `PATCH /agents/:id` endpoint. The
|
||||
* legacy `/claw/agents` create flow named the agent on the gateway via
|
||||
* the `name` field but the field isn't editable post-create today.
|
||||
*/
|
||||
export function canRename(_agent: AgentListItem): boolean {
|
||||
return false
|
||||
}
|
||||
|
||||
/**
|
||||
* The detail line carries the agent's workspace path. The `detail`
|
||||
* field on AgentListItem already holds it for OpenClaw entries
|
||||
* (`/home/node/.openclaw/workspace-...`); for harness agents it's the
|
||||
* synthetic `<adapter>:main` marker that's not informative — hide it.
|
||||
*/
|
||||
export function workspaceLabel(agent: AgentListItem): string | null {
|
||||
if (!agent.detail) return null
|
||||
if (/^(claude|codex|openclaw):main$/.test(agent.detail)) return null
|
||||
return agent.detail
|
||||
}
|
||||
|
||||
const ONE_MINUTE = 60_000
|
||||
const ONE_HOUR = 60 * ONE_MINUTE
|
||||
const ONE_DAY = 24 * ONE_HOUR
|
||||
|
||||
/**
|
||||
* Lightweight relative-time formatter. We don't want to drag in
|
||||
* `dayjs/relativeTime` just for a few labels.
|
||||
*/
|
||||
export function formatRelativeTime(epochMs: number | null): string {
|
||||
if (epochMs === null || !Number.isFinite(epochMs)) return 'never'
|
||||
const diff = Math.max(0, Date.now() - epochMs)
|
||||
if (diff < ONE_MINUTE) return 'just now'
|
||||
if (diff < ONE_HOUR) {
|
||||
const m = Math.floor(diff / ONE_MINUTE)
|
||||
return `${m} min ago`
|
||||
}
|
||||
if (diff < ONE_DAY) {
|
||||
const h = Math.floor(diff / ONE_HOUR)
|
||||
return h === 1 ? '1 hr ago' : `${h} hr ago`
|
||||
}
|
||||
const d = Math.floor(diff / ONE_DAY)
|
||||
return d === 1 ? '1 day ago' : `${d} days ago`
|
||||
}
|
||||
|
||||
/**
|
||||
* Tooltip-friendly description of a row's current liveness state.
|
||||
* Returns `undefined` when the state has nothing extra to add (e.g.
|
||||
* `unknown` with no timestamp).
|
||||
*/
|
||||
export function livenessDetail(
|
||||
status: AgentLiveness,
|
||||
lastUsedAt: number | null | undefined,
|
||||
): string | undefined {
|
||||
if (lastUsedAt == null) return undefined
|
||||
const diffMin = Math.floor((Date.now() - lastUsedAt) / 60_000)
|
||||
if (status === 'idle') return `Idle for ${Math.max(0, diffMin)} min`
|
||||
if (status === 'asleep') {
|
||||
if (diffMin < 60) return `Asleep — quiet for ${diffMin} min`
|
||||
const hr = Math.floor(diffMin / 60)
|
||||
return `Asleep — quiet for ${hr} hr`
|
||||
}
|
||||
if (status === 'working') return 'Working on a turn'
|
||||
if (status === 'error') return 'Attention — last turn failed'
|
||||
return undefined
|
||||
}
|
||||
@@ -0,0 +1,156 @@
|
||||
import type { AgentEntry } from './useOpenClaw'
|
||||
|
||||
export type HarnessAgentAdapter = 'claude' | 'codex' | 'openclaw'
|
||||
|
||||
export type AgentHarnessStreamEvent =
|
||||
| {
|
||||
type: 'text_delta'
|
||||
text: string
|
||||
stream: 'output' | 'thought'
|
||||
rawType?: string
|
||||
}
|
||||
| {
|
||||
type: 'tool_call'
|
||||
text: string
|
||||
title: string
|
||||
id?: string
|
||||
status?: string
|
||||
rawType?: string
|
||||
}
|
||||
| {
|
||||
type: 'status'
|
||||
text: string
|
||||
rawType?: string
|
||||
}
|
||||
| {
|
||||
type: 'done'
|
||||
text?: string
|
||||
stopReason?: string
|
||||
}
|
||||
| {
|
||||
type: 'error'
|
||||
message: string
|
||||
code?: string
|
||||
}
|
||||
|
||||
export type HarnessAgentLiveness = 'working' | 'idle' | 'asleep' | 'error'
|
||||
|
||||
export interface HarnessAgent {
|
||||
id: string
|
||||
name: string
|
||||
adapter: HarnessAgentAdapter
|
||||
modelId?: string
|
||||
reasoningEffort?: string
|
||||
permissionMode: 'approve-all'
|
||||
sessionKey: string
|
||||
createdAt: number
|
||||
updatedAt: number
|
||||
/**
|
||||
* Server-derived liveness state. When the listing endpoint hasn't
|
||||
* been enriched yet (older deployments) this is undefined and the UI
|
||||
* falls back to `unknown`.
|
||||
*/
|
||||
status?: HarnessAgentLiveness
|
||||
/**
|
||||
* Wall-clock ms of the last persisted turn. `null` for never-used
|
||||
* agents. Drives the recency sort and the "Last used X min ago" copy.
|
||||
*/
|
||||
lastUsedAt?: number | null
|
||||
/** Pinned agents float to the top of the list. Defaults to `false`. */
|
||||
pinned?: boolean
|
||||
/** First non-blank line of the most recent user message; null if none. */
|
||||
lastUserMessage?: string | null
|
||||
/** Working directory the agent runs in; null when no session record yet. */
|
||||
cwd?: string | null
|
||||
/** Cumulative + 7-day rolling token usage; null when no record. */
|
||||
tokens?: {
|
||||
last7d: { input: number; output: number; requestCount: number }
|
||||
cumulative: { input: number; output: number }
|
||||
} | null
|
||||
turnsByDay?: number[]
|
||||
failedByDay?: number[]
|
||||
lastError?: string | null
|
||||
lastErrorAt?: number | null
|
||||
/** When non-null, an in-flight turn this row can be resumed from. */
|
||||
activeTurnId?: string | null
|
||||
/** Persistent FIFO queue of messages waiting for this agent. */
|
||||
queue?: HarnessQueuedMessage[]
|
||||
}
|
||||
|
||||
export interface HarnessQueuedMessageAttachment {
|
||||
mediaType: string
|
||||
data: string
|
||||
}
|
||||
|
||||
export interface HarnessQueuedMessage {
|
||||
id: string
|
||||
createdAt: number
|
||||
message: string
|
||||
attachments?: ReadonlyArray<HarnessQueuedMessageAttachment>
|
||||
}
|
||||
|
||||
export interface HarnessAdapterHealth {
|
||||
healthy: boolean
|
||||
reason?: string
|
||||
checkedAt: number
|
||||
}
|
||||
|
||||
export interface HarnessAdapterDescriptor {
|
||||
id: HarnessAgentAdapter
|
||||
name: string
|
||||
defaultModelId: string
|
||||
defaultReasoningEffort: string
|
||||
modelControl: 'runtime-supported' | 'best-effort'
|
||||
models: Array<{ id: string; label: string; recommended?: boolean }>
|
||||
reasoningEfforts: Array<{ id: string; label: string; recommended?: boolean }>
|
||||
health?: HarnessAdapterHealth
|
||||
}
|
||||
|
||||
export interface CreateHarnessAgentInput {
|
||||
name: string
|
||||
adapter: HarnessAgentAdapter
|
||||
modelId?: string
|
||||
reasoningEffort?: string
|
||||
}
|
||||
|
||||
export interface HarnessHistoryReasoning {
|
||||
text: string
|
||||
durationMs?: number
|
||||
}
|
||||
|
||||
export interface HarnessHistoryToolCall {
|
||||
toolCallId?: string
|
||||
toolName: string
|
||||
status: 'pending' | 'running' | 'completed' | 'failed'
|
||||
input?: unknown
|
||||
output?: unknown
|
||||
error?: string
|
||||
durationMs?: number
|
||||
}
|
||||
|
||||
export interface HarnessHistoryEntry {
|
||||
id: string
|
||||
agentId: string
|
||||
sessionId: 'main'
|
||||
role: 'user' | 'assistant'
|
||||
text: string
|
||||
createdAt: number
|
||||
reasoning?: HarnessHistoryReasoning
|
||||
toolCalls?: HarnessHistoryToolCall[]
|
||||
}
|
||||
|
||||
export interface HarnessAgentHistoryPage {
|
||||
agentId: string
|
||||
sessionId: 'main'
|
||||
items: HarnessHistoryEntry[]
|
||||
}
|
||||
|
||||
export function mapHarnessAgentToEntry(agent: HarnessAgent): AgentEntry {
|
||||
return {
|
||||
agentId: agent.id,
|
||||
name: agent.name,
|
||||
workspace: `${agent.adapter}:main`,
|
||||
model: agent.modelId,
|
||||
source: 'agent-harness',
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,160 @@
|
||||
import {
|
||||
Copy,
|
||||
Loader2,
|
||||
MessageSquare,
|
||||
MoreHorizontal,
|
||||
Pencil,
|
||||
RotateCcw,
|
||||
Trash2,
|
||||
} from 'lucide-react'
|
||||
import type { FC } from 'react'
|
||||
import { useNavigate } from 'react-router'
|
||||
import { toast } from 'sonner'
|
||||
import { Button } from '@/components/ui/button'
|
||||
import {
|
||||
DropdownMenu,
|
||||
DropdownMenuContent,
|
||||
DropdownMenuItem,
|
||||
DropdownMenuSeparator,
|
||||
DropdownMenuTrigger,
|
||||
} from '@/components/ui/dropdown-menu'
|
||||
import {
|
||||
Tooltip,
|
||||
TooltipContent,
|
||||
TooltipProvider,
|
||||
TooltipTrigger,
|
||||
} from '@/components/ui/tooltip'
|
||||
import {
|
||||
canDelete as canDeleteAgent,
|
||||
canRename as canRenameAgent,
|
||||
displayName,
|
||||
} from '../agent-display.helpers'
|
||||
import type { AgentListItem } from '../agents-page-types'
|
||||
|
||||
interface AgentActionsProps {
|
||||
agent: AgentListItem
|
||||
activeTurnId: string | null
|
||||
deleting?: boolean
|
||||
onDelete: (agent: AgentListItem) => void
|
||||
}
|
||||
|
||||
/**
|
||||
* Single primary CTA per row: `Resume` (filled, accent-orange, with a
|
||||
* pulsing dot) when an active turn exists; otherwise `Chat` (outline).
|
||||
* Both navigate to the same place — the chat hook auto-attaches via
|
||||
* `/chat/active` when there's a live turn — but the row signals which
|
||||
* action the user is actually taking.
|
||||
*/
|
||||
export const AgentActions: FC<AgentActionsProps> = ({
|
||||
agent,
|
||||
activeTurnId,
|
||||
deleting,
|
||||
onDelete,
|
||||
}) => {
|
||||
const navigate = useNavigate()
|
||||
const allowDelete = canDeleteAgent(agent)
|
||||
const allowRename = canRenameAgent(agent)
|
||||
|
||||
const handleChat = () => navigate(`/agents/${agent.agentId}`)
|
||||
const handleCopyId = async () => {
|
||||
try {
|
||||
await navigator.clipboard.writeText(agent.agentId)
|
||||
toast.success('Agent id copied')
|
||||
} catch {
|
||||
toast.error('Could not copy agent id')
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="flex shrink-0 items-center gap-1.5">
|
||||
{activeTurnId ? (
|
||||
<Button
|
||||
variant="default"
|
||||
size="sm"
|
||||
onClick={handleChat}
|
||||
className="gap-2 bg-[var(--accent-orange)] text-white shadow-sm hover:bg-[var(--accent-orange)]/90"
|
||||
>
|
||||
<span className="relative flex size-2">
|
||||
<span className="absolute inline-flex h-full w-full animate-ping rounded-full bg-white/70 opacity-75" />
|
||||
<span className="relative inline-flex size-2 rounded-full bg-white" />
|
||||
</span>
|
||||
Resume
|
||||
</Button>
|
||||
) : (
|
||||
<Button variant="outline" size="sm" onClick={handleChat}>
|
||||
<MessageSquare className="mr-1.5 size-3" />
|
||||
Chat
|
||||
</Button>
|
||||
)}
|
||||
<DropdownMenu>
|
||||
<DropdownMenuTrigger asChild>
|
||||
<Button
|
||||
variant="ghost"
|
||||
size="icon"
|
||||
aria-label={`More actions for ${displayName(agent)}`}
|
||||
className="size-8 text-muted-foreground hover:text-foreground"
|
||||
>
|
||||
<MoreHorizontal className="size-4" />
|
||||
</Button>
|
||||
</DropdownMenuTrigger>
|
||||
<DropdownMenuContent align="end" className="w-44">
|
||||
<DropdownMenuItem onSelect={() => void handleCopyId()}>
|
||||
<Copy className="mr-2 size-3.5" />
|
||||
Copy id
|
||||
</DropdownMenuItem>
|
||||
<ComingSoonItem
|
||||
icon={Pencil}
|
||||
label="Rename"
|
||||
disabled={!allowRename}
|
||||
/>
|
||||
<ComingSoonItem icon={RotateCcw} label="Reset history" disabled />
|
||||
<DropdownMenuSeparator />
|
||||
<DropdownMenuItem
|
||||
onSelect={() => onDelete(agent)}
|
||||
disabled={!allowDelete || deleting}
|
||||
className="text-destructive focus:text-destructive"
|
||||
>
|
||||
{deleting ? (
|
||||
<Loader2 className="mr-2 size-3.5 animate-spin" />
|
||||
) : (
|
||||
<Trash2 className="mr-2 size-3.5" />
|
||||
)}
|
||||
Delete
|
||||
</DropdownMenuItem>
|
||||
</DropdownMenuContent>
|
||||
</DropdownMenu>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
interface ComingSoonItemProps {
|
||||
icon: typeof Pencil
|
||||
label: string
|
||||
disabled: boolean
|
||||
}
|
||||
|
||||
const ComingSoonItem: FC<ComingSoonItemProps> = ({
|
||||
icon: Icon,
|
||||
label,
|
||||
disabled,
|
||||
}) => {
|
||||
const item = (
|
||||
<DropdownMenuItem disabled className="text-muted-foreground">
|
||||
<Icon className="mr-2 size-3.5" />
|
||||
{label}
|
||||
</DropdownMenuItem>
|
||||
)
|
||||
if (!disabled) return item
|
||||
return (
|
||||
<TooltipProvider delayDuration={300}>
|
||||
<Tooltip>
|
||||
<TooltipTrigger asChild>
|
||||
<span className="block w-full">{item}</span>
|
||||
</TooltipTrigger>
|
||||
<TooltipContent side="left" className="text-xs">
|
||||
{label} coming soon
|
||||
</TooltipContent>
|
||||
</Tooltip>
|
||||
</TooltipProvider>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,96 @@
|
||||
import { AlertTriangle, ChevronDown } from 'lucide-react'
|
||||
import { type FC, useEffect, useState } from 'react'
|
||||
import { Button } from '@/components/ui/button'
|
||||
import {
|
||||
Collapsible,
|
||||
CollapsibleContent,
|
||||
CollapsibleTrigger,
|
||||
} from '@/components/ui/collapsible'
|
||||
import {
|
||||
HoverCard,
|
||||
HoverCardContent,
|
||||
HoverCardTrigger,
|
||||
} from '@/components/ui/hover-card'
|
||||
import { cn } from '@/lib/utils'
|
||||
import { truncate } from './agent-row.helpers'
|
||||
|
||||
interface AgentErrorPanelProps {
|
||||
agentId: string
|
||||
message: string
|
||||
errorAt: number | null
|
||||
}
|
||||
|
||||
const STORAGE_PREFIX = 'agent-row:lastErrorSeenAt:'
|
||||
const PREVIEW_CHARS = 200
|
||||
|
||||
export const AgentErrorPanel: FC<AgentErrorPanelProps> = ({
|
||||
agentId,
|
||||
message,
|
||||
errorAt,
|
||||
}) => {
|
||||
const storageKey = `${STORAGE_PREFIX}${agentId}`
|
||||
// Open if we've never seen this `errorAt` for this agent. Once the
|
||||
// user collapses the panel (or refreshes after seeing it), we mark
|
||||
// it seen so it doesn't re-pop on every poll.
|
||||
const [open, setOpen] = useState<boolean>(() => {
|
||||
if (typeof window === 'undefined' || !errorAt) return true
|
||||
const seen = Number(window.localStorage.getItem(storageKey) ?? 0)
|
||||
return !Number.isFinite(seen) || errorAt > seen
|
||||
})
|
||||
|
||||
useEffect(() => {
|
||||
if (!open && errorAt && typeof window !== 'undefined') {
|
||||
window.localStorage.setItem(storageKey, String(errorAt))
|
||||
}
|
||||
}, [open, errorAt, storageKey])
|
||||
|
||||
const preview = truncate(message, PREVIEW_CHARS)
|
||||
const truncated = preview.length < message.length
|
||||
|
||||
return (
|
||||
<Collapsible open={open} onOpenChange={setOpen} className="mt-3">
|
||||
<div className="flex items-center justify-between rounded-md border border-destructive/30 bg-destructive/5 px-3 py-2">
|
||||
<div className="flex items-center gap-2 font-medium text-destructive text-xs">
|
||||
<AlertTriangle className="size-3.5" />
|
||||
Last error
|
||||
</div>
|
||||
<CollapsibleTrigger asChild>
|
||||
<Button
|
||||
variant="ghost"
|
||||
size="sm"
|
||||
className="h-6 px-2 text-muted-foreground"
|
||||
>
|
||||
<span className="text-xs">{open ? 'hide' : 'show'}</span>
|
||||
<ChevronDown
|
||||
className={cn(
|
||||
'ml-1 size-3 transition-transform',
|
||||
open && 'rotate-180',
|
||||
)}
|
||||
/>
|
||||
</Button>
|
||||
</CollapsibleTrigger>
|
||||
</div>
|
||||
<CollapsibleContent>
|
||||
<div className="mt-1 rounded-md border-destructive/30 border-x border-b bg-destructive/5 px-3 pb-2 text-xs">
|
||||
{truncated ? (
|
||||
<HoverCard openDelay={300}>
|
||||
<HoverCardTrigger asChild>
|
||||
<span className="cursor-default font-mono text-foreground/80">
|
||||
{preview}…
|
||||
</span>
|
||||
</HoverCardTrigger>
|
||||
<HoverCardContent
|
||||
side="bottom"
|
||||
className="max-w-md whitespace-pre-wrap font-mono text-xs"
|
||||
>
|
||||
{message}
|
||||
</HoverCardContent>
|
||||
</HoverCard>
|
||||
) : (
|
||||
<span className="font-mono text-foreground/80">{message}</span>
|
||||
)}
|
||||
</div>
|
||||
</CollapsibleContent>
|
||||
</Collapsible>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,35 @@
|
||||
import { Quote } from 'lucide-react'
|
||||
import type { FC } from 'react'
|
||||
import { firstNonBlankLine, truncate } from './agent-row.helpers'
|
||||
|
||||
interface AgentLastMessageProps {
|
||||
message: string | null
|
||||
}
|
||||
|
||||
const PREVIEW_CHARS = 110
|
||||
|
||||
/**
|
||||
* Inline preview of the most recent user message. Renders as a quoted,
|
||||
* italic line so the row reads like a conversation snippet rather than
|
||||
* a label-and-value pair. No hover-card — opening the agent's chat is
|
||||
* the canonical way to read the full message.
|
||||
*/
|
||||
export const AgentLastMessage: FC<AgentLastMessageProps> = ({ message }) => {
|
||||
if (!message) {
|
||||
return (
|
||||
<p className="mt-1 text-muted-foreground/70 text-xs italic">
|
||||
No messages yet — start a chat
|
||||
</p>
|
||||
)
|
||||
}
|
||||
const preview = truncate(firstNonBlankLine(message), PREVIEW_CHARS)
|
||||
return (
|
||||
<p className="mt-1.5 flex items-start gap-1.5 text-foreground/85 text-sm italic leading-snug">
|
||||
<Quote
|
||||
className="mt-1 size-3 shrink-0 text-muted-foreground/60"
|
||||
aria-hidden
|
||||
/>
|
||||
<span className="truncate">{preview}</span>
|
||||
</p>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
import type { FC } from 'react'
|
||||
import { formatRelativeTime } from '../agent-display.helpers'
|
||||
import { AgentTokenSummary } from './AgentTokenSummary'
|
||||
import type { AgentTokenUsage } from './agent-row.types'
|
||||
|
||||
interface AgentMetaRowProps {
|
||||
lastUsedAt: number | null
|
||||
tokens: AgentTokenUsage | null
|
||||
}
|
||||
|
||||
/**
|
||||
* Bottom-of-row meta line. Intentionally sparse — last activity time
|
||||
* and lifetime tokens. CWD is no longer surfaced here because the path
|
||||
* the server happens to be running from isn't actionable; if a future
|
||||
* surface needs the cwd (chat panel, debug view) it reads from the
|
||||
* listing payload directly.
|
||||
*/
|
||||
export const AgentMetaRow: FC<AgentMetaRowProps> = ({ lastUsedAt, tokens }) => {
|
||||
const lastUsedLabel = formatRelativeTime(lastUsedAt)
|
||||
const tokensTotal =
|
||||
(tokens?.cumulative.input ?? 0) + (tokens?.cumulative.output ?? 0)
|
||||
const showTokens = tokensTotal > 0
|
||||
|
||||
return (
|
||||
<div className="mt-2 flex flex-wrap items-center gap-x-2 text-muted-foreground text-xs">
|
||||
<span>{lastUsedLabel}</span>
|
||||
{showTokens && (
|
||||
<>
|
||||
<span aria-hidden className="text-muted-foreground/50">
|
||||
·
|
||||
</span>
|
||||
<AgentTokenSummary tokens={tokens} />
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,92 @@
|
||||
import type { FC } from 'react'
|
||||
import {
|
||||
HoverCard,
|
||||
HoverCardContent,
|
||||
HoverCardTrigger,
|
||||
} from '@/components/ui/hover-card'
|
||||
import { cn } from '@/lib/utils'
|
||||
import { formatLocalDate, ROW_BAR_COUNT } from './agent-row.helpers'
|
||||
|
||||
interface AgentSparklineProps {
|
||||
/** 14 entries, oldest → newest. Today's bucket is the last index. */
|
||||
turnsByDay: number[]
|
||||
/** Same length, same order. Failed turns counted separately. */
|
||||
failedByDay: number[]
|
||||
className?: string
|
||||
}
|
||||
|
||||
const MIN_BAR_HEIGHT_PX = 2
|
||||
const MAX_BAR_HEIGHT_PX = 18
|
||||
|
||||
export const AgentSparkline: FC<AgentSparklineProps> = ({
|
||||
turnsByDay,
|
||||
failedByDay,
|
||||
className,
|
||||
}) => {
|
||||
if (turnsByDay.length === 0 || turnsByDay.every((n) => n === 0)) return null
|
||||
const max = Math.max(1, ...turnsByDay)
|
||||
|
||||
return (
|
||||
<HoverCard openDelay={250}>
|
||||
<HoverCardTrigger asChild>
|
||||
<div
|
||||
role="img"
|
||||
aria-label={`Last ${ROW_BAR_COUNT} days of activity`}
|
||||
className={cn('flex h-5 items-end gap-px', className)}
|
||||
>
|
||||
{turnsByDay.map((count, idx) => {
|
||||
const ratio = count / max
|
||||
const height = Math.max(
|
||||
MIN_BAR_HEIGHT_PX,
|
||||
Math.round(ratio * MAX_BAR_HEIGHT_PX),
|
||||
)
|
||||
const isToday = idx === ROW_BAR_COUNT - 1
|
||||
const failed = failedByDay[idx] ?? 0
|
||||
return (
|
||||
<div
|
||||
// biome-ignore lint/suspicious/noArrayIndexKey: fixed-length sparkline buckets keyed by day position
|
||||
key={`bar-${idx}`}
|
||||
className={cn(
|
||||
'w-1.5 rounded-sm',
|
||||
count === 0
|
||||
? 'bg-muted-foreground/15'
|
||||
: failed > 0
|
||||
? 'bg-destructive/50'
|
||||
: 'bg-[var(--accent-orange)]/50',
|
||||
isToday && 'ring-1 ring-foreground/30',
|
||||
)}
|
||||
style={{ height }}
|
||||
/>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
</HoverCardTrigger>
|
||||
<HoverCardContent side="left" className="w-56 text-xs">
|
||||
<div className="mb-2 font-medium text-sm">Last 14 days</div>
|
||||
<ul className="space-y-0.5">
|
||||
{turnsByDay.map((count, idx) => {
|
||||
const failed = failedByDay[idx] ?? 0
|
||||
const dayLabel = formatLocalDate(idx)
|
||||
return (
|
||||
<li
|
||||
// biome-ignore lint/suspicious/noArrayIndexKey: fixed-length list keyed by day position
|
||||
key={`day-${idx}`}
|
||||
className="flex items-center justify-between text-muted-foreground"
|
||||
>
|
||||
<span>{dayLabel}</span>
|
||||
<span>
|
||||
{count}
|
||||
{failed > 0 && (
|
||||
<span className="ml-1 text-destructive">
|
||||
({failed} failed)
|
||||
</span>
|
||||
)}
|
||||
</span>
|
||||
</li>
|
||||
)
|
||||
})}
|
||||
</ul>
|
||||
</HoverCardContent>
|
||||
</HoverCard>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,71 @@
|
||||
import { TriangleAlert } from 'lucide-react'
|
||||
import type { FC } from 'react'
|
||||
import { Badge } from '@/components/ui/badge'
|
||||
import {
|
||||
HoverCard,
|
||||
HoverCardContent,
|
||||
HoverCardTrigger,
|
||||
} from '@/components/ui/hover-card'
|
||||
import { cn } from '@/lib/utils'
|
||||
import { adapterLabel } from '../AdapterIcon'
|
||||
import type { HarnessAgentAdapter } from '../agent-harness-types'
|
||||
import type { AgentAdapterHealth } from './agent-row.types'
|
||||
|
||||
interface AgentSummaryChipsProps {
|
||||
adapter: HarnessAgentAdapter | 'unknown'
|
||||
modelLabel: string | null
|
||||
reasoningEffort: string | null
|
||||
/** When unhealthy, the adapter label dims and a warning chip appears. */
|
||||
adapterHealth: AgentAdapterHealth | null
|
||||
}
|
||||
|
||||
/**
|
||||
* Adapter / model / reasoning summary line. Always rendered (so OpenClaw
|
||||
* rows that fall back to defaults still expose what they're set up to do)
|
||||
* and surfaces adapter-health *only when unhealthy* — keeping the calm
|
||||
* default state silent and reserving visual noise for things the user
|
||||
* needs to act on.
|
||||
*/
|
||||
export const AgentSummaryChips: FC<AgentSummaryChipsProps> = ({
|
||||
adapter,
|
||||
modelLabel,
|
||||
reasoningEffort,
|
||||
adapterHealth,
|
||||
}) => {
|
||||
const parts = [adapterLabel(adapter)]
|
||||
if (modelLabel) parts.push(modelLabel)
|
||||
if (reasoningEffort) parts.push(reasoningEffort)
|
||||
const unhealthy = adapterHealth?.healthy === false
|
||||
return (
|
||||
<div
|
||||
className={cn(
|
||||
'flex items-center gap-1.5 text-muted-foreground text-xs',
|
||||
unhealthy && 'text-muted-foreground/70',
|
||||
)}
|
||||
>
|
||||
<span className="truncate">{parts.join(' · ')}</span>
|
||||
{unhealthy && adapterHealth && (
|
||||
<HoverCard openDelay={200}>
|
||||
<HoverCardTrigger asChild>
|
||||
<Badge
|
||||
variant="outline"
|
||||
className="h-5 cursor-default gap-1 border-amber-500/40 bg-amber-50 px-1.5 text-amber-900 hover:bg-amber-50"
|
||||
>
|
||||
<TriangleAlert className="size-2.5" />
|
||||
<span className="font-normal">Unavailable</span>
|
||||
</Badge>
|
||||
</HoverCardTrigger>
|
||||
<HoverCardContent side="right" className="w-72 text-sm">
|
||||
<div className="font-medium">
|
||||
{adapterLabel(adapter)} CLI not available
|
||||
</div>
|
||||
<div className="mt-1 text-muted-foreground text-xs">
|
||||
{adapterHealth.reason ??
|
||||
'Adapter binary missing on $PATH. Install it from the adapter docs to use this agent.'}
|
||||
</div>
|
||||
</HoverCardContent>
|
||||
</HoverCard>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
import type { FC } from 'react'
|
||||
import { cn } from '@/lib/utils'
|
||||
import { AdapterIcon } from '../AdapterIcon'
|
||||
import { livenessDetail } from '../agent-display.helpers'
|
||||
import type { HarnessAgentAdapter } from '../agent-harness-types'
|
||||
import { type AgentLiveness, LivenessDot } from '../LivenessDot'
|
||||
|
||||
export interface AgentTileProps {
|
||||
adapter: HarnessAgentAdapter | 'unknown'
|
||||
status: AgentLiveness
|
||||
lastUsedAt: number | null
|
||||
}
|
||||
|
||||
/**
|
||||
* Adapter glyph + a single liveness dot. Adapter health is no longer
|
||||
* surfaced here — it lives as an inline pill inside `AgentSummaryChips`
|
||||
* so the user isn't asked to disambiguate two dots on the same tile.
|
||||
*/
|
||||
export const AgentTile: FC<AgentTileProps> = ({
|
||||
adapter,
|
||||
status,
|
||||
lastUsedAt,
|
||||
}) => (
|
||||
<div className="relative shrink-0">
|
||||
<div className="flex h-12 w-12 items-center justify-center rounded-xl bg-muted text-muted-foreground">
|
||||
<AdapterIcon adapter={adapter} className="h-6 w-6" />
|
||||
</div>
|
||||
<LivenessDot
|
||||
status={status}
|
||||
detail={livenessDetail(status, lastUsedAt)}
|
||||
className={cn(
|
||||
'absolute -right-0.5 -bottom-0.5',
|
||||
status === 'working' && 'animate-pulse',
|
||||
)}
|
||||
/>
|
||||
</div>
|
||||
)
|
||||
@@ -0,0 +1,55 @@
|
||||
import type { FC } from 'react'
|
||||
import { Badge } from '@/components/ui/badge'
|
||||
import { displayName } from '../agent-display.helpers'
|
||||
import type { AgentListItem } from '../agents-page-types'
|
||||
import type { AgentLiveness } from '../LivenessDot'
|
||||
import { AgentSparkline } from './AgentSparkline'
|
||||
import { PinToggle } from './PinToggle'
|
||||
|
||||
interface AgentTitleRowProps {
|
||||
agent: AgentListItem
|
||||
status: AgentLiveness
|
||||
pinned: boolean
|
||||
turnsByDay: number[]
|
||||
failedByDay: number[]
|
||||
onPinToggle: (next: boolean) => void
|
||||
}
|
||||
|
||||
/**
|
||||
* Title strip: name + status badge + (right-aligned) sparkline. The
|
||||
* pin toggle sits trailing the title so the title always flushes left
|
||||
* regardless of pin state — moving the star left of the title indents
|
||||
* the row's first line off-axis from the model/preview/meta lines
|
||||
* below it. When unpinned and not hovered, the toggle is removed from
|
||||
* layout entirely so it reserves no space at all.
|
||||
*/
|
||||
export const AgentTitleRow: FC<AgentTitleRowProps> = ({
|
||||
agent,
|
||||
status,
|
||||
pinned,
|
||||
turnsByDay,
|
||||
failedByDay,
|
||||
onPinToggle,
|
||||
}) => (
|
||||
<div className="mb-1 flex items-center gap-2">
|
||||
<span className="truncate font-semibold">{displayName(agent)}</span>
|
||||
{status === 'working' && (
|
||||
<Badge
|
||||
variant="secondary"
|
||||
className="bg-amber-50 text-amber-900 hover:bg-amber-50"
|
||||
>
|
||||
Working
|
||||
</Badge>
|
||||
)}
|
||||
{status === 'asleep' && (
|
||||
<Badge variant="outline" className="text-muted-foreground">
|
||||
Asleep
|
||||
</Badge>
|
||||
)}
|
||||
{status === 'error' && <Badge variant="destructive">Attention</Badge>}
|
||||
<PinToggle pinned={pinned} onToggle={onPinToggle} />
|
||||
<div className="ml-auto">
|
||||
<AgentSparkline turnsByDay={turnsByDay} failedByDay={failedByDay} />
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
@@ -0,0 +1,63 @@
|
||||
import type { FC } from 'react'
|
||||
import {
|
||||
HoverCard,
|
||||
HoverCardContent,
|
||||
HoverCardTrigger,
|
||||
} from '@/components/ui/hover-card'
|
||||
import { Progress } from '@/components/ui/progress'
|
||||
import { formatTokens } from './agent-row.helpers'
|
||||
import type { AgentTokenUsage } from './agent-row.types'
|
||||
|
||||
interface AgentTokenSummaryProps {
|
||||
tokens: AgentTokenUsage | null
|
||||
}
|
||||
|
||||
/**
|
||||
* Inline token total + a HoverCard breakdown. Surfaces lifetime tokens
|
||||
* (the only window we can compute reliably from the session record).
|
||||
* Per-window stats land in a follow-up once the activity ledger ships.
|
||||
*/
|
||||
export const AgentTokenSummary: FC<AgentTokenSummaryProps> = ({ tokens }) => {
|
||||
if (!tokens) return null
|
||||
const { input, output } = tokens.cumulative
|
||||
const total = input + output
|
||||
if (total === 0) return null
|
||||
const inputPct = (input / total) * 100
|
||||
|
||||
return (
|
||||
<HoverCard openDelay={200}>
|
||||
<HoverCardTrigger asChild>
|
||||
<span className="cursor-default text-muted-foreground tabular-nums transition-colors hover:text-foreground">
|
||||
{formatTokens(total)} tokens
|
||||
</span>
|
||||
</HoverCardTrigger>
|
||||
<HoverCardContent side="top" align="end" className="w-72 text-sm">
|
||||
<div className="mb-3 flex items-center justify-between">
|
||||
<span className="font-medium">Lifetime tokens</span>
|
||||
<span className="text-muted-foreground text-xs tabular-nums">
|
||||
{formatTokens(total)} total
|
||||
</span>
|
||||
</div>
|
||||
|
||||
<div className="space-y-2">
|
||||
<div className="flex items-center justify-between text-xs">
|
||||
<span className="text-muted-foreground">Input</span>
|
||||
<span className="tabular-nums">{formatTokens(input)}</span>
|
||||
</div>
|
||||
<Progress value={inputPct} className="h-1.5" />
|
||||
|
||||
<div className="mt-2 flex items-center justify-between text-xs">
|
||||
<span className="text-muted-foreground">Output</span>
|
||||
<span className="tabular-nums">{formatTokens(output)}</span>
|
||||
</div>
|
||||
<Progress value={100 - inputPct} className="h-1.5" />
|
||||
</div>
|
||||
|
||||
<p className="mt-3 border-t pt-2 text-muted-foreground text-xs leading-snug">
|
||||
Cumulative across every turn this agent has run. Per-window stats
|
||||
arrive in a future release.
|
||||
</p>
|
||||
</HoverCardContent>
|
||||
</HoverCard>
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,60 @@
|
||||
import { Star } from 'lucide-react'
|
||||
import type { FC } from 'react'
|
||||
import { Button } from '@/components/ui/button'
|
||||
import {
|
||||
Tooltip,
|
||||
TooltipContent,
|
||||
TooltipProvider,
|
||||
TooltipTrigger,
|
||||
} from '@/components/ui/tooltip'
|
||||
import { cn } from '@/lib/utils'
|
||||
|
||||
interface PinToggleProps {
|
||||
pinned: boolean
|
||||
onToggle: (next: boolean) => void
|
||||
}
|
||||
|
||||
/**
|
||||
* Trailing star toggle. The button is *always rendered* — only its
|
||||
* opacity changes between pinned/unpinned/hover states — so the title
|
||||
* row's height is constant. Hiding the slot via `display: none` would
|
||||
* collapse the row's vertical metrics on hover and shift every card
|
||||
* below in the rail.
|
||||
*
|
||||
* Placement is trailing the title (after the status badge) so the
|
||||
* title itself flushes left regardless of pin state — leading the
|
||||
* row with the star would indent the title relative to the model /
|
||||
* preview / meta lines beneath it.
|
||||
*/
|
||||
export const PinToggle: FC<PinToggleProps> = ({ pinned, onToggle }) => (
|
||||
<TooltipProvider delayDuration={300}>
|
||||
<Tooltip>
|
||||
<TooltipTrigger asChild>
|
||||
<Button
|
||||
variant="ghost"
|
||||
size="icon"
|
||||
className={cn(
|
||||
'size-6 text-muted-foreground transition-opacity hover:text-foreground',
|
||||
pinned ? 'opacity-100' : 'opacity-0 group-hover:opacity-100',
|
||||
)}
|
||||
aria-pressed={pinned}
|
||||
aria-label={pinned ? 'Unpin agent' : 'Pin agent'}
|
||||
onClick={(event) => {
|
||||
event.stopPropagation()
|
||||
onToggle(!pinned)
|
||||
}}
|
||||
>
|
||||
<Star
|
||||
className={cn(
|
||||
'size-3.5',
|
||||
pinned && 'fill-amber-400 text-amber-500',
|
||||
)}
|
||||
/>
|
||||
</Button>
|
||||
</TooltipTrigger>
|
||||
<TooltipContent side="top" className="text-xs">
|
||||
{pinned ? 'Unpin' : 'Pin to top'}
|
||||
</TooltipContent>
|
||||
</Tooltip>
|
||||
</TooltipProvider>
|
||||
)
|
||||
@@ -0,0 +1,73 @@
|
||||
import { describe, expect, it } from 'bun:test'
|
||||
import {
|
||||
firstNonBlankLine,
|
||||
formatLocalDate,
|
||||
formatTokens,
|
||||
ROW_BAR_COUNT,
|
||||
truncate,
|
||||
} from './agent-row.helpers'
|
||||
|
||||
describe('formatTokens', () => {
|
||||
it('renders zero / NaN as "0"', () => {
|
||||
expect(formatTokens(0)).toBe('0')
|
||||
expect(formatTokens(Number.NaN)).toBe('0')
|
||||
})
|
||||
|
||||
it('renders sub-1K as integer', () => {
|
||||
expect(formatTokens(142)).toBe('142')
|
||||
})
|
||||
|
||||
it('renders K with one decimal under 10', () => {
|
||||
expect(formatTokens(8_400)).toBe('8.4K')
|
||||
})
|
||||
|
||||
it('drops the decimal at >=10K', () => {
|
||||
expect(formatTokens(120_000)).toBe('120K')
|
||||
})
|
||||
|
||||
it('renders M with one decimal under 10', () => {
|
||||
expect(formatTokens(1_200_000)).toBe('1.2M')
|
||||
})
|
||||
})
|
||||
|
||||
describe('firstNonBlankLine', () => {
|
||||
it('returns the first non-blank line', () => {
|
||||
expect(firstNonBlankLine('\n\nhello\nworld')).toBe('hello')
|
||||
})
|
||||
|
||||
it('skips USER_QUERY envelope tags', () => {
|
||||
expect(firstNonBlankLine('<USER_QUERY>\nfix tests\n</USER_QUERY>')).toBe(
|
||||
'fix tests',
|
||||
)
|
||||
})
|
||||
|
||||
it('falls back to the trimmed input when nothing matches', () => {
|
||||
expect(firstNonBlankLine(' single ')).toBe('single')
|
||||
})
|
||||
})
|
||||
|
||||
describe('truncate', () => {
|
||||
it('returns input unchanged when within limit', () => {
|
||||
expect(truncate('hello', 10)).toBe('hello')
|
||||
})
|
||||
|
||||
it('appends an ellipsis when over limit', () => {
|
||||
expect(truncate('hello world', 6)).toBe('hello…')
|
||||
})
|
||||
})
|
||||
|
||||
describe('formatLocalDate', () => {
|
||||
const today = new Date('2026-04-30T12:00:00Z')
|
||||
|
||||
it('labels today and yesterday explicitly', () => {
|
||||
expect(formatLocalDate(ROW_BAR_COUNT - 1, today)).toBe('today')
|
||||
expect(formatLocalDate(ROW_BAR_COUNT - 2, today)).toBe('yesterday')
|
||||
})
|
||||
|
||||
it('returns a "Mon D" format for older days', () => {
|
||||
const label = formatLocalDate(0, today)
|
||||
// "Apr 17" or "Apr 17," depending on locale; just assert it
|
||||
// contains a month abbreviation and a day number.
|
||||
expect(label).toMatch(/[A-Za-z]+ \d+/)
|
||||
})
|
||||
})
|
||||
@@ -0,0 +1,64 @@
|
||||
/**
|
||||
* Pure formatters consumed by row sub-components. Kept distinct from
|
||||
* `agent-display.helpers.ts` (page-level helpers) so the row internals
|
||||
* have an obvious single home.
|
||||
*/
|
||||
|
||||
const TOKEN_THRESHOLDS: Array<[number, string]> = [
|
||||
[1_000_000, 'M'],
|
||||
[1_000, 'K'],
|
||||
]
|
||||
|
||||
/** `1.2M`, `820K`, `8.4K`, `142`, `0`. */
|
||||
export function formatTokens(n: number): string {
|
||||
if (!Number.isFinite(n) || n <= 0) return '0'
|
||||
for (const [threshold, suffix] of TOKEN_THRESHOLDS) {
|
||||
if (n >= threshold) {
|
||||
const value = n / threshold
|
||||
const decimal = value < 10 ? value.toFixed(1) : value.toFixed(0)
|
||||
return `${decimal}${suffix}`
|
||||
}
|
||||
}
|
||||
return String(Math.round(n))
|
||||
}
|
||||
|
||||
const USER_QUERY_OPEN = /^<USER_QUERY>$/i
|
||||
const USER_QUERY_CLOSE = /^<\/USER_QUERY>$/i
|
||||
|
||||
/**
|
||||
* First non-blank line, with the BrowserOS user-system-prompt
|
||||
* `<USER_QUERY>` envelope tags stripped so previews don't show
|
||||
* structural noise.
|
||||
*/
|
||||
export function firstNonBlankLine(text: string): string {
|
||||
const lines = text.split('\n').map((line) => line.trim())
|
||||
for (const line of lines) {
|
||||
if (!line) continue
|
||||
if (USER_QUERY_OPEN.test(line) || USER_QUERY_CLOSE.test(line)) continue
|
||||
return line
|
||||
}
|
||||
return text.trim()
|
||||
}
|
||||
|
||||
export function truncate(text: string, max: number): string {
|
||||
if (text.length <= max) return text
|
||||
return `${text.slice(0, max - 1).trimEnd()}…`
|
||||
}
|
||||
|
||||
const SPARKLINE_DAYS = 14
|
||||
|
||||
/**
|
||||
* "today" / "yesterday" / "Apr 17" — given an index 0..13 from
|
||||
* oldest → newest. `today` defaults to `new Date()` so callers don't
|
||||
* have to thread a clock through.
|
||||
*/
|
||||
export function formatLocalDate(idx: number, today: Date = new Date()): string {
|
||||
if (idx === SPARKLINE_DAYS - 1) return 'today'
|
||||
if (idx === SPARKLINE_DAYS - 2) return 'yesterday'
|
||||
const offset = SPARKLINE_DAYS - 1 - idx
|
||||
const date = new Date(today)
|
||||
date.setDate(date.getDate() - offset)
|
||||
return date.toLocaleDateString(undefined, { month: 'short', day: 'numeric' })
|
||||
}
|
||||
|
||||
export const ROW_BAR_COUNT = SPARKLINE_DAYS
|
||||
@@ -0,0 +1,51 @@
|
||||
import type { HarnessAgentAdapter } from '../agent-harness-types'
|
||||
import type { AgentListItem } from '../agents-page-types'
|
||||
import type { AgentLiveness } from '../LivenessDot'
|
||||
|
||||
/**
|
||||
* Window-bounded token usage. Server returns `null` when no session
|
||||
* record exists yet for the agent.
|
||||
*/
|
||||
export interface AgentTokenUsage {
|
||||
last7d: { input: number; output: number; requestCount: number }
|
||||
cumulative: { input: number; output: number }
|
||||
}
|
||||
|
||||
export interface AgentAdapterHealth {
|
||||
healthy: boolean
|
||||
reason?: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Everything an `AgentRowCard` needs to render. Mirrors the shape
|
||||
* `useHarnessAgents` exposes; the page assembles one entry per row in
|
||||
* `AgentList` and passes it down. Sub-components only see slices of
|
||||
* this object — no prop drilling beyond two levels.
|
||||
*/
|
||||
export interface AgentRowData {
|
||||
agent: AgentListItem
|
||||
adapter: HarnessAgentAdapter | 'unknown'
|
||||
modelLabel: string | null
|
||||
reasoningEffort: string | null
|
||||
status: AgentLiveness
|
||||
lastUsedAt: number | null
|
||||
pinned: boolean
|
||||
cwd: string | null
|
||||
lastUserMessage: string | null
|
||||
tokens: AgentTokenUsage | null
|
||||
/** 14 entries, oldest → newest. Today is the last index. */
|
||||
turnsByDay: number[]
|
||||
/** Same length and ordering as `turnsByDay`. */
|
||||
failedByDay: number[]
|
||||
lastError: string | null
|
||||
lastErrorAt: number | null
|
||||
/** When non-null, an in-flight turn this row can be resumed from. */
|
||||
activeTurnId: string | null
|
||||
/** Adapter-level health, shared across rows for the same adapter. */
|
||||
adapterHealth: AgentAdapterHealth | null
|
||||
}
|
||||
|
||||
export interface AgentRowCallbacks {
|
||||
onDelete: (agent: AgentListItem) => void
|
||||
onPinToggle: (agent: AgentListItem, next: boolean) => void
|
||||
}
|
||||
@@ -0,0 +1,172 @@
|
||||
import type { NavigateFunction } from 'react-router'
|
||||
import {
|
||||
AGENT_CREATED_EVENT,
|
||||
AGENT_DELETED_EVENT,
|
||||
} from '@/lib/constants/analyticsEvents'
|
||||
import { track } from '@/lib/metrics/track'
|
||||
import type { HarnessAgent, HarnessAgentAdapter } from './agent-harness-types'
|
||||
import type {
|
||||
AgentListItem,
|
||||
CreateAgentRuntime,
|
||||
ProviderOption,
|
||||
} from './agents-page-types'
|
||||
import { findOpenClawCliProviderById } from './openclaw-cli-providers'
|
||||
import type {
|
||||
AgentEntry,
|
||||
OpenClawAgentMutationInput,
|
||||
OpenClawSetupInput,
|
||||
} from './useOpenClaw'
|
||||
|
||||
export interface AgentPageActionInput {
|
||||
createProviderId: string
|
||||
createRuntime: CreateAgentRuntime
|
||||
harnessModelId: string
|
||||
harnessReasoningEffort: string
|
||||
navigate: NavigateFunction
|
||||
newName: string
|
||||
selectableOpenClawProviders: ProviderOption[]
|
||||
setupProviderId: string
|
||||
createHarnessAgent: (input: {
|
||||
name: string
|
||||
adapter: HarnessAgentAdapter
|
||||
modelId?: string
|
||||
reasoningEffort?: string
|
||||
}) => Promise<HarnessAgent>
|
||||
createOpenClawAgent: (
|
||||
input: OpenClawAgentMutationInput,
|
||||
) => Promise<{ agent: AgentEntry }>
|
||||
deleteHarnessAgent: (agentId: string) => Promise<unknown>
|
||||
deleteOpenClawAgent: (agentId: string) => Promise<unknown>
|
||||
setCliAuthModalOpen: (open: boolean) => void
|
||||
setCreateError: (error: string | null) => void
|
||||
setCreateOpen: (open: boolean) => void
|
||||
setDeletingAgentKey: (key: string | null) => void
|
||||
setNewName: (name: string) => void
|
||||
setPageError: (error: string | null) => void
|
||||
setSetupOpen: (open: boolean) => void
|
||||
setupOpenClaw: (input: OpenClawSetupInput) => Promise<unknown>
|
||||
}
|
||||
|
||||
export function createAgentPageActions(input: AgentPageActionInput) {
|
||||
const runWithPageErrorHandling = async (fn: () => Promise<unknown>) => {
|
||||
input.setPageError(null)
|
||||
try {
|
||||
await fn()
|
||||
} catch (err) {
|
||||
input.setPageError(err instanceof Error ? err.message : String(err))
|
||||
}
|
||||
}
|
||||
|
||||
const handleSetup = async () => {
|
||||
const option = input.selectableOpenClawProviders.find(
|
||||
(item) => item.id === input.setupProviderId,
|
||||
)
|
||||
const isCli = !!option && !!findOpenClawCliProviderById(option.type)
|
||||
const llmOption = !isCli && option ? option : undefined
|
||||
|
||||
await runWithPageErrorHandling(async () => {
|
||||
await input.setupOpenClaw({
|
||||
providerType: option?.type,
|
||||
providerName: isCli ? undefined : option?.name,
|
||||
baseUrl: llmOption?.baseUrl,
|
||||
apiKey: llmOption?.apiKey,
|
||||
modelId: option?.modelId,
|
||||
})
|
||||
input.setSetupOpen(false)
|
||||
if (isCli) input.setCliAuthModalOpen(true)
|
||||
})
|
||||
}
|
||||
|
||||
const handleOpenClawCreate = async () => {
|
||||
if (!input.newName.trim()) return
|
||||
const option = input.selectableOpenClawProviders.find(
|
||||
(item) => item.id === input.createProviderId,
|
||||
)
|
||||
const normalizedName = input.newName
|
||||
.trim()
|
||||
.toLowerCase()
|
||||
.replace(/\s+/g, '-')
|
||||
const isCli = !!option && !!findOpenClawCliProviderById(option.type)
|
||||
const llmOption = !isCli && option ? option : undefined
|
||||
|
||||
input.setCreateError(null)
|
||||
try {
|
||||
const result = await input.createOpenClawAgent({
|
||||
name: normalizedName,
|
||||
providerType: option?.type,
|
||||
providerName: isCli ? undefined : option?.name,
|
||||
baseUrl: llmOption?.baseUrl,
|
||||
apiKey: llmOption?.apiKey,
|
||||
modelId: option?.modelId,
|
||||
})
|
||||
input.setCreateOpen(false)
|
||||
input.setNewName('')
|
||||
track(AGENT_CREATED_EVENT, {
|
||||
runtime: 'openclaw',
|
||||
provider_type: option?.type,
|
||||
})
|
||||
input.navigate(`/agents/${result.agent.agentId}`)
|
||||
} catch (err) {
|
||||
input.setCreateError(err instanceof Error ? err.message : String(err))
|
||||
}
|
||||
}
|
||||
|
||||
const handleHarnessCreate = async () => {
|
||||
if (!input.newName.trim()) return
|
||||
|
||||
input.setCreateError(null)
|
||||
try {
|
||||
const agent = await input.createHarnessAgent({
|
||||
name: input.newName.trim(),
|
||||
adapter: input.createRuntime as HarnessAgentAdapter,
|
||||
modelId: input.harnessModelId || undefined,
|
||||
reasoningEffort: input.harnessReasoningEffort || undefined,
|
||||
})
|
||||
input.setCreateOpen(false)
|
||||
input.setNewName('')
|
||||
track(AGENT_CREATED_EVENT, {
|
||||
runtime: input.createRuntime,
|
||||
model_id: input.harnessModelId || undefined,
|
||||
reasoning_effort: input.harnessReasoningEffort || undefined,
|
||||
})
|
||||
input.navigate(`/agents/${agent.id}`)
|
||||
} catch (err) {
|
||||
input.setCreateError(err instanceof Error ? err.message : String(err))
|
||||
}
|
||||
}
|
||||
|
||||
const handleCreate = () => {
|
||||
const createByRuntime: Record<CreateAgentRuntime, () => Promise<void>> = {
|
||||
openclaw: handleOpenClawCreate,
|
||||
claude: handleHarnessCreate,
|
||||
codex: handleHarnessCreate,
|
||||
}
|
||||
void createByRuntime[input.createRuntime]()
|
||||
}
|
||||
|
||||
const handleDelete = async (agent: AgentListItem) => {
|
||||
input.setDeletingAgentKey(agent.key)
|
||||
await runWithPageErrorHandling(async () => {
|
||||
const deleteBySource: Record<
|
||||
AgentListItem['source'],
|
||||
(agentId: string) => Promise<unknown>
|
||||
> = {
|
||||
openclaw: (agentId) => input.deleteOpenClawAgent(agentId),
|
||||
'agent-harness': (agentId) => input.deleteHarnessAgent(agentId),
|
||||
}
|
||||
await deleteBySource[agent.source](agent.agentId)
|
||||
track(AGENT_DELETED_EVENT, {
|
||||
runtime: agent.source,
|
||||
agent_id: agent.agentId,
|
||||
})
|
||||
})
|
||||
input.setDeletingAgentKey(null)
|
||||
}
|
||||
|
||||
return {
|
||||
handleCreate,
|
||||
handleDelete,
|
||||
handleSetup,
|
||||
runWithPageErrorHandling,
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,173 @@
|
||||
import { type Dispatch, type SetStateAction, useEffect, useMemo } from 'react'
|
||||
import type { LlmProviderConfig } from '@/lib/llm-providers/types'
|
||||
import type {
|
||||
HarnessAdapterDescriptor,
|
||||
HarnessAgentAdapter,
|
||||
} from './agent-harness-types'
|
||||
import type { CreateAgentRuntime } from './agents-page-types'
|
||||
import { toProviderOptions } from './agents-page-utils'
|
||||
import {
|
||||
buildOpenClawCliProviderOptions,
|
||||
findOpenClawCliProviderById,
|
||||
useOpenClawCliProviderAuthStatus,
|
||||
} from './openclaw-cli-providers'
|
||||
|
||||
export function useDefaultAgentName(
|
||||
createOpen: boolean,
|
||||
setNewName: Dispatch<SetStateAction<string>>,
|
||||
): void {
|
||||
useEffect(() => {
|
||||
if (!createOpen) return
|
||||
setNewName((current) => current || 'agent')
|
||||
}, [createOpen, setNewName])
|
||||
}
|
||||
|
||||
export function useHarnessAgentDefaults(input: {
|
||||
adapters: HarnessAdapterDescriptor[]
|
||||
createOpen: boolean
|
||||
harnessAdapterId: HarnessAgentAdapter
|
||||
setHarnessAdapterId: Dispatch<SetStateAction<HarnessAgentAdapter>>
|
||||
setHarnessModelId: Dispatch<SetStateAction<string>>
|
||||
setHarnessReasoningEffort: Dispatch<SetStateAction<string>>
|
||||
}): void {
|
||||
const {
|
||||
adapters,
|
||||
createOpen,
|
||||
harnessAdapterId,
|
||||
setHarnessAdapterId,
|
||||
setHarnessModelId,
|
||||
setHarnessReasoningEffort,
|
||||
} = input
|
||||
|
||||
useEffect(() => {
|
||||
if (!createOpen) return
|
||||
const adapter =
|
||||
adapters.find((entry) => entry.id === harnessAdapterId) ?? adapters[0]
|
||||
if (!adapter) return
|
||||
setHarnessAdapterId(adapter.id)
|
||||
setHarnessModelId((current) => current || adapter.defaultModelId)
|
||||
setHarnessReasoningEffort(
|
||||
(current) => current || adapter.defaultReasoningEffort,
|
||||
)
|
||||
}, [
|
||||
adapters,
|
||||
createOpen,
|
||||
harnessAdapterId,
|
||||
setHarnessAdapterId,
|
||||
setHarnessModelId,
|
||||
setHarnessReasoningEffort,
|
||||
])
|
||||
}
|
||||
|
||||
export function useOpenClawProviderSelection(input: {
|
||||
providers: LlmProviderConfig[]
|
||||
defaultProviderId: string
|
||||
createOpen: boolean
|
||||
createRuntime: CreateAgentRuntime
|
||||
createProviderId: string
|
||||
setCreateProviderId: Dispatch<SetStateAction<string>>
|
||||
setupOpen: boolean
|
||||
setupProviderId: string
|
||||
setSetupProviderId: Dispatch<SetStateAction<string>>
|
||||
cliAuthModalOpen: boolean
|
||||
setCliAuthModalOpen: Dispatch<SetStateAction<boolean>>
|
||||
}) {
|
||||
const {
|
||||
providers,
|
||||
defaultProviderId,
|
||||
createOpen,
|
||||
createRuntime,
|
||||
createProviderId,
|
||||
setCreateProviderId,
|
||||
setupOpen,
|
||||
setupProviderId,
|
||||
setSetupProviderId,
|
||||
cliAuthModalOpen,
|
||||
setCliAuthModalOpen,
|
||||
} = input
|
||||
const cliProviderOptions = useMemo(
|
||||
() => buildOpenClawCliProviderOptions(),
|
||||
[],
|
||||
)
|
||||
const selectableOpenClawProviders = useMemo(
|
||||
() => toProviderOptions(providers, cliProviderOptions),
|
||||
[providers, cliProviderOptions],
|
||||
)
|
||||
|
||||
useEffect(() => {
|
||||
if (selectableOpenClawProviders.length === 0) return
|
||||
const fallbackId =
|
||||
selectableOpenClawProviders.find(
|
||||
(provider) => provider.id === defaultProviderId,
|
||||
)?.id ?? selectableOpenClawProviders[0].id
|
||||
|
||||
if (createOpen && !createProviderId) {
|
||||
setCreateProviderId(fallbackId)
|
||||
}
|
||||
}, [
|
||||
createOpen,
|
||||
createProviderId,
|
||||
defaultProviderId,
|
||||
selectableOpenClawProviders,
|
||||
setCreateProviderId,
|
||||
])
|
||||
|
||||
useEffect(() => {
|
||||
if (selectableOpenClawProviders.length === 0) return
|
||||
const fallbackId =
|
||||
selectableOpenClawProviders.find(
|
||||
(provider) => provider.id === defaultProviderId,
|
||||
)?.id ?? selectableOpenClawProviders[0].id
|
||||
|
||||
if (setupOpen && !setupProviderId) {
|
||||
setSetupProviderId(fallbackId)
|
||||
}
|
||||
}, [
|
||||
defaultProviderId,
|
||||
selectableOpenClawProviders,
|
||||
setSetupProviderId,
|
||||
setupOpen,
|
||||
setupProviderId,
|
||||
])
|
||||
|
||||
const selectedCreateOption = selectableOpenClawProviders.find(
|
||||
(provider) => provider.id === createProviderId,
|
||||
)
|
||||
const selectedCliProvider = selectedCreateOption
|
||||
? findOpenClawCliProviderById(selectedCreateOption.type)
|
||||
: undefined
|
||||
const selectedSetupOption = selectableOpenClawProviders.find(
|
||||
(provider) => provider.id === setupProviderId,
|
||||
)
|
||||
const selectedSetupCliProvider = selectedSetupOption
|
||||
? findOpenClawCliProviderById(selectedSetupOption.type)
|
||||
: undefined
|
||||
const activeCliProvider =
|
||||
(setupOpen && selectedSetupCliProvider) ||
|
||||
(createOpen && createRuntime === 'openclaw' && selectedCliProvider) ||
|
||||
undefined
|
||||
const {
|
||||
data: cliAuthStatus,
|
||||
isLoading: cliAuthLoading,
|
||||
error: cliAuthError,
|
||||
} = useOpenClawCliProviderAuthStatus(
|
||||
activeCliProvider?.id ?? '',
|
||||
!!activeCliProvider,
|
||||
)
|
||||
|
||||
useEffect(() => {
|
||||
if (cliAuthModalOpen && cliAuthStatus?.loggedIn) {
|
||||
setCliAuthModalOpen(false)
|
||||
}
|
||||
}, [cliAuthModalOpen, cliAuthStatus?.loggedIn, setCliAuthModalOpen])
|
||||
|
||||
return {
|
||||
selectableOpenClawProviders,
|
||||
selectedCliProvider,
|
||||
selectedSetupCliProvider,
|
||||
authTerminalProvider: selectedSetupCliProvider ?? selectedCliProvider,
|
||||
cliAuthStatus,
|
||||
cliAuthLoading,
|
||||
cliAuthError,
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,119 @@
|
||||
import type { HarnessAgentAdapter } from './agent-harness-types'
|
||||
import type { GatewayLifecycleAction, OpenClawStatus } from './useOpenClaw'
|
||||
|
||||
export type CreateAgentRuntime = 'openclaw' | HarnessAgentAdapter
|
||||
|
||||
export interface ProviderOption {
|
||||
id: string
|
||||
type: string
|
||||
name: string
|
||||
modelId: string
|
||||
baseUrl?: string
|
||||
apiKey?: string
|
||||
}
|
||||
|
||||
export interface AgentListItem {
|
||||
key: string
|
||||
agentId: string
|
||||
name: string
|
||||
source: 'openclaw' | 'agent-harness'
|
||||
runtimeLabel: string
|
||||
modelLabel: string
|
||||
detail: string
|
||||
canChat: boolean
|
||||
canDelete: boolean
|
||||
}
|
||||
|
||||
export interface GatewayUiState {
|
||||
canManageAgents: boolean
|
||||
controlPlaneDegraded: boolean
|
||||
controlPlaneBusy: boolean
|
||||
}
|
||||
|
||||
export const DEFAULT_HARNESS_ADAPTER: HarnessAgentAdapter = 'claude'
|
||||
export const DEFAULT_CREATE_RUNTIME: CreateAgentRuntime = 'openclaw'
|
||||
|
||||
export const LIFECYCLE_BANNER_COPY: Record<GatewayLifecycleAction, string> = {
|
||||
setup: 'Setting up OpenClaw...',
|
||||
start: 'Starting gateway...',
|
||||
stop: 'Stopping gateway...',
|
||||
restart: 'Restarting gateway...',
|
||||
reconnect: 'Restoring gateway connection...',
|
||||
}
|
||||
|
||||
export const CONTROL_PLANE_COPY: Record<
|
||||
OpenClawStatus['controlPlaneStatus'],
|
||||
{
|
||||
badgeVariant: 'default' | 'secondary' | 'outline' | 'destructive'
|
||||
badgeLabel: string
|
||||
title: string
|
||||
description: string
|
||||
}
|
||||
> = {
|
||||
connected: {
|
||||
badgeVariant: 'default',
|
||||
badgeLabel: 'Control Plane Ready',
|
||||
title: 'Gateway Connected',
|
||||
description: 'OpenClaw can create, manage, and chat with agents normally.',
|
||||
},
|
||||
connecting: {
|
||||
badgeVariant: 'secondary',
|
||||
badgeLabel: 'Connecting',
|
||||
title: 'Connecting to Gateway',
|
||||
description:
|
||||
'BrowserOS is establishing the OpenClaw control channel for agent operations.',
|
||||
},
|
||||
reconnecting: {
|
||||
badgeVariant: 'secondary',
|
||||
badgeLabel: 'Reconnecting',
|
||||
title: 'Reconnecting Control Plane',
|
||||
description:
|
||||
'The gateway process is up, but BrowserOS is restoring the control channel.',
|
||||
},
|
||||
recovering: {
|
||||
badgeVariant: 'secondary',
|
||||
badgeLabel: 'Recovering',
|
||||
title: 'Recovering Gateway Connection',
|
||||
description:
|
||||
'BrowserOS detected a control-plane fault and is trying a safe recovery path.',
|
||||
},
|
||||
disconnected: {
|
||||
badgeVariant: 'outline',
|
||||
badgeLabel: 'Disconnected',
|
||||
title: 'Gateway Disconnected',
|
||||
description: 'The gateway process is not available to BrowserOS right now.',
|
||||
},
|
||||
failed: {
|
||||
badgeVariant: 'destructive',
|
||||
badgeLabel: 'Needs Attention',
|
||||
title: 'Gateway Recovery Failed',
|
||||
description:
|
||||
'BrowserOS could not restore the OpenClaw control channel automatically.',
|
||||
},
|
||||
}
|
||||
|
||||
export const FALLBACK_CONTROL_PLANE_COPY = {
|
||||
badgeVariant: 'outline' as const,
|
||||
badgeLabel: 'Unknown',
|
||||
title: 'Gateway State Unknown',
|
||||
description:
|
||||
'BrowserOS received a gateway status it does not recognize yet. Refreshing or reconnecting should restore a known state.',
|
||||
}
|
||||
|
||||
export const RECOVERY_REASON_COPY: Record<
|
||||
NonNullable<OpenClawStatus['lastRecoveryReason']>,
|
||||
string
|
||||
> = {
|
||||
transient_disconnect:
|
||||
'The control channel dropped briefly and BrowserOS is retrying it.',
|
||||
signature_expired:
|
||||
'The gateway rejected the signed device handshake because its clock drifted.',
|
||||
pairing_required:
|
||||
'The gateway asked BrowserOS to approve its local device identity again.',
|
||||
token_mismatch:
|
||||
'BrowserOS had to reload the gateway token before reconnecting.',
|
||||
container_not_ready:
|
||||
'The OpenClaw gateway process is not ready yet, so control-plane recovery cannot start.',
|
||||
unknown:
|
||||
'BrowserOS hit an unexpected gateway error and could not classify it cleanly.',
|
||||
}
|
||||
@@ -0,0 +1,167 @@
|
||||
import type { LlmProviderConfig } from '@/lib/llm-providers/types'
|
||||
import type { HarnessAgent, HarnessAgentAdapter } from './agent-harness-types'
|
||||
import {
|
||||
type AgentListItem,
|
||||
CONTROL_PLANE_COPY,
|
||||
FALLBACK_CONTROL_PLANE_COPY,
|
||||
type GatewayUiState,
|
||||
LIFECYCLE_BANNER_COPY,
|
||||
type ProviderOption,
|
||||
RECOVERY_REASON_COPY,
|
||||
} from './agents-page-types'
|
||||
import { getOpenClawSupportedProviders } from './openclaw-supported-providers'
|
||||
import {
|
||||
type AgentEntry,
|
||||
type GatewayLifecycleAction,
|
||||
getModelDisplayName,
|
||||
type OpenClawStatus,
|
||||
} from './useOpenClaw'
|
||||
|
||||
export function getControlPlaneCopy(
|
||||
status: OpenClawStatus['controlPlaneStatus'],
|
||||
) {
|
||||
return CONTROL_PLANE_COPY[status] ?? FALLBACK_CONTROL_PLANE_COPY
|
||||
}
|
||||
|
||||
export function getRecoveryDetail(status: OpenClawStatus): string | null {
|
||||
if (!status.lastRecoveryReason && !status.lastGatewayError) return null
|
||||
|
||||
const detail = status.lastRecoveryReason
|
||||
? RECOVERY_REASON_COPY[status.lastRecoveryReason]
|
||||
: null
|
||||
|
||||
if (status.lastGatewayError && detail) {
|
||||
return `${detail} Latest gateway error: ${status.lastGatewayError}`
|
||||
}
|
||||
|
||||
return status.lastGatewayError ?? detail
|
||||
}
|
||||
|
||||
export function formatHarnessAdapter(adapter: HarnessAgentAdapter): string {
|
||||
return adapter === 'claude' ? 'Claude Code' : 'Codex'
|
||||
}
|
||||
|
||||
export function toProviderOptions(
|
||||
providers: LlmProviderConfig[],
|
||||
cliProviders: ProviderOption[],
|
||||
): ProviderOption[] {
|
||||
return [...getOpenClawSupportedProviders(providers), ...cliProviders]
|
||||
}
|
||||
|
||||
export function toOpenClawListItem(
|
||||
agent: AgentEntry,
|
||||
canManageAgents: boolean,
|
||||
): AgentListItem {
|
||||
return {
|
||||
key: `openclaw:${agent.agentId}`,
|
||||
agentId: agent.agentId,
|
||||
name: agent.name,
|
||||
source: 'openclaw',
|
||||
runtimeLabel: 'OpenClaw',
|
||||
modelLabel: getModelDisplayName(agent.model) ?? 'default',
|
||||
detail: agent.workspace,
|
||||
canChat: canManageAgents,
|
||||
canDelete: canManageAgents && agent.agentId !== 'main',
|
||||
}
|
||||
}
|
||||
|
||||
export function toHarnessListItem(agent: HarnessAgent): AgentListItem {
|
||||
return {
|
||||
key: `agent-harness:${agent.id}`,
|
||||
agentId: agent.id,
|
||||
name: agent.name,
|
||||
source: 'agent-harness',
|
||||
runtimeLabel: formatHarnessAdapter(agent.adapter),
|
||||
modelLabel: agent.modelId ?? 'default',
|
||||
detail: `${agent.adapter}:main`,
|
||||
canChat: true,
|
||||
canDelete: true,
|
||||
}
|
||||
}
|
||||
|
||||
export function getGatewayUiState(
|
||||
status: OpenClawStatus | null,
|
||||
): GatewayUiState {
|
||||
if (!status) {
|
||||
return {
|
||||
canManageAgents: false,
|
||||
controlPlaneDegraded: false,
|
||||
controlPlaneBusy: false,
|
||||
}
|
||||
}
|
||||
|
||||
const controlPlaneBusy =
|
||||
status.controlPlaneStatus === 'connecting' ||
|
||||
status.controlPlaneStatus === 'reconnecting' ||
|
||||
status.controlPlaneStatus === 'recovering'
|
||||
|
||||
return {
|
||||
canManageAgents:
|
||||
status.status === 'running' && status.controlPlaneStatus === 'connected',
|
||||
controlPlaneBusy,
|
||||
controlPlaneDegraded:
|
||||
status.status === 'running' && status.controlPlaneStatus !== 'connected',
|
||||
}
|
||||
}
|
||||
|
||||
export function getLifecycleBanner(
|
||||
action: GatewayLifecycleAction | null,
|
||||
): string | null {
|
||||
return action ? LIFECYCLE_BANNER_COPY[action] : null
|
||||
}
|
||||
|
||||
export function canManageOpenClawAgents(
|
||||
state: GatewayUiState,
|
||||
lifecyclePending: boolean,
|
||||
): boolean {
|
||||
return state.canManageAgents && !lifecyclePending
|
||||
}
|
||||
|
||||
export function shouldShowControlPlaneDegraded(
|
||||
state: GatewayUiState,
|
||||
lifecyclePending: boolean,
|
||||
): boolean {
|
||||
return state.controlPlaneDegraded && !lifecyclePending
|
||||
}
|
||||
|
||||
export function getControlPlaneCopyForStatus(status: OpenClawStatus | null) {
|
||||
return status
|
||||
? getControlPlaneCopy(status.controlPlaneStatus)
|
||||
: FALLBACK_CONTROL_PLANE_COPY
|
||||
}
|
||||
|
||||
export function getVisibleOpenClawAgents(
|
||||
enabled: boolean,
|
||||
agents: AgentEntry[],
|
||||
): AgentEntry[] {
|
||||
return enabled ? agents : []
|
||||
}
|
||||
|
||||
export function getAgentsLoading(input: {
|
||||
adaptersLoading: boolean
|
||||
harnessAgentsLoading: boolean
|
||||
openClawAgentsLoading: boolean
|
||||
}): boolean {
|
||||
return (
|
||||
input.adaptersLoading ||
|
||||
input.harnessAgentsLoading ||
|
||||
input.openClawAgentsLoading
|
||||
)
|
||||
}
|
||||
|
||||
export function getInlineError(input: {
|
||||
lifecyclePending: boolean
|
||||
pageError: string | null
|
||||
openClawAgentsError: Error | null
|
||||
adaptersError: Error | null
|
||||
harnessAgentsError: Error | null
|
||||
}): string | null {
|
||||
if (input.lifecyclePending) return null
|
||||
return (
|
||||
input.pageError ??
|
||||
input.openClawAgentsError?.message ??
|
||||
input.adaptersError?.message ??
|
||||
input.harnessAgentsError?.message ??
|
||||
null
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
import { describe, expect, it } from 'bun:test'
|
||||
import { buildAgentApiUrl } from './agent-api-url'
|
||||
import { mapHarnessAgentToEntry } from './agent-harness-types'
|
||||
|
||||
describe('mapHarnessAgentToEntry', () => {
|
||||
it('maps created harness agents into chat-compatible entries', () => {
|
||||
expect(
|
||||
mapHarnessAgentToEntry({
|
||||
id: 'agent-1',
|
||||
name: 'Review bot',
|
||||
adapter: 'codex',
|
||||
modelId: 'gpt-5.5',
|
||||
reasoningEffort: 'medium',
|
||||
permissionMode: 'approve-all',
|
||||
sessionKey: 'agent:agent-1:main',
|
||||
createdAt: 1000,
|
||||
updatedAt: 1000,
|
||||
}),
|
||||
).toEqual({
|
||||
agentId: 'agent-1',
|
||||
name: 'Review bot',
|
||||
workspace: 'codex:main',
|
||||
model: 'gpt-5.5',
|
||||
source: 'agent-harness',
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('buildAgentApiUrl', () => {
|
||||
it('does not add a trailing slash for the harness root route', () => {
|
||||
expect(buildAgentApiUrl('http://127.0.0.1:9105', '/')).toBe(
|
||||
'http://127.0.0.1:9105/agents',
|
||||
)
|
||||
expect(buildAgentApiUrl('http://127.0.0.1:9105', '/adapters')).toBe(
|
||||
'http://127.0.0.1:9105/agents/adapters',
|
||||
)
|
||||
})
|
||||
})
|
||||
@@ -0,0 +1,464 @@
|
||||
import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query'
|
||||
import { getAgentServerUrl } from '@/lib/browseros/helpers'
|
||||
import { useAgentServerUrl } from '@/lib/browseros/useBrowserOSProviders'
|
||||
import { buildAgentApiUrl } from './agent-api-url'
|
||||
import {
|
||||
type AgentHarnessStreamEvent,
|
||||
type CreateHarnessAgentInput,
|
||||
type HarnessAdapterDescriptor,
|
||||
type HarnessAgent,
|
||||
type HarnessAgentHistoryPage,
|
||||
type HarnessQueuedMessage,
|
||||
mapHarnessAgentToEntry,
|
||||
} from './agent-harness-types'
|
||||
import type { OpenClawStatus } from './useOpenClaw'
|
||||
|
||||
/**
|
||||
* Combined response shape of `GET /agents`. The page polls this once
|
||||
* and consumes both fields, replacing the dedicated `/claw/status`
|
||||
* poll the previous design carried.
|
||||
*/
|
||||
interface HarnessAgentsResponse {
|
||||
agents: HarnessAgent[]
|
||||
gateway: OpenClawStatus | null
|
||||
}
|
||||
|
||||
export type { AgentHarnessStreamEvent }
|
||||
|
||||
const AGENT_QUERY_KEYS = {
|
||||
adapters: 'agent-harness-adapters',
|
||||
agents: 'agent-harness-agents',
|
||||
} as const
|
||||
|
||||
async function agentsFetch<T>(
|
||||
baseUrl: string,
|
||||
path: string,
|
||||
init?: RequestInit,
|
||||
): Promise<T> {
|
||||
const res = await fetch(buildAgentApiUrl(baseUrl, path), init)
|
||||
if (!res.ok) {
|
||||
let message = `Request failed with status ${res.status}`
|
||||
try {
|
||||
const body = (await res.json()) as { error?: string }
|
||||
if (body.error) message = body.error
|
||||
} catch {}
|
||||
throw new Error(message)
|
||||
}
|
||||
return res.json() as Promise<T>
|
||||
}
|
||||
|
||||
export function useAgentAdapters(enabled = true) {
|
||||
const {
|
||||
baseUrl,
|
||||
isLoading: urlLoading,
|
||||
error: urlError,
|
||||
} = useAgentServerUrl()
|
||||
|
||||
const query = useQuery<HarnessAdapterDescriptor[], Error>({
|
||||
queryKey: [AGENT_QUERY_KEYS.adapters, baseUrl],
|
||||
queryFn: async () => {
|
||||
const data = await agentsFetch<{ adapters: HarnessAdapterDescriptor[] }>(
|
||||
baseUrl as string,
|
||||
'/adapters',
|
||||
)
|
||||
return data.adapters ?? []
|
||||
},
|
||||
enabled: Boolean(baseUrl) && !urlLoading && enabled,
|
||||
})
|
||||
|
||||
return {
|
||||
adapters: query.data ?? [],
|
||||
loading: query.isLoading || urlLoading,
|
||||
error: query.error ?? urlError,
|
||||
refetch: query.refetch,
|
||||
}
|
||||
}
|
||||
|
||||
export function useHarnessAgents(enabled = true) {
|
||||
const {
|
||||
baseUrl,
|
||||
isLoading: urlLoading,
|
||||
error: urlError,
|
||||
} = useAgentServerUrl()
|
||||
|
||||
const query = useQuery<HarnessAgentsResponse, Error>({
|
||||
queryKey: [AGENT_QUERY_KEYS.agents, baseUrl],
|
||||
queryFn: async () => {
|
||||
const data = await agentsFetch<HarnessAgentsResponse>(
|
||||
baseUrl as string,
|
||||
'/',
|
||||
)
|
||||
return {
|
||||
agents: data.agents ?? [],
|
||||
gateway: data.gateway ?? null,
|
||||
}
|
||||
},
|
||||
enabled: Boolean(baseUrl) && !urlLoading && enabled,
|
||||
// Poll every 5s so the per-agent liveness state (working / idle /
|
||||
// asleep / error) and last-used timestamps stay fresh without a
|
||||
// websocket. `refetchIntervalInBackground: false` lets a hidden
|
||||
// tab go quiet — react-query's default, made explicit.
|
||||
refetchInterval: 5_000,
|
||||
refetchIntervalInBackground: false,
|
||||
})
|
||||
|
||||
return {
|
||||
agents: (query.data?.agents ?? []).map(mapHarnessAgentToEntry),
|
||||
harnessAgents: query.data?.agents ?? [],
|
||||
gateway: query.data?.gateway ?? null,
|
||||
loading: query.isLoading || urlLoading,
|
||||
error: query.error ?? urlError,
|
||||
refetch: query.refetch,
|
||||
}
|
||||
}
|
||||
|
||||
export function useCreateHarnessAgent() {
|
||||
const { baseUrl, isLoading: urlLoading } = useAgentServerUrl()
|
||||
const queryClient = useQueryClient()
|
||||
|
||||
return useMutation({
|
||||
mutationFn: async (input: CreateHarnessAgentInput) => {
|
||||
if (!baseUrl || urlLoading) {
|
||||
throw new Error('BrowserOS agent server URL is not ready')
|
||||
}
|
||||
const data = await agentsFetch<{ agent: HarnessAgent }>(baseUrl, '/', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(input),
|
||||
})
|
||||
return data.agent
|
||||
},
|
||||
onSuccess: async () => {
|
||||
await queryClient.invalidateQueries({
|
||||
queryKey: [AGENT_QUERY_KEYS.agents],
|
||||
})
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply a partial update to a harness agent. Used by the pin-toggle
|
||||
* star and (eventually) the inline rename UI. Optimistically writes
|
||||
* the patch into the listing query cache so the row updates instantly,
|
||||
* then rolls back if the server rejects the change.
|
||||
*/
|
||||
export function useUpdateHarnessAgent() {
|
||||
const { baseUrl, isLoading: urlLoading } = useAgentServerUrl()
|
||||
const queryClient = useQueryClient()
|
||||
|
||||
return useMutation({
|
||||
mutationFn: async (input: {
|
||||
agentId: string
|
||||
patch: { name?: string; pinned?: boolean }
|
||||
}) => {
|
||||
if (!baseUrl || urlLoading) {
|
||||
throw new Error('BrowserOS agent server URL is not ready')
|
||||
}
|
||||
const data = await agentsFetch<{ agent: HarnessAgent }>(
|
||||
baseUrl,
|
||||
`/${encodeURIComponent(input.agentId)}`,
|
||||
{
|
||||
method: 'PATCH',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(input.patch),
|
||||
},
|
||||
)
|
||||
return data.agent
|
||||
},
|
||||
onMutate: async ({ agentId, patch }) => {
|
||||
const queryKey = [AGENT_QUERY_KEYS.agents, baseUrl]
|
||||
await queryClient.cancelQueries({ queryKey })
|
||||
const previous = queryClient.getQueryData<HarnessAgentsResponse>(queryKey)
|
||||
if (!previous) return { previous: undefined }
|
||||
queryClient.setQueryData<HarnessAgentsResponse>(queryKey, {
|
||||
...previous,
|
||||
agents: previous.agents.map((agent) =>
|
||||
agent.id === agentId ? { ...agent, ...patch } : agent,
|
||||
),
|
||||
})
|
||||
return { previous }
|
||||
},
|
||||
onError: (_err, _vars, context) => {
|
||||
if (!context?.previous) return
|
||||
queryClient.setQueryData(
|
||||
[AGENT_QUERY_KEYS.agents, baseUrl],
|
||||
context.previous,
|
||||
)
|
||||
},
|
||||
onSettled: async () => {
|
||||
await queryClient.invalidateQueries({
|
||||
queryKey: [AGENT_QUERY_KEYS.agents],
|
||||
})
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
export function useDeleteHarnessAgent() {
|
||||
const { baseUrl, isLoading: urlLoading } = useAgentServerUrl()
|
||||
const queryClient = useQueryClient()
|
||||
|
||||
return useMutation({
|
||||
mutationFn: async (agentId: string) => {
|
||||
if (!baseUrl || urlLoading) {
|
||||
throw new Error('BrowserOS agent server URL is not ready')
|
||||
}
|
||||
return agentsFetch<{ success: boolean }>(
|
||||
baseUrl,
|
||||
`/${encodeURIComponent(agentId)}`,
|
||||
{ method: 'DELETE' },
|
||||
)
|
||||
},
|
||||
onSuccess: async () => {
|
||||
await queryClient.invalidateQueries({
|
||||
queryKey: [AGENT_QUERY_KEYS.agents],
|
||||
})
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
export async function chatWithHarnessAgent(
|
||||
agentId: string,
|
||||
message: string,
|
||||
signal?: AbortSignal,
|
||||
attachments?: ReadonlyArray<unknown>,
|
||||
): Promise<Response> {
|
||||
const baseUrl = await getAgentServerUrl()
|
||||
return fetch(`${baseUrl}/agents/${encodeURIComponent(agentId)}/chat`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
message,
|
||||
...(attachments && attachments.length > 0 ? { attachments } : {}),
|
||||
}),
|
||||
signal,
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Subscribe to an existing turn (the server's `ActiveTurnRegistry`
|
||||
* decoupled the turn lifecycle from POST /chat). `lastSeq` lets the
|
||||
* client resume after a disconnect — the server replays buffered
|
||||
* frames with seq > lastSeq, then tails new ones.
|
||||
*/
|
||||
export async function attachToHarnessTurn(
|
||||
agentId: string,
|
||||
options: { turnId?: string; lastSeq?: number; signal?: AbortSignal } = {},
|
||||
): Promise<Response> {
|
||||
const baseUrl = await getAgentServerUrl()
|
||||
const url = new URL(
|
||||
`${baseUrl}/agents/${encodeURIComponent(agentId)}/chat/stream`,
|
||||
)
|
||||
if (options.turnId) url.searchParams.set('turnId', options.turnId)
|
||||
const headers: Record<string, string> = {}
|
||||
if (typeof options.lastSeq === 'number') {
|
||||
headers['Last-Event-ID'] = String(options.lastSeq)
|
||||
}
|
||||
return fetch(url.toString(), { signal: options.signal, headers })
|
||||
}
|
||||
|
||||
export interface HarnessActiveTurnInfo {
|
||||
turnId: string
|
||||
agentId: string
|
||||
sessionId: 'main'
|
||||
status: 'running' | 'done' | 'error' | 'cancelled'
|
||||
lastSeq: number
|
||||
startedAt: number
|
||||
endedAt?: number
|
||||
/** User message that kicked off the turn; null when not captured. */
|
||||
prompt: string | null
|
||||
}
|
||||
|
||||
/**
|
||||
* Discover an in-flight turn for an agent. Used on chat mount so the
|
||||
* UI reattaches instead of starting a new turn after a tab/refresh.
|
||||
*/
|
||||
export async function fetchActiveHarnessTurn(
|
||||
agentId: string,
|
||||
): Promise<HarnessActiveTurnInfo | null> {
|
||||
const baseUrl = await getAgentServerUrl()
|
||||
const response = await fetch(
|
||||
`${baseUrl}/agents/${encodeURIComponent(agentId)}/chat/active`,
|
||||
)
|
||||
if (!response.ok) return null
|
||||
const body = (await response.json()) as {
|
||||
active: HarnessActiveTurnInfo | null
|
||||
}
|
||||
return body.active
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop button. Hits the explicit cancel endpoint instead of just
|
||||
* aborting the fetch (which now only detaches *this* subscriber from
|
||||
* the buffer; the underlying turn would otherwise keep running).
|
||||
*/
|
||||
export async function cancelHarnessTurn(
|
||||
agentId: string,
|
||||
options: { turnId?: string; reason?: string } = {},
|
||||
): Promise<{ cancelled: boolean }> {
|
||||
const baseUrl = await getAgentServerUrl()
|
||||
const response = await fetch(
|
||||
`${baseUrl}/agents/${encodeURIComponent(agentId)}/chat/cancel`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
...(options.turnId ? { turnId: options.turnId } : {}),
|
||||
...(options.reason ? { reason: options.reason } : {}),
|
||||
}),
|
||||
},
|
||||
)
|
||||
if (!response.ok) return { cancelled: false }
|
||||
return (await response.json()) as { cancelled: boolean }
|
||||
}
|
||||
|
||||
export async function fetchHarnessAgentHistory(
|
||||
agentId: string,
|
||||
): Promise<HarnessAgentHistoryPage> {
|
||||
const baseUrl = await getAgentServerUrl()
|
||||
return agentsFetch<HarnessAgentHistoryPage>(
|
||||
baseUrl,
|
||||
`/${encodeURIComponent(agentId)}/sessions/main/history`,
|
||||
)
|
||||
}
|
||||
|
||||
export interface EnqueueMessageInput {
|
||||
message: string
|
||||
attachments?: ReadonlyArray<unknown>
|
||||
}
|
||||
|
||||
export async function enqueueHarnessMessage(
|
||||
agentId: string,
|
||||
input: EnqueueMessageInput,
|
||||
): Promise<HarnessQueuedMessage> {
|
||||
const baseUrl = await getAgentServerUrl()
|
||||
const response = await fetch(
|
||||
`${baseUrl}/agents/${encodeURIComponent(agentId)}/queue`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
message: input.message,
|
||||
...(input.attachments && input.attachments.length > 0
|
||||
? { attachments: input.attachments }
|
||||
: {}),
|
||||
}),
|
||||
},
|
||||
)
|
||||
if (!response.ok) {
|
||||
let message = `Request failed with status ${response.status}`
|
||||
try {
|
||||
const body = (await response.json()) as { error?: string }
|
||||
if (body.error) message = body.error
|
||||
} catch {}
|
||||
throw new Error(message)
|
||||
}
|
||||
const body = (await response.json()) as { queued: HarnessQueuedMessage }
|
||||
return body.queued
|
||||
}
|
||||
|
||||
export async function removeHarnessQueuedMessage(
|
||||
agentId: string,
|
||||
messageId: string,
|
||||
): Promise<{ removed: boolean }> {
|
||||
const baseUrl = await getAgentServerUrl()
|
||||
const response = await fetch(
|
||||
`${baseUrl}/agents/${encodeURIComponent(agentId)}/queue/${encodeURIComponent(
|
||||
messageId,
|
||||
)}`,
|
||||
{ method: 'DELETE' },
|
||||
)
|
||||
if (!response.ok) return { removed: false }
|
||||
return (await response.json()) as { removed: boolean }
|
||||
}
|
||||
|
||||
/**
|
||||
* Optimistic enqueue: writes the new queued message into the listing
|
||||
* cache immediately so the queue panel reflects the change without
|
||||
* waiting for the next poll. Rolls back if the server rejects.
|
||||
*/
|
||||
export function useEnqueueHarnessMessage() {
|
||||
const { baseUrl } = useAgentServerUrl()
|
||||
const queryClient = useQueryClient()
|
||||
|
||||
return useMutation({
|
||||
mutationFn: async (input: { agentId: string } & EnqueueMessageInput) =>
|
||||
enqueueHarnessMessage(input.agentId, input),
|
||||
onMutate: async (input) => {
|
||||
const queryKey = [AGENT_QUERY_KEYS.agents, baseUrl]
|
||||
await queryClient.cancelQueries({ queryKey })
|
||||
const previous = queryClient.getQueryData<HarnessAgentsResponse>(queryKey)
|
||||
if (!previous) return { previous: undefined }
|
||||
const optimistic: HarnessQueuedMessage = {
|
||||
id: `optimistic-${Math.random().toString(36).slice(2, 10)}`,
|
||||
createdAt: Date.now(),
|
||||
message: input.message,
|
||||
}
|
||||
queryClient.setQueryData<HarnessAgentsResponse>(queryKey, {
|
||||
...previous,
|
||||
agents: previous.agents.map((agent) =>
|
||||
agent.id === input.agentId
|
||||
? { ...agent, queue: [...(agent.queue ?? []), optimistic] }
|
||||
: agent,
|
||||
),
|
||||
})
|
||||
return { previous }
|
||||
},
|
||||
onError: (_err, _vars, context) => {
|
||||
if (!context?.previous) return
|
||||
queryClient.setQueryData(
|
||||
[AGENT_QUERY_KEYS.agents, baseUrl],
|
||||
context.previous,
|
||||
)
|
||||
},
|
||||
onSettled: async () => {
|
||||
await queryClient.invalidateQueries({
|
||||
queryKey: [AGENT_QUERY_KEYS.agents],
|
||||
})
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Optimistic queue removal mirror of `useEnqueueHarnessMessage`.
|
||||
*/
|
||||
export function useRemoveHarnessQueuedMessage() {
|
||||
const { baseUrl } = useAgentServerUrl()
|
||||
const queryClient = useQueryClient()
|
||||
|
||||
return useMutation({
|
||||
mutationFn: async (input: { agentId: string; messageId: string }) =>
|
||||
removeHarnessQueuedMessage(input.agentId, input.messageId),
|
||||
onMutate: async (input) => {
|
||||
const queryKey = [AGENT_QUERY_KEYS.agents, baseUrl]
|
||||
await queryClient.cancelQueries({ queryKey })
|
||||
const previous = queryClient.getQueryData<HarnessAgentsResponse>(queryKey)
|
||||
if (!previous) return { previous: undefined }
|
||||
queryClient.setQueryData<HarnessAgentsResponse>(queryKey, {
|
||||
...previous,
|
||||
agents: previous.agents.map((agent) =>
|
||||
agent.id === input.agentId
|
||||
? {
|
||||
...agent,
|
||||
queue: (agent.queue ?? []).filter(
|
||||
(entry) => entry.id !== input.messageId,
|
||||
),
|
||||
}
|
||||
: agent,
|
||||
),
|
||||
})
|
||||
return { previous }
|
||||
},
|
||||
onError: (_err, _vars, context) => {
|
||||
if (!context?.previous) return
|
||||
queryClient.setQueryData(
|
||||
[AGENT_QUERY_KEYS.agents, baseUrl],
|
||||
context.previous,
|
||||
)
|
||||
},
|
||||
onSettled: async () => {
|
||||
await queryClient.invalidateQueries({
|
||||
queryKey: [AGENT_QUERY_KEYS.agents],
|
||||
})
|
||||
},
|
||||
})
|
||||
}
|
||||
@@ -1,5 +1,4 @@
|
||||
import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query'
|
||||
import { getAgentServerUrl } from '@/lib/browseros/helpers'
|
||||
import { useAgentServerUrl } from '@/lib/browseros/useBrowserOSProviders'
|
||||
|
||||
export interface AgentEntry {
|
||||
@@ -7,6 +6,7 @@ export interface AgentEntry {
|
||||
name: string
|
||||
workspace: string
|
||||
model?: unknown
|
||||
source?: 'openclaw' | 'agent-harness'
|
||||
}
|
||||
|
||||
export interface OpenClawStatus {
|
||||
@@ -41,6 +41,7 @@ export interface OpenClawAgentMutationInput {
|
||||
baseUrl?: string
|
||||
apiKey?: string
|
||||
modelId?: string
|
||||
supportsImages?: boolean
|
||||
}
|
||||
|
||||
export interface OpenClawSetupInput {
|
||||
@@ -49,6 +50,10 @@ export interface OpenClawSetupInput {
|
||||
baseUrl?: string
|
||||
apiKey?: string
|
||||
modelId?: string
|
||||
// Mirrors LlmProviderConfig.supportsImages — pass-through so the gateway
|
||||
// can declare the model's input modalities correctly when persisting the
|
||||
// custom-provider config.
|
||||
supportsImages?: boolean
|
||||
}
|
||||
|
||||
export function getModelDisplayName(model: unknown): string | undefined {
|
||||
@@ -93,7 +98,10 @@ async function fetchOpenClawStatus(baseUrl: string): Promise<OpenClawStatus> {
|
||||
|
||||
async function fetchOpenClawAgents(baseUrl: string): Promise<AgentEntry[]> {
|
||||
const data = await clawFetch<{ agents: AgentEntry[] }>(baseUrl, '/agents')
|
||||
return data.agents ?? []
|
||||
return (data.agents ?? []).map((agent) => ({
|
||||
...agent,
|
||||
source: 'openclaw',
|
||||
}))
|
||||
}
|
||||
|
||||
async function invalidateOpenClawQueries(
|
||||
@@ -310,25 +318,3 @@ export function buildChatHistoryFromTurns(
|
||||
|
||||
return messages
|
||||
}
|
||||
|
||||
export async function chatWithAgent(
|
||||
agentId: string,
|
||||
message: string,
|
||||
sessionKey?: string,
|
||||
history: OpenClawChatHistoryMessage[] = [],
|
||||
signal?: AbortSignal,
|
||||
attachments?: ReadonlyArray<unknown>,
|
||||
): Promise<Response> {
|
||||
const baseUrl = await getAgentServerUrl()
|
||||
return fetch(`${baseUrl}/claw/agents/${agentId}/chat`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
message,
|
||||
sessionKey,
|
||||
history,
|
||||
...(attachments && attachments.length > 0 ? { attachments } : {}),
|
||||
}),
|
||||
signal,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -164,9 +164,17 @@ export const NewScheduledTaskDialog: FC<NewScheduledTaskDialogProps> = ({
|
||||
const resolvedProvider: Provider | null = (() => {
|
||||
const id = selectedProviderId ?? defaultProviderId
|
||||
const found = providers.find((p) => p.id === id)
|
||||
if (found) return { id: found.id, name: found.name, type: found.type }
|
||||
if (found) {
|
||||
return {
|
||||
kind: 'llm' as const,
|
||||
id: found.id,
|
||||
name: found.name,
|
||||
type: found.type,
|
||||
}
|
||||
}
|
||||
if (providers[0])
|
||||
return {
|
||||
kind: 'llm' as const,
|
||||
id: providers[0].id,
|
||||
name: providers[0].name,
|
||||
type: providers[0].type,
|
||||
@@ -175,6 +183,7 @@ export const NewScheduledTaskDialog: FC<NewScheduledTaskDialogProps> = ({
|
||||
})()
|
||||
|
||||
const providerOptions: Provider[] = providers.map((p) => ({
|
||||
kind: 'llm',
|
||||
id: p.id,
|
||||
name: p.name,
|
||||
type: p.type,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { Github, History, Plus, SettingsIcon } from 'lucide-react'
|
||||
import { Bot, Github, History, Plus, SettingsIcon } from 'lucide-react'
|
||||
import type { FC } from 'react'
|
||||
import { Link, useLocation, useNavigate } from 'react-router'
|
||||
import { ChatProviderSelector } from '@/components/chat/ChatProviderSelector'
|
||||
@@ -64,7 +64,9 @@ export const ChatHeader: FC<ChatHeaderProps> = ({
|
||||
className="group relative inline-flex cursor-pointer items-center gap-2 rounded-lg p-2 text-muted-foreground transition-colors hover:bg-muted/50 hover:text-foreground data-[state=open]:bg-accent"
|
||||
title="Change AI Provider"
|
||||
>
|
||||
{selectedProvider.type === 'browseros' ? (
|
||||
{selectedProvider.kind === 'acp' ? (
|
||||
<Bot className="h-[18px] w-[18px]" />
|
||||
) : selectedProvider.type === 'browseros' ? (
|
||||
<BrowserOSIcon size={18} />
|
||||
) : (
|
||||
<ProviderIcon
|
||||
|
||||
@@ -0,0 +1,258 @@
|
||||
import { describe, expect, it } from 'bun:test'
|
||||
import type {
|
||||
HarnessAdapterDescriptor,
|
||||
HarnessAgent,
|
||||
} from '@/entrypoints/app/agents/agent-harness-types'
|
||||
import type { LlmProviderConfig } from '@/lib/llm-providers/types'
|
||||
import {
|
||||
buildSidepanelChatTargets,
|
||||
persistSidepanelChatTargetSelection,
|
||||
resolveSidepanelChatTarget,
|
||||
type SidepanelChatTargetSelection,
|
||||
toLlmProviderConfig,
|
||||
} from './sidepanel-chat-targets'
|
||||
|
||||
const timestamp = 1000
|
||||
|
||||
const providers: LlmProviderConfig[] = [
|
||||
{
|
||||
id: 'browseros',
|
||||
type: 'browseros',
|
||||
name: 'BrowserOS',
|
||||
baseUrl: 'https://api.browseros.com/v1',
|
||||
modelId: 'browseros-auto',
|
||||
supportsImages: true,
|
||||
contextWindow: 200000,
|
||||
temperature: 0.2,
|
||||
createdAt: timestamp,
|
||||
updatedAt: timestamp,
|
||||
},
|
||||
{
|
||||
id: 'anthropic-sonnet',
|
||||
type: 'anthropic',
|
||||
name: 'Anthropic Sonnet',
|
||||
modelId: 'claude-sonnet-4-6',
|
||||
apiKey: 'sk-ant',
|
||||
supportsImages: true,
|
||||
contextWindow: 200000,
|
||||
temperature: 0.2,
|
||||
createdAt: timestamp,
|
||||
updatedAt: timestamp,
|
||||
},
|
||||
]
|
||||
|
||||
const adapters: HarnessAdapterDescriptor[] = [
|
||||
{
|
||||
id: 'claude',
|
||||
name: 'Claude Code',
|
||||
defaultModelId: 'haiku',
|
||||
defaultReasoningEffort: 'medium',
|
||||
modelControl: 'best-effort',
|
||||
models: [
|
||||
{ id: 'sonnet', label: 'Sonnet' },
|
||||
{ id: 'haiku', label: 'Haiku', recommended: true },
|
||||
],
|
||||
reasoningEfforts: [
|
||||
{ id: 'medium', label: 'Medium', recommended: true },
|
||||
{ id: 'high', label: 'High' },
|
||||
],
|
||||
},
|
||||
{
|
||||
id: 'codex',
|
||||
name: 'Codex',
|
||||
defaultModelId: 'gpt-5.5',
|
||||
defaultReasoningEffort: 'medium',
|
||||
modelControl: 'runtime-supported',
|
||||
models: [{ id: 'gpt-5.5', label: 'GPT-5.5', recommended: true }],
|
||||
reasoningEfforts: [{ id: 'medium', label: 'Medium', recommended: true }],
|
||||
},
|
||||
{
|
||||
id: 'openclaw',
|
||||
name: 'OpenClaw',
|
||||
defaultModelId: 'default',
|
||||
defaultReasoningEffort: 'medium',
|
||||
modelControl: 'best-effort',
|
||||
models: [],
|
||||
reasoningEfforts: [
|
||||
{ id: 'medium', label: 'Medium', recommended: true },
|
||||
{ id: 'high', label: 'High' },
|
||||
],
|
||||
},
|
||||
]
|
||||
|
||||
const agents: HarnessAgent[] = [
|
||||
{
|
||||
id: 'agent-codex',
|
||||
name: 'Review Bot',
|
||||
adapter: 'codex',
|
||||
modelId: 'gpt-5.5',
|
||||
reasoningEffort: 'medium',
|
||||
permissionMode: 'approve-all',
|
||||
sessionKey: 'agent:agent-codex:main',
|
||||
createdAt: timestamp,
|
||||
updatedAt: timestamp,
|
||||
},
|
||||
{
|
||||
id: 'agent-openclaw',
|
||||
name: 'Research Claw',
|
||||
adapter: 'openclaw',
|
||||
modelId: 'default',
|
||||
reasoningEffort: 'high',
|
||||
permissionMode: 'approve-all',
|
||||
sessionKey: 'agent:agent-openclaw:main',
|
||||
createdAt: timestamp,
|
||||
updatedAt: timestamp,
|
||||
},
|
||||
]
|
||||
|
||||
describe('buildSidepanelChatTargets', () => {
|
||||
it('returns LLM targets plus one ACP target per persisted harness agent', () => {
|
||||
const targets = buildSidepanelChatTargets({ providers, adapters, agents })
|
||||
|
||||
expect(targets.map((target) => target.id)).toEqual([
|
||||
'browseros',
|
||||
'anthropic-sonnet',
|
||||
'agent-codex',
|
||||
'agent-openclaw',
|
||||
])
|
||||
})
|
||||
|
||||
it('does not emit catalog-only ACP targets without persisted agents', () => {
|
||||
const targets = buildSidepanelChatTargets({
|
||||
providers,
|
||||
adapters,
|
||||
agents: [],
|
||||
})
|
||||
|
||||
expect(targets.map((target) => target.id)).toEqual([
|
||||
'browseros',
|
||||
'anthropic-sonnet',
|
||||
])
|
||||
})
|
||||
|
||||
it('uses the created OpenClaw agent name instead of a generic adapter target', () => {
|
||||
const targets = buildSidepanelChatTargets({ providers, adapters, agents })
|
||||
const openclaw = targets.find((target) => target.id === 'agent-openclaw')
|
||||
|
||||
expect(openclaw).toMatchObject({
|
||||
kind: 'acp',
|
||||
id: 'agent-openclaw',
|
||||
agentId: 'agent-openclaw',
|
||||
adapter: 'openclaw',
|
||||
adapterName: 'OpenClaw',
|
||||
modelId: 'default',
|
||||
modelLabel: 'default',
|
||||
name: 'Research Claw',
|
||||
modelControl: 'best-effort',
|
||||
reasoningEffort: 'high',
|
||||
})
|
||||
})
|
||||
|
||||
it('preserves adapter metadata for created agent targets', () => {
|
||||
const targets = buildSidepanelChatTargets({ providers, adapters, agents })
|
||||
const codex = targets.find((target) => target.id === 'agent-codex')
|
||||
|
||||
expect(codex).toMatchObject({
|
||||
kind: 'acp',
|
||||
agentId: 'agent-codex',
|
||||
adapter: 'codex',
|
||||
adapterName: 'Codex',
|
||||
modelId: 'gpt-5.5',
|
||||
modelLabel: 'GPT-5.5',
|
||||
modelControl: 'runtime-supported',
|
||||
recommended: true,
|
||||
reasoningEffort: 'medium',
|
||||
reasoningEffortLabel: 'Medium',
|
||||
})
|
||||
})
|
||||
|
||||
it('still returns LLM targets when agents and adapters are unavailable', () => {
|
||||
expect(
|
||||
buildSidepanelChatTargets({ providers, adapters: [], agents: [] }),
|
||||
).toEqual([
|
||||
{
|
||||
kind: 'llm',
|
||||
id: 'browseros',
|
||||
name: 'BrowserOS',
|
||||
type: 'browseros',
|
||||
provider: providers[0],
|
||||
},
|
||||
{
|
||||
kind: 'llm',
|
||||
id: 'anthropic-sonnet',
|
||||
name: 'Anthropic Sonnet',
|
||||
type: 'anthropic',
|
||||
provider: providers[1],
|
||||
},
|
||||
])
|
||||
})
|
||||
})
|
||||
|
||||
describe('resolveSidepanelChatTarget', () => {
|
||||
it('resolves selected LLM targets back to their provider config', () => {
|
||||
const targets = buildSidepanelChatTargets({ providers, adapters, agents })
|
||||
const resolved = resolveSidepanelChatTarget({
|
||||
targets,
|
||||
defaultProviderId: 'browseros',
|
||||
selection: { kind: 'llm', id: 'anthropic-sonnet' },
|
||||
})
|
||||
|
||||
expect(resolved?.kind).toBe('llm')
|
||||
expect(toLlmProviderConfig(resolved)?.modelId).toBe('claude-sonnet-4-6')
|
||||
})
|
||||
|
||||
it('falls back to the current default LLM provider when a persisted ACP target is stale', () => {
|
||||
const targets = buildSidepanelChatTargets({
|
||||
providers,
|
||||
adapters,
|
||||
agents: [],
|
||||
})
|
||||
|
||||
expect(
|
||||
resolveSidepanelChatTarget({
|
||||
targets,
|
||||
defaultProviderId: 'anthropic-sonnet',
|
||||
selection: { kind: 'acp', id: 'agent-codex' },
|
||||
}),
|
||||
).toMatchObject({
|
||||
kind: 'llm',
|
||||
id: 'anthropic-sonnet',
|
||||
})
|
||||
})
|
||||
|
||||
it('falls back when an old catalog-style ACP target id is persisted', () => {
|
||||
const targets = buildSidepanelChatTargets({ providers, adapters, agents })
|
||||
|
||||
expect(
|
||||
resolveSidepanelChatTarget({
|
||||
targets,
|
||||
defaultProviderId: 'anthropic-sonnet',
|
||||
selection: { kind: 'acp', id: 'acp:codex:gpt-5.5:medium' },
|
||||
}),
|
||||
).toMatchObject({
|
||||
kind: 'llm',
|
||||
id: 'anthropic-sonnet',
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('persistSidepanelChatTargetSelection', () => {
|
||||
it('stores only target identity and does not mutate LLM provider arrays', async () => {
|
||||
let savedSelection: SidepanelChatTargetSelection | null = null
|
||||
const originalProviders = providers.map((provider) => ({ ...provider }))
|
||||
const targets = buildSidepanelChatTargets({ providers, adapters, agents })
|
||||
const target = targets.find((candidate) => candidate.id === 'agent-codex')
|
||||
|
||||
await persistSidepanelChatTargetSelection(target, {
|
||||
setValue: async (value) => {
|
||||
savedSelection = value
|
||||
},
|
||||
})
|
||||
|
||||
expect(savedSelection as SidepanelChatTargetSelection | null).toEqual({
|
||||
kind: 'acp',
|
||||
id: 'agent-codex',
|
||||
})
|
||||
expect(providers).toEqual(originalProviders)
|
||||
})
|
||||
})
|
||||
@@ -0,0 +1,178 @@
|
||||
import type {
|
||||
HarnessAdapterDescriptor,
|
||||
HarnessAgent,
|
||||
HarnessAgentAdapter,
|
||||
} from '@/entrypoints/app/agents/agent-harness-types'
|
||||
import type { LlmProviderConfig, ProviderType } from '@/lib/llm-providers/types'
|
||||
|
||||
export type SidepanelTargetKind = 'llm' | 'acp'
|
||||
|
||||
export type SidepanelChatTarget =
|
||||
| {
|
||||
kind: 'llm'
|
||||
id: string
|
||||
name: string
|
||||
type: ProviderType
|
||||
provider: LlmProviderConfig
|
||||
}
|
||||
| {
|
||||
kind: 'acp'
|
||||
id: string
|
||||
name: string
|
||||
type: 'acp'
|
||||
agentId: string
|
||||
adapter: HarnessAgentAdapter
|
||||
adapterName: string
|
||||
modelId: string
|
||||
modelLabel: string
|
||||
modelControl: HarnessAdapterDescriptor['modelControl']
|
||||
recommended?: boolean
|
||||
reasoningEffort: string
|
||||
reasoningEffortLabel?: string
|
||||
}
|
||||
|
||||
export type SidepanelChatTargetSelection = Pick<
|
||||
SidepanelChatTarget,
|
||||
'kind' | 'id'
|
||||
>
|
||||
|
||||
interface BuildSidepanelChatTargetsInput {
|
||||
providers: LlmProviderConfig[]
|
||||
adapters: HarnessAdapterDescriptor[]
|
||||
agents?: HarnessAgent[]
|
||||
}
|
||||
|
||||
interface ResolveSidepanelChatTargetInput {
|
||||
targets: SidepanelChatTarget[]
|
||||
defaultProviderId: string
|
||||
selection?: SidepanelChatTargetSelection | null
|
||||
}
|
||||
|
||||
interface SidepanelChatTargetSelectionWriter {
|
||||
setValue(value: SidepanelChatTargetSelection | null): Promise<void>
|
||||
}
|
||||
|
||||
interface SidepanelChatTargetSelectionReader {
|
||||
getValue(): Promise<SidepanelChatTargetSelection | null>
|
||||
}
|
||||
|
||||
type SidepanelChatTargetSelectionStore = SidepanelChatTargetSelectionReader &
|
||||
SidepanelChatTargetSelectionWriter
|
||||
|
||||
let sidepanelChatTargetSelectionStorage:
|
||||
| SidepanelChatTargetSelectionStore
|
||||
| undefined
|
||||
|
||||
export function buildSidepanelChatTargets({
|
||||
providers,
|
||||
adapters,
|
||||
agents = [],
|
||||
}: BuildSidepanelChatTargetsInput): SidepanelChatTarget[] {
|
||||
return [
|
||||
...providers.map(toLlmTarget),
|
||||
...agents.map((agent) => toAcpTargetForAgent(agent, adapters)),
|
||||
]
|
||||
}
|
||||
|
||||
function toAcpTargetForAgent(
|
||||
agent: HarnessAgent,
|
||||
adapters: HarnessAdapterDescriptor[],
|
||||
): SidepanelChatTarget {
|
||||
const adapter = adapters.find((entry) => entry.id === agent.adapter)
|
||||
const modelId = agent.modelId ?? adapter?.defaultModelId ?? 'default'
|
||||
const reasoningEffort =
|
||||
agent.reasoningEffort ?? adapter?.defaultReasoningEffort ?? 'medium'
|
||||
const model = adapter?.models.find((entry) => entry.id === modelId)
|
||||
const reasoning = adapter?.reasoningEfforts.find(
|
||||
(effort) => effort.id === reasoningEffort,
|
||||
)
|
||||
|
||||
return {
|
||||
kind: 'acp',
|
||||
id: agent.id,
|
||||
name: agent.name,
|
||||
type: 'acp',
|
||||
agentId: agent.id,
|
||||
adapter: agent.adapter,
|
||||
adapterName: adapter?.name ?? formatAdapterName(agent.adapter),
|
||||
modelId,
|
||||
modelLabel: model?.label ?? modelId,
|
||||
modelControl: adapter?.modelControl ?? 'best-effort',
|
||||
recommended: model?.recommended,
|
||||
reasoningEffort,
|
||||
reasoningEffortLabel: reasoning?.label,
|
||||
}
|
||||
}
|
||||
|
||||
function formatAdapterName(adapter: HarnessAgentAdapter): string {
|
||||
if (adapter === 'claude') return 'Claude Code'
|
||||
if (adapter === 'codex') return 'Codex'
|
||||
if (adapter === 'openclaw') return 'OpenClaw'
|
||||
return adapter
|
||||
}
|
||||
|
||||
export function resolveSidepanelChatTarget({
|
||||
targets,
|
||||
defaultProviderId,
|
||||
selection,
|
||||
}: ResolveSidepanelChatTargetInput): SidepanelChatTarget | undefined {
|
||||
if (selection) {
|
||||
const selected = targets.find(
|
||||
(target) => target.kind === selection.kind && target.id === selection.id,
|
||||
)
|
||||
if (selected) return selected
|
||||
}
|
||||
|
||||
return (
|
||||
targets.find(
|
||||
(target) => target.kind === 'llm' && target.id === defaultProviderId,
|
||||
) ?? targets.find((target) => target.kind === 'llm')
|
||||
)
|
||||
}
|
||||
|
||||
export function toLlmProviderConfig(
|
||||
target: SidepanelChatTarget | undefined,
|
||||
): LlmProviderConfig | undefined {
|
||||
return target?.kind === 'llm' ? target.provider : undefined
|
||||
}
|
||||
|
||||
export async function persistSidepanelChatTargetSelection(
|
||||
target: SidepanelChatTarget | undefined,
|
||||
store?: SidepanelChatTargetSelectionWriter,
|
||||
): Promise<void> {
|
||||
const targetStore = store ?? (await getSidepanelChatTargetSelectionStorage())
|
||||
await targetStore.setValue(
|
||||
target ? { kind: target.kind, id: target.id } : null,
|
||||
)
|
||||
}
|
||||
|
||||
export async function loadSidepanelChatTargetSelection(
|
||||
store?: SidepanelChatTargetSelectionReader,
|
||||
): Promise<SidepanelChatTargetSelection | null> {
|
||||
const targetStore = store ?? (await getSidepanelChatTargetSelectionStorage())
|
||||
return targetStore.getValue()
|
||||
}
|
||||
|
||||
function toLlmTarget(provider: LlmProviderConfig): SidepanelChatTarget {
|
||||
return {
|
||||
kind: 'llm',
|
||||
id: provider.id,
|
||||
name: provider.name,
|
||||
type: provider.type,
|
||||
provider,
|
||||
}
|
||||
}
|
||||
|
||||
async function getSidepanelChatTargetSelectionStorage(): Promise<SidepanelChatTargetSelectionStore> {
|
||||
if (sidepanelChatTargetSelectionStorage) {
|
||||
return sidepanelChatTargetSelectionStorage
|
||||
}
|
||||
|
||||
const { storage } = await import('@wxt-dev/storage')
|
||||
sidepanelChatTargetSelectionStorage =
|
||||
storage.defineItem<SidepanelChatTargetSelection | null>(
|
||||
'local:sidepanel-chat-target-selection',
|
||||
{ fallback: null },
|
||||
)
|
||||
return sidepanelChatTargetSelectionStorage
|
||||
}
|
||||
@@ -1,9 +1,21 @@
|
||||
import { useEffect, useRef } from 'react'
|
||||
import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
|
||||
import useDeepCompareEffect from 'use-deep-compare-effect'
|
||||
import {
|
||||
useAgentAdapters,
|
||||
useHarnessAgents,
|
||||
} from '@/entrypoints/app/agents/useAgents'
|
||||
import type { LlmProviderConfig } from '@/lib/llm-providers/types'
|
||||
import { useLlmProviders } from '@/lib/llm-providers/useLlmProviders'
|
||||
import { type McpServer, useMcpServers } from '@/lib/mcp/mcpServerStorage'
|
||||
import { usePersonalization } from '@/lib/personalization/personalizationStorage'
|
||||
import {
|
||||
buildSidepanelChatTargets,
|
||||
loadSidepanelChatTargetSelection,
|
||||
persistSidepanelChatTargetSelection,
|
||||
resolveSidepanelChatTarget,
|
||||
type SidepanelChatTarget,
|
||||
type SidepanelChatTargetSelection,
|
||||
} from './sidepanel-chat-targets'
|
||||
|
||||
const constructMcpServers = (servers: McpServer[]) => {
|
||||
return servers
|
||||
@@ -23,14 +35,53 @@ const constructCustomServers = (servers: McpServer[]) => {
|
||||
export const useChatRefs = () => {
|
||||
const { servers: mcpServers } = useMcpServers()
|
||||
const {
|
||||
providers: llmProviders,
|
||||
selectedProvider: selectedLlmProvider,
|
||||
setDefaultProvider,
|
||||
isLoading: isLoadingProviders,
|
||||
} = useLlmProviders()
|
||||
const { adapters, loading: isLoadingAdapters } = useAgentAdapters()
|
||||
const { harnessAgents, loading: isLoadingAgents } = useHarnessAgents()
|
||||
const { personalization } = usePersonalization()
|
||||
const [targetSelection, setTargetSelection] =
|
||||
useState<SidepanelChatTargetSelection | null>(null)
|
||||
|
||||
useEffect(() => {
|
||||
let cancelled = false
|
||||
loadSidepanelChatTargetSelection().then((selection) => {
|
||||
if (!cancelled) setTargetSelection(selection)
|
||||
})
|
||||
return () => {
|
||||
cancelled = true
|
||||
}
|
||||
}, [])
|
||||
|
||||
const chatTargets = useMemo(
|
||||
() =>
|
||||
buildSidepanelChatTargets({
|
||||
providers: llmProviders,
|
||||
adapters,
|
||||
agents: harnessAgents,
|
||||
}),
|
||||
[llmProviders, adapters, harnessAgents],
|
||||
)
|
||||
|
||||
const selectedChatTarget = useMemo(
|
||||
() =>
|
||||
resolveSidepanelChatTarget({
|
||||
targets: chatTargets,
|
||||
defaultProviderId: selectedLlmProvider?.id ?? llmProviders[0]?.id ?? '',
|
||||
selection: targetSelection,
|
||||
}),
|
||||
[chatTargets, llmProviders, selectedLlmProvider, targetSelection],
|
||||
)
|
||||
|
||||
const selectedLlmProviderRef = useRef<LlmProviderConfig | null>(
|
||||
selectedLlmProvider,
|
||||
)
|
||||
const selectedChatTargetRef = useRef<SidepanelChatTarget | undefined>(
|
||||
selectedChatTarget,
|
||||
)
|
||||
const enabledMcpServersRef = useRef(constructMcpServers(mcpServers))
|
||||
const enabledCustomServersRef = useRef(constructCustomServers(mcpServers))
|
||||
const personalizationRef = useRef(personalization)
|
||||
@@ -41,16 +92,36 @@ export const useChatRefs = () => {
|
||||
enabledCustomServersRef.current = constructCustomServers(mcpServers)
|
||||
}, [selectedLlmProvider, mcpServers])
|
||||
|
||||
useEffect(() => {
|
||||
selectedChatTargetRef.current = selectedChatTarget
|
||||
}, [selectedChatTarget])
|
||||
|
||||
useEffect(() => {
|
||||
personalizationRef.current = personalization
|
||||
}, [personalization])
|
||||
|
||||
const selectChatTarget = useCallback(
|
||||
async (target: SidepanelChatTarget | undefined) => {
|
||||
selectedChatTargetRef.current = target
|
||||
setTargetSelection(target ? { kind: target.kind, id: target.id } : null)
|
||||
await persistSidepanelChatTargetSelection(target)
|
||||
},
|
||||
[],
|
||||
)
|
||||
|
||||
return {
|
||||
selectedLlmProviderRef,
|
||||
selectedChatTargetRef,
|
||||
enabledMcpServersRef,
|
||||
enabledCustomServersRef,
|
||||
personalizationRef,
|
||||
llmProviders,
|
||||
setDefaultProvider,
|
||||
chatTargets,
|
||||
selectedChatTarget,
|
||||
selectChatTarget,
|
||||
selectedLlmProvider,
|
||||
isLoadingProviders,
|
||||
isLoadingProviders:
|
||||
isLoadingProviders || isLoadingAdapters || isLoadingAgents,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,153 @@
|
||||
import { describe, expect, it } from 'bun:test'
|
||||
import type { LlmProviderConfig } from '@/lib/llm-providers/types'
|
||||
import type { ChatMode } from './chatTypes'
|
||||
import type { SidepanelChatTarget } from './sidepanel-chat-targets'
|
||||
import { buildSidepanelPreparedSendMessagesRequest } from './useChatSessionRequest'
|
||||
|
||||
const conversationId = '00000000-0000-4000-8000-000000000001'
|
||||
|
||||
describe('buildSidepanelPreparedSendMessagesRequest', () => {
|
||||
it('keeps LLM targets on the existing /chat request body', () => {
|
||||
const request = buildSidepanelPreparedSendMessagesRequest({
|
||||
agentServerUrl: 'http://127.0.0.1:5151',
|
||||
target: llmTarget,
|
||||
fallbackProvider,
|
||||
message: 'Summarize this page',
|
||||
...commonRequestInput(),
|
||||
})
|
||||
|
||||
expect(request.api).toBe('http://127.0.0.1:5151/chat')
|
||||
expect(request.body).toMatchObject({
|
||||
message: 'Summarize this page',
|
||||
conversationId,
|
||||
provider: 'browseros',
|
||||
providerType: 'browseros',
|
||||
providerName: 'BrowserOS',
|
||||
model: 'gpt-5',
|
||||
mode: 'agent',
|
||||
browserContext: {
|
||||
activeTab: { id: 10, url: 'https://example.com', title: 'Example' },
|
||||
enabledMcpServers: ['slack'],
|
||||
},
|
||||
userSystemPrompt: 'Be concise',
|
||||
userWorkingDir: '/tmp/work',
|
||||
previousConversation: [{ role: 'assistant', content: 'Prior answer' }],
|
||||
selectedText: 'selected text',
|
||||
selectedTextSource: {
|
||||
url: 'https://example.com',
|
||||
title: 'Example',
|
||||
},
|
||||
})
|
||||
})
|
||||
|
||||
it('sends created-agent targets to the agent-id sidepanel route', () => {
|
||||
const request = buildSidepanelPreparedSendMessagesRequest({
|
||||
agentServerUrl: 'http://127.0.0.1:5151',
|
||||
target: acpTarget,
|
||||
fallbackProvider,
|
||||
message: 'Inspect the current tab',
|
||||
approvalResponses: [
|
||||
{ approvalId: 'approval-1', approved: true, reason: 'ok' },
|
||||
],
|
||||
...commonRequestInput(),
|
||||
})
|
||||
|
||||
expect(request.api).toBe(
|
||||
'http://127.0.0.1:5151/agents/agent-codex/sidepanel/chat',
|
||||
)
|
||||
expect(request.body).toEqual({
|
||||
conversationId,
|
||||
message: 'Inspect the current tab',
|
||||
browserContext: {
|
||||
activeTab: { id: 10, url: 'https://example.com', title: 'Example' },
|
||||
enabledMcpServers: ['slack'],
|
||||
},
|
||||
userSystemPrompt: 'Be concise',
|
||||
userWorkingDir: '/tmp/work',
|
||||
selectedText: 'selected text',
|
||||
selectedTextSource: {
|
||||
url: 'https://example.com',
|
||||
title: 'Example',
|
||||
},
|
||||
})
|
||||
})
|
||||
|
||||
it('keeps tool approval retry payloads scoped to LLM chat', () => {
|
||||
const request = buildSidepanelPreparedSendMessagesRequest({
|
||||
agentServerUrl: 'http://127.0.0.1:5151',
|
||||
target: llmTarget,
|
||||
fallbackProvider,
|
||||
approvalResponses: [
|
||||
{ approvalId: 'approval-1', approved: false, reason: 'no' },
|
||||
],
|
||||
...commonRequestInput(),
|
||||
})
|
||||
|
||||
expect(request.api).toBe('http://127.0.0.1:5151/chat')
|
||||
expect(request.body).toMatchObject({
|
||||
message: '',
|
||||
toolApprovalResponses: [
|
||||
{ approvalId: 'approval-1', approved: false, reason: 'no' },
|
||||
],
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
function commonRequestInput() {
|
||||
return {
|
||||
conversationId,
|
||||
mode: 'agent' as ChatMode,
|
||||
browserContext: {
|
||||
activeTab: { id: 10, url: 'https://example.com', title: 'Example' },
|
||||
enabledMcpServers: ['slack'],
|
||||
},
|
||||
userSystemPrompt: 'Be concise',
|
||||
userWorkingDir: '/tmp/work',
|
||||
previousConversation: [
|
||||
{ role: 'assistant' as const, content: 'Prior answer' },
|
||||
],
|
||||
declinedApps: ['gmail'],
|
||||
aclRules: [{ id: 'rule-1', sitePattern: '*://*/*', enabled: true }],
|
||||
selectedText: 'selected text',
|
||||
selectedTextSource: {
|
||||
url: 'https://example.com',
|
||||
title: 'Example',
|
||||
},
|
||||
toolApprovalConfig: { categories: { navigation: true } },
|
||||
}
|
||||
}
|
||||
|
||||
const fallbackProvider: LlmProviderConfig = {
|
||||
id: 'browseros',
|
||||
type: 'browseros',
|
||||
name: 'BrowserOS',
|
||||
modelId: 'gpt-5',
|
||||
supportsImages: true,
|
||||
contextWindow: 128000,
|
||||
temperature: 0.7,
|
||||
createdAt: 1000,
|
||||
updatedAt: 1000,
|
||||
}
|
||||
|
||||
const llmTarget: SidepanelChatTarget = {
|
||||
kind: 'llm',
|
||||
id: fallbackProvider.id,
|
||||
name: fallbackProvider.name,
|
||||
type: fallbackProvider.type,
|
||||
provider: fallbackProvider,
|
||||
}
|
||||
|
||||
const acpTarget: SidepanelChatTarget = {
|
||||
kind: 'acp',
|
||||
id: 'agent-codex',
|
||||
name: 'Review bot',
|
||||
type: 'acp',
|
||||
agentId: 'agent-codex',
|
||||
adapter: 'codex',
|
||||
adapterName: 'Codex',
|
||||
modelId: 'gpt-5.5',
|
||||
modelLabel: 'GPT-5.5',
|
||||
modelControl: 'best-effort',
|
||||
reasoningEffort: 'medium',
|
||||
reasoningEffortLabel: 'Medium',
|
||||
}
|
||||
@@ -26,15 +26,14 @@ import { useInvalidateCredits } from '@/lib/credits/useCredits'
|
||||
import { declinedAppsStorage } from '@/lib/declined-apps/storage'
|
||||
import { useGraphqlQuery } from '@/lib/graphql/useGraphqlQuery'
|
||||
import { createDefaultBrowserOSProvider } from '@/lib/llm-providers/storage'
|
||||
import { useLlmProviders } from '@/lib/llm-providers/useLlmProviders'
|
||||
import {
|
||||
type ApprovalResponseData,
|
||||
buildChatRequestBody,
|
||||
type ChatRequestBrowserContext,
|
||||
import type {
|
||||
ApprovalResponseData,
|
||||
ChatRequestBrowserContext,
|
||||
} from '@/lib/messaging/server/buildChatRequestBody'
|
||||
import { track } from '@/lib/metrics/track'
|
||||
import { searchActionsStorage } from '@/lib/search-actions/searchActionsStorage'
|
||||
import { selectedTextStorage } from '@/lib/selected-text/selectedTextStorage'
|
||||
import { sentry } from '@/lib/sentry/sentry'
|
||||
import { stopAgentStorage } from '@/lib/stop-agent/stop-agent-storage'
|
||||
import {
|
||||
type ApprovalResponse,
|
||||
@@ -52,7 +51,12 @@ import {
|
||||
import { selectedWorkspaceStorage } from '@/lib/workspace/workspace-storage'
|
||||
import type { ChatMode } from './chatTypes'
|
||||
import { GetConversationWithMessagesDocument } from './graphql/chatSessionDocument'
|
||||
import { toLlmProviderConfig } from './sidepanel-chat-targets'
|
||||
import { useChatRefs } from './useChatRefs'
|
||||
import {
|
||||
buildSidepanelPreparedSendMessagesRequest,
|
||||
toProviderOption,
|
||||
} from './useChatSessionRequest'
|
||||
import { useExecutionHistoryTracker } from './useExecutionHistoryTracker'
|
||||
import { useNotifyActiveTab } from './useNotifyActiveTab'
|
||||
import { useRemoteConversationSave } from './useRemoteConversationSave'
|
||||
@@ -186,16 +190,19 @@ const buildRequestBrowserContext = ({
|
||||
export const useChatSession = (options?: ChatSessionOptions) => {
|
||||
const {
|
||||
selectedLlmProviderRef,
|
||||
selectedChatTargetRef,
|
||||
enabledMcpServersRef,
|
||||
enabledCustomServersRef,
|
||||
personalizationRef,
|
||||
setDefaultProvider,
|
||||
chatTargets,
|
||||
selectedChatTarget,
|
||||
selectChatTarget,
|
||||
selectedLlmProvider,
|
||||
isLoadingProviders,
|
||||
} = useChatRefs()
|
||||
const invalidateCredits = useInvalidateCredits()
|
||||
|
||||
const { providers: llmProviders, setDefaultProvider } = useLlmProviders()
|
||||
|
||||
const {
|
||||
baseUrl: agentServerUrl,
|
||||
isLoading: isLoadingAgentUrl,
|
||||
@@ -218,11 +225,7 @@ export const useChatSession = (options?: ChatSessionOptions) => {
|
||||
agentUrlRef.current = agentServerUrl
|
||||
}, [agentServerUrl])
|
||||
|
||||
const providers: Provider[] = llmProviders.map((p) => ({
|
||||
id: p.id,
|
||||
name: p.name,
|
||||
type: p.type,
|
||||
}))
|
||||
const providers: Provider[] = chatTargets.map(toProviderOption)
|
||||
|
||||
const [mode, setMode] = useState<ChatMode>('agent')
|
||||
const [textToAction, setTextToAction] = useState<Map<string, ChatAction>>(
|
||||
@@ -324,15 +327,8 @@ export const useChatSession = (options?: ChatSessionOptions) => {
|
||||
textToActionRef.current = textToAction
|
||||
}, [mode, textToAction])
|
||||
|
||||
const selectedProvider = selectedLlmProvider
|
||||
? {
|
||||
id: selectedLlmProvider.id,
|
||||
name: selectedLlmProvider.name,
|
||||
type:
|
||||
selectedLlmProvider.id === 'browseros'
|
||||
? ('browseros' as const)
|
||||
: selectedLlmProvider.type,
|
||||
}
|
||||
const selectedProvider = selectedChatTarget
|
||||
? toProviderOption(selectedChatTarget)
|
||||
: providers[0]
|
||||
|
||||
const {
|
||||
@@ -346,7 +342,8 @@ export const useChatSession = (options?: ChatSessionOptions) => {
|
||||
} = useChat({
|
||||
transport: new DefaultChatTransport({
|
||||
prepareSendMessagesRequest: async ({ messages }) => {
|
||||
const provider =
|
||||
const target = selectedChatTargetRef.current
|
||||
const fallbackProvider =
|
||||
selectedLlmProviderRef.current ?? createDefaultBrowserOSProvider()
|
||||
const activeTabsList = await chrome.tabs.query({
|
||||
active: true,
|
||||
@@ -395,51 +392,46 @@ export const useChatSession = (options?: ChatSessionOptions) => {
|
||||
personalizationRef.current,
|
||||
)
|
||||
|
||||
const approvalResponses = extractApprovalResponses(messages)
|
||||
const commonRequest = {
|
||||
conversationId: conversationIdRef.current,
|
||||
mode: currentMode,
|
||||
browserContext: requestBrowserContext,
|
||||
userSystemPrompt,
|
||||
userWorkingDir: workingDirRef.current,
|
||||
previousConversation,
|
||||
declinedApps,
|
||||
aclRules: enabledAclRules,
|
||||
toolApprovalConfig: approvalConfig,
|
||||
}
|
||||
|
||||
const approvalResponses =
|
||||
target?.kind === 'acp' ? null : extractApprovalResponses(messages)
|
||||
if (approvalResponses) {
|
||||
return {
|
||||
api: `${agentUrlRef.current}/chat`,
|
||||
body: buildChatRequestBody({
|
||||
conversationId: conversationIdRef.current,
|
||||
provider,
|
||||
mode: currentMode,
|
||||
browserContext: requestBrowserContext,
|
||||
userSystemPrompt,
|
||||
userWorkingDir: workingDirRef.current,
|
||||
previousConversation,
|
||||
declinedApps,
|
||||
aclRules: enabledAclRules,
|
||||
toolApprovalConfig: approvalConfig,
|
||||
toolApprovalResponses: approvalResponses,
|
||||
}),
|
||||
}
|
||||
return buildSidepanelPreparedSendMessagesRequest({
|
||||
agentServerUrl: agentUrlRef.current ?? undefined,
|
||||
target,
|
||||
fallbackProvider,
|
||||
...commonRequest,
|
||||
approvalResponses,
|
||||
})
|
||||
}
|
||||
|
||||
const message = getLastMessageText(messages)
|
||||
|
||||
const result = {
|
||||
api: `${agentUrlRef.current}/chat`,
|
||||
body: buildChatRequestBody({
|
||||
message,
|
||||
conversationId: conversationIdRef.current,
|
||||
provider,
|
||||
mode: currentMode,
|
||||
browserContext: requestBrowserContext,
|
||||
userSystemPrompt,
|
||||
userWorkingDir: workingDirRef.current,
|
||||
previousConversation,
|
||||
declinedApps,
|
||||
aclRules: enabledAclRules,
|
||||
selectedText: activeTabSelection?.text,
|
||||
selectedTextSource: activeTabSelection
|
||||
? {
|
||||
url: activeTabSelection.url,
|
||||
title: activeTabSelection.title,
|
||||
}
|
||||
: undefined,
|
||||
toolApprovalConfig: approvalConfig,
|
||||
}),
|
||||
}
|
||||
const result = buildSidepanelPreparedSendMessagesRequest({
|
||||
agentServerUrl: agentUrlRef.current ?? undefined,
|
||||
target,
|
||||
fallbackProvider,
|
||||
message,
|
||||
...commonRequest,
|
||||
selectedText: activeTabSelection?.text,
|
||||
selectedTextSource: activeTabSelection
|
||||
? {
|
||||
url: activeTabSelection.url,
|
||||
title: activeTabSelection.title,
|
||||
}
|
||||
: undefined,
|
||||
})
|
||||
|
||||
// Track which tab's selection was sent so we can clear it on success
|
||||
pendingSelectionTabKeyRef.current =
|
||||
@@ -451,7 +443,7 @@ export const useChatSession = (options?: ChatSessionOptions) => {
|
||||
sendAutomaticallyWhen: () => {
|
||||
if (approvalJustRespondedRef.current) {
|
||||
approvalJustRespondedRef.current = false
|
||||
return true
|
||||
return selectedChatTargetRef.current?.kind !== 'acp'
|
||||
}
|
||||
return false
|
||||
},
|
||||
@@ -686,10 +678,22 @@ export const useChatSession = (options?: ChatSessionOptions) => {
|
||||
}, [dispatchMessage, isIntegrationsSynced])
|
||||
|
||||
const sendMessage = (params: { text: string; action?: ChatAction }) => {
|
||||
const target = selectedChatTargetRef.current
|
||||
const llmTargetProvider = toLlmProviderConfig(target)
|
||||
const agentTarget = target?.kind === 'acp' ? target : undefined
|
||||
track(MESSAGE_SENT_EVENT, {
|
||||
mode,
|
||||
provider_type: selectedLlmProvider?.type,
|
||||
model: selectedLlmProvider?.modelId,
|
||||
provider_id:
|
||||
agentTarget?.agentId ??
|
||||
llmTargetProvider?.id ??
|
||||
selectedLlmProvider?.id,
|
||||
provider_type: agentTarget ? 'acp' : llmTargetProvider?.type,
|
||||
agent_id: agentTarget?.agentId,
|
||||
adapter: agentTarget?.adapter,
|
||||
model:
|
||||
agentTarget?.modelId ??
|
||||
llmTargetProvider?.modelId ??
|
||||
selectedLlmProvider?.modelId,
|
||||
})
|
||||
|
||||
if (!isIntegrationsSyncedRef.current) {
|
||||
@@ -741,14 +745,54 @@ export const useChatSession = (options?: ChatSessionOptions) => {
|
||||
addToolApprovalResponse(params)
|
||||
}
|
||||
|
||||
const resetConversationState = () => {
|
||||
stop()
|
||||
void finishExecutionTask({ isAbort: true })
|
||||
setConversationId(crypto.randomUUID())
|
||||
setMessages([])
|
||||
setTextToAction(new Map())
|
||||
setLiked({})
|
||||
setDisliked({})
|
||||
setRestoredConversationId(null)
|
||||
resetRemoteConversation()
|
||||
}
|
||||
|
||||
const handleSelectProvider = (provider: Provider) => {
|
||||
const fullProvider = llmProviders.find((p) => p.id === provider.id)
|
||||
const target = chatTargets.find(
|
||||
(candidate) =>
|
||||
candidate.id === provider.id && candidate.kind === provider.kind,
|
||||
)
|
||||
if (!target) return
|
||||
|
||||
const previousTarget = selectedChatTargetRef.current
|
||||
track(PROVIDER_SELECTED_EVENT, {
|
||||
provider_id: provider.id,
|
||||
provider_type: provider.type,
|
||||
model_id: fullProvider?.modelId,
|
||||
provider_id: target.id,
|
||||
provider_type: target.kind === 'acp' ? 'acp' : target.type,
|
||||
model_id:
|
||||
target.kind === 'acp' ? target.modelId : target.provider.modelId,
|
||||
agent_id: target.kind === 'acp' ? target.agentId : undefined,
|
||||
adapter: target.kind === 'acp' ? target.adapter : undefined,
|
||||
})
|
||||
setDefaultProvider(provider.id)
|
||||
|
||||
void selectChatTarget(target).catch((error) => {
|
||||
sentry.captureException(error, {
|
||||
extra: {
|
||||
message: 'Failed to persist sidepanel chat target selection',
|
||||
targetId: target.id,
|
||||
targetKind: target.kind,
|
||||
},
|
||||
})
|
||||
})
|
||||
if (target.kind === 'llm') setDefaultProvider(target.provider.id)
|
||||
|
||||
if (
|
||||
previousTarget &&
|
||||
(previousTarget.kind !== target.kind ||
|
||||
previousTarget.id !== target.id) &&
|
||||
messagesRef.current.length > 0
|
||||
) {
|
||||
resetConversationState()
|
||||
}
|
||||
}
|
||||
|
||||
const getActionForMessage = (message: UIMessage) => {
|
||||
@@ -762,15 +806,7 @@ export const useChatSession = (options?: ChatSessionOptions) => {
|
||||
|
||||
const resetConversation = () => {
|
||||
track(CONVERSATION_RESET_EVENT, { message_count: messages.length })
|
||||
stop()
|
||||
void finishExecutionTask({ isAbort: true })
|
||||
setConversationId(crypto.randomUUID())
|
||||
setMessages([])
|
||||
setTextToAction(new Map())
|
||||
setLiked({})
|
||||
setDisliked({})
|
||||
setRestoredConversationId(null)
|
||||
resetRemoteConversation()
|
||||
resetConversationState()
|
||||
}
|
||||
|
||||
const isRestoringConversation =
|
||||
|
||||
@@ -0,0 +1,74 @@
|
||||
import type { Provider } from '../../../components/chat/chatComponentTypes'
|
||||
import type { LlmProviderConfig } from '../../../lib/llm-providers/types'
|
||||
import {
|
||||
type ApprovalResponseData,
|
||||
buildChatRequestBody,
|
||||
} from '../../../lib/messaging/server/buildChatRequestBody'
|
||||
import {
|
||||
type SidepanelChatTarget,
|
||||
toLlmProviderConfig,
|
||||
} from './sidepanel-chat-targets'
|
||||
|
||||
type LlmChatRequestBodyInput = Parameters<typeof buildChatRequestBody>[0]
|
||||
|
||||
type CommonSidepanelRequestInput = Omit<
|
||||
LlmChatRequestBodyInput,
|
||||
'provider' | 'message' | 'toolApprovalResponses' | 'isScheduledTask'
|
||||
>
|
||||
|
||||
interface BuildSidepanelPreparedSendMessagesRequestInput
|
||||
extends CommonSidepanelRequestInput {
|
||||
agentServerUrl: string | undefined
|
||||
target: SidepanelChatTarget | undefined
|
||||
fallbackProvider: LlmProviderConfig
|
||||
message?: string
|
||||
approvalResponses?: ApprovalResponseData[] | null
|
||||
}
|
||||
|
||||
export function buildSidepanelPreparedSendMessagesRequest({
|
||||
agentServerUrl,
|
||||
target,
|
||||
fallbackProvider,
|
||||
message,
|
||||
approvalResponses,
|
||||
...common
|
||||
}: BuildSidepanelPreparedSendMessagesRequestInput) {
|
||||
if (target?.kind === 'acp') {
|
||||
return {
|
||||
api: `${agentServerUrl}/agents/${encodeURIComponent(target.agentId)}/sidepanel/chat`,
|
||||
body: {
|
||||
conversationId: common.conversationId,
|
||||
message: message ?? '',
|
||||
browserContext: common.browserContext,
|
||||
userSystemPrompt: common.userSystemPrompt,
|
||||
userWorkingDir: common.userWorkingDir,
|
||||
selectedText: common.selectedText,
|
||||
selectedTextSource: common.selectedTextSource,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
const provider = toLlmProviderConfig(target) ?? fallbackProvider
|
||||
return {
|
||||
api: `${agentServerUrl}/chat`,
|
||||
body: buildChatRequestBody({
|
||||
...common,
|
||||
provider,
|
||||
message,
|
||||
toolApprovalResponses: approvalResponses ?? undefined,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
export function toProviderOption(target: SidepanelChatTarget): Provider {
|
||||
return {
|
||||
id: target.id,
|
||||
name: target.name,
|
||||
type: target.type,
|
||||
kind: target.kind,
|
||||
agentId: target.kind === 'acp' ? target.agentId : undefined,
|
||||
adapterName: target.kind === 'acp' ? target.adapterName : undefined,
|
||||
modelLabel: target.kind === 'acp' ? target.modelLabel : undefined,
|
||||
modelControl: target.kind === 'acp' ? target.modelControl : undefined,
|
||||
}
|
||||
}
|
||||
@@ -59,15 +59,3 @@ export interface AgentConversation {
|
||||
createdAt: number
|
||||
updatedAt: number
|
||||
}
|
||||
|
||||
export interface AgentCardData {
|
||||
agentId: string
|
||||
name: string
|
||||
model?: string
|
||||
status: 'idle' | 'working' | 'error'
|
||||
lastMessage?: string
|
||||
lastMessageTimestamp?: number
|
||||
activitySummary?: string
|
||||
currentTool?: string
|
||||
costUsd?: number
|
||||
}
|
||||
|
||||
@@ -75,6 +75,12 @@ export const MCP_EXTERNAL_ACCESS_DISABLED_EVENT =
|
||||
/** @public */
|
||||
export const MCP_SERVER_RESTARTED_EVENT = 'settings.mcp_server.restarted'
|
||||
|
||||
/** @public */
|
||||
export const AGENT_CREATED_EVENT = 'agents.agent.created'
|
||||
|
||||
/** @public */
|
||||
export const AGENT_DELETED_EVENT = 'agents.agent.deleted'
|
||||
|
||||
/** @public */
|
||||
export const NEW_SCHEDULED_TASK_CREATED_EVENT =
|
||||
'settings.scheduled_task.created'
|
||||
|
||||
@@ -2,29 +2,75 @@ function isAbortError(error: unknown): boolean {
|
||||
return error instanceof DOMException && error.name === 'AbortError'
|
||||
}
|
||||
|
||||
export interface ParsedSSEEvent<T> {
|
||||
data: T
|
||||
/** Numeric `id:` line on the same SSE event, if any. */
|
||||
seq?: number
|
||||
}
|
||||
|
||||
export function parseSSELines<T>(buffer: string): {
|
||||
events: T[]
|
||||
events: ParsedSSEEvent<T>[]
|
||||
remainder: string
|
||||
} {
|
||||
// SSE events are separated by blank lines. Buffer lines until we hit
|
||||
// a blank, then assemble each event. Lines we recognise: `id: <n>`
|
||||
// and `data: <payload>`. Everything else is ignored.
|
||||
const events: ParsedSSEEvent<T>[] = []
|
||||
const lines = buffer.split('\n')
|
||||
const remainder = lines.pop() ?? ''
|
||||
const events: T[] = []
|
||||
|
||||
for (const line of lines) {
|
||||
if (!line.startsWith('data: ')) continue
|
||||
const payload = line.slice(6)
|
||||
if (payload === '[DONE]') continue
|
||||
try {
|
||||
events.push(JSON.parse(payload) as T)
|
||||
} catch {}
|
||||
// Find the last blank-line boundary; everything after it is the
|
||||
// remainder (next event partially received).
|
||||
let lastBoundary = -1
|
||||
for (let i = lines.length - 1; i >= 0; i--) {
|
||||
if (lines[i] === '') {
|
||||
lastBoundary = i
|
||||
break
|
||||
}
|
||||
}
|
||||
const completeLines = lastBoundary >= 0 ? lines.slice(0, lastBoundary) : []
|
||||
const remainder =
|
||||
lastBoundary >= 0 ? lines.slice(lastBoundary + 1).join('\n') : buffer
|
||||
|
||||
let currentSeq: number | undefined
|
||||
let currentData: string | null = null
|
||||
const flush = () => {
|
||||
if (currentData != null && currentData !== '[DONE]') {
|
||||
try {
|
||||
events.push({
|
||||
data: JSON.parse(currentData) as T,
|
||||
seq: currentSeq,
|
||||
})
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
currentSeq = undefined
|
||||
currentData = null
|
||||
}
|
||||
|
||||
for (const line of completeLines) {
|
||||
if (line === '') {
|
||||
flush()
|
||||
continue
|
||||
}
|
||||
if (line.startsWith('id: ')) {
|
||||
const n = Number.parseInt(line.slice(4).trim(), 10)
|
||||
if (Number.isFinite(n)) currentSeq = n
|
||||
continue
|
||||
}
|
||||
if (line.startsWith('data: ')) {
|
||||
currentData = line.slice(6)
|
||||
}
|
||||
}
|
||||
// Catch a complete trailing event with no terminating blank line —
|
||||
// shouldn't happen in well-formed SSE, but be tolerant.
|
||||
flush()
|
||||
|
||||
return { events, remainder }
|
||||
}
|
||||
|
||||
export async function consumeSSEStream<T>(
|
||||
response: Response,
|
||||
onEvent: (event: T) => void,
|
||||
onEvent: (event: T, meta: { seq?: number }) => void,
|
||||
signal?: AbortSignal,
|
||||
): Promise<void> {
|
||||
const reader = response.body?.getReader()
|
||||
@@ -49,7 +95,7 @@ export async function consumeSSEStream<T>(
|
||||
buffer = remainder
|
||||
|
||||
for (const event of events) {
|
||||
onEvent(event)
|
||||
onEvent(event.data, { seq: event.seq })
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
@@ -64,7 +110,7 @@ export async function consumeSSEStream<T>(
|
||||
if (buffer) {
|
||||
const { events } = parseSSELines<T>(buffer)
|
||||
for (const event of events) {
|
||||
onEvent(event)
|
||||
onEvent(event.data, { seq: event.seq })
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@ const chromiumArgs = [
|
||||
'--show-component-extension-options',
|
||||
'--disable-browseros-server',
|
||||
'--disable-browseros-extensions',
|
||||
'--browseros-dock-icon=dev',
|
||||
]
|
||||
|
||||
if (env.BROWSEROS_CDP_PORT) {
|
||||
|
||||
51
packages/browseros-agent/apps/eval/.env.example
vendored
Normal file
51
packages/browseros-agent/apps/eval/.env.example
vendored
Normal file
@@ -0,0 +1,51 @@
|
||||
# Copy to .env.development for local eval runs.
|
||||
|
||||
# Provider keys used by existing config files.
|
||||
OPENROUTER_API_KEY=
|
||||
FIREWORKS_API_KEY=
|
||||
ANTHROPIC_API_KEY=
|
||||
OPENAI_API_KEY=
|
||||
GOOGLE_GENERATIVE_AI_API_KEY=
|
||||
|
||||
# Claude Agent SDK token used by performance_grader.
|
||||
CLAUDE_CODE_OAUTH_TOKEN=
|
||||
|
||||
# Suite-mode model selection.
|
||||
EVAL_VARIANT=local
|
||||
EVAL_AGENT_PROVIDER=openai-compatible
|
||||
EVAL_AGENT_MODEL=
|
||||
EVAL_AGENT_API_KEY=
|
||||
EVAL_AGENT_BASE_URL=
|
||||
EVAL_AGENT_SUPPORTS_IMAGES=true
|
||||
|
||||
# Optional suite-mode executor override for orchestrator suites.
|
||||
EVAL_EXECUTOR_MODEL=
|
||||
EVAL_EXECUTOR_API_KEY=
|
||||
EVAL_EXECUTOR_BASE_URL=
|
||||
|
||||
# Clado visual action executor.
|
||||
CLADO_ACTION_MODEL=
|
||||
CLADO_ACTION_API_KEY=
|
||||
CLADO_ACTION_BASE_URL=
|
||||
# Backward-compatible alias used by older local scripts.
|
||||
CLADO_ACTION_URL=
|
||||
|
||||
# BrowserOS runner.
|
||||
BROWSEROS_BINARY=/Applications/BrowserOS.app/Contents/MacOS/BrowserOS
|
||||
BROWSEROS_SERVER_URL=http://127.0.0.1:9110
|
||||
BROWSEROS_SERVER_LOG_DIR=/tmp/browseros-server-logs
|
||||
BROWSEROS_CONFIG_URL=
|
||||
|
||||
# Captcha solver extension.
|
||||
NOPECHA_API_KEY=
|
||||
|
||||
# WebArena-Infinity.
|
||||
WEBARENA_INFINITY_DIR=
|
||||
INFINITY_APP_URL=
|
||||
|
||||
# R2 publishing and weekly report.
|
||||
EVAL_R2_ACCOUNT_ID=
|
||||
EVAL_R2_ACCESS_KEY_ID=
|
||||
EVAL_R2_SECRET_ACCESS_KEY=
|
||||
EVAL_R2_BUCKET=browseros-eval
|
||||
EVAL_R2_CDN_BASE_URL=https://eval.browseros.com
|
||||
875
packages/browseros-agent/apps/eval/DESIGN_DOC.md
vendored
875
packages/browseros-agent/apps/eval/DESIGN_DOC.md
vendored
@@ -1,875 +0,0 @@
|
||||
# Eval System - Production Grade Design Doc
|
||||
|
||||
## Current State Analysis
|
||||
|
||||
### What's Working Well
|
||||
1. **Zod validation** - Already exists in `config-validator.ts`, reuses `LLMConfigSchema` from `@browseros/shared`
|
||||
2. **Grader registry pattern** - `createGrader()` factory works well, easy to add new graders
|
||||
3. **AgentEvaluator interface** - Clean interface: `execute() → AgentResult`
|
||||
4. **Discriminated unions** - Messages, agent types use proper TypeScript patterns
|
||||
5. **Capture utilities** - `ScreenshotCapture`, `MessageLogger`, `TrajectorySaver` are modular
|
||||
|
||||
### Key Problems
|
||||
|
||||
**1. No Agent Registry/Factory**
|
||||
Agent creation is hardcoded if-else in `task-executor.ts`:
|
||||
```typescript
|
||||
// Current approach - not scalable
|
||||
if (this.config.agent.type === 'single') {
|
||||
const evaluator = new SingleAgentEvaluator(...)
|
||||
} else if (this.config.agent.type === 'orchestrator-executor') {
|
||||
const evaluator = new OrchestratorExecutorEvaluator(...)
|
||||
}
|
||||
// Adding new agent = modify this file
|
||||
```
|
||||
|
||||
**2. Heavy Server Dependency**
|
||||
Imports from `@browseros/server`:
|
||||
- `GeminiAgent` - Core agent (necessary)
|
||||
- `ToolExecutionHooks` - Hook interface
|
||||
- `ResolvedAgentConfig` - Agent config type
|
||||
- `AgentExecutionError` - Error type
|
||||
- `VercelAIContentGenerator` - Provider adapter
|
||||
- Gateway client functions
|
||||
|
||||
**3. Scattered Types**
|
||||
- `src/types.ts` - Main types
|
||||
- `agents/types.ts` - Agent interface
|
||||
- `agents/orchestrator-executor/types.ts` - Orchestrator types
|
||||
- `runner/types.ts` - Runner types
|
||||
- `graders/types.ts` - Grader types
|
||||
|
||||
**4. Duplicated Capture Logic**
|
||||
Both agent evaluators duplicate:
|
||||
- Initialize ScreenshotCapture
|
||||
- Initialize MessageLogger
|
||||
- Set up tool hooks
|
||||
- Handle timeouts
|
||||
- Collect errors/warnings
|
||||
|
||||
**5. No Unified Utils**
|
||||
Hooks, screenshot capture, message logging code is copy-pasted per agent type.
|
||||
|
||||
---
|
||||
|
||||
## Design Goals
|
||||
|
||||
1. **Easy to add new agents** - Register new agent type, implement interface, done
|
||||
2. **Shared capture infrastructure** - All agents use same screenshot/logging utils
|
||||
3. **Type-safe with Zod** - Config validation at entry point
|
||||
4. **Minimal server coupling** - Only import what's necessary
|
||||
5. **Clear folder structure** - Types where they belong
|
||||
6. **Production patterns** - Factory, registry, composition
|
||||
|
||||
---
|
||||
|
||||
## Proposed Architecture
|
||||
|
||||
### Folder Structure
|
||||
|
||||
```
|
||||
eval/src/
|
||||
├── index.ts # Entry point, CLI
|
||||
├── types/
|
||||
│ ├── index.ts # Re-exports all types
|
||||
│ ├── config.ts # EvalConfig, AgentConfig (Zod schemas + types)
|
||||
│ ├── task.ts # Task, TaskMetadata
|
||||
│ ├── message.ts # Message discriminated union
|
||||
│ ├── result.ts # AgentResult, GraderResult
|
||||
│ └── errors.ts # ErrorSource, TaskError, EvalWarning
|
||||
│
|
||||
├── agents/
|
||||
│ ├── index.ts # Re-exports + auto-registration
|
||||
│ ├── registry.ts # Agent registry + factory
|
||||
│ ├── types.ts # AgentEvaluator interface, AgentContext
|
||||
│ ├── single/
|
||||
│ │ └── index.ts # SingleAgentEvaluator
|
||||
│ └── orchestrator-executor/
|
||||
│ ├── index.ts # OrchestratorExecutorEvaluator
|
||||
│ ├── types.ts # Orchestrator-specific types only
|
||||
│ ├── orchestrator.ts
|
||||
│ ├── orchestrator-agent.ts
|
||||
│ ├── orchestrator-tools.ts
|
||||
│ ├── executor.ts
|
||||
│ └── executor-store.ts
|
||||
│
|
||||
├── capture/
|
||||
│ ├── index.ts # Re-exports
|
||||
│ ├── types.ts # CaptureContext interface
|
||||
│ ├── context.ts # CaptureContext class (bundles all capture)
|
||||
│ ├── hooks.ts # createCaptureHooks() utility
|
||||
│ ├── screenshot.ts # ScreenshotCapture
|
||||
│ ├── message-logger.ts # MessageLogger
|
||||
│ ├── trajectory-saver.ts # TrajectorySaver
|
||||
│ └── window-manager.ts # WindowManager
|
||||
│
|
||||
├── graders/
|
||||
│ ├── index.ts # Re-exports
|
||||
│ ├── registry.ts # Grader registry (existing pattern)
|
||||
│ ├── types.ts # Grader interface
|
||||
│ ├── benchmark/
|
||||
│ │ ├── webvoyager.ts
|
||||
│ │ └── mind2web.ts
|
||||
│ └── fara/
|
||||
│ ├── alignment.ts
|
||||
│ ├── rubric.ts
|
||||
│ ├── multimodal.ts
|
||||
│ └── combined.ts
|
||||
│
|
||||
├── runner/
|
||||
│ ├── index.ts # runEval() main entry
|
||||
│ ├── types.ts # RunEvalOptions, TaskResult, BatchSummary
|
||||
│ ├── task-loader.ts
|
||||
│ ├── task-executor.ts
|
||||
│ └── parallel-executor.ts
|
||||
│
|
||||
└── utils/
|
||||
├── env.ts # resolveEnvValue() helper
|
||||
└── validation.ts # Config validation logic
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Key Components
|
||||
|
||||
### 1. Type System (`types/`)
|
||||
|
||||
**`types/config.ts`** - Zod schemas + inferred types:
|
||||
```typescript
|
||||
import { LLMConfigSchema, LLMProviderSchema } from '@browseros/shared/schemas/llm'
|
||||
import { z } from 'zod'
|
||||
|
||||
// Single agent config
|
||||
export const SingleAgentConfigSchema = LLMConfigSchema.extend({
|
||||
type: z.literal('single'),
|
||||
})
|
||||
export type SingleAgentConfig = z.infer<typeof SingleAgentConfigSchema>
|
||||
|
||||
// Orchestrator-executor config
|
||||
export const OrchestratorExecutorConfigSchema = z.object({
|
||||
type: z.literal('orchestrator-executor'),
|
||||
orchestrator: LLMConfigSchema.extend({
|
||||
maxTurns: z.number().int().min(1).optional(),
|
||||
}),
|
||||
executor: LLMConfigSchema.extend({
|
||||
maxStepsPerDelegation: z.number().int().min(1).optional(),
|
||||
}),
|
||||
})
|
||||
export type OrchestratorExecutorConfig = z.infer<typeof OrchestratorExecutorConfigSchema>
|
||||
|
||||
// Discriminated union
|
||||
export const AgentConfigSchema = z.discriminatedUnion('type', [
|
||||
SingleAgentConfigSchema,
|
||||
OrchestratorExecutorConfigSchema,
|
||||
])
|
||||
export type AgentConfig = z.infer<typeof AgentConfigSchema>
|
||||
|
||||
// Full eval config
|
||||
export const EvalConfigSchema = z.object({
|
||||
agent: AgentConfigSchema,
|
||||
dataset: z.string().min(1),
|
||||
output_dir: z.string().optional(),
|
||||
num_workers: z.number().int().min(1).max(20).default(1),
|
||||
browseros: z.object({
|
||||
server_url: z.string().url(),
|
||||
}),
|
||||
grader_model: z.string().optional(),
|
||||
grader_api_key_env: z.string().optional(),
|
||||
grader_base_url: z.string().url().optional(),
|
||||
timeout_ms: z.number().int().min(30000).max(3600000).optional(),
|
||||
})
|
||||
export type EvalConfig = z.infer<typeof EvalConfigSchema>
|
||||
```
|
||||
|
||||
**`types/message.ts`** - Message types:
|
||||
```typescript
|
||||
import { z } from 'zod'
|
||||
|
||||
const BaseMessageSchema = z.object({
|
||||
timestamp: z.string().datetime(),
|
||||
})
|
||||
|
||||
export const UserMessageSchema = BaseMessageSchema.extend({
|
||||
type: z.literal('user'),
|
||||
content: z.string(),
|
||||
})
|
||||
|
||||
export const AssistantMessageSchema = BaseMessageSchema.extend({
|
||||
type: z.literal('assistant'),
|
||||
content: z.string(),
|
||||
})
|
||||
|
||||
export const ToolCallMessageSchema = BaseMessageSchema.extend({
|
||||
type: z.literal('tool_call'),
|
||||
tool: z.string(),
|
||||
toolCallId: z.string(),
|
||||
params: z.record(z.unknown()),
|
||||
})
|
||||
|
||||
export const ToolResultMessageSchema = BaseMessageSchema.extend({
|
||||
type: z.literal('tool_result'),
|
||||
toolCallId: z.string(),
|
||||
result: z.unknown(),
|
||||
isError: z.boolean(),
|
||||
screenshot: z.number().optional(),
|
||||
})
|
||||
|
||||
export const ErrorMessageSchema = BaseMessageSchema.extend({
|
||||
type: z.literal('error'),
|
||||
content: z.string(),
|
||||
errorCode: z.string().optional(),
|
||||
})
|
||||
|
||||
// Orchestrator-specific messages
|
||||
export const DelegationMessageSchema = BaseMessageSchema.extend({
|
||||
type: z.literal('delegation'),
|
||||
instruction: z.string(),
|
||||
executorId: z.string(),
|
||||
maxSteps: z.number().optional(),
|
||||
})
|
||||
|
||||
export const DelegationResultMessageSchema = BaseMessageSchema.extend({
|
||||
type: z.literal('delegation_result'),
|
||||
executorId: z.string(),
|
||||
summary: z.string(),
|
||||
status: z.enum(['done', 'blocked', 'max_steps']),
|
||||
stepsUsed: z.number(),
|
||||
currentUrl: z.string().optional(),
|
||||
})
|
||||
|
||||
export const MessageSchema = z.discriminatedUnion('type', [
|
||||
UserMessageSchema,
|
||||
AssistantMessageSchema,
|
||||
ToolCallMessageSchema,
|
||||
ToolResultMessageSchema,
|
||||
ErrorMessageSchema,
|
||||
DelegationMessageSchema,
|
||||
DelegationResultMessageSchema,
|
||||
])
|
||||
|
||||
export type Message = z.infer<typeof MessageSchema>
|
||||
export type UserMessage = z.infer<typeof UserMessageSchema>
|
||||
export type AssistantMessage = z.infer<typeof AssistantMessageSchema>
|
||||
export type ToolCallMessage = z.infer<typeof ToolCallMessageSchema>
|
||||
export type ToolResultMessage = z.infer<typeof ToolResultMessageSchema>
|
||||
export type ErrorMessage = z.infer<typeof ErrorMessageSchema>
|
||||
export type DelegationMessage = z.infer<typeof DelegationMessageSchema>
|
||||
export type DelegationResultMessage = z.infer<typeof DelegationResultMessageSchema>
|
||||
|
||||
// Type guards
|
||||
export const isToolCallMessage = (m: Message): m is ToolCallMessage => m.type === 'tool_call'
|
||||
export const isDelegationMessage = (m: Message): m is DelegationMessage => m.type === 'delegation'
|
||||
// ... etc
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 2. Agent Registry (`agents/registry.ts`)
|
||||
|
||||
```typescript
|
||||
import type { AgentContext, AgentEvaluator } from './types'
|
||||
|
||||
type AgentFactory = (context: AgentContext) => AgentEvaluator
|
||||
|
||||
const registry = new Map<string, AgentFactory>()
|
||||
|
||||
/**
|
||||
* Register an agent type
|
||||
*/
|
||||
export function registerAgent(type: string, factory: AgentFactory): void {
|
||||
if (registry.has(type)) {
|
||||
throw new Error(`Agent type "${type}" already registered`)
|
||||
}
|
||||
registry.set(type, factory)
|
||||
}
|
||||
|
||||
/**
|
||||
* Create agent evaluator from context
|
||||
*/
|
||||
export function createAgent(context: AgentContext): AgentEvaluator {
|
||||
const factory = registry.get(context.config.agent.type)
|
||||
if (!factory) {
|
||||
const available = Array.from(registry.keys()).join(', ')
|
||||
throw new Error(
|
||||
`Unknown agent type: "${context.config.agent.type}". Available: ${available}`
|
||||
)
|
||||
}
|
||||
return factory(context)
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all registered agent types
|
||||
*/
|
||||
export function getRegisteredAgentTypes(): string[] {
|
||||
return Array.from(registry.keys())
|
||||
}
|
||||
```
|
||||
|
||||
**`agents/index.ts`** - Auto-registration:
|
||||
```typescript
|
||||
import { registerAgent } from './registry'
|
||||
import { SingleAgentEvaluator } from './single'
|
||||
import { OrchestratorExecutorEvaluator } from './orchestrator-executor'
|
||||
|
||||
// Auto-register built-in agents
|
||||
registerAgent('single', (ctx) => new SingleAgentEvaluator(ctx))
|
||||
registerAgent('orchestrator-executor', (ctx) => new OrchestratorExecutorEvaluator(ctx))
|
||||
|
||||
// Re-exports
|
||||
export { createAgent, registerAgent, getRegisteredAgentTypes } from './registry'
|
||||
export type { AgentContext, AgentEvaluator, AgentResult } from './types'
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 3. Agent Context (`agents/types.ts`)
|
||||
|
||||
```typescript
|
||||
import type { CaptureContext } from '../capture/types'
|
||||
import type { EvalConfig, Task, TaskMetadata, Message } from '../types'
|
||||
|
||||
/**
|
||||
* All dependencies an agent needs - passed to factory
|
||||
*/
|
||||
export interface AgentContext {
|
||||
// Config
|
||||
config: EvalConfig
|
||||
task: Task
|
||||
|
||||
// Browser window
|
||||
windowId: number
|
||||
tabId: number
|
||||
|
||||
// Output
|
||||
outputDir: string // Root output dir
|
||||
taskOutputDir: string // Task-specific: outputDir/query_id/
|
||||
|
||||
// Capture infrastructure (pre-initialized)
|
||||
capture: CaptureContext
|
||||
}
|
||||
|
||||
/**
|
||||
* Result returned by agent execution
|
||||
*/
|
||||
export interface AgentResult {
|
||||
metadata: TaskMetadata
|
||||
messages: Message[]
|
||||
finalAnswer: string | null
|
||||
}
|
||||
|
||||
/**
|
||||
* Interface all agent evaluators must implement
|
||||
*/
|
||||
export interface AgentEvaluator {
|
||||
/**
|
||||
* Execute the agent on the task
|
||||
*/
|
||||
execute(): Promise<AgentResult>
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4. Capture Context (`capture/context.ts`)
|
||||
|
||||
Bundle all capture utilities:
|
||||
```typescript
|
||||
import { randomUUID } from 'node:crypto'
|
||||
import type { ToolExecutionHooks, ToolExecutionResult } from '@browseros/server/agent'
|
||||
import type { Message, TaskError, EvalWarning, ErrorSource } from '../types'
|
||||
import { MessageLogger } from './message-logger'
|
||||
import { ScreenshotCapture } from './screenshot'
|
||||
import { TrajectorySaver } from './trajectory-saver'
|
||||
|
||||
export interface CaptureContextConfig {
|
||||
serverUrl: string
|
||||
outputDir: string
|
||||
taskId: string
|
||||
tabId: number
|
||||
windowId: number
|
||||
}
|
||||
|
||||
/**
|
||||
* Unified capture context - bundles screenshot, message logging, errors/warnings
|
||||
*/
|
||||
export class CaptureContext {
|
||||
readonly screenshot: ScreenshotCapture
|
||||
readonly messageLogger: MessageLogger
|
||||
readonly trajectorySaver: TrajectorySaver
|
||||
|
||||
private errors: TaskError[] = []
|
||||
private warnings: EvalWarning[] = []
|
||||
private currentToolCallId: string | null = null
|
||||
|
||||
private readonly tabId: number
|
||||
private readonly windowId: number
|
||||
|
||||
constructor(private config: CaptureContextConfig) {
|
||||
this.tabId = config.tabId
|
||||
this.windowId = config.windowId
|
||||
this.trajectorySaver = new TrajectorySaver(config.outputDir, config.taskId)
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize - must be called before use
|
||||
*/
|
||||
async init(): Promise<string> {
|
||||
const taskOutputDir = await this.trajectorySaver.init()
|
||||
|
||||
this.screenshot = new ScreenshotCapture(this.config.serverUrl, taskOutputDir)
|
||||
await this.screenshot.init()
|
||||
|
||||
this.messageLogger = new MessageLogger(taskOutputDir)
|
||||
|
||||
return taskOutputDir
|
||||
}
|
||||
|
||||
/**
|
||||
* Create tool execution hooks for GeminiAgent
|
||||
*/
|
||||
createToolHooks(): ToolExecutionHooks {
|
||||
return {
|
||||
onBeforeToolCall: async (toolName: string, args: unknown) => {
|
||||
try {
|
||||
this.currentToolCallId = randomUUID()
|
||||
await this.messageLogger.logToolCall(
|
||||
toolName,
|
||||
this.currentToolCallId,
|
||||
args as Record<string, unknown>
|
||||
)
|
||||
} catch (err) {
|
||||
this.addWarning('message_logging', `Failed to log tool call ${toolName}: ${err}`)
|
||||
}
|
||||
},
|
||||
|
||||
onAfterToolCall: async (toolName: string, result: ToolExecutionResult) => {
|
||||
let screenshotNum = 0
|
||||
|
||||
// Capture screenshot
|
||||
try {
|
||||
screenshotNum = await this.screenshot.capture(this.tabId, this.windowId)
|
||||
} catch (err) {
|
||||
this.addWarning('screenshot', `Screenshot after ${toolName} failed: ${err}`)
|
||||
screenshotNum = this.screenshot.getCount()
|
||||
}
|
||||
|
||||
// Log tool errors
|
||||
if (result.isError) {
|
||||
this.addWarning('mcp_tool', `Tool ${toolName} error: ${result.errorMessage}`)
|
||||
}
|
||||
|
||||
// Log result
|
||||
if (this.currentToolCallId) {
|
||||
try {
|
||||
await this.messageLogger.logToolResult(
|
||||
this.currentToolCallId,
|
||||
result.isError ? { error: result.errorMessage } : result.parts,
|
||||
result.isError,
|
||||
screenshotNum
|
||||
)
|
||||
} catch (err) {
|
||||
this.addWarning('message_logging', `Failed to log tool result: ${err}`)
|
||||
}
|
||||
}
|
||||
|
||||
this.currentToolCallId = null
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// Error/warning collection
|
||||
addError(source: ErrorSource, message: string, details?: Record<string, unknown>): void {
|
||||
this.errors.push({ source, message, timestamp: new Date().toISOString(), details })
|
||||
}
|
||||
|
||||
addWarning(source: ErrorSource, message: string): void {
|
||||
this.warnings.push({ source, message, timestamp: new Date().toISOString() })
|
||||
console.warn(`[${source}] ${message}`)
|
||||
}
|
||||
|
||||
getErrors(): TaskError[] { return [...this.errors] }
|
||||
getWarnings(): EvalWarning[] { return [...this.warnings] }
|
||||
getMessages(): Message[] { return this.messageLogger.getMessages() }
|
||||
getScreenshotCount(): number { return this.screenshot.getCount() }
|
||||
getLastAssistantMessage(): string | null { return this.messageLogger.getLastAssistantMessage() }
|
||||
|
||||
// Delegation logging (for orchestrator-executor)
|
||||
async logDelegation(instruction: string, executorId: string, maxSteps?: number): Promise<void> {
|
||||
await this.messageLogger.logDelegation(instruction, executorId, maxSteps)
|
||||
}
|
||||
|
||||
async logDelegationResult(
|
||||
executorId: string,
|
||||
summary: string,
|
||||
status: 'done' | 'blocked' | 'max_steps',
|
||||
stepsUsed: number,
|
||||
currentUrl?: string
|
||||
): Promise<void> {
|
||||
await this.messageLogger.logDelegationResult(executorId, summary, status, stepsUsed, currentUrl)
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 5. Single Agent Evaluator (`agents/single/index.ts`)
|
||||
|
||||
Clean implementation using context:
|
||||
```typescript
|
||||
import { randomUUID } from 'node:crypto'
|
||||
import { GeminiAgent } from '@browseros/server/agent'
|
||||
import { AgentExecutionError } from '@browseros/server/agent/errors'
|
||||
import type { ResolvedAgentConfig } from '@browseros/server/agent/types'
|
||||
import { MCPServerConfig } from '@google/gemini-cli-core'
|
||||
import type { AgentContext, AgentEvaluator, AgentResult } from '../types'
|
||||
import type { SingleAgentConfig, TaskMetadata } from '../../types'
|
||||
import { resolveEnvValue } from '../../utils/env'
|
||||
|
||||
const DEFAULT_TIMEOUT_MS = 15 * 60 * 1000
|
||||
|
||||
export class SingleAgentEvaluator implements AgentEvaluator {
|
||||
constructor(private ctx: AgentContext) {}
|
||||
|
||||
async execute(): Promise<AgentResult> {
|
||||
const startTime = Date.now()
|
||||
const { config, task, capture } = this.ctx
|
||||
const agentConfig = config.agent as SingleAgentConfig
|
||||
const timeoutMs = config.timeout_ms ?? DEFAULT_TIMEOUT_MS
|
||||
|
||||
// Log initial user message
|
||||
await capture.messageLogger.logUser(task.query)
|
||||
|
||||
// Set up timeout
|
||||
const abortController = new AbortController()
|
||||
const timeoutHandle = setTimeout(() => abortController.abort(), timeoutMs)
|
||||
|
||||
// Create agent
|
||||
const resolvedConfig: ResolvedAgentConfig = {
|
||||
conversationId: randomUUID(),
|
||||
provider: agentConfig.provider,
|
||||
model: agentConfig.model ?? 'gemini-2.0-flash',
|
||||
apiKey: resolveEnvValue(agentConfig.apiKey),
|
||||
baseUrl: agentConfig.baseUrl,
|
||||
sessionExecutionDir: '/tmp/browseros-eval',
|
||||
evalMode: true,
|
||||
}
|
||||
|
||||
const mcpServers = {
|
||||
'browseros-mcp': new MCPServerConfig(
|
||||
undefined, undefined, undefined, undefined, undefined,
|
||||
`${config.browseros.server_url}/mcp`,
|
||||
{ Accept: 'application/json, text/event-stream', 'X-BrowserOS-Source': 'eval' },
|
||||
undefined, undefined, true
|
||||
),
|
||||
}
|
||||
|
||||
const agent = await GeminiAgent.create(resolvedConfig, mcpServers)
|
||||
|
||||
// Set capture hooks
|
||||
agent.setToolHooks(capture.createToolHooks())
|
||||
|
||||
// Create mock stream to capture assistant messages
|
||||
let lastAssistantMessage = ''
|
||||
const mockStream = {
|
||||
write: async (data: string) => {
|
||||
if (data.includes('"type":"text-delta"')) {
|
||||
const match = data.match(/"delta":"((?:[^"\\]|\\.)*)"/)
|
||||
if (match) lastAssistantMessage += JSON.parse(`"${match[1]}"`)
|
||||
} else if (data.includes('"type":"finish"')) {
|
||||
if (lastAssistantMessage) {
|
||||
await capture.messageLogger.logAssistant(lastAssistantMessage)
|
||||
lastAssistantMessage = ''
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
// Execute
|
||||
let terminationReason: TaskMetadata['termination_reason'] = 'completed'
|
||||
|
||||
try {
|
||||
await agent.execute(
|
||||
task.query,
|
||||
mockStream as Parameters<typeof agent.execute>[1],
|
||||
abortController.signal,
|
||||
{ windowId: this.ctx.windowId, activeTab: { id: this.ctx.tabId, url: task.start_url } }
|
||||
)
|
||||
} catch (err) {
|
||||
const error = err instanceof Error ? err : new Error(String(err))
|
||||
|
||||
if (abortController.signal.aborted) {
|
||||
terminationReason = 'timeout'
|
||||
capture.addError('agent_execution', `Task timed out after ${timeoutMs / 1000}s`)
|
||||
} else {
|
||||
terminationReason = 'error'
|
||||
const msg = err instanceof AgentExecutionError && err.originalError
|
||||
? `${error.message}: ${err.originalError.message}`
|
||||
: error.message
|
||||
capture.addError('agent_execution', msg, { stack: error.stack })
|
||||
}
|
||||
await capture.messageLogger.logError(error.message)
|
||||
} finally {
|
||||
clearTimeout(timeoutHandle)
|
||||
}
|
||||
|
||||
// Build metadata
|
||||
const metadata: TaskMetadata = {
|
||||
query_id: task.query_id,
|
||||
dataset: task.dataset,
|
||||
query: task.query,
|
||||
started_at: new Date(startTime).toISOString(),
|
||||
completed_at: new Date().toISOString(),
|
||||
total_duration_ms: Date.now() - startTime,
|
||||
total_steps: capture.getScreenshotCount(),
|
||||
termination_reason: terminationReason,
|
||||
final_answer: capture.getLastAssistantMessage(),
|
||||
errors: capture.getErrors(),
|
||||
warnings: capture.getWarnings(),
|
||||
agent_config: { type: 'single', model: resolvedConfig.model },
|
||||
grader_results: {},
|
||||
}
|
||||
|
||||
await capture.trajectorySaver.saveMetadata(metadata)
|
||||
|
||||
return {
|
||||
metadata,
|
||||
messages: capture.getMessages(),
|
||||
finalAnswer: metadata.final_answer,
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 6. Task Executor (`runner/task-executor.ts`)
|
||||
|
||||
Uses agent registry:
|
||||
```typescript
|
||||
import { createAgent } from '../agents'
|
||||
import type { AgentContext } from '../agents/types'
|
||||
import { CaptureContext } from '../capture/context'
|
||||
import type { EvalConfig, Task } from '../types'
|
||||
import type { WindowManager } from '../capture/window-manager'
|
||||
|
||||
export class TaskExecutor {
|
||||
constructor(
|
||||
private config: EvalConfig,
|
||||
private outputDir: string,
|
||||
private windowManager: WindowManager,
|
||||
private graderOptions: GraderOptions | null,
|
||||
) {}
|
||||
|
||||
async execute(task: Task): Promise<TaskResult> {
|
||||
const startTime = Date.now()
|
||||
let window: { windowId: number; tabId: number } | null = null
|
||||
|
||||
try {
|
||||
// Create window
|
||||
window = await this.windowManager.createWindow(task.query_id, task.start_url)
|
||||
|
||||
// Initialize capture context
|
||||
const capture = new CaptureContext({
|
||||
serverUrl: this.config.browseros.server_url,
|
||||
outputDir: this.outputDir,
|
||||
taskId: task.query_id,
|
||||
tabId: window.tabId,
|
||||
windowId: window.windowId,
|
||||
})
|
||||
const taskOutputDir = await capture.init()
|
||||
|
||||
// Build agent context
|
||||
const context: AgentContext = {
|
||||
config: this.config,
|
||||
task,
|
||||
windowId: window.windowId,
|
||||
tabId: window.tabId,
|
||||
outputDir: this.outputDir,
|
||||
taskOutputDir,
|
||||
capture,
|
||||
}
|
||||
|
||||
// Create and execute agent (via registry)
|
||||
const agent = createAgent(context)
|
||||
const agentResult = await agent.execute()
|
||||
|
||||
// Run graders
|
||||
const graderResults = await this.runGraders(task, agentResult)
|
||||
|
||||
return {
|
||||
status: agentResult.metadata.termination_reason === 'timeout' ? 'timeout' : 'completed',
|
||||
task,
|
||||
agentResult,
|
||||
graderResults,
|
||||
durationMs: Date.now() - startTime,
|
||||
}
|
||||
} catch (error) {
|
||||
return {
|
||||
status: 'failed',
|
||||
task,
|
||||
error: error instanceof Error ? error : new Error(String(error)),
|
||||
errorSource: 'unknown',
|
||||
durationMs: Date.now() - startTime,
|
||||
}
|
||||
} finally {
|
||||
if (window) {
|
||||
await this.windowManager.closeWindow(task.query_id)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Server Dependencies
|
||||
|
||||
### What We MUST Import from Server
|
||||
|
||||
These are necessary - `GeminiAgent` IS the agent:
|
||||
```typescript
|
||||
// Core agent
|
||||
import { GeminiAgent, type ToolExecutionHooks, type ToolExecutionResult } from '@browseros/server/agent'
|
||||
import { AgentExecutionError } from '@browseros/server/agent/errors'
|
||||
import type { ResolvedAgentConfig } from '@browseros/server/agent/types'
|
||||
|
||||
// Provider adapter (for orchestrator-agent)
|
||||
import { VercelAIContentGenerator } from '@browseros/server/agent/provider-adapter'
|
||||
|
||||
// Gateway client (for browseros provider only)
|
||||
import { fetchBrowserOSConfig, getLLMConfigFromProvider } from '@browseros/server/lib/clients/gateway'
|
||||
```
|
||||
|
||||
### What Could Move to Shared (Future)
|
||||
|
||||
If we want to decouple more:
|
||||
```typescript
|
||||
// These types could be in @browseros/shared
|
||||
export interface ToolExecutionHooks { ... }
|
||||
export interface ToolExecutionResult { ... }
|
||||
export interface ResolvedAgentConfig { ... }
|
||||
```
|
||||
|
||||
But for now, importing from server is fine - eval is tightly coupled to server anyway.
|
||||
|
||||
---
|
||||
|
||||
## Import Guidelines
|
||||
|
||||
```typescript
|
||||
// Shared package - schemas, constants
|
||||
import { LLMConfigSchema, LLMProviderSchema, LLM_PROVIDERS } from '@browseros/shared/schemas/llm'
|
||||
import { TIMEOUTS } from '@browseros/shared/constants/timeouts'
|
||||
import { AGENT_LIMITS } from '@browseros/shared/constants/limits'
|
||||
import type { BrowserContext } from '@browseros/shared/schemas/browser-context'
|
||||
|
||||
// Server - only agent-related imports
|
||||
import { GeminiAgent, type ToolExecutionHooks } from '@browseros/server/agent'
|
||||
import type { ResolvedAgentConfig } from '@browseros/server/agent/types'
|
||||
|
||||
// Internal eval types - from types/ folder
|
||||
import type { EvalConfig, Task, Message, AgentResult } from '../types'
|
||||
import type { AgentContext, AgentEvaluator } from '../agents/types'
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Adding a New Agent Type
|
||||
|
||||
1. Create folder: `agents/my-new-agent/`
|
||||
2. Implement `AgentEvaluator` interface:
|
||||
|
||||
```typescript
|
||||
// agents/my-new-agent/index.ts
|
||||
import type { AgentContext, AgentEvaluator, AgentResult } from '../types'
|
||||
|
||||
export class MyNewAgentEvaluator implements AgentEvaluator {
|
||||
constructor(private ctx: AgentContext) {}
|
||||
|
||||
async execute(): Promise<AgentResult> {
|
||||
const { config, task, capture } = this.ctx
|
||||
|
||||
// Use capture.createToolHooks() for screenshot/logging
|
||||
// Use capture.messageLogger for messages
|
||||
// Use capture.addError/addWarning for errors
|
||||
|
||||
// Return AgentResult
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
3. Register in `agents/index.ts`:
|
||||
|
||||
```typescript
|
||||
import { MyNewAgentEvaluator } from './my-new-agent'
|
||||
|
||||
registerAgent('my-new-agent', (ctx) => new MyNewAgentEvaluator(ctx))
|
||||
```
|
||||
|
||||
4. Add config schema in `types/config.ts`:
|
||||
|
||||
```typescript
|
||||
export const MyNewAgentConfigSchema = z.object({
|
||||
type: z.literal('my-new-agent'),
|
||||
// ... specific fields
|
||||
})
|
||||
|
||||
export const AgentConfigSchema = z.discriminatedUnion('type', [
|
||||
SingleAgentConfigSchema,
|
||||
OrchestratorExecutorConfigSchema,
|
||||
MyNewAgentConfigSchema, // Add here
|
||||
])
|
||||
```
|
||||
|
||||
Done - no changes to runner code needed.
|
||||
|
||||
---
|
||||
|
||||
## Implementation Order
|
||||
|
||||
1. **Phase 1: Types** (~1 hour)
|
||||
- Create `types/` folder with proper structure
|
||||
- Move/consolidate all types
|
||||
- Add Zod schemas for messages
|
||||
|
||||
2. **Phase 2: Capture Context** (~1 hour)
|
||||
- Create `CaptureContext` class
|
||||
- Add delegation message methods
|
||||
- Create `createToolHooks()` utility
|
||||
|
||||
3. **Phase 3: Agent Registry** (~30 min)
|
||||
- Create `registry.ts`
|
||||
- Create `AgentContext` interface
|
||||
- Update exports
|
||||
|
||||
4. **Phase 4: Refactor Single Agent** (~1 hour)
|
||||
- Use `AgentContext`
|
||||
- Use `CaptureContext`
|
||||
- Clean up code
|
||||
|
||||
5. **Phase 5: Refactor Orchestrator-Executor** (~2 hours)
|
||||
- Use `AgentContext`
|
||||
- Integrate `CaptureContext`
|
||||
- Wire up hooks properly
|
||||
|
||||
6. **Phase 6: Update Runner** (~30 min)
|
||||
- Use `createAgent()` instead of if-else
|
||||
- Initialize `CaptureContext` in executor
|
||||
|
||||
7. **Phase 7: Testing** (~1 hour)
|
||||
- Run single-agent eval
|
||||
- Run orchestrator-executor eval
|
||||
- Verify screenshots/messages captured
|
||||
|
||||
---
|
||||
|
||||
## Summary
|
||||
|
||||
| Before | After |
|
||||
|--------|-------|
|
||||
| If-else agent creation | Registry + factory pattern |
|
||||
| Duplicated capture code | Shared `CaptureContext` |
|
||||
| Scattered types | Organized `types/` folder |
|
||||
| Copy-paste hooks | `createToolHooks()` utility |
|
||||
| Tight coupling | Clear interfaces |
|
||||
| Hard to add agents | Register + implement |
|
||||
@@ -1,431 +0,0 @@
|
||||
# Implementation Phases - Parallel Execution Plan
|
||||
|
||||
## Dependency Graph
|
||||
|
||||
```
|
||||
Phase 1: Types (4 parallel subagents)
|
||||
│
|
||||
├──────────────────┬──────────────────┐
|
||||
▼ ▼ │
|
||||
Phase 2: Capture Phase 3: Agent │
|
||||
(2 parallel) Registry │
|
||||
│ (1 subagent) │
|
||||
│ │ │
|
||||
└────────┬─────────┘ │
|
||||
▼ │
|
||||
Phase 4: Agent Refactors │
|
||||
(2 parallel - after 2+3) │
|
||||
│ │
|
||||
▼ │
|
||||
Phase 5: Runner Update │
|
||||
(1 subagent - after 4) │
|
||||
│ │
|
||||
▼ │
|
||||
Phase 6: Cleanup & Test ◄─────────────────┘
|
||||
(1 subagent)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Phase 1: Types (4 Parallel Subagents)
|
||||
|
||||
No dependencies - can all run simultaneously.
|
||||
|
||||
### Subagent 1A: Config Types
|
||||
```
|
||||
Create /apps/eval/src/types/config.ts
|
||||
|
||||
Requirements:
|
||||
1. Import LLMConfigSchema, LLMProviderSchema from @browseros/shared/schemas/llm
|
||||
2. Import z from zod
|
||||
|
||||
Create Zod schemas:
|
||||
- SingleAgentConfigSchema = LLMConfigSchema.extend({ type: z.literal('single') })
|
||||
- OrchestratorExecutorConfigSchema with orchestrator + executor nested configs
|
||||
- AgentConfigSchema = z.discriminatedUnion('type', [...])
|
||||
- EvalConfigSchema with all fields (agent, dataset, output_dir, num_workers, browseros, grader_*, timeout_ms)
|
||||
|
||||
Export both schemas and inferred types (z.infer<>)
|
||||
|
||||
Reference: Current implementation in /apps/eval/src/utils/config-validator.ts (lines 1-42)
|
||||
```
|
||||
|
||||
### Subagent 1B: Message Types
|
||||
```
|
||||
Create /apps/eval/src/types/message.ts
|
||||
|
||||
Requirements:
|
||||
1. Use Zod for all schemas
|
||||
2. Create BaseMessageSchema with timestamp field
|
||||
|
||||
Create schemas for:
|
||||
- UserMessageSchema (type: 'user', content)
|
||||
- AssistantMessageSchema (type: 'assistant', content)
|
||||
- ToolCallMessageSchema (type: 'tool_call', tool, toolCallId, params)
|
||||
- ToolResultMessageSchema (type: 'tool_result', toolCallId, result, isError, screenshot?)
|
||||
- ErrorMessageSchema (type: 'error', content, errorCode?)
|
||||
- DelegationMessageSchema (type: 'delegation', instruction, executorId, maxSteps?)
|
||||
- DelegationResultMessageSchema (type: 'delegation_result', executorId, summary, status, stepsUsed, currentUrl?)
|
||||
|
||||
Create MessageSchema = z.discriminatedUnion('type', [...all schemas])
|
||||
|
||||
Export schemas, types, and type guards (isToolCallMessage, isDelegationMessage, etc.)
|
||||
|
||||
Reference: Current types in /apps/eval/src/types.ts (lines 62-127)
|
||||
```
|
||||
|
||||
### Subagent 1C: Task & Result Types
|
||||
```
|
||||
Create /apps/eval/src/types/task.ts
|
||||
|
||||
Requirements:
|
||||
1. Use Zod schemas with inferred types
|
||||
|
||||
Create:
|
||||
- TaskMetadataSchema (original_task_id, website?, category?, additional?)
|
||||
- TaskSchema (query_id, dataset, query, graders[], start_url?, setup_script?, metadata)
|
||||
|
||||
Export schemas and types.
|
||||
|
||||
---
|
||||
|
||||
Create /apps/eval/src/types/result.ts
|
||||
|
||||
Create:
|
||||
- GraderResultSchema (score, pass, reasoning, details?)
|
||||
- TaskMetadataSchema (query_id, dataset, query, started_at, completed_at, total_duration_ms, total_steps, termination_reason, final_answer, errors, warnings, agent_config, grader_results)
|
||||
- AgentResultSchema (metadata, messages, finalAnswer)
|
||||
|
||||
Export schemas and types.
|
||||
|
||||
Reference: Current types in /apps/eval/src/types.ts (lines 6-20, 156-182)
|
||||
```
|
||||
|
||||
### Subagent 1D: Error Types + Index
|
||||
```
|
||||
Create /apps/eval/src/types/errors.ts
|
||||
|
||||
Create:
|
||||
- ErrorSourceSchema = z.enum(['window_creation', 'agent_execution', 'mcp_tool', 'screenshot', 'grader', 'message_logging', 'cleanup', 'unknown'])
|
||||
- TaskErrorSchema (source, message, timestamp, details?)
|
||||
- EvalWarningSchema (source, message, timestamp)
|
||||
|
||||
Export schemas and types.
|
||||
|
||||
---
|
||||
|
||||
Create /apps/eval/src/types/index.ts
|
||||
|
||||
Re-export everything from:
|
||||
- ./config
|
||||
- ./message
|
||||
- ./task
|
||||
- ./result
|
||||
- ./errors
|
||||
|
||||
This becomes the single import point: import { EvalConfig, Message, Task } from '../types'
|
||||
|
||||
Reference: Current types in /apps/eval/src/types.ts (lines 129-154)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Phase 2: Capture Infrastructure (2 Parallel Subagents)
|
||||
|
||||
**Depends on:** Phase 1 (types)
|
||||
|
||||
### Subagent 2A: CaptureContext Class
|
||||
```
|
||||
Create /apps/eval/src/capture/types.ts
|
||||
|
||||
Define interface:
|
||||
- CaptureContextConfig { serverUrl, outputDir, taskId, tabId, windowId }
|
||||
|
||||
---
|
||||
|
||||
Create /apps/eval/src/capture/context.ts
|
||||
|
||||
Requirements:
|
||||
1. Import ToolExecutionHooks, ToolExecutionResult from @browseros/server/agent
|
||||
2. Import types from ../types
|
||||
3. Import existing ScreenshotCapture, MessageLogger, TrajectorySaver
|
||||
|
||||
Implement CaptureContext class:
|
||||
- Constructor takes CaptureContextConfig
|
||||
- async init() - initializes screenshot, messageLogger, trajectorySaver, returns taskOutputDir
|
||||
- createToolHooks(): ToolExecutionHooks - returns hooks for GeminiAgent
|
||||
- addError(source, message, details?)
|
||||
- addWarning(source, message)
|
||||
- getErrors(), getWarnings(), getMessages(), getScreenshotCount(), getLastAssistantMessage()
|
||||
- logDelegation(instruction, executorId, maxSteps?)
|
||||
- logDelegationResult(executorId, summary, status, stepsUsed, currentUrl?)
|
||||
|
||||
Reference implementation details in DESIGN_DOC.md section "4. Capture Context"
|
||||
|
||||
Update /apps/eval/src/capture/index.ts to export CaptureContext
|
||||
```
|
||||
|
||||
### Subagent 2B: MessageLogger Extensions
|
||||
```
|
||||
Update /apps/eval/src/capture/message-logger.ts
|
||||
|
||||
Add two new methods:
|
||||
|
||||
1. logDelegation(instruction: string, executorId: string, maxSteps?: number): Promise<void>
|
||||
- Creates DelegationMessage with type: 'delegation'
|
||||
- Appends to messages
|
||||
|
||||
2. logDelegationResult(executorId: string, summary: string, status: 'done' | 'blocked' | 'max_steps', stepsUsed: number, currentUrl?: string): Promise<void>
|
||||
- Creates DelegationResultMessage with type: 'delegation_result'
|
||||
- Appends to messages
|
||||
|
||||
Import DelegationMessage, DelegationResultMessage from ../types
|
||||
|
||||
Reference: Current MessageLogger in /apps/eval/src/capture/message-logger.ts
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Phase 3: Agent Registry (1 Subagent)
|
||||
|
||||
**Depends on:** Phase 1 (types)
|
||||
**Can run parallel with:** Phase 2
|
||||
|
||||
### Subagent 3A: Agent Registry + Types
|
||||
```
|
||||
Create /apps/eval/src/agents/types.ts
|
||||
|
||||
Define:
|
||||
- AgentContext interface:
|
||||
{
|
||||
config: EvalConfig
|
||||
task: Task
|
||||
windowId: number
|
||||
tabId: number
|
||||
outputDir: string
|
||||
taskOutputDir: string
|
||||
capture: CaptureContext
|
||||
}
|
||||
|
||||
- AgentResult interface (re-export from ../types or define here)
|
||||
- AgentEvaluator interface { execute(): Promise<AgentResult> }
|
||||
|
||||
---
|
||||
|
||||
Create /apps/eval/src/agents/registry.ts
|
||||
|
||||
Implement:
|
||||
- type AgentFactory = (context: AgentContext) => AgentEvaluator
|
||||
- const registry = new Map<string, AgentFactory>()
|
||||
- registerAgent(type: string, factory: AgentFactory): void
|
||||
- createAgent(context: AgentContext): AgentEvaluator
|
||||
- getRegisteredAgentTypes(): string[]
|
||||
|
||||
Reference: DESIGN_DOC.md section "2. Agent Registry"
|
||||
|
||||
---
|
||||
|
||||
Update /apps/eval/src/agents/index.ts
|
||||
|
||||
- Import registerAgent from ./registry
|
||||
- Import SingleAgentEvaluator (will be updated later)
|
||||
- Import OrchestratorExecutorEvaluator (will be updated later)
|
||||
- Call registerAgent for both
|
||||
- Re-export createAgent, registerAgent, getRegisteredAgentTypes
|
||||
- Re-export types
|
||||
|
||||
Note: Registration calls will fail initially until agents are refactored.
|
||||
That's OK - add TODO comments for now.
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Phase 4: Agent Refactors (2 Parallel Subagents)
|
||||
|
||||
**Depends on:** Phase 2 + Phase 3
|
||||
|
||||
### Subagent 4A: Single Agent Refactor
|
||||
```
|
||||
Refactor /apps/eval/src/agents/single-agent.ts
|
||||
|
||||
Changes:
|
||||
1. Change constructor to accept AgentContext instead of individual params:
|
||||
constructor(private ctx: AgentContext) {}
|
||||
|
||||
2. Use ctx.capture instead of creating ScreenshotCapture/MessageLogger:
|
||||
- Remove local ScreenshotCapture initialization
|
||||
- Remove local MessageLogger initialization
|
||||
- Remove local hooks setup
|
||||
- Use ctx.capture.createToolHooks() for GeminiAgent hooks
|
||||
- Use ctx.capture.messageLogger.logUser/logAssistant
|
||||
- Use ctx.capture.addError/addWarning
|
||||
- Use ctx.capture.getMessages(), getScreenshotCount(), etc.
|
||||
|
||||
3. Build metadata using capture methods
|
||||
|
||||
4. Remove TrajectorySaver init (done in CaptureContext)
|
||||
|
||||
5. Keep the core agent execution logic (GeminiAgent.create, agent.execute)
|
||||
|
||||
Reference:
|
||||
- Current implementation: /apps/eval/src/agents/single-agent.ts
|
||||
- Target implementation: DESIGN_DOC.md section "5. Single Agent Evaluator"
|
||||
```
|
||||
|
||||
### Subagent 4B: Orchestrator-Executor Refactor
|
||||
```
|
||||
Refactor /apps/eval/src/agents/orchestrator-executor/index.ts
|
||||
|
||||
Changes:
|
||||
1. Change OrchestratorExecutorEvaluator constructor to accept AgentContext:
|
||||
constructor(private ctx: AgentContext) {}
|
||||
|
||||
2. Initialize capture from context (already done in runner)
|
||||
|
||||
3. Add hook integration:
|
||||
- Create executor hooks that use ctx.capture.createToolHooks()
|
||||
- Wire hooks through Orchestrator → ExecutorStore → Executor
|
||||
- Call ctx.capture.logDelegation() when orchestrator delegates
|
||||
- Call ctx.capture.logDelegationResult() when executor returns
|
||||
|
||||
4. Update return to include messages:
|
||||
return {
|
||||
metadata,
|
||||
messages: ctx.capture.getMessages(), // Now populated!
|
||||
finalAnswer,
|
||||
}
|
||||
|
||||
Also update supporting files if needed:
|
||||
- orchestrator.ts - add setExecutorHooks() method
|
||||
- executor.ts - accept external hooks via setObservationHooks()
|
||||
- executor-store.ts - pass hooks to new executors
|
||||
|
||||
Reference:
|
||||
- Current: /apps/eval/src/agents/orchestrator-executor/index.ts
|
||||
- Target: DESIGN_DOC.md and previous IMPLEMENTATION_PLAN.md
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Phase 5: Runner Update (1 Subagent)
|
||||
|
||||
**Depends on:** Phase 4
|
||||
|
||||
### Subagent 5A: Task Executor Update
|
||||
```
|
||||
Update /apps/eval/src/runner/task-executor.ts
|
||||
|
||||
Changes:
|
||||
1. Import createAgent from ../agents instead of individual evaluators
|
||||
2. Import CaptureContext from ../capture
|
||||
|
||||
3. In execute() method:
|
||||
- Create CaptureContext and call init()
|
||||
- Build AgentContext with all required fields
|
||||
- Use createAgent(context) instead of if-else switch
|
||||
- Remove the if (config.agent.type === 'single') / else if blocks
|
||||
|
||||
4. Remove direct imports of SingleAgentEvaluator, OrchestratorExecutorEvaluator
|
||||
|
||||
Before:
|
||||
```typescript
|
||||
if (this.config.agent.type === 'single') {
|
||||
const evaluator = new SingleAgentEvaluator(this.config, task, window.windowId, ...)
|
||||
} else if (this.config.agent.type === 'orchestrator-executor') {
|
||||
const evaluator = new OrchestratorExecutorEvaluator(this.config, task, ...)
|
||||
}
|
||||
```
|
||||
|
||||
After:
|
||||
```typescript
|
||||
const capture = new CaptureContext({ serverUrl, outputDir, taskId, tabId, windowId })
|
||||
const taskOutputDir = await capture.init()
|
||||
|
||||
const context: AgentContext = {
|
||||
config: this.config,
|
||||
task,
|
||||
windowId: window.windowId,
|
||||
tabId: window.tabId,
|
||||
outputDir: this.outputDir,
|
||||
taskOutputDir,
|
||||
capture,
|
||||
}
|
||||
|
||||
const agent = createAgent(context)
|
||||
const agentResult = await agent.execute()
|
||||
```
|
||||
|
||||
Reference:
|
||||
- Current: /apps/eval/src/runner/task-executor.ts (lines 143-186)
|
||||
- Target: DESIGN_DOC.md section "6. Task Executor"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Phase 6: Cleanup & Test (1 Subagent)
|
||||
|
||||
**Depends on:** Phase 5
|
||||
|
||||
### Subagent 6A: Cleanup Old Files + Verify
|
||||
```
|
||||
Tasks:
|
||||
1. Delete old /apps/eval/src/types.ts (replaced by types/ folder)
|
||||
|
||||
2. Update all imports across the codebase:
|
||||
- Change: import { EvalConfig, Task, Message } from '../types'
|
||||
- Keep same (types/index.ts re-exports everything)
|
||||
|
||||
3. Update /apps/eval/src/utils/config-validator.ts:
|
||||
- Import schemas from ../types/config instead of defining locally
|
||||
- Remove duplicate schema definitions
|
||||
|
||||
4. Verify no TypeScript errors:
|
||||
- Run: cd apps/eval && bun run typecheck
|
||||
|
||||
5. Test single-agent eval:
|
||||
- Run: cd apps/eval && bun run eval -c configs/webvoyager-test.json
|
||||
- Verify screenshots captured
|
||||
- Verify messages.jsonl populated
|
||||
|
||||
6. Test orchestrator-executor eval:
|
||||
- Run: cd apps/eval && bun run eval -c configs/orchestrator-executor-test.json
|
||||
- Verify screenshots captured
|
||||
- Verify messages.jsonl has delegation messages
|
||||
- Verify graders pass (no "no_screenshots" error)
|
||||
|
||||
Report any issues found.
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Execution Summary
|
||||
|
||||
| Phase | Subagents | Can Parallelize? | Dependencies |
|
||||
|-------|-----------|------------------|--------------|
|
||||
| 1 | 4 (1A, 1B, 1C, 1D) | Yes - all parallel | None |
|
||||
| 2 | 2 (2A, 2B) | Yes - both parallel | Phase 1 |
|
||||
| 3 | 1 (3A) | Yes - parallel with Phase 2 | Phase 1 |
|
||||
| 4 | 2 (4A, 4B) | Yes - both parallel | Phase 2 + 3 |
|
||||
| 5 | 1 (5A) | No | Phase 4 |
|
||||
| 6 | 1 (6A) | No | Phase 5 |
|
||||
|
||||
**Total: 11 subagent tasks**
|
||||
|
||||
**Parallel execution timeline:**
|
||||
```
|
||||
Time →
|
||||
─────────────────────────────────────────────────────────────────
|
||||
Phase 1: [1A] [1B] [1C] [1D] (4 parallel)
|
||||
─────────────────
|
||||
Phase 2: [2A] [2B] (2 parallel)
|
||||
Phase 3: [3A] (parallel with Phase 2)
|
||||
───────────
|
||||
Phase 4: [4A] [4B] (2 parallel)
|
||||
──────────
|
||||
Phase 5: [5A]
|
||||
────
|
||||
Phase 6: [6A]
|
||||
────
|
||||
```
|
||||
|
||||
**Maximum parallelism: 4 subagents** (Phase 1)
|
||||
@@ -1,888 +0,0 @@
|
||||
# Eval System - Production Grade Implementation Plan
|
||||
|
||||
## Overview
|
||||
|
||||
This plan outlines the changes needed to make the eval system production-grade with uniform agent observation across all agent patterns (single-agent, orchestrator-executor, future patterns).
|
||||
|
||||
**Goal:** All agent evaluators produce consistent `AgentResult` with screenshots, message traces, and verifiable action sequences.
|
||||
|
||||
---
|
||||
|
||||
## Phase 1: Type System Extensions
|
||||
|
||||
### 1.1 Add New Message Types
|
||||
|
||||
**File:** `src/types.ts`
|
||||
|
||||
Add delegation-specific message types for orchestrator pattern:
|
||||
|
||||
```typescript
|
||||
// After ErrorMessage definition (~line 99)
|
||||
|
||||
export interface DelegationMessage extends BaseMessage {
|
||||
type: 'delegation'
|
||||
instruction: string
|
||||
executorId: string
|
||||
maxSteps?: number
|
||||
}
|
||||
|
||||
export interface DelegationResultMessage extends BaseMessage {
|
||||
type: 'delegation_result'
|
||||
executorId: string
|
||||
summary: string
|
||||
status: 'done' | 'blocked' | 'max_steps'
|
||||
stepsUsed: number
|
||||
currentUrl?: string
|
||||
}
|
||||
|
||||
// Update Message union (~line 101)
|
||||
export type Message =
|
||||
| UserMessage
|
||||
| AssistantMessage
|
||||
| ToolCallMessage
|
||||
| ToolResultMessage
|
||||
| ErrorMessage
|
||||
| DelegationMessage // NEW
|
||||
| DelegationResultMessage // NEW
|
||||
|
||||
// Add type guards
|
||||
export function isDelegationMessage(msg: Message): msg is DelegationMessage {
|
||||
return msg.type === 'delegation'
|
||||
}
|
||||
|
||||
export function isDelegationResultMessage(msg: Message): msg is DelegationResultMessage {
|
||||
return msg.type === 'delegation_result'
|
||||
}
|
||||
```
|
||||
|
||||
### 1.2 Add Orchestrator Hook Types
|
||||
|
||||
**File:** `src/agents/orchestrator-executor/types.ts`
|
||||
|
||||
```typescript
|
||||
// Add after existing types
|
||||
|
||||
export interface OrchestratorHooks {
|
||||
onDelegation?: (instruction: string, executorId: string, maxSteps?: number) => Promise<void>
|
||||
onDelegationResult?: (result: ExecutorResult) => Promise<void>
|
||||
onTurnStart?: (turn: number) => Promise<void>
|
||||
onTurnComplete?: (turn: number) => Promise<void>
|
||||
onComplete?: (answer: string) => Promise<void>
|
||||
onFailed?: (reason: string) => Promise<void>
|
||||
}
|
||||
|
||||
export interface ExecutorObservationHooks {
|
||||
onBeforeToolCall?: (toolName: string, args: unknown) => Promise<string> // returns toolCallId
|
||||
onAfterToolCall?: (toolName: string, toolCallId: string, result: unknown, isError: boolean) => Promise<void>
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Phase 2: Unified Capture Infrastructure
|
||||
|
||||
### 2.1 Create EvalCapture Class
|
||||
|
||||
**File:** `src/capture/eval-capture.ts` (NEW)
|
||||
|
||||
```typescript
|
||||
/**
|
||||
* EvalCapture - Unified capture infrastructure for all agent evaluators
|
||||
*
|
||||
* Combines screenshot capture, message logging, and provides hooks for
|
||||
* both single-agent and orchestrator-executor patterns.
|
||||
*/
|
||||
|
||||
import { randomUUID } from 'node:crypto'
|
||||
import type {
|
||||
AssistantMessage,
|
||||
DelegationMessage,
|
||||
DelegationResultMessage,
|
||||
ErrorMessage,
|
||||
Message,
|
||||
ToolCallMessage,
|
||||
ToolResultMessage,
|
||||
UserMessage,
|
||||
} from '../types'
|
||||
import { MessageLogger } from './message-logger'
|
||||
import { ScreenshotCapture } from './screenshot'
|
||||
|
||||
export interface EvalCaptureConfig {
|
||||
serverUrl: string
|
||||
outputDir: string
|
||||
tabId: number
|
||||
windowId: number
|
||||
}
|
||||
|
||||
export class EvalCapture {
|
||||
private screenshotCapture: ScreenshotCapture
|
||||
private messageLogger: MessageLogger
|
||||
private tabId: number
|
||||
private windowId: number
|
||||
private currentToolCallId: string | null = null
|
||||
|
||||
constructor(config: EvalCaptureConfig) {
|
||||
this.screenshotCapture = new ScreenshotCapture(config.serverUrl, config.outputDir)
|
||||
this.messageLogger = new MessageLogger(config.outputDir)
|
||||
this.tabId = config.tabId
|
||||
this.windowId = config.windowId
|
||||
}
|
||||
|
||||
async init(): Promise<void> {
|
||||
await this.screenshotCapture.init()
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Screenshot Capture
|
||||
// ============================================================================
|
||||
|
||||
async captureScreenshot(): Promise<number> {
|
||||
return this.screenshotCapture.capture(this.tabId, this.windowId)
|
||||
}
|
||||
|
||||
getScreenshotCount(): number {
|
||||
return this.screenshotCapture.getCount()
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Message Logging - Basic Types
|
||||
// ============================================================================
|
||||
|
||||
async logUser(content: string): Promise<void> {
|
||||
await this.messageLogger.logUser(content)
|
||||
}
|
||||
|
||||
async logAssistant(content: string): Promise<void> {
|
||||
await this.messageLogger.logAssistant(content)
|
||||
}
|
||||
|
||||
async logError(content: string, errorCode?: string): Promise<void> {
|
||||
await this.messageLogger.logError(content, errorCode)
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Tool Call Logging (for single-agent and executor)
|
||||
// ============================================================================
|
||||
|
||||
async logToolCall(tool: string, params: Record<string, unknown>): Promise<string> {
|
||||
const toolCallId = randomUUID()
|
||||
this.currentToolCallId = toolCallId
|
||||
await this.messageLogger.logToolCall(tool, toolCallId, params)
|
||||
return toolCallId
|
||||
}
|
||||
|
||||
async logToolResult(
|
||||
toolCallId: string,
|
||||
result: unknown,
|
||||
isError: boolean,
|
||||
screenshot?: number,
|
||||
): Promise<void> {
|
||||
await this.messageLogger.logToolResult(toolCallId, result, isError, screenshot)
|
||||
this.currentToolCallId = null
|
||||
}
|
||||
|
||||
getCurrentToolCallId(): string | null {
|
||||
return this.currentToolCallId
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Delegation Logging (for orchestrator-executor)
|
||||
// ============================================================================
|
||||
|
||||
async logDelegation(
|
||||
instruction: string,
|
||||
executorId: string,
|
||||
maxSteps?: number,
|
||||
): Promise<void> {
|
||||
const message: DelegationMessage = {
|
||||
type: 'delegation',
|
||||
timestamp: new Date().toISOString(),
|
||||
instruction,
|
||||
executorId,
|
||||
...(maxSteps !== undefined && { maxSteps }),
|
||||
}
|
||||
// Extend MessageLogger to handle this, or append directly
|
||||
await this.appendMessage(message)
|
||||
}
|
||||
|
||||
async logDelegationResult(
|
||||
executorId: string,
|
||||
summary: string,
|
||||
status: 'done' | 'blocked' | 'max_steps',
|
||||
stepsUsed: number,
|
||||
currentUrl?: string,
|
||||
): Promise<void> {
|
||||
const message: DelegationResultMessage = {
|
||||
type: 'delegation_result',
|
||||
timestamp: new Date().toISOString(),
|
||||
executorId,
|
||||
summary,
|
||||
status,
|
||||
stepsUsed,
|
||||
...(currentUrl && { currentUrl }),
|
||||
}
|
||||
await this.appendMessage(message)
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Helpers
|
||||
// ============================================================================
|
||||
|
||||
private async appendMessage(message: Message): Promise<void> {
|
||||
// Access internal messages array and file
|
||||
// This requires either extending MessageLogger or using a shared approach
|
||||
const messages = this.messageLogger.getMessages()
|
||||
messages.push(message)
|
||||
// Write to file - MessageLogger needs extension for this
|
||||
}
|
||||
|
||||
getMessages(): Message[] {
|
||||
return this.messageLogger.getMessages()
|
||||
}
|
||||
|
||||
getLastAssistantMessage(): string | null {
|
||||
return this.messageLogger.getLastAssistantMessage()
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 2.2 Extend MessageLogger for New Types
|
||||
|
||||
**File:** `src/capture/message-logger.ts`
|
||||
|
||||
Add methods for delegation messages:
|
||||
|
||||
```typescript
|
||||
// Add after logError method
|
||||
|
||||
async logDelegation(
|
||||
instruction: string,
|
||||
executorId: string,
|
||||
maxSteps?: number,
|
||||
): Promise<void> {
|
||||
const message: DelegationMessage = {
|
||||
type: 'delegation',
|
||||
timestamp: new Date().toISOString(),
|
||||
instruction,
|
||||
executorId,
|
||||
...(maxSteps !== undefined && { maxSteps }),
|
||||
}
|
||||
await this.append(message)
|
||||
}
|
||||
|
||||
async logDelegationResult(
|
||||
executorId: string,
|
||||
summary: string,
|
||||
status: 'done' | 'blocked' | 'max_steps',
|
||||
stepsUsed: number,
|
||||
currentUrl?: string,
|
||||
): Promise<void> {
|
||||
const message: DelegationResultMessage = {
|
||||
type: 'delegation_result',
|
||||
timestamp: new Date().toISOString(),
|
||||
executorId,
|
||||
summary,
|
||||
status,
|
||||
stepsUsed,
|
||||
...(currentUrl && { currentUrl }),
|
||||
}
|
||||
await this.append(message)
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Phase 3: Executor Hook Integration
|
||||
|
||||
### 3.1 Modify Executor to Accept External Hooks
|
||||
|
||||
**File:** `src/agents/orchestrator-executor/executor.ts`
|
||||
|
||||
```typescript
|
||||
// Add import
|
||||
import type { ExecutorObservationHooks } from './types'
|
||||
|
||||
export class Executor {
|
||||
private agent: GeminiAgent | null = null
|
||||
private stepsUsed = 0
|
||||
private currentUrl = ''
|
||||
private config: ExecutorConfig
|
||||
private serverUrl: string
|
||||
private windowId: number
|
||||
private tabId: number
|
||||
private observationHooks?: ExecutorObservationHooks // NEW
|
||||
|
||||
// ... existing constructor ...
|
||||
|
||||
/**
|
||||
* Set external observation hooks for capture integration
|
||||
*/
|
||||
setObservationHooks(hooks: ExecutorObservationHooks): void {
|
||||
this.observationHooks = hooks
|
||||
}
|
||||
|
||||
async execute(
|
||||
instruction: string,
|
||||
maxSteps?: number,
|
||||
signal?: AbortSignal,
|
||||
): Promise<Omit<ExecutorResult, 'executorId'>> {
|
||||
// ... existing setup ...
|
||||
|
||||
// Track steps via hooks - MODIFIED to include external observation
|
||||
let stepsThisRun = 0
|
||||
const hooks: ToolExecutionHooks = {
|
||||
onBeforeToolCall: async (toolName: string, args: unknown) => {
|
||||
// Call external hook if set (for logging)
|
||||
if (this.observationHooks?.onBeforeToolCall) {
|
||||
await this.observationHooks.onBeforeToolCall(toolName, args)
|
||||
}
|
||||
},
|
||||
onAfterToolCall: async (toolName: string, result: ToolExecutionResult) => {
|
||||
stepsThisRun++
|
||||
this.stepsUsed++
|
||||
|
||||
// Call external hook if set (for screenshot capture and logging)
|
||||
if (this.observationHooks?.onAfterToolCall) {
|
||||
const toolCallId = 'current' // Will be tracked by EvalCapture
|
||||
await this.observationHooks.onAfterToolCall(
|
||||
toolName,
|
||||
toolCallId,
|
||||
result.parts,
|
||||
result.isError,
|
||||
)
|
||||
}
|
||||
},
|
||||
}
|
||||
this.agent.setToolHooks(hooks)
|
||||
|
||||
// ... rest of execute method ...
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 3.2 Pass Hooks Through ExecutorStore
|
||||
|
||||
**File:** `src/agents/orchestrator-executor/executor-store.ts`
|
||||
|
||||
```typescript
|
||||
import type { ExecutorObservationHooks } from './types'
|
||||
|
||||
export class ExecutorStore {
|
||||
private executors = new Map<string, Executor>()
|
||||
private observationHooks?: ExecutorObservationHooks // NEW
|
||||
|
||||
/**
|
||||
* Set observation hooks that will be applied to all executors
|
||||
*/
|
||||
setObservationHooks(hooks: ExecutorObservationHooks): void {
|
||||
this.observationHooks = hooks
|
||||
// Apply to existing executors
|
||||
for (const executor of this.executors.values()) {
|
||||
executor.setObservationHooks(hooks)
|
||||
}
|
||||
}
|
||||
|
||||
getOrCreate(
|
||||
id: string,
|
||||
config: ExecutorConfig,
|
||||
serverUrl: string,
|
||||
windowId: number,
|
||||
tabId: number,
|
||||
): Executor {
|
||||
if (!this.executors.has(id)) {
|
||||
const executor = new Executor(config, serverUrl, windowId, tabId)
|
||||
// Apply observation hooks to new executor
|
||||
if (this.observationHooks) {
|
||||
executor.setObservationHooks(this.observationHooks)
|
||||
}
|
||||
this.executors.set(id, executor)
|
||||
}
|
||||
return this.executors.get(id)!
|
||||
}
|
||||
|
||||
// ... rest unchanged ...
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Phase 4: Orchestrator Hook Integration
|
||||
|
||||
### 4.1 Add Hooks to OrchestratorAgent
|
||||
|
||||
**File:** `src/agents/orchestrator-executor/orchestrator-agent.ts`
|
||||
|
||||
```typescript
|
||||
import type { ExecutorObservationHooks, OrchestratorHooks } from './types'
|
||||
|
||||
export class OrchestratorAgent {
|
||||
private orchestratorHooks?: OrchestratorHooks // NEW
|
||||
|
||||
private constructor(
|
||||
private client: GeminiClient,
|
||||
private geminiConfig: GeminiConfig,
|
||||
private state: OrchestratorState,
|
||||
private executorStore: ExecutorStore,
|
||||
private maxTurns: number,
|
||||
) {}
|
||||
|
||||
/**
|
||||
* Set orchestrator-level hooks for delegation tracking
|
||||
*/
|
||||
setHooks(hooks: OrchestratorHooks): void {
|
||||
this.orchestratorHooks = hooks
|
||||
}
|
||||
|
||||
/**
|
||||
* Set executor observation hooks (passed through to ExecutorStore)
|
||||
*/
|
||||
setExecutorObservationHooks(hooks: ExecutorObservationHooks): void {
|
||||
this.executorStore.setObservationHooks(hooks)
|
||||
}
|
||||
|
||||
/**
|
||||
* Get hooks for tool context (used by orchestrator-tools.ts)
|
||||
*/
|
||||
getOrchestratorHooks(): OrchestratorHooks | undefined {
|
||||
return this.orchestratorHooks
|
||||
}
|
||||
|
||||
async run(taskQuery: string): Promise<OrchestratorAgentResult> {
|
||||
let currentParts: Part[] = [{ text: taskQuery }]
|
||||
let turns = 0
|
||||
|
||||
while (
|
||||
!this.state.isComplete &&
|
||||
!this.state.isFailed &&
|
||||
turns < this.maxTurns
|
||||
) {
|
||||
turns++
|
||||
|
||||
// Fire turn start hook
|
||||
await this.orchestratorHooks?.onTurnStart?.(turns)
|
||||
|
||||
// ... existing turn logic ...
|
||||
|
||||
// Fire turn complete hook
|
||||
await this.orchestratorHooks?.onTurnComplete?.(turns)
|
||||
}
|
||||
|
||||
// Fire completion hooks
|
||||
if (this.state.isComplete && this.state.finalAnswer) {
|
||||
await this.orchestratorHooks?.onComplete?.(this.state.finalAnswer)
|
||||
} else if (this.state.isFailed && this.state.failureReason) {
|
||||
await this.orchestratorHooks?.onFailed?.(this.state.failureReason)
|
||||
}
|
||||
|
||||
return {
|
||||
success: this.state.isComplete,
|
||||
answer: this.state.finalAnswer,
|
||||
reason: this.state.failureReason,
|
||||
delegationCount: this.state.delegationCount,
|
||||
totalExecutorSteps: this.state.totalExecutorSteps,
|
||||
turns,
|
||||
}
|
||||
}
|
||||
|
||||
// ... rest unchanged ...
|
||||
}
|
||||
```
|
||||
|
||||
### 4.2 Fire Hooks in Orchestrator Tools
|
||||
|
||||
**File:** `src/agents/orchestrator-executor/orchestrator-tools.ts`
|
||||
|
||||
Modify the delegate tool handler to fire hooks:
|
||||
|
||||
```typescript
|
||||
// In createOrchestratorTools function, modify the delegate tool handler
|
||||
|
||||
// Inside the delegate tool's handler:
|
||||
handler: async (args) => {
|
||||
const { instruction, executorId, maxSteps } = args as DelegateParams
|
||||
|
||||
// Fire delegation hook BEFORE execution
|
||||
const hooks = context.getOrchestratorHooks?.()
|
||||
const actualExecutorId = executorId ?? randomUUID()
|
||||
await hooks?.onDelegation?.(instruction, actualExecutorId, maxSteps)
|
||||
|
||||
// Get or create executor
|
||||
const executor = context.executorStore.getOrCreate(
|
||||
actualExecutorId,
|
||||
context.executorConfig,
|
||||
context.serverUrl,
|
||||
context.windowId,
|
||||
context.tabId,
|
||||
)
|
||||
|
||||
// Execute
|
||||
const result = await executor.execute(instruction, maxSteps)
|
||||
|
||||
// Update state
|
||||
context.state.delegationCount++
|
||||
context.state.totalExecutorSteps += result.stepsUsed
|
||||
|
||||
// Fire delegation result hook AFTER execution
|
||||
await hooks?.onDelegationResult?.({
|
||||
...result,
|
||||
executorId: actualExecutorId,
|
||||
})
|
||||
|
||||
// Return result to orchestrator
|
||||
return {
|
||||
executorId: actualExecutorId,
|
||||
...result,
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Phase 5: Update OrchestratorExecutorEvaluator
|
||||
|
||||
### 5.1 Full Integration
|
||||
|
||||
**File:** `src/agents/orchestrator-executor/index.ts`
|
||||
|
||||
```typescript
|
||||
import { ScreenshotCapture } from '../../capture/screenshot'
|
||||
import { MessageLogger } from '../../capture/message-logger'
|
||||
import { TrajectorySaver } from '../../capture/trajectory-saver'
|
||||
import type { ExecutorObservationHooks, OrchestratorHooks } from './types'
|
||||
|
||||
export class OrchestratorExecutorEvaluator implements AgentEvaluator {
|
||||
constructor(
|
||||
private config: EvalConfig,
|
||||
private task: Task,
|
||||
private windowId: number,
|
||||
private tabId: number,
|
||||
private outputDir: string,
|
||||
) {}
|
||||
|
||||
async execute(): Promise<AgentResult> {
|
||||
const startTime = Date.now()
|
||||
const timeoutMs = this.config.timeout_ms ?? DEFAULT_TIMEOUT_MS
|
||||
|
||||
const errors: TaskError[] = []
|
||||
const warnings: EvalWarning[] = []
|
||||
|
||||
const addError = (source: TaskError['source'], message: string, details?: Record<string, unknown>) => {
|
||||
errors.push({ source, message, timestamp: new Date().toISOString(), details })
|
||||
}
|
||||
|
||||
const addWarning = (source: EvalWarning['source'], message: string) => {
|
||||
warnings.push({ source, message, timestamp: new Date().toISOString() })
|
||||
console.warn(`[${source}] ${message}`)
|
||||
}
|
||||
|
||||
// Initialize trajectory saver
|
||||
const saver = new TrajectorySaver(this.outputDir, this.task.query_id)
|
||||
const taskOutputDir = await saver.init()
|
||||
|
||||
// NEW: Initialize capture infrastructure (same as single-agent)
|
||||
const screenshotCapture = new ScreenshotCapture(
|
||||
this.config.browseros.server_url,
|
||||
taskOutputDir,
|
||||
)
|
||||
await screenshotCapture.init()
|
||||
|
||||
const messageLogger = new MessageLogger(taskOutputDir)
|
||||
|
||||
// Log initial user message
|
||||
await messageLogger.logUser(this.task.query)
|
||||
|
||||
// Validate config type
|
||||
if (this.config.agent.type !== 'orchestrator-executor') {
|
||||
throw new Error('OrchestratorExecutorEvaluator requires orchestrator-executor config')
|
||||
}
|
||||
|
||||
const agentConfig = this.config.agent as OrchestratorExecutorConfig
|
||||
const { orchestrator: orchestratorConfig, executor: executorConfig } =
|
||||
resolveAgentConfig(agentConfig)
|
||||
|
||||
// Create orchestrator
|
||||
const orchestrator = new Orchestrator(
|
||||
orchestratorConfig,
|
||||
executorConfig,
|
||||
this.config.browseros.server_url,
|
||||
this.windowId,
|
||||
this.tabId,
|
||||
)
|
||||
|
||||
// NEW: Set up executor observation hooks (for tool call/result capture)
|
||||
let currentToolCallId: string | null = null
|
||||
|
||||
const executorHooks: ExecutorObservationHooks = {
|
||||
onBeforeToolCall: async (toolName: string, args: unknown) => {
|
||||
try {
|
||||
currentToolCallId = randomUUID()
|
||||
await messageLogger.logToolCall(toolName, currentToolCallId, args as Record<string, unknown>)
|
||||
} catch (err) {
|
||||
addWarning('message_logging', `Failed to log tool call ${toolName}: ${err instanceof Error ? err.message : String(err)}`)
|
||||
}
|
||||
return currentToolCallId
|
||||
},
|
||||
onAfterToolCall: async (toolName: string, _toolCallId: string, result: unknown, isError: boolean) => {
|
||||
let screenshotNum = 0
|
||||
|
||||
// Capture screenshot after tool execution
|
||||
try {
|
||||
screenshotNum = await screenshotCapture.capture(this.tabId, this.windowId)
|
||||
} catch (err) {
|
||||
addWarning('screenshot', `Screenshot after ${toolName} failed: ${err instanceof Error ? err.message : String(err)}`)
|
||||
screenshotNum = screenshotCapture.getCount()
|
||||
}
|
||||
|
||||
// Log tool errors
|
||||
if (isError) {
|
||||
addWarning('mcp_tool', `Tool ${toolName} returned error`)
|
||||
}
|
||||
|
||||
if (!currentToolCallId) {
|
||||
addWarning('message_logging', 'Tool result without matching tool call')
|
||||
return
|
||||
}
|
||||
|
||||
try {
|
||||
await messageLogger.logToolResult(currentToolCallId, result, isError, screenshotNum)
|
||||
} catch (err) {
|
||||
addWarning('message_logging', `Failed to log tool result: ${err instanceof Error ? err.message : String(err)}`)
|
||||
}
|
||||
|
||||
currentToolCallId = null
|
||||
},
|
||||
}
|
||||
|
||||
// NEW: Set up orchestrator hooks (for delegation tracking)
|
||||
const orchestratorHooks: OrchestratorHooks = {
|
||||
onDelegation: async (instruction: string, executorId: string, maxSteps?: number) => {
|
||||
try {
|
||||
await messageLogger.logDelegation(instruction, executorId, maxSteps)
|
||||
} catch (err) {
|
||||
addWarning('message_logging', `Failed to log delegation: ${err instanceof Error ? err.message : String(err)}`)
|
||||
}
|
||||
},
|
||||
onDelegationResult: async (result) => {
|
||||
try {
|
||||
await messageLogger.logDelegationResult(
|
||||
result.executorId,
|
||||
result.summary,
|
||||
result.status,
|
||||
result.stepsUsed,
|
||||
result.currentUrl,
|
||||
)
|
||||
} catch (err) {
|
||||
addWarning('message_logging', `Failed to log delegation result: ${err instanceof Error ? err.message : String(err)}`)
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
// Apply hooks to orchestrator
|
||||
orchestrator.setHooks(orchestratorHooks)
|
||||
orchestrator.setExecutorObservationHooks(executorHooks)
|
||||
|
||||
// Set up timeout
|
||||
const abortController = new AbortController()
|
||||
const timeoutHandle = setTimeout(() => {
|
||||
abortController.abort()
|
||||
}, timeoutMs)
|
||||
|
||||
let terminationReason: 'completed' | 'max_steps' | 'error' | 'timeout' = 'completed'
|
||||
let finalAnswer: string | null = null
|
||||
let orchestratorResult: Awaited<ReturnType<typeof orchestrator.run>> | null = null
|
||||
|
||||
try {
|
||||
const runPromise = orchestrator.run(this.task.query)
|
||||
|
||||
orchestratorResult = await Promise.race([
|
||||
runPromise,
|
||||
new Promise<never>((_, reject) => {
|
||||
abortController.signal.addEventListener('abort', () => {
|
||||
reject(new Error('Timeout'))
|
||||
})
|
||||
}),
|
||||
])
|
||||
|
||||
if (orchestratorResult.success) {
|
||||
finalAnswer = orchestratorResult.answer
|
||||
terminationReason = 'completed'
|
||||
// Log final assistant message
|
||||
if (finalAnswer) {
|
||||
await messageLogger.logAssistant(finalAnswer)
|
||||
}
|
||||
} else {
|
||||
terminationReason = 'error'
|
||||
addError('agent_execution', orchestratorResult.reason ?? 'Unknown failure')
|
||||
await messageLogger.logError(orchestratorResult.reason ?? 'Unknown failure')
|
||||
}
|
||||
} catch (err) {
|
||||
const error = err instanceof Error ? err : new Error(String(err))
|
||||
|
||||
if (error.message === 'Timeout' || abortController.signal.aborted) {
|
||||
terminationReason = 'timeout'
|
||||
addError('agent_execution', `Task timed out after ${timeoutMs / 1000}s`)
|
||||
} else {
|
||||
terminationReason = 'error'
|
||||
addError('agent_execution', error.message, { stack: error.stack })
|
||||
}
|
||||
await messageLogger.logError(error.message)
|
||||
} finally {
|
||||
clearTimeout(timeoutHandle)
|
||||
orchestrator.getExecutorStore().clear()
|
||||
}
|
||||
|
||||
const endTime = Date.now()
|
||||
|
||||
// Create metadata
|
||||
const metadata: TaskMetadata = {
|
||||
query_id: this.task.query_id,
|
||||
dataset: this.task.dataset,
|
||||
query: this.task.query,
|
||||
started_at: new Date(startTime).toISOString(),
|
||||
completed_at: new Date(endTime).toISOString(),
|
||||
total_duration_ms: endTime - startTime,
|
||||
total_steps: screenshotCapture.getCount(), // Now accurate
|
||||
termination_reason: terminationReason,
|
||||
final_answer: finalAnswer,
|
||||
errors,
|
||||
warnings,
|
||||
agent_config: {
|
||||
type: 'orchestrator-executor',
|
||||
model: `${orchestratorConfig.model} / ${executorConfig.model}`,
|
||||
},
|
||||
grader_results: {},
|
||||
}
|
||||
|
||||
await saver.saveMetadata(metadata)
|
||||
|
||||
return {
|
||||
metadata,
|
||||
messages: messageLogger.getMessages(), // NOW POPULATED
|
||||
finalAnswer,
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Phase 6: Orchestrator Class Updates
|
||||
|
||||
### 6.1 Add Hook Passthrough Methods
|
||||
|
||||
**File:** `src/agents/orchestrator-executor/orchestrator.ts`
|
||||
|
||||
```typescript
|
||||
import type { ExecutorObservationHooks, OrchestratorHooks } from './types'
|
||||
|
||||
export class Orchestrator {
|
||||
private agent: OrchestratorAgent | null = null
|
||||
private executorStore: ExecutorStore
|
||||
private pendingOrchestratorHooks?: OrchestratorHooks
|
||||
private pendingExecutorHooks?: ExecutorObservationHooks
|
||||
|
||||
constructor(
|
||||
private orchestratorConfig: OrchestratorConfig,
|
||||
private executorConfig: ExecutorConfig,
|
||||
private serverUrl: string,
|
||||
private windowId: number,
|
||||
private tabId: number,
|
||||
) {
|
||||
this.executorStore = new ExecutorStore()
|
||||
}
|
||||
|
||||
/**
|
||||
* Set orchestrator-level hooks (must be called before run())
|
||||
*/
|
||||
setHooks(hooks: OrchestratorHooks): void {
|
||||
this.pendingOrchestratorHooks = hooks
|
||||
if (this.agent) {
|
||||
this.agent.setHooks(hooks)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set executor observation hooks (must be called before run())
|
||||
*/
|
||||
setExecutorObservationHooks(hooks: ExecutorObservationHooks): void {
|
||||
this.pendingExecutorHooks = hooks
|
||||
this.executorStore.setObservationHooks(hooks)
|
||||
if (this.agent) {
|
||||
this.agent.setExecutorObservationHooks(hooks)
|
||||
}
|
||||
}
|
||||
|
||||
async run(taskQuery: string): Promise<OrchestratorAgentResult> {
|
||||
this.agent = await OrchestratorAgent.create(
|
||||
this.orchestratorConfig,
|
||||
this.executorConfig,
|
||||
this.serverUrl,
|
||||
this.windowId,
|
||||
this.tabId,
|
||||
)
|
||||
|
||||
// Apply pending hooks
|
||||
if (this.pendingOrchestratorHooks) {
|
||||
this.agent.setHooks(this.pendingOrchestratorHooks)
|
||||
}
|
||||
if (this.pendingExecutorHooks) {
|
||||
this.agent.setExecutorObservationHooks(this.pendingExecutorHooks)
|
||||
}
|
||||
|
||||
const result = await this.agent.run(taskQuery)
|
||||
this.executorStore = this.agent.getExecutorStore()
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
getExecutorStore(): ExecutorStore {
|
||||
return this.agent?.getExecutorStore() ?? this.executorStore
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Implementation Order
|
||||
|
||||
1. **Phase 1** - Type extensions (types.ts) - 30 min
|
||||
2. **Phase 2** - MessageLogger extensions - 30 min
|
||||
3. **Phase 3** - Executor hook integration - 1 hour
|
||||
4. **Phase 4** - OrchestratorAgent hooks - 1 hour
|
||||
5. **Phase 5** - OrchestratorExecutorEvaluator update - 1.5 hours
|
||||
6. **Phase 6** - Orchestrator passthrough - 30 min
|
||||
7. **Testing** - End-to-end verification - 1 hour
|
||||
|
||||
**Total estimated time:** ~6 hours
|
||||
|
||||
---
|
||||
|
||||
## Testing Checklist
|
||||
|
||||
- [ ] Single-agent eval still works (regression test)
|
||||
- [ ] Orchestrator-executor produces screenshots in output folder
|
||||
- [ ] Orchestrator-executor produces messages.jsonl with:
|
||||
- [ ] user message
|
||||
- [ ] delegation messages
|
||||
- [ ] tool_call messages (from executor)
|
||||
- [ ] tool_result messages with screenshot numbers
|
||||
- [ ] delegation_result messages
|
||||
- [ ] assistant message (final answer)
|
||||
- [ ] Graders pass with orchestrator-executor (no "no_screenshots" error)
|
||||
- [ ] metadata.json has accurate `total_steps` count
|
||||
- [ ] Error/warning capture works for both patterns
|
||||
|
||||
---
|
||||
|
||||
## Future Considerations
|
||||
|
||||
1. **New Agent Patterns:** Any new agent type just needs to:
|
||||
- Accept hooks in constructor or via setter
|
||||
- Fire hooks at appropriate points
|
||||
- Use shared capture infrastructure
|
||||
|
||||
2. **Grader Updates:** May need to update graders to understand delegation messages
|
||||
|
||||
3. **Parallel Executors:** If orchestrator delegates to multiple executors in parallel, need to handle concurrent screenshot capture
|
||||
|
||||
4. **Memory/Performance:** Screenshot capture creates MCP connection per capture - consider connection pooling for high-volume evals
|
||||
308
packages/browseros-agent/apps/eval/README.md
vendored
308
packages/browseros-agent/apps/eval/README.md
vendored
@@ -2,155 +2,158 @@
|
||||
|
||||
[](../../../../LICENSE)
|
||||
|
||||
Evaluation framework for benchmarking BrowserOS browser automation agents. Runs tasks from standard datasets ([WebVoyager](https://arxiv.org/abs/2401.13919), [Mind2Web](https://arxiv.org/abs/2306.06070)), captures trajectories with screenshots, and grades results automatically.
|
||||
Evaluation framework for BrowserOS browser automation agents. Runs tasks from standard datasets ([WebVoyager](https://arxiv.org/abs/2401.13919), [Mind2Web](https://arxiv.org/abs/2306.06070), AGI SDK / REAL Bench, WebArena-Infinity, WebBench), captures trajectories with screenshots, and grades results automatically.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- **BrowserOS binary** installed at `/Applications/BrowserOS.app` (macOS)
|
||||
- **BrowserOS binary** at `/Applications/BrowserOS.app` (macOS) or `BROWSEROS_BINARY` pointing at it
|
||||
- **Bun** runtime
|
||||
- **API keys** for your chosen LLM provider and grader model
|
||||
- **API keys** for your LLM provider (and `CLAUDE_CODE_OAUTH_TOKEN` if you use `performance_grader`)
|
||||
|
||||
## Quick Start
|
||||
|
||||
### 1. Set up environment
|
||||
|
||||
```bash
|
||||
cd apps/eval
|
||||
```
|
||||
|
||||
Edit `.env.development` and add your API keys:
|
||||
|
||||
```bash
|
||||
# Pick ONE provider for the orchestrator (whichever you have access to)
|
||||
OPENAI_API_KEY=sk-xxxxx
|
||||
ANTHROPIC_API_KEY=sk-ant-xxxxx
|
||||
FIREWORKS_API_KEY=fw_xxxxx
|
||||
GOOGLE_API_KEY=AIza-xxxxx
|
||||
|
||||
# For grading results (OpenRouter recommended — gives access to many models)
|
||||
OPENROUTER_API_KEY=sk-or-v1-xxxxx
|
||||
```
|
||||
|
||||
### 2. Launch the dashboard
|
||||
|
||||
```bash
|
||||
cp .env.example .env.development
|
||||
# Edit .env.development with your keys, then:
|
||||
bun run eval
|
||||
```
|
||||
|
||||
Opens the **Eval Dashboard** at `http://localhost:9900` in config mode.
|
||||
Opens the eval dashboard at `http://localhost:9900` in config mode. From there: load a preset, edit settings, click **Run**.
|
||||
|
||||
### 3. Configure and run
|
||||
|
||||
From the dashboard:
|
||||
|
||||
1. **Load a preset** — select from the dropdown or click **Load File** to import a config JSON
|
||||
2. **Edit settings** — change agent type, provider, model, API keys, dataset, workers, timeouts
|
||||
3. **Save Config** — export your configuration for reuse
|
||||
4. **Click Run** — starts the evaluation with live progress
|
||||
|
||||
### Alternative: Run from CLI
|
||||
### CLI mode
|
||||
|
||||
```bash
|
||||
bun run eval -c configs/orchestrator-executor-clado-test.json
|
||||
bun run eval -c configs/legacy/browseros-agent-weekly.json
|
||||
bun run eval suite --config configs/legacy/browseros-agent-weekly.json --publish r2
|
||||
```
|
||||
|
||||
Runs immediately. Dashboard still available at `http://localhost:9900` for live progress.
|
||||
|
||||
## Agent Types
|
||||
The `suite` command is the workflow-compatible full loop: execute tasks, run graders, write artifacts, and optionally publish to R2. The old `-c` form remains supported during migration.
|
||||
|
||||
### Orchestrator-Executor with Clado
|
||||
```bash
|
||||
bun run eval run --config configs/legacy/browseros-agent-weekly.json
|
||||
bun run eval suite --suite configs/suites/agisdk-daily-10.json --variant kimi-fireworks --publish r2
|
||||
bun run eval grade --run results/browseros-agent-weekly/2026-04-29-1430
|
||||
bun run eval publish --run results/browseros-agent-weekly/2026-04-29-1430 --target r2
|
||||
```
|
||||
|
||||
The recommended architecture for visual model evals. Two tiers:
|
||||
Config files live in two groups:
|
||||
|
||||
- **Orchestrator** — An LLM that plans and issues high-level instructions
|
||||
- **Executor** — The **Clado Action** visual model that takes screenshots and predicts click/type/scroll coordinates
|
||||
```txt
|
||||
configs/legacy/ # Complete EvalConfig files used by older workflows and the dashboard
|
||||
configs/suites/ # Suite definitions; model/provider comes from CLI flags or env
|
||||
```
|
||||
|
||||
The orchestrator works with **any LLM provider**. Pick whichever you have access to:
|
||||
Suite mode takes model settings from CLI flags first, then env:
|
||||
|
||||
#### OpenAI orchestrator
|
||||
```bash
|
||||
EVAL_VARIANT=kimi-fireworks \
|
||||
EVAL_AGENT_PROVIDER=openai-compatible \
|
||||
EVAL_AGENT_MODEL=accounts/fireworks/models/kimi-k2p5 \
|
||||
EVAL_AGENT_API_KEY=$FIREWORKS_API_KEY \
|
||||
EVAL_AGENT_BASE_URL=https://api.fireworks.ai/inference/v1 \
|
||||
bun run eval suite --suite configs/suites/agisdk-daily-10.json --publish r2
|
||||
```
|
||||
|
||||
### Suites and variants
|
||||
|
||||
A **suite** is what we run: the task dataset, graders, worker count, timeout, and browser settings. For example, `agisdk-daily-10` means "run these 10 AGI SDK tasks and grade them with `agisdk_state_diff`."
|
||||
|
||||
A **variant** is the model setup we are testing on that suite. `EVAL_VARIANT` is just the human-readable name for that setup. The actual model connection still comes from `EVAL_AGENT_PROVIDER`, `EVAL_AGENT_MODEL`, `EVAL_AGENT_API_KEY`, and `EVAL_AGENT_BASE_URL`.
|
||||
|
||||
This lets us run the same suite against multiple model setups without copying the benchmark config:
|
||||
|
||||
```txt
|
||||
agisdk-daily-10 + kimi-fireworks
|
||||
agisdk-daily-10 + claude-sonnet
|
||||
agisdk-daily-10 + clado-action-000159
|
||||
```
|
||||
|
||||
For `orchestrator-executor` suites, there can also be an executor model/backend. The `EVAL_AGENT_*` vars describe the main agent or orchestrator. The optional `EVAL_EXECUTOR_*` or `CLADO_ACTION_*` vars describe the delegated executor.
|
||||
|
||||
## Agent types
|
||||
|
||||
| Type | Description |
|
||||
|------|-------------|
|
||||
| `single` | Single LLM agent driven by the BrowserOS tool loop (CDP) |
|
||||
| `orchestrator-executor` | High-level orchestrator + per-step executor (LLM or Clado visual model) |
|
||||
|
||||
### Single agent
|
||||
|
||||
```json
|
||||
{
|
||||
"agent": {
|
||||
"type": "single",
|
||||
"provider": "openai-compatible",
|
||||
"model": "moonshotai/kimi-k2.5",
|
||||
"apiKey": "OPENROUTER_API_KEY",
|
||||
"baseUrl": "https://openrouter.ai/api/v1",
|
||||
"supportsImages": true
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Orchestrator-Executor
|
||||
|
||||
The orchestrator works with any LLM provider. The executor can be another LLM, or the **Clado action** visual model that takes screenshots and predicts click/type/scroll coordinates.
|
||||
|
||||
```json
|
||||
{
|
||||
"agent": {
|
||||
"type": "orchestrator-executor",
|
||||
"orchestrator": {
|
||||
"provider": "openai",
|
||||
"model": "gpt-4o",
|
||||
"apiKey": "OPENAI_API_KEY"
|
||||
"provider": "openai-compatible",
|
||||
"model": "accounts/fireworks/models/kimi-k2p5",
|
||||
"apiKey": "FIREWORKS_API_KEY",
|
||||
"baseUrl": "https://api.fireworks.ai/inference/v1"
|
||||
},
|
||||
"executor": {
|
||||
"provider": "clado-action",
|
||||
"model": "qwen3-vl-30b-a3b-instruct",
|
||||
"model": "Qwen3.5-35B-A3B-action-000159-merged",
|
||||
"apiKey": "",
|
||||
"baseUrl": "https://clado-ai--clado-browseros-action-actionmodel-generate.modal.run"
|
||||
"baseUrl": "https://clado-ai--clado-browseros-action-000159-merged-actionmod-f4a6ef.modal.run"
|
||||
}
|
||||
},
|
||||
"dataset": "../data/webvoyager_e2e_test.jsonl",
|
||||
"output_dir": "../results/oe-clado-openai",
|
||||
"num_workers": 3,
|
||||
"browseros": {
|
||||
"server_url": "http://127.0.0.1:9110",
|
||||
"base_cdp_port": 9010,
|
||||
"base_server_port": 9110,
|
||||
"base_extension_port": 9310,
|
||||
"headless": true
|
||||
},
|
||||
"grader_api_key_env": "OPENROUTER_API_KEY",
|
||||
"grader_base_url": "https://openrouter.ai/api/v1",
|
||||
"grader_model": "openai/gpt-4.1",
|
||||
"timeout_ms": 1200000
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Anthropic orchestrator
|
||||
## Graders
|
||||
|
||||
| Name | Description |
|
||||
|------|-------------|
|
||||
| `performance_grader` | Multi-axis grader running on Claude Agent SDK (uses its own credentials via `CLAUDE_CODE_OAUTH_TOKEN`) |
|
||||
| `agisdk_state_diff` | AGI SDK / REAL Bench environment state-diff grader (deterministic) |
|
||||
| `infinity_state` | WebArena-Infinity verifier-script grader (deterministic) |
|
||||
|
||||
Set `graders` in your config to override the per-task `graders` field from the dataset:
|
||||
|
||||
```json
|
||||
"orchestrator": {
|
||||
"provider": "anthropic",
|
||||
"model": "claude-sonnet-4-20250514",
|
||||
"apiKey": "ANTHROPIC_API_KEY"
|
||||
}
|
||||
"graders": ["performance_grader"]
|
||||
```
|
||||
|
||||
#### Google orchestrator
|
||||
|
||||
```json
|
||||
"orchestrator": {
|
||||
"provider": "google",
|
||||
"model": "gemini-2.0-flash",
|
||||
"apiKey": "GOOGLE_API_KEY"
|
||||
}
|
||||
```
|
||||
|
||||
#### Fireworks orchestrator (OpenAI-compatible)
|
||||
|
||||
```json
|
||||
"orchestrator": {
|
||||
"provider": "openai-compatible",
|
||||
"model": "accounts/fireworks/models/kimi-k2p5",
|
||||
"apiKey": "FIREWORKS_API_KEY",
|
||||
"baseUrl": "https://api.fireworks.ai/inference/v1"
|
||||
}
|
||||
```
|
||||
|
||||
The executor config stays the same across all orchestrator providers — it always uses the Clado action model.
|
||||
|
||||
### Other Agent Types
|
||||
|
||||
| Type | Description | Example config |
|
||||
|------|-------------|----------------|
|
||||
| `single` | Single LLM agent via Gemini CLI + MCP | `webvoyager-test.json` |
|
||||
| `tool-loop` | AI SDK tool loop, connects via CDP | `tool-loop-test.json` |
|
||||
| `gemini-computer-use` | Google native computer use API | `gemini-computer-use.json` |
|
||||
| `yutori-navigator` | Yutori N1 visual model | `yutori-navigator.json` |
|
||||
|
||||
## Configuration Reference
|
||||
## Configuration reference
|
||||
|
||||
### API keys
|
||||
|
||||
The `apiKey` field supports two formats:
|
||||
- **Env var name**: `"OPENAI_API_KEY"` — resolved from `.env.development` at runtime
|
||||
- **Direct value**: `"sk-xxxxx"` — used as-is (not recommended, prefer env vars)
|
||||
- **Direct value**: `"sk-xxxxx"` — used as-is (not recommended)
|
||||
|
||||
### Environment variables
|
||||
|
||||
| Variable | Used for |
|
||||
|----------|----------|
|
||||
| `EVAL_AGENT_PROVIDER`, `EVAL_AGENT_MODEL`, `EVAL_AGENT_API_KEY`, `EVAL_AGENT_BASE_URL`, `EVAL_AGENT_SUPPORTS_IMAGES` | Suite variant model selection |
|
||||
| `FIREWORKS_API_KEY`, `OPENROUTER_API_KEY`, `ANTHROPIC_API_KEY`, provider-specific keys | Config-file or provider-backed model calls |
|
||||
| `EVAL_EXECUTOR_MODEL`, `EVAL_EXECUTOR_API_KEY`, `EVAL_EXECUTOR_BASE_URL` | Suite-mode orchestrator executor override |
|
||||
| `CLADO_ACTION_MODEL`, `CLADO_ACTION_API_KEY`, `CLADO_ACTION_BASE_URL` | Clado executor defaults |
|
||||
| `BROWSEROS_BINARY` | BrowserOS binary path in CI/local smoke runs |
|
||||
| `BROWSEROS_SERVER_URL` | Optional grader MCP URL override |
|
||||
| `WEBARENA_INFINITY_DIR` | Local WebArena-Infinity checkout for Infinity tasks |
|
||||
| `NOPECHA_API_KEY` | CAPTCHA solver extension |
|
||||
| `EVAL_R2_ACCOUNT_ID`, `EVAL_R2_ACCESS_KEY_ID`, `EVAL_R2_SECRET_ACCESS_KEY`, `EVAL_R2_BUCKET`, `EVAL_R2_CDN_BASE_URL` | R2 upload and viewer URL |
|
||||
|
||||
### Supported providers
|
||||
|
||||
@@ -160,12 +163,28 @@ The `apiKey` field supports two formats:
|
||||
| Anthropic | `anthropic` | No |
|
||||
| Google | `google` | No |
|
||||
| Azure OpenAI | `azure` | Yes |
|
||||
| AWS Bedrock | `bedrock` | No (uses `region`, `accessKeyId`, `secretAccessKey`) |
|
||||
| AWS Bedrock | `bedrock` | No |
|
||||
| OpenRouter | `openrouter` | No |
|
||||
| Fireworks, Together, etc. | `openai-compatible` | Yes |
|
||||
| Ollama | `ollama` | No |
|
||||
| Clado Action (executor only) | `clado-action` | Yes |
|
||||
|
||||
### R2 publishing
|
||||
|
||||
`suite --config ... --publish r2` and `publish --target r2` upload the run artifacts plus `viewer.html` to the viewer-compatible R2 layout:
|
||||
|
||||
```bash
|
||||
export EVAL_R2_ACCOUNT_ID=...
|
||||
export EVAL_R2_ACCESS_KEY_ID=...
|
||||
export EVAL_R2_SECRET_ACCESS_KEY=...
|
||||
export EVAL_R2_BUCKET=browseros-eval
|
||||
export EVAL_R2_CDN_BASE_URL=https://eval.browseros.com
|
||||
```
|
||||
|
||||
`EVAL_R2_CDN_BASE_URL` must be a public R2 custom domain, `r2.dev` URL, or Worker URL. Do not set it to the private `*.r2.cloudflarestorage.com` S3 API endpoint.
|
||||
|
||||
Published runs are available at `EVAL_R2_CDN_BASE_URL/viewer.html?run=<run-id>`.
|
||||
|
||||
### BrowserOS infrastructure
|
||||
|
||||
```json
|
||||
@@ -179,34 +198,29 @@ The `apiKey` field supports two formats:
|
||||
}
|
||||
```
|
||||
|
||||
Each worker gets its own Chrome instance. Worker N uses `base_port + N` for CDP and server ports. `base_extension_port` is still reserved as a legacy BrowserOS launch argument for compatibility with Chromium builds that still pass it.
|
||||
Each worker gets its own Chrome instance. Worker N uses `base_port + N` for CDP and server ports.
|
||||
|
||||
### Execution settings
|
||||
|
||||
| Field | Description | Default |
|
||||
|-------|-------------|---------|
|
||||
| `num_workers` | Parallel workers (each gets its own Chrome) | `1` |
|
||||
| `timeout_ms` | Per-task timeout in ms | `900000` (15 min) |
|
||||
| `timeout_ms` | Per-task timeout in ms | `1800000` (30 min) |
|
||||
| `restart_server_per_task` | Restart Chrome between tasks (cleaner state, slower) | `false` |
|
||||
|
||||
### Grading
|
||||
|
||||
Results are auto-graded after each task. The grader uses an LLM judge.
|
||||
|
||||
| Field | Description |
|
||||
|-------|-------------|
|
||||
| `grader_model` | Model for grading (e.g., `openai/gpt-4.1`) |
|
||||
| `grader_api_key_env` | Env var name for grader API key |
|
||||
| `grader_base_url` | API endpoint (e.g., `https://openrouter.ai/api/v1`) |
|
||||
|
||||
## Datasets
|
||||
|
||||
| File | Tasks | Description |
|
||||
|------|-------|-------------|
|
||||
| `webvoyager_e2e_test.jsonl` | 10 | WebVoyager test subset (quick smoke test) |
|
||||
| `agisdk-daily-10.jsonl` | 10 | Daily AGI SDK / REAL Bench subset |
|
||||
| `webvoyager.jsonl` | 643 | Full WebVoyager benchmark |
|
||||
| `mind2web_e2e_test.jsonl` | 10 | Mind2Web test subset |
|
||||
| `mind2web.jsonl` | 300 | Full Mind2Web benchmark |
|
||||
| `mind2web.jsonl` | 300 | Online-Mind2Web |
|
||||
| `webbench-{0,1,2}of4-50.jsonl` | 50 each | WebBench shards (50-task subsets) |
|
||||
| `agisdk-real-smoke.jsonl` | 1 | AGI SDK / REAL Bench smoke task |
|
||||
| `agisdk-real.jsonl` | 36 | AGI SDK / REAL Bench (action-only tasks) |
|
||||
| `webarena-infinity-hard-50.jsonl` | 50 | WebArena-Infinity hard set |
|
||||
| `browsecomp-medium-hard-50.jsonl` | 50 | BrowseComp medium-hard |
|
||||
| `browsecomp-very-hard-50.jsonl` | 50 | BrowseComp very-hard |
|
||||
|
||||
Task format (JSONL, one per line):
|
||||
|
||||
@@ -215,7 +229,7 @@ Task format (JSONL, one per line):
|
||||
"query_id": "Amazon--0",
|
||||
"dataset": "webvoyager",
|
||||
"query": "Search an Xbox Wireless controller with green color and rated above 4 stars.",
|
||||
"graders": ["webvoyager_grader", "fara_combined"],
|
||||
"graders": ["performance_grader"],
|
||||
"start_url": "https://www.amazon.com/",
|
||||
"metadata": { "original_task_id": "Amazon--0", "website": "Amazon" }
|
||||
}
|
||||
@@ -227,24 +241,58 @@ Results are saved to `output_dir`:
|
||||
|
||||
```
|
||||
results/
|
||||
oe-clado-openai/
|
||||
Amazon--0/
|
||||
metadata.json # Task result, timing, grader scores
|
||||
messages.jsonl # Full message log
|
||||
screenshots/
|
||||
001.png # Step-by-step screenshots
|
||||
002.png
|
||||
summary.json # Aggregate pass rates
|
||||
browseros-agent-weekly/
|
||||
2026-04-29-1430/
|
||||
Amazon--0/
|
||||
attempt.json # Stable attempt summary for viewer/reporting
|
||||
metadata.json # Task result, timing, grader scores
|
||||
grades.json # Compact grader results
|
||||
messages.jsonl # Full message log
|
||||
grader-artifacts/ # Grader-specific inputs/outputs/stderr
|
||||
screenshots/
|
||||
001.png # Step-by-step screenshots
|
||||
002.png
|
||||
summary.json # Aggregate pass rates
|
||||
```
|
||||
|
||||
R2 publishing preserves the task files under `runs/<run-id>/...`, writes `runs/<run-id>/manifest.json`, and uploads `viewer.html` at the bucket root. The viewer URL is `EVAL_R2_CDN_BASE_URL/viewer.html?run=<run-id>`.
|
||||
|
||||
### R2 viewer manifest
|
||||
|
||||
`runs/<run-id>/manifest.json` is the source of truth for the public viewer. New manifests include `schemaVersion: 2` and each task includes explicit artifact paths:
|
||||
|
||||
```json
|
||||
{
|
||||
"schemaVersion": 2,
|
||||
"runId": "agisdk-real-smoke-2026-04-30-0000",
|
||||
"tasks": [
|
||||
{
|
||||
"queryId": "agisdk-dashdish-10",
|
||||
"paths": {
|
||||
"metadata": "tasks/agisdk-dashdish-10/metadata.json",
|
||||
"messages": "tasks/agisdk-dashdish-10/messages.jsonl",
|
||||
"grades": "tasks/agisdk-dashdish-10/grades.json",
|
||||
"trace": "tasks/agisdk-dashdish-10/trace.jsonl",
|
||||
"screenshots": "tasks/agisdk-dashdish-10/screenshots",
|
||||
"graderArtifacts": "tasks/agisdk-dashdish-10/grader-artifacts"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
The static viewer uses `task.paths` when present. Older uploaded runs without `schemaVersion` or `task.paths` still work through the legacy inferred layout: `runs/<run-id>/<task-id>/metadata.json`, `messages.jsonl`, and `screenshots/<n>.png`.
|
||||
|
||||
Manifest paths are stable artifact locations, not a guarantee that every optional artifact exists for every task. For example, `attempt.json`, `trace.jsonl`, or grader artifact directories may be absent when that artifact was not produced by the run.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
**BrowserOS not found**: Expects `/Applications/BrowserOS.app/Contents/MacOS/BrowserOS`. Make sure it's installed.
|
||||
**BrowserOS not found**: Expects `/Applications/BrowserOS.app/Contents/MacOS/BrowserOS`. Set `BROWSEROS_BINARY` to override.
|
||||
|
||||
**Port conflicts**: Each worker uses `base_port + workerIndex`. 3 workers on base 9110 → ports 9110, 9111, 9112. Stop other BrowserOS instances first.
|
||||
|
||||
**API key not resolving**: If your config has `"apiKey": "OPENAI_API_KEY"`, ensure the env var is set in `.env.development`.
|
||||
|
||||
**Tasks timing out**: Increase `timeout_ms`. Default is 15 minutes; complex tasks may need 20+ minutes.
|
||||
**Tasks timing out**: Increase `timeout_ms`. Default is 30 minutes.
|
||||
|
||||
**Headless vs headed**: Set `"headless": false` to watch Chrome in real-time. Useful for debugging.
|
||||
**Headless vs headed**: Set `"headless": false` to watch Chrome in real time.
|
||||
|
||||
18
packages/browseros-agent/apps/eval/config.json
vendored
18
packages/browseros-agent/apps/eval/config.json
vendored
@@ -1,18 +0,0 @@
|
||||
{
|
||||
"agent": {
|
||||
"type": "single",
|
||||
"provider": "openrouter",
|
||||
"model": "openai/gpt-4o",
|
||||
"apiKey": "OPENROUTER_API_KEY"
|
||||
},
|
||||
"dataset": "data/webvoyager_e2e_test.jsonl",
|
||||
"output_dir": "results",
|
||||
"num_workers": 5,
|
||||
"browseros": {
|
||||
"server_url": "http://127.0.0.1:9110"
|
||||
},
|
||||
"grader_api_key_env": "OPENROUTER_API_KEY",
|
||||
"grader_base_url": "https://openrouter.ai/api/v1",
|
||||
"grader_model": "openai/gpt-4.1",
|
||||
"timeout_ms": 300000
|
||||
}
|
||||
@@ -7,8 +7,8 @@
|
||||
"baseUrl": "https://openrouter.ai/api/v1",
|
||||
"supportsImages": true
|
||||
},
|
||||
"dataset": "../data/agisdk-real.jsonl",
|
||||
"num_workers": 10,
|
||||
"dataset": "../../data/agisdk-real-smoke.jsonl",
|
||||
"num_workers": 1,
|
||||
"restart_server_per_task": true,
|
||||
"browseros": {
|
||||
"server_url": "http://127.0.0.1:9110",
|
||||
26
packages/browseros-agent/apps/eval/configs/legacy/agisdk-real.json
vendored
Normal file
26
packages/browseros-agent/apps/eval/configs/legacy/agisdk-real.json
vendored
Normal file
@@ -0,0 +1,26 @@
|
||||
{
|
||||
"agent": {
|
||||
"type": "single",
|
||||
"provider": "openai-compatible",
|
||||
"model": "accounts/fireworks/models/kimi-k2p5",
|
||||
"apiKey": "FIREWORKS_API_KEY",
|
||||
"baseUrl": "https://api.fireworks.ai/inference/v1",
|
||||
"supportsImages": true
|
||||
},
|
||||
"dataset": "../../data/agisdk-real.jsonl",
|
||||
"num_workers": 4,
|
||||
"restart_server_per_task": true,
|
||||
"browseros": {
|
||||
"server_url": "http://127.0.0.1:9110",
|
||||
"base_cdp_port": 9010,
|
||||
"base_server_port": 9110,
|
||||
"base_extension_port": 9310,
|
||||
"load_extensions": false,
|
||||
"headless": false
|
||||
},
|
||||
"captcha": {
|
||||
"api_key_env": "NOPECHA_API_KEY"
|
||||
},
|
||||
"graders": ["agisdk_state_diff"],
|
||||
"timeout_ms": 1800000
|
||||
}
|
||||
@@ -7,7 +7,7 @@
|
||||
"baseUrl": "https://openrouter.ai/api/v1",
|
||||
"supportsImages": true
|
||||
},
|
||||
"dataset": "../data/webbench-2of4-50.jsonl",
|
||||
"dataset": "../../data/webbench-2of4-50.jsonl",
|
||||
"num_workers": 10,
|
||||
"restart_server_per_task": true,
|
||||
"browseros": {
|
||||
@@ -22,8 +22,5 @@
|
||||
"api_key_env": "NOPECHA_API_KEY"
|
||||
},
|
||||
"graders": ["performance_grader"],
|
||||
"grader_api_key_env": "OPENROUTER_API_KEY",
|
||||
"grader_base_url": "https://openrouter.ai/api/v1",
|
||||
"grader_model": "openai/gpt-4.1",
|
||||
"timeout_ms": 1800000
|
||||
}
|
||||
@@ -14,7 +14,7 @@
|
||||
"baseUrl": "https://api.fireworks.ai/inference/v1"
|
||||
}
|
||||
},
|
||||
"dataset": "../data/webbench-2of4-50.jsonl",
|
||||
"dataset": "../../data/webbench-2of4-50.jsonl",
|
||||
"num_workers": 10,
|
||||
"restart_server_per_task": true,
|
||||
"browseros": {
|
||||
@@ -29,8 +29,5 @@
|
||||
"api_key_env": "NOPECHA_API_KEY"
|
||||
},
|
||||
"graders": ["performance_grader"],
|
||||
"grader_api_key_env": "OPENROUTER_API_KEY",
|
||||
"grader_base_url": "https://openrouter.ai/api/v1",
|
||||
"grader_model": "openai/gpt-4.1",
|
||||
"timeout_ms": 1800000
|
||||
}
|
||||
@@ -9,12 +9,12 @@
|
||||
},
|
||||
"executor": {
|
||||
"provider": "clado-action",
|
||||
"model": "qwen3-vl-30b-a3b-instruct",
|
||||
"model": "Qwen3.5-35B-A3B-action-000159-merged",
|
||||
"apiKey": "",
|
||||
"baseUrl": "https://clado-ai--clado-browseros-action-actionmodel-generate.modal.run"
|
||||
"baseUrl": "https://clado-ai--clado-browseros-action-000159-merged-actionmod-f4a6ef.modal.run"
|
||||
}
|
||||
},
|
||||
"dataset": "../data/webbench-2of4-50.jsonl",
|
||||
"dataset": "../../data/agisdk-real.jsonl",
|
||||
"num_workers": 10,
|
||||
"restart_server_per_task": true,
|
||||
"browseros": {
|
||||
@@ -23,14 +23,11 @@
|
||||
"base_server_port": 9110,
|
||||
"base_extension_port": 9310,
|
||||
"load_extensions": false,
|
||||
"headless": false
|
||||
"headless": true
|
||||
},
|
||||
"captcha": {
|
||||
"api_key_env": "NOPECHA_API_KEY"
|
||||
},
|
||||
"graders": ["performance_grader"],
|
||||
"grader_api_key_env": "OPENROUTER_API_KEY",
|
||||
"grader_base_url": "https://openrouter.ai/api/v1",
|
||||
"grader_model": "openai/gpt-4.1",
|
||||
"graders": ["agisdk_state_diff"],
|
||||
"timeout_ms": 1800000
|
||||
}
|
||||
@@ -7,7 +7,7 @@
|
||||
"baseUrl": "https://openrouter.ai/api/v1",
|
||||
"supportsImages": true
|
||||
},
|
||||
"dataset": "../data/webarena-infinity-hard-50.jsonl",
|
||||
"dataset": "../../data/webarena-infinity-hard-50.jsonl",
|
||||
"num_workers": 10,
|
||||
"restart_server_per_task": true,
|
||||
"browseros": {
|
||||
@@ -5,7 +5,7 @@
|
||||
"model": "openai/gpt-4.1",
|
||||
"apiKey": "OPENROUTER_API_KEY"
|
||||
},
|
||||
"dataset": "../data/mind2web.jsonl",
|
||||
"dataset": "../../data/mind2web.jsonl",
|
||||
"num_workers": 5,
|
||||
"restart_server_per_task": true,
|
||||
"browseros": {
|
||||
@@ -20,8 +20,5 @@
|
||||
"api_key_env": "NOPECHA_API_KEY"
|
||||
},
|
||||
"graders": ["performance_grader"],
|
||||
"grader_api_key_env": "OPENROUTER_API_KEY",
|
||||
"grader_base_url": "https://openrouter.ai/api/v1",
|
||||
"grader_model": "openai/gpt-4.1",
|
||||
"timeout_ms": 300000
|
||||
}
|
||||
@@ -7,7 +7,7 @@
|
||||
"baseUrl": "https://api.fireworks.ai/inference/v1",
|
||||
"supportsImages": true
|
||||
},
|
||||
"dataset": "../data/webvoyager.jsonl",
|
||||
"dataset": "../../data/webvoyager.jsonl",
|
||||
"num_workers": 3,
|
||||
"restart_server_per_task": true,
|
||||
"browseros": {
|
||||
@@ -22,8 +22,5 @@
|
||||
"api_key_env": "NOPECHA_API_KEY"
|
||||
},
|
||||
"graders": ["performance_grader"],
|
||||
"grader_api_key_env": "OPENROUTER_API_KEY",
|
||||
"grader_base_url": "https://openrouter.ai/api/v1",
|
||||
"grader_model": "openai/gpt-4.1",
|
||||
"timeout_ms": 1200000
|
||||
}
|
||||
22
packages/browseros-agent/apps/eval/configs/suites/agisdk-daily-10.json
vendored
Normal file
22
packages/browseros-agent/apps/eval/configs/suites/agisdk-daily-10.json
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"id": "agisdk-daily-10",
|
||||
"dataset": "../../data/agisdk-daily-10.jsonl",
|
||||
"agent": {
|
||||
"type": "single"
|
||||
},
|
||||
"graders": ["agisdk_state_diff"],
|
||||
"workers": 1,
|
||||
"restartBrowserPerTask": true,
|
||||
"timeoutMs": 1800000,
|
||||
"browseros": {
|
||||
"server_url": "http://127.0.0.1:9110",
|
||||
"base_cdp_port": 9010,
|
||||
"base_server_port": 9110,
|
||||
"base_extension_port": 9310,
|
||||
"load_extensions": false,
|
||||
"headless": true
|
||||
},
|
||||
"captcha": {
|
||||
"api_key_env": "NOPECHA_API_KEY"
|
||||
}
|
||||
}
|
||||
22
packages/browseros-agent/apps/eval/configs/suites/agisdk-real-smoke.json
vendored
Normal file
22
packages/browseros-agent/apps/eval/configs/suites/agisdk-real-smoke.json
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"id": "agisdk-real-smoke",
|
||||
"dataset": "../../data/agisdk-real-smoke.jsonl",
|
||||
"agent": {
|
||||
"type": "single"
|
||||
},
|
||||
"graders": ["agisdk_state_diff"],
|
||||
"workers": 1,
|
||||
"restartBrowserPerTask": true,
|
||||
"timeoutMs": 1800000,
|
||||
"browseros": {
|
||||
"server_url": "http://127.0.0.1:9110",
|
||||
"base_cdp_port": 9010,
|
||||
"base_server_port": 9110,
|
||||
"base_extension_port": 9310,
|
||||
"load_extensions": false,
|
||||
"headless": false
|
||||
},
|
||||
"captcha": {
|
||||
"api_key_env": "NOPECHA_API_KEY"
|
||||
}
|
||||
}
|
||||
22
packages/browseros-agent/apps/eval/configs/suites/agisdk-real.json
vendored
Normal file
22
packages/browseros-agent/apps/eval/configs/suites/agisdk-real.json
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"id": "agisdk-real",
|
||||
"dataset": "../../data/agisdk-real.jsonl",
|
||||
"agent": {
|
||||
"type": "single"
|
||||
},
|
||||
"graders": ["agisdk_state_diff"],
|
||||
"workers": 1,
|
||||
"restartBrowserPerTask": true,
|
||||
"timeoutMs": 1800000,
|
||||
"browseros": {
|
||||
"server_url": "http://127.0.0.1:9110",
|
||||
"base_cdp_port": 9010,
|
||||
"base_server_port": 9110,
|
||||
"base_extension_port": 9310,
|
||||
"load_extensions": false,
|
||||
"headless": false
|
||||
},
|
||||
"captcha": {
|
||||
"api_key_env": "NOPECHA_API_KEY"
|
||||
}
|
||||
}
|
||||
@@ -1,30 +0,0 @@
|
||||
{
|
||||
"agent": {
|
||||
"type": "gemini-computer-use",
|
||||
"apiKey": "GOOGLE_AI_API_KEY",
|
||||
"screenSize": {
|
||||
"width": 1440,
|
||||
"height": 900
|
||||
},
|
||||
"turnLimit": 100
|
||||
},
|
||||
"dataset": "../data/test-set.jsonl",
|
||||
"num_workers": 1,
|
||||
"restart_server_per_task": true,
|
||||
"browseros": {
|
||||
"server_url": "http://127.0.0.1:9110",
|
||||
"base_cdp_port": 9010,
|
||||
"base_server_port": 9110,
|
||||
"base_extension_port": 9310,
|
||||
"load_extensions": false,
|
||||
"headless": false
|
||||
},
|
||||
"captcha": {
|
||||
"api_key_env": "NOPECHA_API_KEY"
|
||||
},
|
||||
"graders": ["performance_grader"],
|
||||
"grader_api_key_env": "OPENROUTER_API_KEY",
|
||||
"grader_base_url": "https://openrouter.ai/api/v1",
|
||||
"grader_model": "openai/gpt-4.1",
|
||||
"timeout_ms": 1200000
|
||||
}
|
||||
@@ -1,30 +0,0 @@
|
||||
{
|
||||
"agent": {
|
||||
"type": "yutori-navigator",
|
||||
"apiKey": "YUTORI_API_KEY",
|
||||
"screenSize": {
|
||||
"width": 1280,
|
||||
"height": 800
|
||||
},
|
||||
"turnLimit": 100
|
||||
},
|
||||
"dataset": "../data/test-set.jsonl",
|
||||
"num_workers": 1,
|
||||
"restart_server_per_task": true,
|
||||
"browseros": {
|
||||
"server_url": "http://127.0.0.1:9110",
|
||||
"base_cdp_port": 9010,
|
||||
"base_server_port": 9110,
|
||||
"base_extension_port": 9310,
|
||||
"load_extensions": false,
|
||||
"headless": false
|
||||
},
|
||||
"captcha": {
|
||||
"api_key_env": "NOPECHA_API_KEY"
|
||||
},
|
||||
"graders": ["performance_grader"],
|
||||
"grader_api_key_env": "OPENROUTER_API_KEY",
|
||||
"grader_base_url": "https://openrouter.ai/api/v1",
|
||||
"grader_model": "openai/gpt-4.1",
|
||||
"timeout_ms": 1200000
|
||||
}
|
||||
10
packages/browseros-agent/apps/eval/data/agisdk-daily-10.jsonl
vendored
Normal file
10
packages/browseros-agent/apps/eval/data/agisdk-daily-10.jsonl
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
{"query_id": "agisdk-dashdish-10", "dataset": "agisdk-real", "query": "Place an order from \"Souvla\" for a \"Medium Classic Cheeseburger\" and a \"Small Bacon Double Cheeseburger\" with \"Standard Delivery\" as the method with the default charged options.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-dashdish.vercel.app", "metadata": {"original_task_id": "dashdish-10", "website": "DashDish", "category": "agisdk-real", "additional": {"agisdk_task_id": "dashdish-10", "challenge_type": "action", "difficulty": "hard", "similar_to": "Doordash"}}}
|
||||
{"query_id": "agisdk-fly-unified-5", "dataset": "agisdk-real", "query": "Find me the cheapest fare for a flight from Orlando to Milwaukee on December 5th, 2024 and book it.\nPassenger: John Doe\nDate of Birth: 01/01/1990\nSex: Male\nSeat Selection: No\nPayment: Credit Card (378342143523967), Exp: 12/30, Security Code: 420 Address: 123 Main St, San Francisco, CA, 94105, USA, Phone: 555-123-4567, Email: johndoe@example.com.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-fly-unified.vercel.app", "metadata": {"original_task_id": "fly-unified-5", "website": "Fly Unified", "category": "agisdk-real", "additional": {"agisdk_task_id": "fly-unified-5", "challenge_type": "retrieval-action", "difficulty": "medium", "similar_to": "United Airlines"}}}
|
||||
{"query_id": "agisdk-udriver-10", "dataset": "agisdk-real", "query": "Order me a ride for 4pm, I'll be at the de Young muesum headed to the Waterbar, fanciest option possible please.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-udriver.vercel.app", "metadata": {"original_task_id": "udriver-10", "website": "UDriver", "category": "agisdk-real", "additional": {"agisdk_task_id": "udriver-10", "challenge_type": "action", "difficulty": "hard", "similar_to": "Uber"}}}
|
||||
{"query_id": "agisdk-udriver-9", "dataset": "agisdk-real", "query": "Book me a ride from the thai restaurant I last took a ride to for later today at 2pm, I'll be at 333 Apartments on Fremont", "graders": ["agisdk_state_diff"], "start_url": "https://evals-udriver.vercel.app", "metadata": {"original_task_id": "udriver-9", "website": "UDriver", "category": "agisdk-real", "additional": {"agisdk_task_id": "udriver-9", "challenge_type": "retrieval-action", "difficulty": "hard", "similar_to": "Uber"}}}
|
||||
{"query_id": "agisdk-topwork-4", "dataset": "agisdk-real", "query": "Create a job post for a UI/UX Designer with expertise in Figma, Sketch, and Adobe Creative Suite, including project details, timeline, and required skills (Wireframing, Prototyping, Responsive Design).", "graders": ["agisdk_state_diff"], "start_url": "https://evals-topwork.vercel.app", "metadata": {"original_task_id": "topwork-4", "website": "TopWork", "category": "agisdk-real", "additional": {"agisdk_task_id": "topwork-4", "challenge_type": "action", "difficulty": "medium", "similar_to": "Upwork"}}}
|
||||
{"query_id": "agisdk-gocalendar-4", "dataset": "agisdk-real", "query": "Change the \"Team Check-In\" event on July 18, 2024, name to \"Project Kickoff\" and update the location to \"Zoom\"", "graders": ["agisdk_state_diff"], "start_url": "https://evals-gocalendar.vercel.app", "metadata": {"original_task_id": "gocalendar-4", "website": "GoCalendar", "category": "agisdk-real", "additional": {"agisdk_task_id": "gocalendar-4", "challenge_type": "action", "difficulty": "medium", "similar_to": "Google Calendar"}}}
|
||||
{"query_id": "agisdk-staynb-6", "dataset": "agisdk-real", "query": "Find and book the stay with the best value for money (cheapest stay with the best reviews) for 1 day. For fields you don't know the answer for, just fill them in with anything of your choice.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-staynb.vercel.app", "metadata": {"original_task_id": "staynb-6", "website": "StayNB", "category": "agisdk-real", "additional": {"agisdk_task_id": "staynb-6", "challenge_type": "retrieval-action", "difficulty": "medium", "similar_to": "Airbnb"}}}
|
||||
{"query_id": "agisdk-udriver-11", "dataset": "agisdk-real", "query": "I need to go from Pacific Catch on Chestnut back home to 333 Fremont now. If the fancy version is within ten dollars of the regular one, book that.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-udriver.vercel.app", "metadata": {"original_task_id": "udriver-11", "website": "UDriver", "category": "agisdk-real", "additional": {"agisdk_task_id": "udriver-11", "challenge_type": "action", "difficulty": "hard", "similar_to": "Uber"}}}
|
||||
{"query_id": "agisdk-networkin-5", "dataset": "agisdk-real", "query": "Send a connection request to John Smith.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-networkin.vercel.app", "metadata": {"original_task_id": "networkin-5", "website": "Networkin", "category": "agisdk-real", "additional": {"agisdk_task_id": "networkin-5", "challenge_type": "action", "difficulty": "easy", "similar_to": "LinkedIn"}}}
|
||||
{"query_id": "agisdk-zilloft-6", "dataset": "agisdk-real", "query": "Select a property listed in San Francisco as \"Condos\" within a price range under $300,000 and request a tour for tomorrow at 4:00 PM. Use these contact details: Name: Sarah Brown, Email: sarahbrown@example.com, Phone: 555-987-6543.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-zilloft.vercel.app", "metadata": {"original_task_id": "zilloft-6", "website": "Zilloft", "category": "agisdk-real", "additional": {"agisdk_task_id": "zilloft-6", "challenge_type": "action", "difficulty": "medium", "similar_to": "Zillow"}}}
|
||||
1
packages/browseros-agent/apps/eval/data/agisdk-real-smoke.jsonl
vendored
Normal file
1
packages/browseros-agent/apps/eval/data/agisdk-real-smoke.jsonl
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"query_id": "agisdk-dashdish-10", "dataset": "agisdk-real", "query": "Place an order from \"Souvla\" for a \"Medium Classic Cheeseburger\" and a \"Small Bacon Double Cheeseburger\" with \"Standard Delivery\" as the method with the default charged options.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-dashdish.vercel.app", "metadata": {"original_task_id": "dashdish-10", "website": "DashDish", "category": "agisdk-real", "additional": {"agisdk_task_id": "dashdish-10", "challenge_type": "action", "difficulty": "hard", "similar_to": "Doordash"}}}
|
||||
@@ -1,20 +1,16 @@
|
||||
{"query_id": "agisdk-dashdish-10", "dataset": "agisdk-real", "query": "Place an order from \"Souvla\" for a \"Medium Classic Cheeseburger\" and a \"Small Bacon Double Cheeseburger\" with \"Standard Delivery\" as the method with the default charged options.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-dashdish.vercel.app", "metadata": {"original_task_id": "dashdish-10", "website": "DashDish", "category": "agisdk-real", "additional": {"agisdk_task_id": "dashdish-10", "challenge_type": "action", "difficulty": "hard", "similar_to": "Doordash"}}}
|
||||
{"query_id": "agisdk-fly-unified-5", "dataset": "agisdk-real", "query": "Find me the cheapest fare for a flight from Orlando to Milwaukee on December 5th, 2024 and book it.\nPassenger: John Doe\nDate of Birth: 01/01/1990\nSex: Male\nSeat Selection: No\nPayment: Credit Card (378342143523967), Exp: 12/25, Security Code: 420 Address: 123 Main St, San Francisco, CA, 94105, USA, Phone: 555-123-4567, Email: johndoe@example.com.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-fly-unified.vercel.app", "metadata": {"original_task_id": "fly-unified-5", "website": "Fly Unified", "category": "agisdk-real", "additional": {"agisdk_task_id": "fly-unified-5", "challenge_type": "retrieval-action", "difficulty": "medium", "similar_to": "United Airlines"}}}
|
||||
{"query_id": "agisdk-fly-unified-5", "dataset": "agisdk-real", "query": "Find me the cheapest fare for a flight from Orlando to Milwaukee on December 5th, 2024 and book it.\nPassenger: John Doe\nDate of Birth: 01/01/1990\nSex: Male\nSeat Selection: No\nPayment: Credit Card (378342143523967), Exp: 12/30, Security Code: 420 Address: 123 Main St, San Francisco, CA, 94105, USA, Phone: 555-123-4567, Email: johndoe@example.com.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-fly-unified.vercel.app", "metadata": {"original_task_id": "fly-unified-5", "website": "Fly Unified", "category": "agisdk-real", "additional": {"agisdk_task_id": "fly-unified-5", "challenge_type": "retrieval-action", "difficulty": "medium", "similar_to": "United Airlines"}}}
|
||||
{"query_id": "agisdk-udriver-10", "dataset": "agisdk-real", "query": "Order me a ride for 4pm, I'll be at the de Young muesum headed to the Waterbar, fanciest option possible please.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-udriver.vercel.app", "metadata": {"original_task_id": "udriver-10", "website": "UDriver", "category": "agisdk-real", "additional": {"agisdk_task_id": "udriver-10", "challenge_type": "action", "difficulty": "hard", "similar_to": "Uber"}}}
|
||||
{"query_id": "agisdk-udriver-9", "dataset": "agisdk-real", "query": "Book me a ride from the thai restaurant I last took a ride to for later today at 2pm, I'll be at 333 Apartments on Fremont", "graders": ["agisdk_state_diff"], "start_url": "https://evals-udriver.vercel.app", "metadata": {"original_task_id": "udriver-9", "website": "UDriver", "category": "agisdk-real", "additional": {"agisdk_task_id": "udriver-9", "challenge_type": "retrieval-action", "difficulty": "hard", "similar_to": "Uber"}}}
|
||||
{"query_id": "agisdk-topwork-4", "dataset": "agisdk-real", "query": "Create a job post for a UI/UX Designer with expertise in Figma, Sketch, and Adobe Creative Suite, including project details, timeline, and required skills (Wireframing, Prototyping, Responsive Design).", "graders": ["agisdk_state_diff"], "start_url": "https://evals-topwork.vercel.app", "metadata": {"original_task_id": "topwork-4", "website": "TopWork", "category": "agisdk-real", "additional": {"agisdk_task_id": "topwork-4", "challenge_type": "action", "difficulty": "medium", "similar_to": "Upwork"}}}
|
||||
{"query_id": "agisdk-gocalendar-4", "dataset": "agisdk-real", "query": "Change the \"Team Check-In\" event on July 18, 2024, name to \"Project Kickoff\" and update the location to \"Zoom\"", "graders": ["agisdk_state_diff"], "start_url": "https://evals-gocalendar.vercel.app", "metadata": {"original_task_id": "gocalendar-4", "website": "GoCalendar", "category": "agisdk-real", "additional": {"agisdk_task_id": "gocalendar-4", "challenge_type": "action", "difficulty": "medium", "similar_to": "Google Calendar"}}}
|
||||
{"query_id": "agisdk-staynb-6", "dataset": "agisdk-real", "query": "Find and book the stay with the best value for money (cheapest stay with the best reviews) for 1 day. For fields you don't know the answer for, just fill them in with anything of your choice.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-staynb.vercel.app", "metadata": {"original_task_id": "staynb-6", "website": "StayNB", "category": "agisdk-real", "additional": {"agisdk_task_id": "staynb-6", "challenge_type": "retrieval-action", "difficulty": "medium", "similar_to": "Airbnb"}}}
|
||||
{"query_id": "agisdk-fly-unified-9", "dataset": "agisdk-real", "query": "Book me a flight from San Francisco to Chicago in Basic Economy on December 18th at 10:00. Ensure no seat selection is made.\nPassenger: David Lee\nDate of Birth: 07/22/1985\nSex: Male\nSeat Selection: No\nPayment: Credit Card (9999 8888 7777), Exp: 03/30, Address: 987 Cedar St, Chicago, IL, 60601, USA, Phone: 555-987-1234, Email: davidlee@example.com.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-fly-unified.vercel.app", "metadata": {"original_task_id": "fly-unified-9", "website": "Fly Unified", "category": "agisdk-real", "additional": {"agisdk_task_id": "fly-unified-9", "challenge_type": "action", "difficulty": "hard", "similar_to": "United Airlines"}}}
|
||||
{"query_id": "agisdk-networkin-9", "dataset": "agisdk-real", "query": "Find a professional who attended Stanford and send them a connection request and a message.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-networkin.vercel.app", "metadata": {"original_task_id": "networkin-9", "website": "Networkin", "category": "agisdk-real", "additional": {"agisdk_task_id": "networkin-9", "challenge_type": "retrieval-action", "difficulty": "medium", "similar_to": "LinkedIn"}}}
|
||||
{"query_id": "agisdk-udriver-11", "dataset": "agisdk-real", "query": "I need to go from Pacific Catch on Chestnut back home to 333 Fremont now. If the fancy version is within ten dollars of the regular one, book that.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-udriver.vercel.app", "metadata": {"original_task_id": "udriver-11", "website": "UDriver", "category": "agisdk-real", "additional": {"agisdk_task_id": "udriver-11", "challenge_type": "action", "difficulty": "hard", "similar_to": "Uber"}}}
|
||||
{"query_id": "agisdk-fly-unified-4", "dataset": "agisdk-real", "query": "Book me a round-trip flight from Providence (Rhode Island) to Indianapolis, departing on December 5th, 2024 at 08:00 and returning on December 9th at 14:00.\nPassenger: Jane Smith\nDate of Birth: 02/14/1995\nSex: Female\nSeat Selection: Yes (Window seat)\nPayment: Credit Card (378342143523967), Exp: 06/26, security code: 345 Address: 456 Elm St, Miami, FL, 33101, USA, Phone: 555-987-6543, Email: janesmith@example.com.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-fly-unified.vercel.app", "metadata": {"original_task_id": "fly-unified-4", "website": "Fly Unified", "category": "agisdk-real", "additional": {"agisdk_task_id": "fly-unified-4", "challenge_type": "action", "difficulty": "medium", "similar_to": "United Airlines"}}}
|
||||
{"query_id": "agisdk-networkin-5", "dataset": "agisdk-real", "query": "Send a connection request to John Smith.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-networkin.vercel.app", "metadata": {"original_task_id": "networkin-5", "website": "Networkin", "category": "agisdk-real", "additional": {"agisdk_task_id": "networkin-5", "challenge_type": "action", "difficulty": "easy", "similar_to": "LinkedIn"}}}
|
||||
{"query_id": "agisdk-zilloft-6", "dataset": "agisdk-real", "query": "Select a property listed in San Francisco as \"Condos\" within a price range under $300,000 and request a tour for tomorrow at 4:00 PM. Use these contact details: Name: Sarah Brown, Email: sarahbrown@example.com, Phone: 555-987-6543.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-zilloft.vercel.app", "metadata": {"original_task_id": "zilloft-6", "website": "Zilloft", "category": "agisdk-real", "additional": {"agisdk_task_id": "zilloft-6", "challenge_type": "action", "difficulty": "medium", "similar_to": "Zillow"}}}
|
||||
{"query_id": "agisdk-topwork-2", "dataset": "agisdk-real", "query": "Create a job posting for a Backend Developer specializing in Python, Django, and Flask to develop a high-performance web application. Include project details such as required skills (PostgreSQL, Docker, AWS, CI/CD), estimated project timeline, and budget.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-topwork.vercel.app", "metadata": {"original_task_id": "topwork-2", "website": "TopWork", "category": "agisdk-real", "additional": {"agisdk_task_id": "topwork-2", "challenge_type": "action", "difficulty": "medium", "similar_to": "Upwork"}}}
|
||||
{"query_id": "agisdk-gocalendar-3", "dataset": "agisdk-real", "query": "Delete the event titled \"Breakfast Meeting with Client\" scheduled for July 19, 2024", "graders": ["agisdk_state_diff"], "start_url": "https://evals-gocalendar.vercel.app", "metadata": {"original_task_id": "gocalendar-3", "website": "GoCalendar", "category": "agisdk-real", "additional": {"agisdk_task_id": "gocalendar-3", "challenge_type": "action", "difficulty": "easy", "similar_to": "Google Calendar"}}}
|
||||
{"query_id": "agisdk-topwork-3", "dataset": "agisdk-real", "query": "Create a job listing for a Full-Stack Developer with expertise in Java, Spring Boot, and Angular, outlining the project scope, estimated duration, and required skills (MySQL, Docker, Kubernetes, and Jenkins). The ideal candidate should have experience in enterprise-level applications and building scalable microservices. After creating the job post, please describe what you included in the job listing.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-topwork.vercel.app", "metadata": {"original_task_id": "topwork-3", "website": "TopWork", "category": "agisdk-real", "additional": {"agisdk_task_id": "topwork-3", "challenge_type": "retrieval", "difficulty": "medium", "similar_to": "Upwork"}}}
|
||||
{"query_id": "agisdk-fly-unified-2", "dataset": "agisdk-real", "query": "Book me a one-way flight from Indiana to New York on December 2nd 2024 at 12:00.\nPassenger: John Doe\nDate of Birth: 01/01/1990\nSex: Male\nSeat Selection: No\nPayment: Credit Card (378342143523967), Exp: 12/25, Security Code: 245, Address: 123 Main St, San Francisco, CA, 94105, USA, Phone: 555-123-4567, Email: johndoe@example.com.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-fly-unified.vercel.app", "metadata": {"original_task_id": "fly-unified-2", "website": "Fly Unified", "category": "agisdk-real", "additional": {"agisdk_task_id": "fly-unified-2", "challenge_type": "action", "difficulty": "easy", "similar_to": "United Airlines"}}}
|
||||
{"query_id": "agisdk-dashdish-7", "dataset": "agisdk-real", "query": "Select \"Express Delivery\" for an order from \"DragonEats\" of \"Mushroom Swiss Burger\" and complete the checkout with the pre-loaded Visa card.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-dashdish.vercel.app", "metadata": {"original_task_id": "dashdish-7", "website": "DashDish", "category": "agisdk-real", "additional": {"agisdk_task_id": "dashdish-7", "challenge_type": "action", "difficulty": "hard", "similar_to": "Doordash"}}}
|
||||
{"query_id": "agisdk-networkin-3", "dataset": "agisdk-real", "query": "Write a post inviting users to a networking event, including details about the event's purpose, date, and target audience.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-networkin.vercel.app", "metadata": {"original_task_id": "networkin-3", "website": "Networkin", "category": "agisdk-real", "additional": {"agisdk_task_id": "networkin-3", "challenge_type": "action", "difficulty": "medium", "similar_to": "LinkedIn"}}}
|
||||
{"query_id": "agisdk-gomail-7", "dataset": "agisdk-real", "query": "Delete the email with the subject \"New Leadership Articles You Can't Miss\" from the Inbox.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-gomail.vercel.app", "metadata": {"original_task_id": "gomail-7", "website": "GoMail", "category": "agisdk-real", "additional": {"agisdk_task_id": "gomail-7", "challenge_type": "retrieval-action", "difficulty": "hard", "similar_to": "Gmail"}}}
|
||||
@@ -23,25 +19,18 @@
|
||||
{"query_id": "agisdk-staynb-2", "dataset": "agisdk-real", "query": "Click on one of the stays displayed on the homepage and book it for a family of 4 (2 adults and 2 children). For fields you don't know the answer for, just fill them in with anything of your choice.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-staynb.vercel.app", "metadata": {"original_task_id": "staynb-2", "website": "StayNB", "category": "agisdk-real", "additional": {"agisdk_task_id": "staynb-2", "challenge_type": "action", "difficulty": "easy", "similar_to": "Airbnb"}}}
|
||||
{"query_id": "agisdk-opendining-10", "dataset": "agisdk-real", "query": "Check the menus of all restaurants for vegetarian options and make a reservation at the one with the most vegetarian choices. For fields you don't know the answer for, just fill them in with anything of your choice.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-opendining.vercel.app", "metadata": {"original_task_id": "opendining-10", "website": "OpenDining", "category": "agisdk-real", "additional": {"agisdk_task_id": "opendining-10", "challenge_type": "retrieval-action", "difficulty": "medium", "similar_to": "OpenTable"}}}
|
||||
{"query_id": "agisdk-opendining-4", "dataset": "agisdk-real", "query": "Use the search bar to search for a restaurant on September 2nd at 4:30 PM for 7 people, using \"Japanese\" as the search term, and book the first result. For fields you don't know the answer for, just fill them in with anything of your choice.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-opendining.vercel.app", "metadata": {"original_task_id": "opendining-4", "website": "OpenDining", "category": "agisdk-real", "additional": {"agisdk_task_id": "opendining-4", "challenge_type": "action", "difficulty": "hard", "similar_to": "OpenTable"}}}
|
||||
{"query_id": "agisdk-gomail-8", "dataset": "agisdk-real", "query": "Clear all emails from \"GitHub\" in the inbox to trash.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-gomail.vercel.app", "metadata": {"original_task_id": "gomail-8", "website": "GoMail", "category": "agisdk-real", "additional": {"agisdk_task_id": "gomail-8", "challenge_type": "action", "difficulty": "medium", "similar_to": "Gmail"}}}
|
||||
{"query_id": "agisdk-dashdish-4", "dataset": "agisdk-real", "query": "Schedule a delivery order from \"Taco Bell\" adding a \"Classic Cheeseburger\" large size for later and add the note \"Leave at the front door\".", "graders": ["agisdk_state_diff"], "start_url": "https://evals-dashdish.vercel.app", "metadata": {"original_task_id": "dashdish-4", "website": "DashDish", "category": "agisdk-real", "additional": {"agisdk_task_id": "dashdish-4", "challenge_type": "action", "difficulty": "medium", "similar_to": "Doordash"}}}
|
||||
{"query_id": "agisdk-networkin-1", "dataset": "agisdk-real", "query": "Create a new text post for the feed with a professional update about AI trends in 2025, mentioning three key advancements and their impact on the job market.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-networkin.vercel.app", "metadata": {"original_task_id": "networkin-1", "website": "Networkin", "category": "agisdk-real", "additional": {"agisdk_task_id": "networkin-1", "challenge_type": "action", "difficulty": "medium", "similar_to": "LinkedIn"}}}
|
||||
{"query_id": "agisdk-dashdish-5", "dataset": "agisdk-real", "query": "Add three \"Loaded Bacon Cheese Fries\" to the shopping cart from \"Man vs. Fries\". Proceed to checkout and select \"Pickup\" as the delivery method.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-dashdish.vercel.app", "metadata": {"original_task_id": "dashdish-5", "website": "DashDish", "category": "agisdk-real", "additional": {"agisdk_task_id": "dashdish-5", "challenge_type": "retrieval-action", "difficulty": "medium", "similar_to": "Doordash"}}}
|
||||
{"query_id": "agisdk-opendining-5", "dataset": "agisdk-real", "query": "Scroll through the homepage carousel until \"Ocean Breeze\" is visible, select the second available time slot, and complete the reservation. For fields you don't know the answer for, just fill them in with anything of your choice.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-opendining.vercel.app", "metadata": {"original_task_id": "opendining-5", "website": "OpenDining", "category": "agisdk-real", "additional": {"agisdk_task_id": "opendining-5", "challenge_type": "action", "difficulty": "medium", "similar_to": "OpenTable"}}}
|
||||
{"query_id": "agisdk-topwork-1", "dataset": "agisdk-real", "query": "Create a new job post for a Frontend Developer with expertise in React and TypeScript, specifying project details such as estimated duration, required skills, and budget.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-topwork.vercel.app", "metadata": {"original_task_id": "topwork-1", "website": "TopWork", "category": "agisdk-real", "additional": {"agisdk_task_id": "topwork-1", "challenge_type": "action", "difficulty": "medium", "similar_to": "Upwork"}}}
|
||||
{"query_id": "agisdk-gocalendar-1", "dataset": "agisdk-real", "query": "Create a new event titled \"Team Meeting\" on July 19, 2024, from 2 PM to 2:30 PM, and include \"Conference Room A\" as the location", "graders": ["agisdk_state_diff"], "start_url": "https://evals-gocalendar.vercel.app", "metadata": {"original_task_id": "gocalendar-1", "website": "GoCalendar", "category": "agisdk-real", "additional": {"agisdk_task_id": "gocalendar-1", "challenge_type": "action", "difficulty": "medium", "similar_to": "Google Calendar"}}}
|
||||
{"query_id": "agisdk-gomail-5", "dataset": "agisdk-real", "query": "Schedule an email to jane.doe@example.com with the subject \"Weekly Update\" to be sent next Monday at 9:00 AM.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-gomail.vercel.app", "metadata": {"original_task_id": "gomail-5", "website": "GoMail", "category": "agisdk-real", "additional": {"agisdk_task_id": "gomail-5", "challenge_type": "retrieval-action", "difficulty": "medium", "similar_to": "Gmail"}}}
|
||||
{"query_id": "agisdk-staynb-4", "dataset": "agisdk-real", "query": "Book a stay for 2 children with 1 adult. For fields you don't know the answer for, just fill them in with anything of your choice.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-staynb.vercel.app", "metadata": {"original_task_id": "staynb-4", "website": "StayNB", "category": "agisdk-real", "additional": {"agisdk_task_id": "staynb-4", "challenge_type": "action", "difficulty": "medium", "similar_to": "Airbnb"}}}
|
||||
{"query_id": "agisdk-networkin-6", "dataset": "agisdk-real", "query": "Choose a random person who you haven't connected with, connect with them, and send them a message saying, 'howdy, partner'.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-networkin.vercel.app", "metadata": {"original_task_id": "networkin-6", "website": "Networkin", "category": "agisdk-real", "additional": {"agisdk_task_id": "networkin-6", "challenge_type": "action", "difficulty": "medium", "similar_to": "LinkedIn"}}}
|
||||
{"query_id": "agisdk-dashdish-2", "dataset": "agisdk-real", "query": "Add a \"Medium Pepperoni Pizza\" from the restaurant \"Papa Johns Pizza\" to the shopping cart and purchase it.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-dashdish.vercel.app", "metadata": {"original_task_id": "dashdish-2", "website": "DashDish", "category": "agisdk-real", "additional": {"agisdk_task_id": "dashdish-2", "challenge_type": "action", "difficulty": "easy", "similar_to": "Doordash"}}}
|
||||
{"query_id": "agisdk-staynb-8", "dataset": "agisdk-real", "query": "Scroll through the homepage and book the last stay located in Paris.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-staynb.vercel.app", "metadata": {"original_task_id": "staynb-8", "website": "StayNB", "category": "agisdk-real", "additional": {"agisdk_task_id": "staynb-8", "challenge_type": "retrieval-action", "difficulty": "medium", "similar_to": "Airbnb"}}}
|
||||
{"query_id": "agisdk-gomail-2", "dataset": "agisdk-real", "query": "Mark the first email in the Inbox as \"read\".", "graders": ["agisdk_state_diff"], "start_url": "https://evals-gomail.vercel.app", "metadata": {"original_task_id": "gomail-2", "website": "GoMail", "category": "agisdk-real", "additional": {"agisdk_task_id": "gomail-2", "challenge_type": "action", "difficulty": "easy", "similar_to": "Gmail"}}}
|
||||
{"query_id": "agisdk-networkin-10", "dataset": "agisdk-real", "query": "Generate a polite follow-up message for a previous unanswered chat, starting with \"Following up on\".", "graders": ["agisdk_state_diff"], "start_url": "https://evals-networkin.vercel.app", "metadata": {"original_task_id": "networkin-10", "website": "Networkin", "category": "agisdk-real", "additional": {"agisdk_task_id": "networkin-10", "challenge_type": "action", "difficulty": "medium", "similar_to": "LinkedIn"}}}
|
||||
{"query_id": "agisdk-gomail-3", "dataset": "agisdk-real", "query": "Compose a new email to jonathan.smith@example.com with the subject \"Meeting Notes\" and body \"Please find the meeting notes attached.\"", "graders": ["agisdk_state_diff"], "start_url": "https://evals-gomail.vercel.app", "metadata": {"original_task_id": "gomail-3", "website": "GoMail", "category": "agisdk-real", "additional": {"agisdk_task_id": "gomail-3", "challenge_type": "action", "difficulty": "easy", "similar_to": "Gmail"}}}
|
||||
{"query_id": "agisdk-udriver-6", "dataset": "agisdk-real", "query": "Me and 4 friends need a ride from the Palace Hotel to dinner at Osha Thai leaving now", "graders": ["agisdk_state_diff"], "start_url": "https://evals-udriver.vercel.app", "metadata": {"original_task_id": "udriver-6", "website": "UDriver", "category": "agisdk-real", "additional": {"agisdk_task_id": "udriver-6", "challenge_type": "action", "difficulty": "hard", "similar_to": "Uber"}}}
|
||||
{"query_id": "agisdk-staynb-9", "dataset": "agisdk-real", "query": "Book a stay with the maximum number of guests supported. For fields you don't know the answer for, just fill them in with anything of your choice.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-staynb.vercel.app", "metadata": {"original_task_id": "staynb-9", "website": "StayNB", "category": "agisdk-real", "additional": {"agisdk_task_id": "staynb-9", "challenge_type": "action", "difficulty": "hard", "similar_to": "Airbnb"}}}
|
||||
{"query_id": "agisdk-zilloft-3", "dataset": "agisdk-real", "query": "Find a home in San Diego priced under $150,000 with at least 2 bedrooms and request a tour. Use these details: Contact Name: John Doe, Email: johndoe@example.com, Phone: 555-123-4567, Tour Time: 2:00 PM, Tour Date: First available.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-zilloft.vercel.app", "metadata": {"original_task_id": "zilloft-3", "website": "Zilloft", "category": "agisdk-real", "additional": {"agisdk_task_id": "zilloft-3", "challenge_type": "retrieval-action", "difficulty": "easy", "similar_to": "Zillow"}}}
|
||||
{"query_id": "agisdk-fly-unified-6", "dataset": "agisdk-real", "query": "Reserve me a seat for the flight from Austin to Pittsburgh departing on December 11th, 2024 at 8:00 in Basic Economy.\nPassenger: Alice Brown\nDate of Birth: 05/20/1992\nSex: Female\nSeat Selection: Yes (Aisle seat)\nPayment: Credit Card (378342143523967), Exp: 09/27, security code: 332 Address: 789 Pine St, Los Angeles, CA, 90012, USA, Phone: 555-456-7890, Email: alicebrown@example.com.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-fly-unified.vercel.app", "metadata": {"original_task_id": "fly-unified-6", "website": "Fly Unified", "category": "agisdk-real", "additional": {"agisdk_task_id": "fly-unified-6", "challenge_type": "action", "difficulty": "medium", "similar_to": "United Airlines"}}}
|
||||
{"query_id": "agisdk-opendining-3", "dataset": "agisdk-real", "query": "Book a table at \"The Royal Dine\" for a party of 4 on July 20, 2024, at 7 PM. For fields you don't know the answer for, just fill them in with anything of your choice.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-opendining.vercel.app", "metadata": {"original_task_id": "opendining-3", "website": "OpenDining", "category": "agisdk-real", "additional": {"agisdk_task_id": "opendining-3", "challenge_type": "action", "difficulty": "easy", "similar_to": "OpenTable"}}}
|
||||
{"query_id": "agisdk-gocalendar-7", "dataset": "agisdk-real", "query": "Reschedule the \"Morning Coffee with sister\" event from July 18, 2024, at 9 AM to July 19, 2024, at 10AM using drag-and-drop functionality", "graders": ["agisdk_state_diff"], "start_url": "https://evals-gocalendar.vercel.app", "metadata": {"original_task_id": "gocalendar-7", "website": "GoCalendar", "category": "agisdk-real", "additional": {"agisdk_task_id": "gocalendar-7", "challenge_type": "action", "difficulty": "medium", "similar_to": "Google Calendar"}}}
|
||||
{"query_id": "agisdk-staynb-5", "dataset": "agisdk-real", "query": "Use the search bar to look for a stay. For the \"Where\" section, use the \"Search by region\" popover and select \"Europe\". Set the check-in date to October 13th and the check-out date to October 23rd. For the \"Who\" section, select 1 infant, 2 children, and 2 adults. Press the search button, select the first stay, and book it.", "graders": ["agisdk_state_diff"], "start_url": "https://evals-staynb.vercel.app", "metadata": {"original_task_id": "staynb-5", "website": "StayNB", "category": "agisdk-real", "additional": {"agisdk_task_id": "staynb-5", "challenge_type": "action", "difficulty": "medium", "similar_to": "Airbnb"}}}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user