mirror of
https://github.com/browseros-ai/BrowserOS.git
synced 2026-05-18 11:06:19 +00:00
92 lines
3.3 KiB
YAML
92 lines
3.3 KiB
YAML
name: Weekly Eval
|
|
|
|
on:
|
|
schedule:
|
|
# Every Saturday at 06:00 UTC
|
|
- cron: '0 6 * * 6'
|
|
workflow_dispatch:
|
|
inputs:
|
|
config:
|
|
description: 'Eval config file (relative to apps/eval/)'
|
|
required: false
|
|
default: 'configs/browseros-agent-weekly.json'
|
|
|
|
permissions:
|
|
contents: read
|
|
|
|
jobs:
|
|
eval:
|
|
runs-on: ubuntu-latest
|
|
timeout-minutes: 360
|
|
|
|
steps:
|
|
- name: Checkout
|
|
uses: actions/checkout@v4
|
|
|
|
- name: Install BrowserOS
|
|
run: |
|
|
wget -q https://github.com/browseros-ai/BrowserOS/releases/download/v0.44.0.1/BrowserOS_v0.44.0.1_amd64.deb
|
|
sudo dpkg -i BrowserOS_v0.44.0.1_amd64.deb
|
|
browseros --version || echo "BrowserOS installed at $(which browseros)"
|
|
|
|
- name: Install Bun
|
|
uses: oven-sh/setup-bun@v2
|
|
with:
|
|
bun-version: latest
|
|
|
|
- name: Install dependencies
|
|
working-directory: packages/browseros-agent
|
|
run: bun install --ignore-scripts && bun run build:agent-sdk
|
|
|
|
- name: Install captcha solver extension
|
|
working-directory: packages/browseros-agent/apps/eval
|
|
run: |
|
|
mkdir -p extensions
|
|
curl -sL -o /tmp/nopecha.zip https://github.com/NopeCHALLC/nopecha-extension/releases/latest/download/chromium_automation.zip
|
|
unzip -qo /tmp/nopecha.zip -d extensions/nopecha
|
|
|
|
- name: Run eval
|
|
working-directory: packages/browseros-agent/apps/eval
|
|
env:
|
|
FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
|
|
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
|
|
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
|
CLAUDE_CODE_OAUTH_TOKEN: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
|
|
BROWSEROS_BINARY: /usr/bin/browseros
|
|
EVAL_CONFIG: ${{ github.event.inputs.config || 'configs/browseros-agent-weekly.json' }}
|
|
run: |
|
|
echo "Running eval with config: $EVAL_CONFIG"
|
|
bun run src/index.ts -c "$EVAL_CONFIG"
|
|
|
|
- name: Upload runs to R2
|
|
if: success()
|
|
working-directory: packages/browseros-agent/apps/eval
|
|
env:
|
|
EVAL_R2_ACCOUNT_ID: ${{ secrets.EVAL_R2_ACCOUNT_ID }}
|
|
EVAL_R2_ACCESS_KEY_ID: ${{ secrets.EVAL_R2_ACCESS_KEY_ID }}
|
|
EVAL_R2_SECRET_ACCESS_KEY: ${{ secrets.EVAL_R2_SECRET_ACCESS_KEY }}
|
|
EVAL_R2_BUCKET: ${{ secrets.EVAL_R2_BUCKET }}
|
|
EVAL_R2_CDN_BASE_URL: ${{ secrets.EVAL_R2_CDN_BASE_URL }}
|
|
EVAL_CONFIG: ${{ github.event.inputs.config || 'configs/browseros-agent-weekly.json' }}
|
|
run: |
|
|
CONFIG_NAME=$(basename "$EVAL_CONFIG" .json)
|
|
bun scripts/upload-run.ts "results/$CONFIG_NAME"
|
|
|
|
- name: Generate trend report
|
|
if: success()
|
|
working-directory: packages/browseros-agent
|
|
env:
|
|
EVAL_R2_ACCOUNT_ID: ${{ secrets.EVAL_R2_ACCOUNT_ID }}
|
|
EVAL_R2_ACCESS_KEY_ID: ${{ secrets.EVAL_R2_ACCESS_KEY_ID }}
|
|
EVAL_R2_SECRET_ACCESS_KEY: ${{ secrets.EVAL_R2_SECRET_ACCESS_KEY }}
|
|
EVAL_R2_BUCKET: ${{ secrets.EVAL_R2_BUCKET }}
|
|
EVAL_R2_CDN_BASE_URL: ${{ secrets.EVAL_R2_CDN_BASE_URL }}
|
|
run: bun apps/eval/scripts/weekly-report.ts /tmp/eval-report.html
|
|
|
|
- name: Upload report as artifact
|
|
if: success()
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: eval-report-${{ github.run_id }}
|
|
path: /tmp/eval-report.html
|