/** * Weekly Report Generator * * Reads all uploaded eval runs from R2, builds cumulative score history, * and generates an HTML dashboard with: * - Config selector dropdown (groups runs by config/runId pattern) * - Config details card (architecture, model, dataset, grader) * - Interactive trend chart (filtered by selected config) * - Stat cards (latest, trend, best, duration) * - Searchable table of all runs * * Usage: * bun apps/eval/scripts/weekly-report.ts [local-output-path] * * Env vars required: * EVAL_R2_ACCOUNT_ID, EVAL_R2_ACCESS_KEY_ID, EVAL_R2_SECRET_ACCESS_KEY * EVAL_R2_BUCKET (default: browseros-eval) */ import { writeFile } from 'node:fs/promises' import { GetObjectCommand, ListObjectsV2Command, PutObjectCommand, S3Client, } from '@aws-sdk/client-s3' import { buildRunSummaries, type ReportManifest, type RunSummary, } from '../src/reporting/run-summary' function requireEnv(name: string): string { const value = process.env[name] if (!value) { console.error(`Missing required env var: ${name}`) process.exit(1) } return value } const accountId = requireEnv('EVAL_R2_ACCOUNT_ID') const accessKeyId = requireEnv('EVAL_R2_ACCESS_KEY_ID') const secretAccessKey = requireEnv('EVAL_R2_SECRET_ACCESS_KEY') const bucket = process.env.EVAL_R2_BUCKET || 'browseros-eval' const client = new S3Client({ region: 'auto', endpoint: `https://${accountId}.r2.cloudflarestorage.com`, credentials: { accessKeyId, secretAccessKey }, }) // Step 1: List all manifest.json files in runs/ console.log('Scanning R2 for eval runs...') const manifests: ReportManifest[] = [] let continuationToken: string | undefined do { const listRes = await client.send( new ListObjectsV2Command({ Bucket: bucket, Prefix: 'runs/', ContinuationToken: continuationToken, }), ) const manifestKeys = listRes.Contents?.filter((obj) => obj.Key?.endsWith('/manifest.json')).map( (obj) => obj.Key as string, ) ?? [] for (const key of manifestKeys) { try { const res = await client.send( new GetObjectCommand({ Bucket: bucket, Key: key }), ) const body = await res.Body?.transformToString() if (body) manifests.push(JSON.parse(body)) } catch { console.warn(` Failed to read ${key}, skipping`) } } continuationToken = listRes.NextContinuationToken } while (continuationToken) console.log(`Found ${manifests.length} runs`) if (manifests.length === 0) { console.log('No runs found. Nothing to report.') process.exit(0) } // Step 2: Build run summaries const runs: RunSummary[] = buildRunSummaries(manifests) // Step 3: Identify unique config groups function escHtml(s: string): string { return s .replace(/&/g, '&') .replace(//g, '>') .replace(/"/g, '"') } const configGroups = [...new Set(runs.map((r) => r.configName))] const defaultConfig = configGroups.includes('ci-weekly') ? 'ci-weekly' : configGroups[0] // Step 4: Generate HTML report const html = `
| Date | Config | Model | Dataset | Architecture | Score | Tasks | Timeout | Avg Duration | View |
|---|---|---|---|---|---|---|---|---|---|
| ${escHtml(r.date)} | ${escHtml(r.runId)} | ${escHtml(r.dataset)} | ${escHtml(archLabel)} | ${r.avgScore.toFixed(1)}% | ${r.total} | ${r.timeout} | ${(r.avgDurationMs / 1000).toFixed(0)}s | View → |