mirror of
https://github.com/browseros-ai/BrowserOS.git
synced 2026-05-19 19:41:06 +00:00
* feat(eval): add suite variant config bridge * feat(eval): add stable run artifacts * refactor(eval): add shared grader contract * feat(eval): persist grader artifacts * refactor(eval): rename runner layers * refactor(eval): add executor backend boundary * refactor(eval): split clado backend * feat(eval): add workflow compatible cli * feat(eval): add r2 publisher module * ci(eval): migrate weekly workflow to eval cli * docs(eval): document suite pipeline * chore(eval): verify pipeline refactor * fix: address review feedback for PR #875 * docs(eval): add env example * docs(eval): explain suites and variants * chore(eval): organize config layouts * chore(eval): colocate grader python evaluators
65 lines
1.9 KiB
TypeScript
Vendored
65 lines
1.9 KiB
TypeScript
Vendored
import { describe, expect, it } from 'bun:test'
|
|
import { parseEvalCliArgs } from '../../src/cli/args'
|
|
|
|
describe('parseEvalCliArgs', () => {
|
|
it('parses the workflow-compatible suite config command', () => {
|
|
expect(
|
|
parseEvalCliArgs([
|
|
'suite',
|
|
'--config',
|
|
'configs/legacy/browseros-agent-weekly.json',
|
|
'--publish',
|
|
'r2',
|
|
]),
|
|
).toEqual({
|
|
command: 'suite',
|
|
configPath: 'configs/legacy/browseros-agent-weekly.json',
|
|
publishTarget: 'r2',
|
|
})
|
|
})
|
|
|
|
it('parses suite variant and model options', () => {
|
|
expect(
|
|
parseEvalCliArgs([
|
|
'suite',
|
|
'--suite',
|
|
'configs/suites/agisdk-daily-10.json',
|
|
'--variant',
|
|
'kimi-fireworks',
|
|
'--provider',
|
|
'openai-compatible',
|
|
'--model',
|
|
'accounts/fireworks/models/kimi-k2p5',
|
|
'--base-url',
|
|
'https://api.fireworks.ai/inference/v1',
|
|
]),
|
|
).toEqual({
|
|
command: 'suite',
|
|
suitePath: 'configs/suites/agisdk-daily-10.json',
|
|
variantId: 'kimi-fireworks',
|
|
provider: 'openai-compatible',
|
|
model: 'accounts/fireworks/models/kimi-k2p5',
|
|
baseUrl: 'https://api.fireworks.ai/inference/v1',
|
|
})
|
|
})
|
|
|
|
it('keeps the old config shorthand as legacy config mode', () => {
|
|
expect(
|
|
parseEvalCliArgs(['-c', 'configs/legacy/browseros-agent-weekly.json']),
|
|
).toEqual({
|
|
command: 'legacy',
|
|
configPath: 'configs/legacy/browseros-agent-weekly.json',
|
|
})
|
|
})
|
|
|
|
it('rejects missing required command options with targeted errors', () => {
|
|
expect(() => parseEvalCliArgs(['run'])).toThrow(
|
|
'run requires --config or --suite',
|
|
)
|
|
expect(() => parseEvalCliArgs(['grade'])).toThrow('grade requires --run')
|
|
expect(() =>
|
|
parseEvalCliArgs(['publish', '--run', 'results/run-1']),
|
|
).toThrow('publish requires --target')
|
|
})
|
|
})
|