mirror of
https://github.com/browseros-ai/BrowserOS.git
synced 2026-05-19 19:41:06 +00:00
* feat(eval): add suite variant config bridge * feat(eval): add stable run artifacts * refactor(eval): add shared grader contract * feat(eval): persist grader artifacts * refactor(eval): rename runner layers * refactor(eval): add executor backend boundary * refactor(eval): split clado backend * feat(eval): add workflow compatible cli * feat(eval): add r2 publisher module * ci(eval): migrate weekly workflow to eval cli * docs(eval): document suite pipeline * chore(eval): verify pipeline refactor * fix: address review feedback for PR #875 * docs(eval): add env example * docs(eval): explain suites and variants * chore(eval): organize config layouts * chore(eval): colocate grader python evaluators
67 lines
1.7 KiB
TypeScript
Vendored
67 lines
1.7 KiB
TypeScript
Vendored
import { describe, expect, it } from 'bun:test'
|
|
import { mkdtemp, writeFile } from 'node:fs/promises'
|
|
import { tmpdir } from 'node:os'
|
|
import { join } from 'node:path'
|
|
import { runPythonJsonEvaluator } from '../../src/grading/python-evaluator'
|
|
|
|
async function writeScript(source: string): Promise<string> {
|
|
const dir = await mkdtemp(join(tmpdir(), 'eval-python-'))
|
|
const script = join(dir, 'script.py')
|
|
await writeFile(script, source)
|
|
return script
|
|
}
|
|
|
|
describe('runPythonJsonEvaluator', () => {
|
|
it('sends JSON on stdin, captures stderr, and parses stdout JSON', async () => {
|
|
const script = await writeScript(`
|
|
import json, sys
|
|
data = json.loads(sys.stdin.read())
|
|
print("warning", file=sys.stderr)
|
|
print(json.dumps({"ok": True, "value": data["value"]}))
|
|
`)
|
|
|
|
const result = await runPythonJsonEvaluator<{ ok: boolean; value: number }>(
|
|
{
|
|
scriptPath: script,
|
|
input: { value: 42 },
|
|
timeoutMs: 5_000,
|
|
},
|
|
)
|
|
|
|
expect(result.output).toEqual({ ok: true, value: 42 })
|
|
expect(result.stderr).toContain('warning')
|
|
expect(result.exitCode).toBe(0)
|
|
})
|
|
|
|
it('reports non-zero exits with stderr', async () => {
|
|
const script = await writeScript(`
|
|
import sys
|
|
print("bad verifier", file=sys.stderr)
|
|
sys.exit(3)
|
|
`)
|
|
|
|
await expect(
|
|
runPythonJsonEvaluator({
|
|
scriptPath: script,
|
|
input: {},
|
|
timeoutMs: 5_000,
|
|
}),
|
|
).rejects.toThrow('bad verifier')
|
|
})
|
|
|
|
it('enforces timeouts', async () => {
|
|
const script = await writeScript(`
|
|
import time
|
|
time.sleep(5)
|
|
`)
|
|
|
|
await expect(
|
|
runPythonJsonEvaluator({
|
|
scriptPath: script,
|
|
input: {},
|
|
timeoutMs: 50,
|
|
}),
|
|
).rejects.toThrow('timed out')
|
|
})
|
|
})
|