fix(mantis): validate telegram driver before lease

This commit is contained in:
Ayaan Zaidi
2026-05-11 14:11:27 +05:30
parent 4ddccbd28d
commit 96d4dd68da
4 changed files with 34 additions and 7 deletions

View File

@@ -44,7 +44,7 @@ Required workflow:
`.artifacts/qa-e2e/mantis/telegram-desktop-proof-worktrees/candidate`, then `.artifacts/qa-e2e/mantis/telegram-desktop-proof-worktrees/candidate`, then
install and build each worktree with the repo's normal `pnpm` commands. install and build each worktree with the repo's normal `pnpm` commands.
5. In each worktree, run the real-user Telegram Crabbox proof flow from the 5. In each worktree, run the real-user Telegram Crabbox proof flow from the
skill. Use `scripts/e2e/telegram-user-driver.py`, the workflow-provided skill. Use `$OPENCLAW_TELEGRAM_USER_DRIVER_SCRIPT`, the workflow-provided
`crabbox` binary, and the workflow-provided local `ffmpeg`/`ffprobe`; do `crabbox` binary, and the workflow-provided local `ffmpeg`/`ffprobe`; do
not generate, install, or patch replacement proof tooling during the run. not generate, install, or patch replacement proof tooling during the run.
Use the same proof idea for baseline and candidate. You may iterate and rerun Use the same proof idea for baseline and candidate. You may iterate and rerun

View File

@@ -408,7 +408,7 @@ jobs:
OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR_TOKEN: ${{ secrets.OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR_TOKEN }} OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR_TOKEN: ${{ secrets.OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR_TOKEN }}
OPENCLAW_TELEGRAM_USER_CRABBOX_BIN: /usr/local/bin/crabbox OPENCLAW_TELEGRAM_USER_CRABBOX_BIN: /usr/local/bin/crabbox
OPENCLAW_TELEGRAM_USER_CRABBOX_PROVIDER: ${{ needs.resolve_request.outputs.crabbox_provider }} OPENCLAW_TELEGRAM_USER_CRABBOX_PROVIDER: ${{ needs.resolve_request.outputs.crabbox_provider }}
OPENCLAW_TELEGRAM_USER_DRIVER_SCRIPT: scripts/e2e/telegram-user-driver.py OPENCLAW_TELEGRAM_USER_DRIVER_SCRIPT: ${{ github.workspace }}/scripts/e2e/telegram-user-driver.py
with: with:
openai-api-key: ${{ secrets.OPENCLAW_MANTIS_AGENT_OPENAI_API_KEY || secrets.OPENAI_API_KEY }} openai-api-key: ${{ secrets.OPENCLAW_MANTIS_AGENT_OPENAI_API_KEY || secrets.OPENAI_API_KEY }}
prompt-file: .github/codex/prompts/mantis-telegram-desktop-proof.md prompt-file: .github/codex/prompts/mantis-telegram-desktop-proof.md

View File

@@ -1171,12 +1171,17 @@ async function writeExecutable(filePath: string, content: string) {
fs.chmodSync(filePath, 0o700); fs.chmodSync(filePath, 0o700);
} }
function requireUserDriverScript(opts: Options) {
const userDriverScript = expandHome(opts.userDriverScript);
if (!fs.existsSync(userDriverScript)) {
throw new Error(`Missing user driver script: ${opts.userDriverScript}`);
}
return userDriverScript;
}
async function prepareRemoteState(params: { localRoot: string; opts: Options; root: string }) { async function prepareRemoteState(params: { localRoot: string; opts: Options; root: string }) {
const stateArchive = path.join(params.localRoot, "remote-state.tgz"); const stateArchive = path.join(params.localRoot, "remote-state.tgz");
const userDriverScript = expandHome(params.opts.userDriverScript); const userDriverScript = requireUserDriverScript(params.opts);
if (!fs.existsSync(userDriverScript)) {
throw new Error(`Missing user driver script: ${params.opts.userDriverScript}`);
}
await runCommand({ await runCommand({
command: "cp", command: "cp",
args: [userDriverScript, path.join(params.localRoot, "user-driver.py")], args: [userDriverScript, path.join(params.localRoot, "user-driver.py")],
@@ -1475,6 +1480,7 @@ async function startSession(root: string, opts: Options, outputDir: string) {
}; };
} }
requireUserDriverScript(opts);
const credential = await leaseCredential({ localRoot, opts, root }); const credential = await leaseCredential({ localRoot, opts, root });
const sut = opts.sutUsername const sut = opts.sutUsername
? { id: "", username: opts.sutUsername } ? { id: "", username: opts.sutUsername }
@@ -1960,6 +1966,7 @@ async function main() {
return; return;
} }
requireUserDriverScript(opts);
credential = await leaseCredential({ localRoot, opts, root }); credential = await leaseCredential({ localRoot, opts, root });
const sut = opts.sutUsername const sut = opts.sutUsername
? { id: "", username: opts.sutUsername } ? { id: "", username: opts.sutUsername }

View File

@@ -51,7 +51,7 @@ describe("Mantis Telegram Desktop proof workflow", () => {
const agent = workflowStep("Run Codex Mantis Telegram agent"); const agent = workflowStep("Run Codex Mantis Telegram agent");
expect(agent.env?.OPENCLAW_TELEGRAM_USER_DRIVER_SCRIPT).toBe( expect(agent.env?.OPENCLAW_TELEGRAM_USER_DRIVER_SCRIPT).toBe(
"scripts/e2e/telegram-user-driver.py", "${{ github.workspace }}/scripts/e2e/telegram-user-driver.py",
); );
expect(agent.env?.OPENCLAW_TELEGRAM_USER_CRABBOX_BIN).toBe("/usr/local/bin/crabbox"); expect(agent.env?.OPENCLAW_TELEGRAM_USER_CRABBOX_BIN).toBe("/usr/local/bin/crabbox");
expect(agent.env?.CRABBOX_COORDINATOR).toContain( expect(agent.env?.CRABBOX_COORDINATOR).toContain(
@@ -66,4 +66,24 @@ describe("Mantis Telegram Desktop proof workflow", () => {
"OPENCLAW_TELEGRAM_USER_CRABBOX_BIN OPENCLAW_TELEGRAM_USER_CRABBOX_PROVIDER OPENCLAW_TELEGRAM_USER_DRIVER_SCRIPT", "OPENCLAW_TELEGRAM_USER_CRABBOX_BIN OPENCLAW_TELEGRAM_USER_CRABBOX_PROVIDER OPENCLAW_TELEGRAM_USER_DRIVER_SCRIPT",
); );
}); });
it("checks the Telegram user driver before leasing credentials", () => {
const proofScript = readFileSync(PROOF_SCRIPT, "utf8");
const startSession = proofScript.slice(
proofScript.indexOf("async function startSession"),
proofScript.indexOf("async function sendSessionProbe"),
);
const defaultProof = proofScript.slice(proofScript.indexOf("async function main"));
expect(startSession).toContain("requireUserDriverScript(opts);");
expect(startSession).toContain("leaseCredential({ localRoot, opts, root })");
expect(defaultProof).toContain("requireUserDriverScript(opts);");
expect(defaultProof).toContain("leaseCredential({ localRoot, opts, root })");
expect(startSession.indexOf("requireUserDriverScript(opts);")).toBeLessThan(
startSession.indexOf("leaseCredential({ localRoot, opts, root })"),
);
expect(defaultProof.indexOf("requireUserDriverScript(opts);")).toBeLessThan(
defaultProof.indexOf("leaseCredential({ localRoot, opts, root })"),
);
});
}); });