ci(mantis): prepare telegram proof tooling

This commit is contained in:
Ayaan Zaidi
2026-05-11 12:47:27 +05:30
parent 4facdded3e
commit 6729044966
3 changed files with 86 additions and 4 deletions

View File

@@ -44,8 +44,11 @@ Required workflow:
`.artifacts/qa-e2e/mantis/telegram-desktop-proof-worktrees/candidate`, then
install and build each worktree with the repo's normal `pnpm` commands.
5. In each worktree, run the real-user Telegram Crabbox proof flow from the
skill. Use the same proof idea for baseline and candidate. You may iterate
and rerun if the visual result is not convincing.
skill. Use `scripts/e2e/telegram-user-driver.py`, the workflow-provided
`crabbox` binary, and the workflow-provided local media tools; do not
generate or patch replacement proof tooling during the run. Use the same
proof idea for baseline and candidate. You may iterate and rerun if the
visual result is not convincing.
6. Open Telegram Desktop directly to the newest relevant message with the
runner `view` command before finishing each recording. Keep the chat scrolled
to the bottom so new proof messages appear in-frame.

View File

@@ -331,6 +331,16 @@ jobs:
crabbox --version
crabbox media preview --help >/dev/null
- name: Install local proof tools
shell: bash
run: |
set -euo pipefail
sudo apt-get update -y
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y ffmpeg gifsicle
test -f scripts/e2e/telegram-user-driver.py
ffmpeg -version >/dev/null
gifsicle --version >/dev/null
- name: Ensure agent key exists
env:
OPENAI_API_KEY: ${{ secrets.OPENCLAW_MANTIS_AGENT_OPENAI_API_KEY || secrets.OPENAI_API_KEY }}
@@ -352,6 +362,7 @@ jobs:
printf '%s\n' 'Defaults env_keep += "CRABBOX_ACCESS_CLIENT_ID CRABBOX_ACCESS_CLIENT_SECRET CRABBOX_COORDINATOR CRABBOX_COORDINATOR_TOKEN CRABBOX_LEASE_ID CRABBOX_PROVIDER"'
printf '%s\n' 'Defaults env_keep += "GH_TOKEN MANTIS_INSTRUCTIONS MANTIS_OUTPUT_DIR MANTIS_PR_NUMBER"'
printf '%s\n' 'Defaults env_keep += "OPENCLAW_BUILD_PRIVATE_QA OPENCLAW_ENABLE_PRIVATE_QA_CLI OPENCLAW_QA_CONVEX_SECRET_CI OPENCLAW_QA_CONVEX_SITE_URL OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR_TOKEN"'
printf '%s\n' 'Defaults env_keep += "OPENCLAW_TELEGRAM_USER_CRABBOX_BIN OPENCLAW_TELEGRAM_USER_CRABBOX_PROVIDER OPENCLAW_TELEGRAM_USER_DRIVER_SCRIPT"'
} | sudo tee /etc/sudoers.d/mantis-codex-env >/dev/null
sudo chmod 0440 /etc/sudoers.d/mantis-codex-env
codex_home="/tmp/mantis-codex-home-${GITHUB_RUN_ID}"
@@ -375,8 +386,8 @@ jobs:
CANDIDATE_SHA: ${{ needs.validate_refs.outputs.candidate_revision }}
CRABBOX_ACCESS_CLIENT_ID: ${{ secrets.CRABBOX_ACCESS_CLIENT_ID }}
CRABBOX_ACCESS_CLIENT_SECRET: ${{ secrets.CRABBOX_ACCESS_CLIENT_SECRET }}
CRABBOX_COORDINATOR: ${{ secrets.CRABBOX_COORDINATOR }}
CRABBOX_COORDINATOR_TOKEN: ${{ secrets.CRABBOX_COORDINATOR_TOKEN }}
CRABBOX_COORDINATOR: ${{ secrets.CRABBOX_COORDINATOR || secrets.OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR }}
CRABBOX_COORDINATOR_TOKEN: ${{ secrets.CRABBOX_COORDINATOR_TOKEN || secrets.OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR_TOKEN }}
CRABBOX_LEASE_ID: ${{ needs.resolve_request.outputs.lease_id }}
CRABBOX_PROVIDER: ${{ needs.resolve_request.outputs.crabbox_provider }}
GH_TOKEN: ${{ github.token }}
@@ -387,6 +398,9 @@ jobs:
OPENCLAW_QA_CONVEX_SITE_URL: ${{ secrets.OPENCLAW_QA_CONVEX_SITE_URL }}
OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR: ${{ secrets.OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR }}
OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR_TOKEN: ${{ secrets.OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR_TOKEN }}
OPENCLAW_TELEGRAM_USER_CRABBOX_BIN: /usr/local/bin/crabbox
OPENCLAW_TELEGRAM_USER_CRABBOX_PROVIDER: ${{ needs.resolve_request.outputs.crabbox_provider }}
OPENCLAW_TELEGRAM_USER_DRIVER_SCRIPT: scripts/e2e/telegram-user-driver.py
with:
openai-api-key: ${{ secrets.OPENCLAW_MANTIS_AGENT_OPENAI_API_KEY || secrets.OPENAI_API_KEY }}
prompt-file: .github/codex/prompts/mantis-telegram-desktop-proof.md

View File

@@ -0,0 +1,65 @@
import { existsSync, readFileSync } from "node:fs";
import { describe, expect, it } from "vitest";
import { parse } from "yaml";
const PROOF_SCRIPT = "scripts/e2e/telegram-user-crabbox-proof.ts";
const USER_DRIVER = "scripts/e2e/telegram-user-driver.py";
const WORKFLOW = ".github/workflows/mantis-telegram-desktop-proof.yml";
type WorkflowStep = {
env?: Record<string, string>;
name?: string;
run?: string;
uses?: string;
};
type WorkflowJob = {
steps?: WorkflowStep[];
};
type Workflow = {
jobs?: Record<string, WorkflowJob>;
};
function workflowStep(name: string): WorkflowStep {
const workflow = parse(readFileSync(WORKFLOW, "utf8")) as Workflow;
const steps = workflow.jobs?.run_telegram_desktop_proof?.steps ?? [];
const step = steps.find((candidate) => candidate.name === name);
if (!step) {
throw new Error(`Missing workflow step: ${name}`);
}
return step;
}
describe("Mantis Telegram Desktop proof workflow", () => {
it("uses the repo-owned Telegram user driver by default", () => {
expect(existsSync(USER_DRIVER)).toBe(true);
expect(readFileSync(PROOF_SCRIPT, "utf8")).toContain(
'const DEFAULT_USER_DRIVER = "scripts/e2e/telegram-user-driver.py";',
);
expect(readFileSync(USER_DRIVER, "utf8")).toContain("/usr/local/lib/libtdjson.so");
});
it("preinstalls local proof tools before the Codex agent runs", () => {
const install = workflowStep("Install local proof tools");
expect(install.run).toContain("apt-get install -y ffmpeg gifsicle");
expect(install.run).toContain("test -f scripts/e2e/telegram-user-driver.py");
const agent = workflowStep("Run Codex Mantis Telegram agent");
expect(agent.env?.OPENCLAW_TELEGRAM_USER_DRIVER_SCRIPT).toBe(
"scripts/e2e/telegram-user-driver.py",
);
expect(agent.env?.OPENCLAW_TELEGRAM_USER_CRABBOX_BIN).toBe("/usr/local/bin/crabbox");
expect(agent.env?.CRABBOX_COORDINATOR).toContain(
"secrets.CRABBOX_COORDINATOR || secrets.OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR",
);
expect(agent.env?.CRABBOX_COORDINATOR_TOKEN).toContain(
"secrets.CRABBOX_COORDINATOR_TOKEN || secrets.OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR_TOKEN",
);
const prepare = workflowStep("Prepare Codex user");
expect(prepare.run).toContain(
"OPENCLAW_TELEGRAM_USER_CRABBOX_BIN OPENCLAW_TELEGRAM_USER_CRABBOX_PROVIDER OPENCLAW_TELEGRAM_USER_DRIVER_SCRIPT",
);
});
});