fix(mantis): route telegram proof through trusted harness

This commit is contained in:
Ayaan Zaidi
2026-05-11 14:58:07 +05:30
parent 7e1afc0fea
commit becb6e8bbb
3 changed files with 45 additions and 11 deletions

View File

@@ -27,6 +27,7 @@ Inputs are provided as environment variables:
- `MANTIS_OUTPUT_DIR`
- `MANTIS_INSTRUCTIONS`
- `CRABBOX_PROVIDER`
- `OPENCLAW_TELEGRAM_USER_PROOF_CMD`
- optional `CRABBOX_LEASE_ID`
Required workflow:
@@ -44,11 +45,15 @@ Required workflow:
`.artifacts/qa-e2e/mantis/telegram-desktop-proof-worktrees/candidate`, then
install and build each worktree with the repo's normal `pnpm` commands.
5. In each worktree, run the real-user Telegram Crabbox proof flow from the
skill. Use `$OPENCLAW_TELEGRAM_USER_DRIVER_SCRIPT`, the workflow-provided
`crabbox` binary, and the workflow-provided local `ffmpeg`/`ffprobe`; do
not generate, install, or patch replacement proof tooling during the run.
Use the same proof idea for baseline and candidate. You may iterate and rerun
if the visual result is not convincing.
skill with `$OPENCLAW_TELEGRAM_USER_PROOF_CMD`; do not run
`pnpm qa:telegram-user:crabbox` directly. The proof command comes from the
trusted workflow checkout while the current directory controls which
baseline or candidate OpenClaw build is tested. Use
`$OPENCLAW_TELEGRAM_USER_DRIVER_SCRIPT`, the workflow-provided `crabbox`
binary, and the workflow-provided local `ffmpeg`/`ffprobe`; do not generate,
install, or patch replacement proof tooling during the run. Use the same
proof idea for baseline and candidate. You may iterate and rerun if the
visual result is not convincing.
6. Open Telegram Desktop directly to the newest relevant message with the
runner `view` command before finishing each recording. Keep the chat scrolled
to the bottom so new proof messages appear in-frame.

View File

@@ -64,9 +64,8 @@ jobs:
github.event_name == 'issue_comment' &&
github.event.issue.pull_request &&
(
contains(github.event.comment.body, '@Mantis') ||
contains(github.event.comment.body, '@mantis') ||
contains(github.event.comment.body, '/mantis')
contains(github.event.comment.body, '@openclaw-mantis') ||
contains(github.event.comment.body, '/openclaw-mantis')
)
)
}}
@@ -144,7 +143,7 @@ jobs:
const normalized = body.toLowerCase();
const requested =
(normalized.includes("@mantis") || normalized.includes("/mantis")) &&
(normalized.includes("@openclaw-mantis") || normalized.includes("/openclaw-mantis")) &&
normalized.includes("telegram") &&
(normalized.includes("desktop") || normalized.includes("native")) &&
normalized.includes("proof");
@@ -336,6 +335,14 @@ jobs:
run: |
set -euo pipefail
test -f scripts/e2e/telegram-user-driver.py
cat >"${RUNNER_TEMP}/openclaw-telegram-user-crabbox-proof" <<'EOF'
#!/usr/bin/env bash
set -euo pipefail
exec node --import tsx "${GITHUB_WORKSPACE}/scripts/e2e/telegram-user-crabbox-proof.ts" "$@"
EOF
chmod 0755 "${RUNNER_TEMP}/openclaw-telegram-user-crabbox-proof"
sudo install -m 0755 "${RUNNER_TEMP}/openclaw-telegram-user-crabbox-proof" /usr/local/bin/openclaw-telegram-user-crabbox-proof
/usr/local/bin/openclaw-telegram-user-crabbox-proof --help >/dev/null
media_tools="${RUNNER_TEMP}/mantis-media-tools"
install -d "$media_tools"
curl --fail --location --retry 3 --retry-delay 2 \
@@ -370,7 +377,7 @@ jobs:
printf '%s\n' 'Defaults env_keep += "CRABBOX_ACCESS_CLIENT_ID CRABBOX_ACCESS_CLIENT_SECRET CRABBOX_COORDINATOR CRABBOX_COORDINATOR_TOKEN CRABBOX_LEASE_ID CRABBOX_PROVIDER"'
printf '%s\n' 'Defaults env_keep += "GH_TOKEN MANTIS_INSTRUCTIONS MANTIS_OUTPUT_DIR MANTIS_PR_NUMBER"'
printf '%s\n' 'Defaults env_keep += "OPENCLAW_BUILD_PRIVATE_QA OPENCLAW_ENABLE_PRIVATE_QA_CLI OPENCLAW_QA_CONVEX_SECRET_CI OPENCLAW_QA_CONVEX_SITE_URL OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR_TOKEN"'
printf '%s\n' 'Defaults env_keep += "OPENCLAW_TELEGRAM_USER_CRABBOX_BIN OPENCLAW_TELEGRAM_USER_CRABBOX_PROVIDER OPENCLAW_TELEGRAM_USER_DRIVER_SCRIPT"'
printf '%s\n' 'Defaults env_keep += "OPENCLAW_TELEGRAM_USER_CRABBOX_BIN OPENCLAW_TELEGRAM_USER_CRABBOX_PROVIDER OPENCLAW_TELEGRAM_USER_DRIVER_SCRIPT OPENCLAW_TELEGRAM_USER_PROOF_CMD"'
} | sudo tee /etc/sudoers.d/mantis-codex-env >/dev/null
sudo chmod 0440 /etc/sudoers.d/mantis-codex-env
codex_home="/tmp/mantis-codex-home-${GITHUB_RUN_ID}"
@@ -409,6 +416,7 @@ jobs:
OPENCLAW_TELEGRAM_USER_CRABBOX_BIN: /usr/local/bin/crabbox
OPENCLAW_TELEGRAM_USER_CRABBOX_PROVIDER: ${{ needs.resolve_request.outputs.crabbox_provider }}
OPENCLAW_TELEGRAM_USER_DRIVER_SCRIPT: ${{ github.workspace }}/scripts/e2e/telegram-user-driver.py
OPENCLAW_TELEGRAM_USER_PROOF_CMD: /usr/local/bin/openclaw-telegram-user-crabbox-proof
with:
openai-api-key: ${{ secrets.OPENCLAW_MANTIS_AGENT_OPENAI_API_KEY || secrets.OPENAI_API_KEY }}
prompt-file: .github/codex/prompts/mantis-telegram-desktop-proof.md

View File

@@ -7,6 +7,7 @@ const USER_DRIVER = "scripts/e2e/telegram-user-driver.py";
const PACKAGE_JSON = "package.json";
const WORKFLOW = ".github/workflows/mantis-telegram-desktop-proof.yml";
const LIVE_WORKFLOW = ".github/workflows/mantis-telegram-live.yml";
const PROMPT = ".github/codex/prompts/mantis-telegram-desktop-proof.md";
type WorkflowStep = {
env?: Record<string, string>;
@@ -57,6 +58,15 @@ describe("Mantis Telegram Desktop proof workflow", () => {
expect(liveWorkflow.env?.PNPM_VERSION?.split(".", 1)[0]).toBe(pnpmMajor);
});
it("uses the OpenClaw Mantis mention as the comment trigger", () => {
const workflow = readFileSync(WORKFLOW, "utf8");
expect(workflow).toContain("@openclaw-mantis");
expect(workflow).toContain("/openclaw-mantis");
expect(workflow).not.toContain("@Mantis");
expect(workflow).not.toContain("@mantis");
expect(workflow).not.toContain('"/mantis"');
});
it("uses the repo-owned Telegram user driver by default", () => {
expect(existsSync(USER_DRIVER)).toBe(true);
expect(readFileSync(PROOF_SCRIPT, "utf8")).toContain(
@@ -68,6 +78,10 @@ describe("Mantis Telegram Desktop proof workflow", () => {
it("installs local proof tools before the Codex agent runs", () => {
const install = workflowStep("Install local proof tools");
expect(install.run).toContain("test -f scripts/e2e/telegram-user-driver.py");
expect(install.run).toContain("/usr/local/bin/openclaw-telegram-user-crabbox-proof");
expect(install.run).toContain(
'exec node --import tsx "${GITHUB_WORKSPACE}/scripts/e2e/telegram-user-crabbox-proof.ts" "$@"',
);
expect(install.run).toContain("BtbN/FFmpeg-Builds");
expect(install.run).toContain("ffmpeg-master-latest-linux64-gpl.tar.xz");
expect(install.run).toContain("/usr/local/bin/ffmpeg");
@@ -78,6 +92,9 @@ describe("Mantis Telegram Desktop proof workflow", () => {
expect(agent.env?.OPENCLAW_TELEGRAM_USER_DRIVER_SCRIPT).toBe(
"${{ github.workspace }}/scripts/e2e/telegram-user-driver.py",
);
expect(agent.env?.OPENCLAW_TELEGRAM_USER_PROOF_CMD).toBe(
"/usr/local/bin/openclaw-telegram-user-crabbox-proof",
);
expect(agent.env?.OPENCLAW_TELEGRAM_USER_CRABBOX_BIN).toBe("/usr/local/bin/crabbox");
expect(agent.env?.CRABBOX_COORDINATOR).toContain(
"secrets.CRABBOX_COORDINATOR || secrets.OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR",
@@ -88,8 +105,12 @@ describe("Mantis Telegram Desktop proof workflow", () => {
const prepare = workflowStep("Prepare Codex user");
expect(prepare.run).toContain(
"OPENCLAW_TELEGRAM_USER_CRABBOX_BIN OPENCLAW_TELEGRAM_USER_CRABBOX_PROVIDER OPENCLAW_TELEGRAM_USER_DRIVER_SCRIPT",
"OPENCLAW_TELEGRAM_USER_CRABBOX_BIN OPENCLAW_TELEGRAM_USER_CRABBOX_PROVIDER OPENCLAW_TELEGRAM_USER_DRIVER_SCRIPT OPENCLAW_TELEGRAM_USER_PROOF_CMD",
);
const prompt = readFileSync(PROMPT, "utf8");
expect(prompt).toContain("$OPENCLAW_TELEGRAM_USER_PROOF_CMD");
expect(prompt).toContain("do not run\n `pnpm qa:telegram-user:crabbox` directly");
});
it("checks the Telegram user driver before leasing credentials", () => {