From becb6e8bbbc2faaba3d46b36352e62417db32fe8 Mon Sep 17 00:00:00 2001 From: Ayaan Zaidi Date: Mon, 11 May 2026 14:58:07 +0530 Subject: [PATCH] fix(mantis): route telegram proof through trusted harness --- .../prompts/mantis-telegram-desktop-proof.md | 15 ++++++++---- .../mantis-telegram-desktop-proof.yml | 18 +++++++++++---- ...is-telegram-desktop-proof-workflow.test.ts | 23 ++++++++++++++++++- 3 files changed, 45 insertions(+), 11 deletions(-) diff --git a/.github/codex/prompts/mantis-telegram-desktop-proof.md b/.github/codex/prompts/mantis-telegram-desktop-proof.md index cfc7c8e0c8b..a17073b1e43 100644 --- a/.github/codex/prompts/mantis-telegram-desktop-proof.md +++ b/.github/codex/prompts/mantis-telegram-desktop-proof.md @@ -27,6 +27,7 @@ Inputs are provided as environment variables: - `MANTIS_OUTPUT_DIR` - `MANTIS_INSTRUCTIONS` - `CRABBOX_PROVIDER` +- `OPENCLAW_TELEGRAM_USER_PROOF_CMD` - optional `CRABBOX_LEASE_ID` Required workflow: @@ -44,11 +45,15 @@ Required workflow: `.artifacts/qa-e2e/mantis/telegram-desktop-proof-worktrees/candidate`, then install and build each worktree with the repo's normal `pnpm` commands. 5. In each worktree, run the real-user Telegram Crabbox proof flow from the - skill. Use `$OPENCLAW_TELEGRAM_USER_DRIVER_SCRIPT`, the workflow-provided - `crabbox` binary, and the workflow-provided local `ffmpeg`/`ffprobe`; do - not generate, install, or patch replacement proof tooling during the run. - Use the same proof idea for baseline and candidate. You may iterate and rerun - if the visual result is not convincing. + skill with `$OPENCLAW_TELEGRAM_USER_PROOF_CMD`; do not run + `pnpm qa:telegram-user:crabbox` directly. The proof command comes from the + trusted workflow checkout while the current directory controls which + baseline or candidate OpenClaw build is tested. Use + `$OPENCLAW_TELEGRAM_USER_DRIVER_SCRIPT`, the workflow-provided `crabbox` + binary, and the workflow-provided local `ffmpeg`/`ffprobe`; do not generate, + install, or patch replacement proof tooling during the run. Use the same + proof idea for baseline and candidate. You may iterate and rerun if the + visual result is not convincing. 6. Open Telegram Desktop directly to the newest relevant message with the runner `view` command before finishing each recording. Keep the chat scrolled to the bottom so new proof messages appear in-frame. diff --git a/.github/workflows/mantis-telegram-desktop-proof.yml b/.github/workflows/mantis-telegram-desktop-proof.yml index 06afa9793cc..ecd24f02408 100644 --- a/.github/workflows/mantis-telegram-desktop-proof.yml +++ b/.github/workflows/mantis-telegram-desktop-proof.yml @@ -64,9 +64,8 @@ jobs: github.event_name == 'issue_comment' && github.event.issue.pull_request && ( - contains(github.event.comment.body, '@Mantis') || - contains(github.event.comment.body, '@mantis') || - contains(github.event.comment.body, '/mantis') + contains(github.event.comment.body, '@openclaw-mantis') || + contains(github.event.comment.body, '/openclaw-mantis') ) ) }} @@ -144,7 +143,7 @@ jobs: const normalized = body.toLowerCase(); const requested = - (normalized.includes("@mantis") || normalized.includes("/mantis")) && + (normalized.includes("@openclaw-mantis") || normalized.includes("/openclaw-mantis")) && normalized.includes("telegram") && (normalized.includes("desktop") || normalized.includes("native")) && normalized.includes("proof"); @@ -336,6 +335,14 @@ jobs: run: | set -euo pipefail test -f scripts/e2e/telegram-user-driver.py + cat >"${RUNNER_TEMP}/openclaw-telegram-user-crabbox-proof" <<'EOF' + #!/usr/bin/env bash + set -euo pipefail + exec node --import tsx "${GITHUB_WORKSPACE}/scripts/e2e/telegram-user-crabbox-proof.ts" "$@" + EOF + chmod 0755 "${RUNNER_TEMP}/openclaw-telegram-user-crabbox-proof" + sudo install -m 0755 "${RUNNER_TEMP}/openclaw-telegram-user-crabbox-proof" /usr/local/bin/openclaw-telegram-user-crabbox-proof + /usr/local/bin/openclaw-telegram-user-crabbox-proof --help >/dev/null media_tools="${RUNNER_TEMP}/mantis-media-tools" install -d "$media_tools" curl --fail --location --retry 3 --retry-delay 2 \ @@ -370,7 +377,7 @@ jobs: printf '%s\n' 'Defaults env_keep += "CRABBOX_ACCESS_CLIENT_ID CRABBOX_ACCESS_CLIENT_SECRET CRABBOX_COORDINATOR CRABBOX_COORDINATOR_TOKEN CRABBOX_LEASE_ID CRABBOX_PROVIDER"' printf '%s\n' 'Defaults env_keep += "GH_TOKEN MANTIS_INSTRUCTIONS MANTIS_OUTPUT_DIR MANTIS_PR_NUMBER"' printf '%s\n' 'Defaults env_keep += "OPENCLAW_BUILD_PRIVATE_QA OPENCLAW_ENABLE_PRIVATE_QA_CLI OPENCLAW_QA_CONVEX_SECRET_CI OPENCLAW_QA_CONVEX_SITE_URL OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR_TOKEN"' - printf '%s\n' 'Defaults env_keep += "OPENCLAW_TELEGRAM_USER_CRABBOX_BIN OPENCLAW_TELEGRAM_USER_CRABBOX_PROVIDER OPENCLAW_TELEGRAM_USER_DRIVER_SCRIPT"' + printf '%s\n' 'Defaults env_keep += "OPENCLAW_TELEGRAM_USER_CRABBOX_BIN OPENCLAW_TELEGRAM_USER_CRABBOX_PROVIDER OPENCLAW_TELEGRAM_USER_DRIVER_SCRIPT OPENCLAW_TELEGRAM_USER_PROOF_CMD"' } | sudo tee /etc/sudoers.d/mantis-codex-env >/dev/null sudo chmod 0440 /etc/sudoers.d/mantis-codex-env codex_home="/tmp/mantis-codex-home-${GITHUB_RUN_ID}" @@ -409,6 +416,7 @@ jobs: OPENCLAW_TELEGRAM_USER_CRABBOX_BIN: /usr/local/bin/crabbox OPENCLAW_TELEGRAM_USER_CRABBOX_PROVIDER: ${{ needs.resolve_request.outputs.crabbox_provider }} OPENCLAW_TELEGRAM_USER_DRIVER_SCRIPT: ${{ github.workspace }}/scripts/e2e/telegram-user-driver.py + OPENCLAW_TELEGRAM_USER_PROOF_CMD: /usr/local/bin/openclaw-telegram-user-crabbox-proof with: openai-api-key: ${{ secrets.OPENCLAW_MANTIS_AGENT_OPENAI_API_KEY || secrets.OPENAI_API_KEY }} prompt-file: .github/codex/prompts/mantis-telegram-desktop-proof.md diff --git a/test/scripts/mantis-telegram-desktop-proof-workflow.test.ts b/test/scripts/mantis-telegram-desktop-proof-workflow.test.ts index 1708009ab11..b2741619288 100644 --- a/test/scripts/mantis-telegram-desktop-proof-workflow.test.ts +++ b/test/scripts/mantis-telegram-desktop-proof-workflow.test.ts @@ -7,6 +7,7 @@ const USER_DRIVER = "scripts/e2e/telegram-user-driver.py"; const PACKAGE_JSON = "package.json"; const WORKFLOW = ".github/workflows/mantis-telegram-desktop-proof.yml"; const LIVE_WORKFLOW = ".github/workflows/mantis-telegram-live.yml"; +const PROMPT = ".github/codex/prompts/mantis-telegram-desktop-proof.md"; type WorkflowStep = { env?: Record; @@ -57,6 +58,15 @@ describe("Mantis Telegram Desktop proof workflow", () => { expect(liveWorkflow.env?.PNPM_VERSION?.split(".", 1)[0]).toBe(pnpmMajor); }); + it("uses the OpenClaw Mantis mention as the comment trigger", () => { + const workflow = readFileSync(WORKFLOW, "utf8"); + expect(workflow).toContain("@openclaw-mantis"); + expect(workflow).toContain("/openclaw-mantis"); + expect(workflow).not.toContain("@Mantis"); + expect(workflow).not.toContain("@mantis"); + expect(workflow).not.toContain('"/mantis"'); + }); + it("uses the repo-owned Telegram user driver by default", () => { expect(existsSync(USER_DRIVER)).toBe(true); expect(readFileSync(PROOF_SCRIPT, "utf8")).toContain( @@ -68,6 +78,10 @@ describe("Mantis Telegram Desktop proof workflow", () => { it("installs local proof tools before the Codex agent runs", () => { const install = workflowStep("Install local proof tools"); expect(install.run).toContain("test -f scripts/e2e/telegram-user-driver.py"); + expect(install.run).toContain("/usr/local/bin/openclaw-telegram-user-crabbox-proof"); + expect(install.run).toContain( + 'exec node --import tsx "${GITHUB_WORKSPACE}/scripts/e2e/telegram-user-crabbox-proof.ts" "$@"', + ); expect(install.run).toContain("BtbN/FFmpeg-Builds"); expect(install.run).toContain("ffmpeg-master-latest-linux64-gpl.tar.xz"); expect(install.run).toContain("/usr/local/bin/ffmpeg"); @@ -78,6 +92,9 @@ describe("Mantis Telegram Desktop proof workflow", () => { expect(agent.env?.OPENCLAW_TELEGRAM_USER_DRIVER_SCRIPT).toBe( "${{ github.workspace }}/scripts/e2e/telegram-user-driver.py", ); + expect(agent.env?.OPENCLAW_TELEGRAM_USER_PROOF_CMD).toBe( + "/usr/local/bin/openclaw-telegram-user-crabbox-proof", + ); expect(agent.env?.OPENCLAW_TELEGRAM_USER_CRABBOX_BIN).toBe("/usr/local/bin/crabbox"); expect(agent.env?.CRABBOX_COORDINATOR).toContain( "secrets.CRABBOX_COORDINATOR || secrets.OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR", @@ -88,8 +105,12 @@ describe("Mantis Telegram Desktop proof workflow", () => { const prepare = workflowStep("Prepare Codex user"); expect(prepare.run).toContain( - "OPENCLAW_TELEGRAM_USER_CRABBOX_BIN OPENCLAW_TELEGRAM_USER_CRABBOX_PROVIDER OPENCLAW_TELEGRAM_USER_DRIVER_SCRIPT", + "OPENCLAW_TELEGRAM_USER_CRABBOX_BIN OPENCLAW_TELEGRAM_USER_CRABBOX_PROVIDER OPENCLAW_TELEGRAM_USER_DRIVER_SCRIPT OPENCLAW_TELEGRAM_USER_PROOF_CMD", ); + + const prompt = readFileSync(PROMPT, "utf8"); + expect(prompt).toContain("$OPENCLAW_TELEGRAM_USER_PROOF_CMD"); + expect(prompt).toContain("do not run\n `pnpm qa:telegram-user:crabbox` directly"); }); it("checks the Telegram user driver before leasing credentials", () => {