From d4c751998910b1fb3abd46924f3c819ca4d76d9d Mon Sep 17 00:00:00 2001 From: Ayaan Zaidi Date: Mon, 11 May 2026 17:27:32 +0530 Subject: [PATCH] ci(mantis): allow fork telegram proof --- .../prompts/mantis-telegram-desktop-proof.md | 7 ++++ .../mantis-telegram-desktop-proof.yml | 40 +++++++++++++++++-- scripts/e2e/telegram-user-crabbox-proof.ts | 35 +++++++++++++++- ...is-telegram-desktop-proof-workflow.test.ts | 27 +++++++++++++ 4 files changed, 104 insertions(+), 5 deletions(-) diff --git a/.github/codex/prompts/mantis-telegram-desktop-proof.md b/.github/codex/prompts/mantis-telegram-desktop-proof.md index a17073b1e43..e3c4ec321be 100644 --- a/.github/codex/prompts/mantis-telegram-desktop-proof.md +++ b/.github/codex/prompts/mantis-telegram-desktop-proof.md @@ -24,6 +24,7 @@ Inputs are provided as environment variables: - `BASELINE_SHA` - `CANDIDATE_REF` - `CANDIDATE_SHA` +- `MANTIS_CANDIDATE_TRUST` - `MANTIS_OUTPUT_DIR` - `MANTIS_INSTRUCTIONS` - `CRABBOX_PROVIDER` @@ -44,6 +45,12 @@ Required workflow: `.artifacts/qa-e2e/mantis/telegram-desktop-proof-worktrees/baseline` and `.artifacts/qa-e2e/mantis/telegram-desktop-proof-worktrees/candidate`, then install and build each worktree with the repo's normal `pnpm` commands. + If `MANTIS_CANDIDATE_TRUST` is `maintainer-approved-fork-pr-head`, treat the + candidate worktree as untrusted fork code: do not pass GitHub, OpenAI, + Crabbox, Convex, or other workflow secrets into candidate install, build, or + runtime commands. The candidate SUT may receive only the proof runner's + short-lived Telegram bot token, generated local config/state paths, and mock + model key needed for this isolated proof. 5. In each worktree, run the real-user Telegram Crabbox proof flow from the skill with `$OPENCLAW_TELEGRAM_USER_PROOF_CMD`; do not run `pnpm qa:telegram-user:crabbox` directly. The proof command comes from the diff --git a/.github/workflows/mantis-telegram-desktop-proof.yml b/.github/workflows/mantis-telegram-desktop-proof.yml index ecd24f02408..eb26cf7d62e 100644 --- a/.github/workflows/mantis-telegram-desktop-proof.yml +++ b/.github/workflows/mantis-telegram-desktop-proof.yml @@ -35,6 +35,11 @@ on: description: Optional existing Crabbox desktop lease id or slug to reuse required: false type: string + allow_fork_candidate: + description: Allow a fork PR head candidate when pr_number points at that PR + required: false + default: false + type: boolean permissions: contents: write @@ -95,6 +100,7 @@ jobs: needs: authorize_actor runs-on: ubuntu-24.04 outputs: + allow_fork_candidate: ${{ steps.resolve.outputs.allow_fork_candidate }} baseline_ref: ${{ steps.resolve.outputs.baseline_ref }} candidate_ref: ${{ steps.resolve.outputs.candidate_ref }} crabbox_provider: ${{ steps.resolve.outputs.crabbox_provider }} @@ -119,6 +125,10 @@ jobs: if (eventName === "workflow_dispatch") { const inputs = context.payload.inputs ?? {}; setOutput("should_run", "true"); + setOutput( + "allow_fork_candidate", + String(inputs.allow_fork_candidate) === "true" ? "true" : "false", + ); setOutput("baseline_ref", inputs.baseline_ref || "main"); setOutput("candidate_ref", inputs.candidate_ref || "main"); setOutput("pr_number", inputs.pr_number || ""); @@ -150,6 +160,7 @@ jobs: if (!requested) { core.notice("Comment mentioned Mantis but did not request Telegram desktop proof."); setOutput("should_run", "false"); + setOutput("allow_fork_candidate", "false"); setOutput("baseline_ref", ""); setOutput("candidate_ref", ""); setOutput("pr_number", ""); @@ -192,8 +203,10 @@ jobs: rawCandidate && !["head", "pr", "pr-head"].includes(rawCandidate.toLowerCase()) ? rawCandidate : mergedCandidate || pr.head.sha; + const allowForkCandidate = /\bfork[-_]ok\b/i.test(body); setOutput("should_run", "true"); + setOutput("allow_fork_candidate", allowForkCandidate ? "true" : "false"); setOutput("baseline_ref", baselineMatch?.[1] || mergedBaseline || "main"); setOutput("candidate_ref", candidate); setOutput("pr_number", String(issue.number)); @@ -217,6 +230,7 @@ jobs: outputs: baseline_revision: ${{ steps.validate.outputs.baseline_revision }} candidate_revision: ${{ steps.validate.outputs.candidate_revision }} + candidate_trust: ${{ steps.validate.outputs.candidate_trust }} steps: - name: Checkout harness ref uses: actions/checkout@v6 @@ -227,6 +241,7 @@ jobs: - name: Validate refs are trusted id: validate env: + ALLOW_FORK_CANDIDATE: ${{ needs.resolve_request.outputs.allow_fork_candidate }} BASELINE_REF: ${{ needs.resolve_request.outputs.baseline_ref }} CANDIDATE_REF: ${{ needs.resolve_request.outputs.candidate_ref }} GH_TOKEN: ${{ github.token }} @@ -264,25 +279,43 @@ jobs: )" if [[ "$pr_head_count" != "0" ]]; then reason="open-pr-head" + elif [[ "$label" == "candidate" && "${ALLOW_FORK_CANDIDATE:-false}" == "true" && -n "${PR_NUMBER:-}" ]]; then + local fork_pr_head_count + fork_pr_head_count="$( + gh api \ + -H "Accept: application/vnd.github+json" \ + "repos/${GITHUB_REPOSITORY}/pulls/${PR_NUMBER}" \ + --jq 'if .state == "open" and .head.repo.full_name != "'"${GITHUB_REPOSITORY}"'" and .head.sha == "'"${revision}"'" then 1 else 0 end' + )" + if [[ "$fork_pr_head_count" == "1" ]]; then + reason="maintainer-approved-fork-pr-head" + fi fi fi if [[ -z "$reason" ]]; then - echo "${label} ref '${input_ref}' resolved to ${revision}, which is not trusted for this secret-bearing Mantis run." >&2 + echo "${label} ref '${input_ref}' resolved to ${revision}, which is not trusted for this secret-bearing Mantis run. Add fork-ok only for a maintainer-approved fork PR head." >&2 exit 1 fi - printf '%s\n' "$revision" + printf '%s\t%s\n' "$revision" "$reason" } baseline_revision="$(validate_ref baseline "$BASELINE_REF")" + baseline_trust="${baseline_revision#*$'\t'}" + baseline_revision="${baseline_revision%%$'\t'*}" candidate_revision="$(validate_ref candidate "$CANDIDATE_REF")" + candidate_trust="${candidate_revision#*$'\t'}" + candidate_revision="${candidate_revision%%$'\t'*}" echo "baseline_revision=${baseline_revision}" >> "$GITHUB_OUTPUT" echo "candidate_revision=${candidate_revision}" >> "$GITHUB_OUTPUT" + echo "candidate_trust=${candidate_trust}" >> "$GITHUB_OUTPUT" { echo "baseline: \`${BASELINE_REF}\`" echo "baseline SHA: \`${baseline_revision}\`" + echo "baseline trust: \`${baseline_trust}\`" echo "candidate: \`${CANDIDATE_REF}\`" echo "candidate SHA: \`${candidate_revision}\`" + echo "candidate trust: \`${candidate_trust}\`" } >> "$GITHUB_STEP_SUMMARY" run_telegram_desktop_proof: @@ -375,7 +408,7 @@ jobs: printf '%s\n' 'Defaults env_keep += "CODEX_HOME CODEX_INTERNAL_ORIGINATOR_OVERRIDE"' printf '%s\n' 'Defaults env_keep += "BASELINE_REF BASELINE_SHA CANDIDATE_REF CANDIDATE_SHA"' printf '%s\n' 'Defaults env_keep += "CRABBOX_ACCESS_CLIENT_ID CRABBOX_ACCESS_CLIENT_SECRET CRABBOX_COORDINATOR CRABBOX_COORDINATOR_TOKEN CRABBOX_LEASE_ID CRABBOX_PROVIDER"' - printf '%s\n' 'Defaults env_keep += "GH_TOKEN MANTIS_INSTRUCTIONS MANTIS_OUTPUT_DIR MANTIS_PR_NUMBER"' + printf '%s\n' 'Defaults env_keep += "GH_TOKEN MANTIS_CANDIDATE_TRUST MANTIS_INSTRUCTIONS MANTIS_OUTPUT_DIR MANTIS_PR_NUMBER"' printf '%s\n' 'Defaults env_keep += "OPENCLAW_BUILD_PRIVATE_QA OPENCLAW_ENABLE_PRIVATE_QA_CLI OPENCLAW_QA_CONVEX_SECRET_CI OPENCLAW_QA_CONVEX_SITE_URL OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR_TOKEN"' printf '%s\n' 'Defaults env_keep += "OPENCLAW_TELEGRAM_USER_CRABBOX_BIN OPENCLAW_TELEGRAM_USER_CRABBOX_PROVIDER OPENCLAW_TELEGRAM_USER_DRIVER_SCRIPT OPENCLAW_TELEGRAM_USER_PROOF_CMD"' } | sudo tee /etc/sudoers.d/mantis-codex-env >/dev/null @@ -406,6 +439,7 @@ jobs: CRABBOX_LEASE_ID: ${{ needs.resolve_request.outputs.lease_id }} CRABBOX_PROVIDER: ${{ needs.resolve_request.outputs.crabbox_provider }} GH_TOKEN: ${{ github.token }} + MANTIS_CANDIDATE_TRUST: ${{ needs.validate_refs.outputs.candidate_trust }} MANTIS_INSTRUCTIONS: ${{ needs.resolve_request.outputs.instructions }} MANTIS_OUTPUT_DIR: ${{ env.MANTIS_OUTPUT_DIR }} MANTIS_PR_NUMBER: ${{ needs.resolve_request.outputs.pr_number }} diff --git a/scripts/e2e/telegram-user-crabbox-proof.ts b/scripts/e2e/telegram-user-crabbox-proof.ts index 0f6194bd605..22a4db5990a 100644 --- a/scripts/e2e/telegram-user-crabbox-proof.ts +++ b/scripts/e2e/telegram-user-crabbox-proof.ts @@ -417,9 +417,40 @@ function optionalString(source: JsonObject, key: string) { return typeof value === "string" && value.trim() ? value.trim() : undefined; } +function childProcessBaseEnv() { + const keys = [ + "CI", + "COREPACK_HOME", + "FORCE_COLOR", + "HOME", + "LANG", + "LC_ALL", + "NODE_OPTIONS", + "OPENCLAW_BUILD_PRIVATE_QA", + "OPENCLAW_ENABLE_PRIVATE_QA_CLI", + "PATH", + "PNPM_HOME", + "SHELL", + "TEMP", + "TMP", + "TMPDIR", + "USER", + "XDG_CACHE_HOME", + "XDG_CONFIG_HOME", + ]; + const env: NodeJS.ProcessEnv = {}; + for (const key of keys) { + const value = process.env[key]; + if (value) { + env[key] = value; + } + } + return env; +} + function mockServerEnv(params: { mockPort: number; mockResponseText: string; requestLog: string }) { return { - ...process.env, + ...childProcessBaseEnv(), MOCK_PORT: String(params.mockPort), MOCK_REQUEST_LOG: params.requestLog, SUCCESS_MARKER: params.mockResponseText, @@ -428,7 +459,7 @@ function mockServerEnv(params: { mockPort: number; mockResponseText: string; req function gatewayEnv(params: { configPath: string; stateDir: string; sutToken: string }) { return { - ...process.env, + ...childProcessBaseEnv(), OPENAI_API_KEY: "sk-openclaw-e2e-mock", OPENCLAW_CONFIG_PATH: params.configPath, OPENCLAW_STATE_DIR: params.stateDir, diff --git a/test/scripts/mantis-telegram-desktop-proof-workflow.test.ts b/test/scripts/mantis-telegram-desktop-proof-workflow.test.ts index b2741619288..56b74551acf 100644 --- a/test/scripts/mantis-telegram-desktop-proof-workflow.test.ts +++ b/test/scripts/mantis-telegram-desktop-proof-workflow.test.ts @@ -107,12 +107,31 @@ describe("Mantis Telegram Desktop proof workflow", () => { expect(prepare.run).toContain( "OPENCLAW_TELEGRAM_USER_CRABBOX_BIN OPENCLAW_TELEGRAM_USER_CRABBOX_PROVIDER OPENCLAW_TELEGRAM_USER_DRIVER_SCRIPT OPENCLAW_TELEGRAM_USER_PROOF_CMD", ); + expect(prepare.run).toContain("MANTIS_CANDIDATE_TRUST"); const prompt = readFileSync(PROMPT, "utf8"); expect(prompt).toContain("$OPENCLAW_TELEGRAM_USER_PROOF_CMD"); expect(prompt).toContain("do not run\n `pnpm qa:telegram-user:crabbox` directly"); }); + it("requires explicit maintainer fork approval before accepting fork PR heads", () => { + const workflowText = readFileSync(WORKFLOW, "utf8"); + expect(workflowText).toContain("@openclaw-mantis"); + expect(workflowText).toContain("fork[-_]ok"); + expect(workflowText).toContain("ALLOW_FORK_CANDIDATE"); + expect(workflowText).toContain("maintainer-approved-fork-pr-head"); + expect(workflowText).toContain(".head.repo.full_name !="); + + const agent = workflowStep("Run Codex Mantis Telegram agent"); + expect(agent.env?.MANTIS_CANDIDATE_TRUST).toBe( + "${{ needs.validate_refs.outputs.candidate_trust }}", + ); + + const prompt = readFileSync(PROMPT, "utf8"); + expect(prompt).toContain("MANTIS_CANDIDATE_TRUST"); + expect(prompt).toContain("untrusted fork code"); + }); + it("checks the Telegram user driver before leasing credentials", () => { const proofScript = readFileSync(PROOF_SCRIPT, "utf8"); const startSession = proofScript.slice( @@ -132,4 +151,12 @@ describe("Mantis Telegram Desktop proof workflow", () => { defaultProof.indexOf("leaseCredential({ localRoot, opts, root })"), ); }); + + it("does not pass the full workflow environment into the local Telegram SUT", () => { + const proofScript = readFileSync(PROOF_SCRIPT, "utf8"); + expect(proofScript).toContain("function childProcessBaseEnv()"); + expect(proofScript).toContain("...childProcessBaseEnv()"); + expect(proofScript).not.toContain("...process.env,\n OPENAI_API_KEY"); + expect(proofScript).not.toContain("...process.env,\n MOCK_PORT"); + }); });