From d4c751998910b1fb3abd46924f3c819ca4d76d9d Mon Sep 17 00:00:00 2001
From: Ayaan Zaidi <hi@obviy.us>
Date: Mon, 11 May 2026 17:27:32 +0530
Subject: [PATCH] ci(mantis): allow fork telegram proof

---
 .../prompts/mantis-telegram-desktop-proof.md  |  7 ++++
 .../mantis-telegram-desktop-proof.yml         | 40 +++++++++++++++++--
 scripts/e2e/telegram-user-crabbox-proof.ts    | 35 +++++++++++++++-
 ...is-telegram-desktop-proof-workflow.test.ts | 27 +++++++++++++
 4 files changed, 104 insertions(+), 5 deletions(-)

diff --git a/.github/codex/prompts/mantis-telegram-desktop-proof.md b/.github/codex/prompts/mantis-telegram-desktop-proof.md
index a17073b1e43..e3c4ec321be 100644
--- a/.github/codex/prompts/mantis-telegram-desktop-proof.md
+++ b/.github/codex/prompts/mantis-telegram-desktop-proof.md
@@ -24,6 +24,7 @@ Inputs are provided as environment variables:
 - `BASELINE_SHA`
 - `CANDIDATE_REF`
 - `CANDIDATE_SHA`
+- `MANTIS_CANDIDATE_TRUST`
 - `MANTIS_OUTPUT_DIR`
 - `MANTIS_INSTRUCTIONS`
 - `CRABBOX_PROVIDER`
@@ -44,6 +45,12 @@ Required workflow:
    `.artifacts/qa-e2e/mantis/telegram-desktop-proof-worktrees/baseline` and
    `.artifacts/qa-e2e/mantis/telegram-desktop-proof-worktrees/candidate`, then
    install and build each worktree with the repo's normal `pnpm` commands.
+   If `MANTIS_CANDIDATE_TRUST` is `maintainer-approved-fork-pr-head`, treat the
+   candidate worktree as untrusted fork code: do not pass GitHub, OpenAI,
+   Crabbox, Convex, or other workflow secrets into candidate install, build, or
+   runtime commands. The candidate SUT may receive only the proof runner's
+   short-lived Telegram bot token, generated local config/state paths, and mock
+   model key needed for this isolated proof.
 5. In each worktree, run the real-user Telegram Crabbox proof flow from the
    skill with `$OPENCLAW_TELEGRAM_USER_PROOF_CMD`; do not run
    `pnpm qa:telegram-user:crabbox` directly. The proof command comes from the
diff --git a/.github/workflows/mantis-telegram-desktop-proof.yml b/.github/workflows/mantis-telegram-desktop-proof.yml
index ecd24f02408..eb26cf7d62e 100644
--- a/.github/workflows/mantis-telegram-desktop-proof.yml
+++ b/.github/workflows/mantis-telegram-desktop-proof.yml
@@ -35,6 +35,11 @@ on:
         description: Optional existing Crabbox desktop lease id or slug to reuse
         required: false
         type: string
+      allow_fork_candidate:
+        description: Allow a fork PR head candidate when pr_number points at that PR
+        required: false
+        default: false
+        type: boolean
 
 permissions:
   contents: write
@@ -95,6 +100,7 @@ jobs:
     needs: authorize_actor
     runs-on: ubuntu-24.04
     outputs:
+      allow_fork_candidate: ${{ steps.resolve.outputs.allow_fork_candidate }}
       baseline_ref: ${{ steps.resolve.outputs.baseline_ref }}
       candidate_ref: ${{ steps.resolve.outputs.candidate_ref }}
       crabbox_provider: ${{ steps.resolve.outputs.crabbox_provider }}
@@ -119,6 +125,10 @@ jobs:
             if (eventName === "workflow_dispatch") {
               const inputs = context.payload.inputs ?? {};
               setOutput("should_run", "true");
+              setOutput(
+                "allow_fork_candidate",
+                String(inputs.allow_fork_candidate) === "true" ? "true" : "false",
+              );
               setOutput("baseline_ref", inputs.baseline_ref || "main");
               setOutput("candidate_ref", inputs.candidate_ref || "main");
               setOutput("pr_number", inputs.pr_number || "");
@@ -150,6 +160,7 @@ jobs:
             if (!requested) {
               core.notice("Comment mentioned Mantis but did not request Telegram desktop proof.");
               setOutput("should_run", "false");
+              setOutput("allow_fork_candidate", "false");
               setOutput("baseline_ref", "");
               setOutput("candidate_ref", "");
               setOutput("pr_number", "");
@@ -192,8 +203,10 @@ jobs:
               rawCandidate && !["head", "pr", "pr-head"].includes(rawCandidate.toLowerCase())
                 ? rawCandidate
                 : mergedCandidate || pr.head.sha;
+            const allowForkCandidate = /\bfork[-_]ok\b/i.test(body);
 
             setOutput("should_run", "true");
+            setOutput("allow_fork_candidate", allowForkCandidate ? "true" : "false");
             setOutput("baseline_ref", baselineMatch?.[1] || mergedBaseline || "main");
             setOutput("candidate_ref", candidate);
             setOutput("pr_number", String(issue.number));
@@ -217,6 +230,7 @@ jobs:
     outputs:
       baseline_revision: ${{ steps.validate.outputs.baseline_revision }}
       candidate_revision: ${{ steps.validate.outputs.candidate_revision }}
+      candidate_trust: ${{ steps.validate.outputs.candidate_trust }}
     steps:
       - name: Checkout harness ref
         uses: actions/checkout@v6
@@ -227,6 +241,7 @@ jobs:
       - name: Validate refs are trusted
         id: validate
         env:
+          ALLOW_FORK_CANDIDATE: ${{ needs.resolve_request.outputs.allow_fork_candidate }}
           BASELINE_REF: ${{ needs.resolve_request.outputs.baseline_ref }}
           CANDIDATE_REF: ${{ needs.resolve_request.outputs.candidate_ref }}
           GH_TOKEN: ${{ github.token }}
@@ -264,25 +279,43 @@ jobs:
               )"
               if [[ "$pr_head_count" != "0" ]]; then
                 reason="open-pr-head"
+              elif [[ "$label" == "candidate" && "${ALLOW_FORK_CANDIDATE:-false}" == "true" && -n "${PR_NUMBER:-}" ]]; then
+                local fork_pr_head_count
+                fork_pr_head_count="$(
+                  gh api \
+                    -H "Accept: application/vnd.github+json" \
+                    "repos/${GITHUB_REPOSITORY}/pulls/${PR_NUMBER}" \
+                    --jq 'if .state == "open" and .head.repo.full_name != "'"${GITHUB_REPOSITORY}"'" and .head.sha == "'"${revision}"'" then 1 else 0 end'
+                )"
+                if [[ "$fork_pr_head_count" == "1" ]]; then
+                  reason="maintainer-approved-fork-pr-head"
+                fi
               fi
             fi
 
             if [[ -z "$reason" ]]; then
-              echo "${label} ref '${input_ref}' resolved to ${revision}, which is not trusted for this secret-bearing Mantis run." >&2
+              echo "${label} ref '${input_ref}' resolved to ${revision}, which is not trusted for this secret-bearing Mantis run. Add fork-ok only for a maintainer-approved fork PR head." >&2
               exit 1
             fi
-            printf '%s\n' "$revision"
+            printf '%s\t%s\n' "$revision" "$reason"
           }
 
           baseline_revision="$(validate_ref baseline "$BASELINE_REF")"
+          baseline_trust="${baseline_revision#*$'\t'}"
+          baseline_revision="${baseline_revision%%$'\t'*}"
           candidate_revision="$(validate_ref candidate "$CANDIDATE_REF")"
+          candidate_trust="${candidate_revision#*$'\t'}"
+          candidate_revision="${candidate_revision%%$'\t'*}"
           echo "baseline_revision=${baseline_revision}" >> "$GITHUB_OUTPUT"
           echo "candidate_revision=${candidate_revision}" >> "$GITHUB_OUTPUT"
+          echo "candidate_trust=${candidate_trust}" >> "$GITHUB_OUTPUT"
           {
             echo "baseline: \`${BASELINE_REF}\`"
             echo "baseline SHA: \`${baseline_revision}\`"
+            echo "baseline trust: \`${baseline_trust}\`"
             echo "candidate: \`${CANDIDATE_REF}\`"
             echo "candidate SHA: \`${candidate_revision}\`"
+            echo "candidate trust: \`${candidate_trust}\`"
           } >> "$GITHUB_STEP_SUMMARY"
 
   run_telegram_desktop_proof:
@@ -375,7 +408,7 @@ jobs:
             printf '%s\n' 'Defaults env_keep += "CODEX_HOME CODEX_INTERNAL_ORIGINATOR_OVERRIDE"'
             printf '%s\n' 'Defaults env_keep += "BASELINE_REF BASELINE_SHA CANDIDATE_REF CANDIDATE_SHA"'
             printf '%s\n' 'Defaults env_keep += "CRABBOX_ACCESS_CLIENT_ID CRABBOX_ACCESS_CLIENT_SECRET CRABBOX_COORDINATOR CRABBOX_COORDINATOR_TOKEN CRABBOX_LEASE_ID CRABBOX_PROVIDER"'
-            printf '%s\n' 'Defaults env_keep += "GH_TOKEN MANTIS_INSTRUCTIONS MANTIS_OUTPUT_DIR MANTIS_PR_NUMBER"'
+            printf '%s\n' 'Defaults env_keep += "GH_TOKEN MANTIS_CANDIDATE_TRUST MANTIS_INSTRUCTIONS MANTIS_OUTPUT_DIR MANTIS_PR_NUMBER"'
             printf '%s\n' 'Defaults env_keep += "OPENCLAW_BUILD_PRIVATE_QA OPENCLAW_ENABLE_PRIVATE_QA_CLI OPENCLAW_QA_CONVEX_SECRET_CI OPENCLAW_QA_CONVEX_SITE_URL OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR_TOKEN"'
             printf '%s\n' 'Defaults env_keep += "OPENCLAW_TELEGRAM_USER_CRABBOX_BIN OPENCLAW_TELEGRAM_USER_CRABBOX_PROVIDER OPENCLAW_TELEGRAM_USER_DRIVER_SCRIPT OPENCLAW_TELEGRAM_USER_PROOF_CMD"'
           } | sudo tee /etc/sudoers.d/mantis-codex-env >/dev/null
@@ -406,6 +439,7 @@ jobs:
           CRABBOX_LEASE_ID: ${{ needs.resolve_request.outputs.lease_id }}
           CRABBOX_PROVIDER: ${{ needs.resolve_request.outputs.crabbox_provider }}
           GH_TOKEN: ${{ github.token }}
+          MANTIS_CANDIDATE_TRUST: ${{ needs.validate_refs.outputs.candidate_trust }}
           MANTIS_INSTRUCTIONS: ${{ needs.resolve_request.outputs.instructions }}
           MANTIS_OUTPUT_DIR: ${{ env.MANTIS_OUTPUT_DIR }}
           MANTIS_PR_NUMBER: ${{ needs.resolve_request.outputs.pr_number }}
diff --git a/scripts/e2e/telegram-user-crabbox-proof.ts b/scripts/e2e/telegram-user-crabbox-proof.ts
index 0f6194bd605..22a4db5990a 100644
--- a/scripts/e2e/telegram-user-crabbox-proof.ts
+++ b/scripts/e2e/telegram-user-crabbox-proof.ts
@@ -417,9 +417,40 @@ function optionalString(source: JsonObject, key: string) {
   return typeof value === "string" && value.trim() ? value.trim() : undefined;
 }
 
+function childProcessBaseEnv() {
+  const keys = [
+    "CI",
+    "COREPACK_HOME",
+    "FORCE_COLOR",
+    "HOME",
+    "LANG",
+    "LC_ALL",
+    "NODE_OPTIONS",
+    "OPENCLAW_BUILD_PRIVATE_QA",
+    "OPENCLAW_ENABLE_PRIVATE_QA_CLI",
+    "PATH",
+    "PNPM_HOME",
+    "SHELL",
+    "TEMP",
+    "TMP",
+    "TMPDIR",
+    "USER",
+    "XDG_CACHE_HOME",
+    "XDG_CONFIG_HOME",
+  ];
+  const env: NodeJS.ProcessEnv = {};
+  for (const key of keys) {
+    const value = process.env[key];
+    if (value) {
+      env[key] = value;
+    }
+  }
+  return env;
+}
+
 function mockServerEnv(params: { mockPort: number; mockResponseText: string; requestLog: string }) {
   return {
-    ...process.env,
+    ...childProcessBaseEnv(),
     MOCK_PORT: String(params.mockPort),
     MOCK_REQUEST_LOG: params.requestLog,
     SUCCESS_MARKER: params.mockResponseText,
@@ -428,7 +459,7 @@ function mockServerEnv(params: { mockPort: number; mockResponseText: string; req
 
 function gatewayEnv(params: { configPath: string; stateDir: string; sutToken: string }) {
   return {
-    ...process.env,
+    ...childProcessBaseEnv(),
     OPENAI_API_KEY: "sk-openclaw-e2e-mock",
     OPENCLAW_CONFIG_PATH: params.configPath,
     OPENCLAW_STATE_DIR: params.stateDir,
diff --git a/test/scripts/mantis-telegram-desktop-proof-workflow.test.ts b/test/scripts/mantis-telegram-desktop-proof-workflow.test.ts
index b2741619288..56b74551acf 100644
--- a/test/scripts/mantis-telegram-desktop-proof-workflow.test.ts
+++ b/test/scripts/mantis-telegram-desktop-proof-workflow.test.ts
@@ -107,12 +107,31 @@ describe("Mantis Telegram Desktop proof workflow", () => {
     expect(prepare.run).toContain(
       "OPENCLAW_TELEGRAM_USER_CRABBOX_BIN OPENCLAW_TELEGRAM_USER_CRABBOX_PROVIDER OPENCLAW_TELEGRAM_USER_DRIVER_SCRIPT OPENCLAW_TELEGRAM_USER_PROOF_CMD",
     );
+    expect(prepare.run).toContain("MANTIS_CANDIDATE_TRUST");
 
     const prompt = readFileSync(PROMPT, "utf8");
     expect(prompt).toContain("$OPENCLAW_TELEGRAM_USER_PROOF_CMD");
     expect(prompt).toContain("do not run\n   `pnpm qa:telegram-user:crabbox` directly");
   });
 
+  it("requires explicit maintainer fork approval before accepting fork PR heads", () => {
+    const workflowText = readFileSync(WORKFLOW, "utf8");
+    expect(workflowText).toContain("@openclaw-mantis");
+    expect(workflowText).toContain("fork[-_]ok");
+    expect(workflowText).toContain("ALLOW_FORK_CANDIDATE");
+    expect(workflowText).toContain("maintainer-approved-fork-pr-head");
+    expect(workflowText).toContain(".head.repo.full_name !=");
+
+    const agent = workflowStep("Run Codex Mantis Telegram agent");
+    expect(agent.env?.MANTIS_CANDIDATE_TRUST).toBe(
+      "${{ needs.validate_refs.outputs.candidate_trust }}",
+    );
+
+    const prompt = readFileSync(PROMPT, "utf8");
+    expect(prompt).toContain("MANTIS_CANDIDATE_TRUST");
+    expect(prompt).toContain("untrusted fork code");
+  });
+
   it("checks the Telegram user driver before leasing credentials", () => {
     const proofScript = readFileSync(PROOF_SCRIPT, "utf8");
     const startSession = proofScript.slice(
@@ -132,4 +151,12 @@ describe("Mantis Telegram Desktop proof workflow", () => {
       defaultProof.indexOf("leaseCredential({ localRoot, opts, root })"),
     );
   });
+
+  it("does not pass the full workflow environment into the local Telegram SUT", () => {
+    const proofScript = readFileSync(PROOF_SCRIPT, "utf8");
+    expect(proofScript).toContain("function childProcessBaseEnv()");
+    expect(proofScript).toContain("...childProcessBaseEnv()");
+    expect(proofScript).not.toContain("...process.env,\n    OPENAI_API_KEY");
+    expect(proofScript).not.toContain("...process.env,\n    MOCK_PORT");
+  });
 });