ci(mantis): allow fork telegram proof

2026-05-13 15:47:28 +00:00 · 2026-05-11 17:27:32 +05:30
parent e6b0b37e3f
commit d4c7519989
4 changed files with 104 additions and 5 deletions
--- a/.github/codex/prompts/mantis-telegram-desktop-proof.md
+++ b/.github/codex/prompts/mantis-telegram-desktop-proof.md
@@ -24,6 +24,7 @@ Inputs are provided as environment variables:
 - `BASELINE_SHA`
 - `CANDIDATE_REF`
 - `CANDIDATE_SHA`
+- `MANTIS_CANDIDATE_TRUST`
 - `MANTIS_OUTPUT_DIR`
 - `MANTIS_INSTRUCTIONS`
 - `CRABBOX_PROVIDER`
@@ -44,6 +45,12 @@ Required workflow:
   `.artifacts/qa-e2e/mantis/telegram-desktop-proof-worktrees/baseline` and
   `.artifacts/qa-e2e/mantis/telegram-desktop-proof-worktrees/candidate`, then
   install and build each worktree with the repo's normal `pnpm` commands.
+   If `MANTIS_CANDIDATE_TRUST` is `maintainer-approved-fork-pr-head`, treat the
+   candidate worktree as untrusted fork code: do not pass GitHub, OpenAI,
+   Crabbox, Convex, or other workflow secrets into candidate install, build, or
+   runtime commands. The candidate SUT may receive only the proof runner's
+   short-lived Telegram bot token, generated local config/state paths, and mock
+   model key needed for this isolated proof.
 5. In each worktree, run the real-user Telegram Crabbox proof flow from the
   skill with `$OPENCLAW_TELEGRAM_USER_PROOF_CMD`; do not run
   `pnpm qa:telegram-user:crabbox` directly. The proof command comes from the
--- a/.github/workflows/mantis-telegram-desktop-proof.yml
+++ b/.github/workflows/mantis-telegram-desktop-proof.yml
@@ -35,6 +35,11 @@ on:
        description: Optional existing Crabbox desktop lease id or slug to reuse
        required: false
        type: string
+      allow_fork_candidate:
+        description: Allow a fork PR head candidate when pr_number points at that PR
+        required: false
+        default: false
+        type: boolean

 permissions:
  contents: write
@@ -95,6 +100,7 @@ jobs:
    needs: authorize_actor
    runs-on: ubuntu-24.04
    outputs:
+      allow_fork_candidate: ${{ steps.resolve.outputs.allow_fork_candidate }}
      baseline_ref: ${{ steps.resolve.outputs.baseline_ref }}
      candidate_ref: ${{ steps.resolve.outputs.candidate_ref }}
      crabbox_provider: ${{ steps.resolve.outputs.crabbox_provider }}
@@ -119,6 +125,10 @@ jobs:
            if (eventName === "workflow_dispatch") {
              const inputs = context.payload.inputs ?? {};
              setOutput("should_run", "true");
+              setOutput(
+                "allow_fork_candidate",
+                String(inputs.allow_fork_candidate) === "true" ? "true" : "false",
+              );
              setOutput("baseline_ref", inputs.baseline_ref || "main");
              setOutput("candidate_ref", inputs.candidate_ref || "main");
              setOutput("pr_number", inputs.pr_number || "");
@@ -150,6 +160,7 @@ jobs:
            if (!requested) {
              core.notice("Comment mentioned Mantis but did not request Telegram desktop proof.");
              setOutput("should_run", "false");
+              setOutput("allow_fork_candidate", "false");
              setOutput("baseline_ref", "");
              setOutput("candidate_ref", "");
              setOutput("pr_number", "");
@@ -192,8 +203,10 @@ jobs:
              rawCandidate && !["head", "pr", "pr-head"].includes(rawCandidate.toLowerCase())
                ? rawCandidate
                : mergedCandidate || pr.head.sha;
+            const allowForkCandidate = /\bfork[-_]ok\b/i.test(body);

            setOutput("should_run", "true");
+            setOutput("allow_fork_candidate", allowForkCandidate ? "true" : "false");
            setOutput("baseline_ref", baselineMatch?.[1] || mergedBaseline || "main");
            setOutput("candidate_ref", candidate);
            setOutput("pr_number", String(issue.number));
@@ -217,6 +230,7 @@ jobs:
    outputs:
      baseline_revision: ${{ steps.validate.outputs.baseline_revision }}
      candidate_revision: ${{ steps.validate.outputs.candidate_revision }}
+      candidate_trust: ${{ steps.validate.outputs.candidate_trust }}
    steps:
      - name: Checkout harness ref
        uses: actions/checkout@v6
@@ -227,6 +241,7 @@ jobs:
      - name: Validate refs are trusted
        id: validate
        env:
+          ALLOW_FORK_CANDIDATE: ${{ needs.resolve_request.outputs.allow_fork_candidate }}
          BASELINE_REF: ${{ needs.resolve_request.outputs.baseline_ref }}
          CANDIDATE_REF: ${{ needs.resolve_request.outputs.candidate_ref }}
          GH_TOKEN: ${{ github.token }}
@@ -264,25 +279,43 @@ jobs:
              )"
              if [[ "$pr_head_count" != "0" ]]; then
                reason="open-pr-head"
+              elif [[ "$label" == "candidate" && "${ALLOW_FORK_CANDIDATE:-false}" == "true" && -n "${PR_NUMBER:-}" ]]; then
+                local fork_pr_head_count
+                fork_pr_head_count="$(
+                  gh api \
+                    -H "Accept: application/vnd.github+json" \
+                    "repos/${GITHUB_REPOSITORY}/pulls/${PR_NUMBER}" \
+                    --jq 'if .state == "open" and .head.repo.full_name != "'"${GITHUB_REPOSITORY}"'" and .head.sha == "'"${revision}"'" then 1 else 0 end'
+                )"
+                if [[ "$fork_pr_head_count" == "1" ]]; then
+                  reason="maintainer-approved-fork-pr-head"
+                fi
              fi
            fi

            if [[ -z "$reason" ]]; then
-              echo "${label} ref '${input_ref}' resolved to ${revision}, which is not trusted for this secret-bearing Mantis run." >&2
+              echo "${label} ref '${input_ref}' resolved to ${revision}, which is not trusted for this secret-bearing Mantis run. Add fork-ok only for a maintainer-approved fork PR head." >&2
              exit 1
            fi
-            printf '%s\n' "$revision"
+            printf '%s\t%s\n' "$revision" "$reason"
          }

          baseline_revision="$(validate_ref baseline "$BASELINE_REF")"
+          baseline_trust="${baseline_revision#*$'\t'}"
+          baseline_revision="${baseline_revision%%$'\t'*}"
          candidate_revision="$(validate_ref candidate "$CANDIDATE_REF")"
+          candidate_trust="${candidate_revision#*$'\t'}"
+          candidate_revision="${candidate_revision%%$'\t'*}"
          echo "baseline_revision=${baseline_revision}" >> "$GITHUB_OUTPUT"
          echo "candidate_revision=${candidate_revision}" >> "$GITHUB_OUTPUT"
+          echo "candidate_trust=${candidate_trust}" >> "$GITHUB_OUTPUT"
          {
            echo "baseline: \`${BASELINE_REF}\`"
            echo "baseline SHA: \`${baseline_revision}\`"
+            echo "baseline trust: \`${baseline_trust}\`"
            echo "candidate: \`${CANDIDATE_REF}\`"
            echo "candidate SHA: \`${candidate_revision}\`"
+            echo "candidate trust: \`${candidate_trust}\`"
          } >> "$GITHUB_STEP_SUMMARY"

  run_telegram_desktop_proof:
@@ -375,7 +408,7 @@ jobs:
            printf '%s\n' 'Defaults env_keep += "CODEX_HOME CODEX_INTERNAL_ORIGINATOR_OVERRIDE"'
            printf '%s\n' 'Defaults env_keep += "BASELINE_REF BASELINE_SHA CANDIDATE_REF CANDIDATE_SHA"'
            printf '%s\n' 'Defaults env_keep += "CRABBOX_ACCESS_CLIENT_ID CRABBOX_ACCESS_CLIENT_SECRET CRABBOX_COORDINATOR CRABBOX_COORDINATOR_TOKEN CRABBOX_LEASE_ID CRABBOX_PROVIDER"'
-            printf '%s\n' 'Defaults env_keep += "GH_TOKEN MANTIS_INSTRUCTIONS MANTIS_OUTPUT_DIR MANTIS_PR_NUMBER"'
+            printf '%s\n' 'Defaults env_keep += "GH_TOKEN MANTIS_CANDIDATE_TRUST MANTIS_INSTRUCTIONS MANTIS_OUTPUT_DIR MANTIS_PR_NUMBER"'
            printf '%s\n' 'Defaults env_keep += "OPENCLAW_BUILD_PRIVATE_QA OPENCLAW_ENABLE_PRIVATE_QA_CLI OPENCLAW_QA_CONVEX_SECRET_CI OPENCLAW_QA_CONVEX_SITE_URL OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR_TOKEN"'
            printf '%s\n' 'Defaults env_keep += "OPENCLAW_TELEGRAM_USER_CRABBOX_BIN OPENCLAW_TELEGRAM_USER_CRABBOX_PROVIDER OPENCLAW_TELEGRAM_USER_DRIVER_SCRIPT OPENCLAW_TELEGRAM_USER_PROOF_CMD"'
          } | sudo tee /etc/sudoers.d/mantis-codex-env >/dev/null
@@ -406,6 +439,7 @@ jobs:
          CRABBOX_LEASE_ID: ${{ needs.resolve_request.outputs.lease_id }}
          CRABBOX_PROVIDER: ${{ needs.resolve_request.outputs.crabbox_provider }}
          GH_TOKEN: ${{ github.token }}
+          MANTIS_CANDIDATE_TRUST: ${{ needs.validate_refs.outputs.candidate_trust }}
          MANTIS_INSTRUCTIONS: ${{ needs.resolve_request.outputs.instructions }}
          MANTIS_OUTPUT_DIR: ${{ env.MANTIS_OUTPUT_DIR }}
          MANTIS_PR_NUMBER: ${{ needs.resolve_request.outputs.pr_number }}
--- a/scripts/e2e/telegram-user-crabbox-proof.ts
+++ b/scripts/e2e/telegram-user-crabbox-proof.ts
@@ -417,9 +417,40 @@ function optionalString(source: JsonObject, key: string) {
  return typeof value === "string" && value.trim() ? value.trim() : undefined;
 }

+function childProcessBaseEnv() {
+  const keys = [
+    "CI",
+    "COREPACK_HOME",
+    "FORCE_COLOR",
+    "HOME",
+    "LANG",
+    "LC_ALL",
+    "NODE_OPTIONS",
+    "OPENCLAW_BUILD_PRIVATE_QA",
+    "OPENCLAW_ENABLE_PRIVATE_QA_CLI",
+    "PATH",
+    "PNPM_HOME",
+    "SHELL",
+    "TEMP",
+    "TMP",
+    "TMPDIR",
+    "USER",
+    "XDG_CACHE_HOME",
+    "XDG_CONFIG_HOME",
+  ];
+  const env: NodeJS.ProcessEnv = {};
+  for (const key of keys) {
+    const value = process.env[key];
+    if (value) {
+      env[key] = value;
+    }
+  }
+  return env;
+}
+
 function mockServerEnv(params: { mockPort: number; mockResponseText: string; requestLog: string }) {
  return {
-    ...process.env,
+    ...childProcessBaseEnv(),
    MOCK_PORT: String(params.mockPort),
    MOCK_REQUEST_LOG: params.requestLog,
    SUCCESS_MARKER: params.mockResponseText,
@@ -428,7 +459,7 @@ function mockServerEnv(params: { mockPort: number; mockResponseText: string; req

 function gatewayEnv(params: { configPath: string; stateDir: string; sutToken: string }) {
  return {
-    ...process.env,
+    ...childProcessBaseEnv(),
    OPENAI_API_KEY: "sk-openclaw-e2e-mock",
    OPENCLAW_CONFIG_PATH: params.configPath,
    OPENCLAW_STATE_DIR: params.stateDir,
--- a/test/scripts/mantis-telegram-desktop-proof-workflow.test.ts
+++ b/test/scripts/mantis-telegram-desktop-proof-workflow.test.ts
@@ -107,12 +107,31 @@ describe("Mantis Telegram Desktop proof workflow", () => {
    expect(prepare.run).toContain(
      "OPENCLAW_TELEGRAM_USER_CRABBOX_BIN OPENCLAW_TELEGRAM_USER_CRABBOX_PROVIDER OPENCLAW_TELEGRAM_USER_DRIVER_SCRIPT OPENCLAW_TELEGRAM_USER_PROOF_CMD",
    );
+    expect(prepare.run).toContain("MANTIS_CANDIDATE_TRUST");

    const prompt = readFileSync(PROMPT, "utf8");
    expect(prompt).toContain("$OPENCLAW_TELEGRAM_USER_PROOF_CMD");
    expect(prompt).toContain("do not run\n   `pnpm qa:telegram-user:crabbox` directly");
  });

+  it("requires explicit maintainer fork approval before accepting fork PR heads", () => {
+    const workflowText = readFileSync(WORKFLOW, "utf8");
+    expect(workflowText).toContain("@openclaw-mantis");
+    expect(workflowText).toContain("fork[-_]ok");
+    expect(workflowText).toContain("ALLOW_FORK_CANDIDATE");
+    expect(workflowText).toContain("maintainer-approved-fork-pr-head");
+    expect(workflowText).toContain(".head.repo.full_name !=");
+
+    const agent = workflowStep("Run Codex Mantis Telegram agent");
+    expect(agent.env?.MANTIS_CANDIDATE_TRUST).toBe(
+      "${{ needs.validate_refs.outputs.candidate_trust }}",
+    );
+
+    const prompt = readFileSync(PROMPT, "utf8");
+    expect(prompt).toContain("MANTIS_CANDIDATE_TRUST");
+    expect(prompt).toContain("untrusted fork code");
+  });
+
  it("checks the Telegram user driver before leasing credentials", () => {
    const proofScript = readFileSync(PROOF_SCRIPT, "utf8");
    const startSession = proofScript.slice(
@@ -132,4 +151,12 @@ describe("Mantis Telegram Desktop proof workflow", () => {
      defaultProof.indexOf("leaseCredential({ localRoot, opts, root })"),
    );
  });
+
+  it("does not pass the full workflow environment into the local Telegram SUT", () => {
+    const proofScript = readFileSync(PROOF_SCRIPT, "utf8");
+    expect(proofScript).toContain("function childProcessBaseEnv()");
+    expect(proofScript).toContain("...childProcessBaseEnv()");
+    expect(proofScript).not.toContain("...process.env,\n    OPENAI_API_KEY");
+    expect(proofScript).not.toContain("...process.env,\n    MOCK_PORT");
+  });
 });