From 663206aac45f5025564d160b15a3b0bdd17bcdee Mon Sep 17 00:00:00 2001 From: Ayaan Zaidi Date: Mon, 11 May 2026 17:49:45 +0530 Subject: [PATCH] ci(mantis): derive telegram proof refs from pr --- .../prompts/mantis-telegram-desktop-proof.md | 6 +- .../mantis-telegram-desktop-proof.yml | 203 +++++------------- ...is-telegram-desktop-proof-workflow.test.ts | 24 ++- 3 files changed, 73 insertions(+), 160 deletions(-) diff --git a/.github/codex/prompts/mantis-telegram-desktop-proof.md b/.github/codex/prompts/mantis-telegram-desktop-proof.md index e3c4ec321be..ec1a89d49f1 100644 --- a/.github/codex/prompts/mantis-telegram-desktop-proof.md +++ b/.github/codex/prompts/mantis-telegram-desktop-proof.md @@ -35,9 +35,7 @@ Required workflow: 1. Read `.agents/skills/telegram-crabbox-e2e-proof/SKILL.md`. 2. Inspect the PR with `gh pr view "$MANTIS_PR_NUMBER"` and - `gh pr diff "$MANTIS_PR_NUMBER"` when `MANTIS_PR_NUMBER` is set. If the run - came from workflow dispatch without a PR number, inspect - `BASELINE_SHA..CANDIDATE_SHA`. + `gh pr diff "$MANTIS_PR_NUMBER"`. 3. Decide what Telegram message, mock model response, command, callback, button, media, or sequence best proves the PR. Use `MANTIS_INSTRUCTIONS` as extra maintainer guidance, not as a replacement for reading the PR. @@ -45,7 +43,7 @@ Required workflow: `.artifacts/qa-e2e/mantis/telegram-desktop-proof-worktrees/baseline` and `.artifacts/qa-e2e/mantis/telegram-desktop-proof-worktrees/candidate`, then install and build each worktree with the repo's normal `pnpm` commands. - If `MANTIS_CANDIDATE_TRUST` is `maintainer-approved-fork-pr-head`, treat the + If `MANTIS_CANDIDATE_TRUST` is `fork-pr-head`, treat the candidate worktree as untrusted fork code: do not pass GitHub, OpenAI, Crabbox, Convex, or other workflow secrets into candidate install, build, or runtime commands. The candidate SUT may receive only the proof runner's diff --git a/.github/workflows/mantis-telegram-desktop-proof.yml b/.github/workflows/mantis-telegram-desktop-proof.yml index eb26cf7d62e..da6a0a01066 100644 --- a/.github/workflows/mantis-telegram-desktop-proof.yml +++ b/.github/workflows/mantis-telegram-desktop-proof.yml @@ -5,19 +5,9 @@ on: types: [created] workflow_dispatch: inputs: - baseline_ref: - description: Ref, tag, or SHA to capture as the before GIF - required: true - default: main - type: string - candidate_ref: - description: Ref, tag, or SHA to capture as the after GIF - required: true - default: main - type: string pr_number: - description: Optional PR number to receive the QA evidence comment - required: false + description: PR number to capture + required: true type: string instructions: description: Optional freeform proof instructions for the agent @@ -35,11 +25,6 @@ on: description: Optional existing Crabbox desktop lease id or slug to reuse required: false type: string - allow_fork_candidate: - description: Allow a fork PR head candidate when pr_number points at that PR - required: false - default: false - type: boolean permissions: contents: write @@ -47,7 +32,7 @@ permissions: pull-requests: write concurrency: - group: mantis-telegram-desktop-proof-${{ github.event.issue.number || inputs.pr_number || inputs.candidate_ref || github.run_id }}-${{ github.run_attempt }} + group: mantis-telegram-desktop-proof-${{ github.event.issue.number || inputs.pr_number || github.run_id }}-${{ github.run_attempt }} cancel-in-progress: false env: @@ -68,6 +53,7 @@ jobs: ( github.event_name == 'issue_comment' && github.event.issue.pull_request && + contains(github.event.issue.labels.*.name, 'mantis: telegram-visible-proof') && ( contains(github.event.comment.body, '@openclaw-mantis') || contains(github.event.comment.body, '/openclaw-mantis') @@ -100,7 +86,6 @@ jobs: needs: authorize_actor runs-on: ubuntu-24.04 outputs: - allow_fork_candidate: ${{ steps.resolve.outputs.allow_fork_candidate }} baseline_ref: ${{ steps.resolve.outputs.baseline_ref }} candidate_ref: ${{ steps.resolve.outputs.candidate_ref }} crabbox_provider: ${{ steps.resolve.outputs.crabbox_provider }} @@ -108,7 +93,6 @@ jobs: lease_id: ${{ steps.resolve.outputs.lease_id }} pr_number: ${{ steps.resolve.outputs.pr_number }} request_source: ${{ steps.resolve.outputs.request_source }} - should_run: ${{ steps.resolve.outputs.should_run }} steps: - name: Resolve refs and target PR id: resolve @@ -122,52 +106,11 @@ jobs: core.info(`${name}=${value ?? ""}`); } - if (eventName === "workflow_dispatch") { - const inputs = context.payload.inputs ?? {}; - setOutput("should_run", "true"); - setOutput( - "allow_fork_candidate", - String(inputs.allow_fork_candidate) === "true" ? "true" : "false", - ); - setOutput("baseline_ref", inputs.baseline_ref || "main"); - setOutput("candidate_ref", inputs.candidate_ref || "main"); - setOutput("pr_number", inputs.pr_number || ""); - setOutput("instructions", inputs.instructions || ""); - setOutput("crabbox_provider", inputs.crabbox_provider || "aws"); - setOutput("lease_id", inputs.crabbox_lease_id || ""); - setOutput("request_source", "workflow_dispatch"); - return; - } - - if (eventName !== "issue_comment") { - core.setFailed(`Unsupported event: ${eventName}`); - return; - } - - const issue = context.payload.issue; - const body = context.payload.comment?.body ?? ""; - if (!issue?.pull_request) { - core.setFailed("Mantis issue_comment trigger requires a pull request comment."); - return; - } - - const normalized = body.toLowerCase(); - const requested = - (normalized.includes("@openclaw-mantis") || normalized.includes("/openclaw-mantis")) && - normalized.includes("telegram") && - (normalized.includes("desktop") || normalized.includes("native")) && - normalized.includes("proof"); - if (!requested) { - core.notice("Comment mentioned Mantis but did not request Telegram desktop proof."); - setOutput("should_run", "false"); - setOutput("allow_fork_candidate", "false"); - setOutput("baseline_ref", ""); - setOutput("candidate_ref", ""); - setOutput("pr_number", ""); - setOutput("instructions", ""); - setOutput("crabbox_provider", ""); - setOutput("lease_id", ""); - setOutput("request_source", "unsupported_issue_comment"); + const inputs = context.payload.inputs ?? {}; + const prNumber = + eventName === "workflow_dispatch" ? inputs.pr_number : String(context.payload.issue?.number ?? ""); + if (!prNumber) { + core.setFailed("Mantis Telegram desktop proof requires a pull request."); return; } @@ -175,57 +118,35 @@ jobs: const { data: pr } = await github.rest.pulls.get({ owner, repo, - pull_number: issue.number, + pull_number: Number(prNumber), }); - let mergedBaseline = ""; - let mergedCandidate = ""; - if (pr.merged) { - const { data: commits } = await github.rest.pulls.listCommits({ - owner, - repo, - pull_number: issue.number, - per_page: 100, - }); - mergedCandidate = pr.merge_commit_sha || commits.at(-1)?.sha || ""; - mergedBaseline = mergedCandidate && commits.length > 0 ? `${mergedCandidate}~${commits.length}` : ""; - } - const baselineMatch = body.match(/(?:baseline|base)[\s:=]+([^\s`]+)/i); - const candidateMatch = body.match(/(?:candidate|head)[\s:=]+([^\s`]+)/i); - const providerMatch = body.match(/(?:provider|crabbox_provider)[\s:=]+([^\s`]+)/i); - const leaseMatch = body.match(/(?:lease|lease_id|crabbox_lease_id)[\s:=]+([^\s`]+)/i); - const provider = providerMatch?.[1] || "aws"; + const body = eventName === "workflow_dispatch" ? inputs.instructions || "" : context.payload.comment?.body || ""; + const provider = inputs.crabbox_provider || "aws"; if (!["aws", "hetzner"].includes(provider)) { core.setFailed(`Unsupported Crabbox provider for Mantis Telegram desktop proof: ${provider}`); return; } - const rawCandidate = candidateMatch?.[1]; - const candidate = - rawCandidate && !["head", "pr", "pr-head"].includes(rawCandidate.toLowerCase()) - ? rawCandidate - : mergedCandidate || pr.head.sha; - const allowForkCandidate = /\bfork[-_]ok\b/i.test(body); - setOutput("should_run", "true"); - setOutput("allow_fork_candidate", allowForkCandidate ? "true" : "false"); - setOutput("baseline_ref", baselineMatch?.[1] || mergedBaseline || "main"); - setOutput("candidate_ref", candidate); - setOutput("pr_number", String(issue.number)); + setOutput("baseline_ref", pr.base.sha); + setOutput("candidate_ref", pr.head.sha); + setOutput("pr_number", String(pr.number)); setOutput("instructions", body); setOutput("crabbox_provider", provider); - setOutput("lease_id", leaseMatch?.[1] || ""); - setOutput("request_source", "issue_comment"); + setOutput("lease_id", inputs.crabbox_lease_id || ""); + setOutput("request_source", eventName); - await github.rest.reactions.createForIssueComment({ - owner, - repo, - comment_id: context.payload.comment.id, - content: "eyes", - }).catch((error) => core.warning(`Could not add eyes reaction: ${error.message}`)); + if (eventName === "issue_comment") { + await github.rest.reactions.createForIssueComment({ + owner, + repo, + comment_id: context.payload.comment.id, + content: "eyes", + }).catch((error) => core.warning(`Could not add eyes reaction: ${error.message}`)); + } validate_refs: name: Validate selected refs needs: resolve_request - if: ${{ needs.resolve_request.outputs.should_run == 'true' }} runs-on: ubuntu-24.04 outputs: baseline_revision: ${{ steps.validate.outputs.baseline_revision }} @@ -241,7 +162,6 @@ jobs: - name: Validate refs are trusted id: validate env: - ALLOW_FORK_CANDIDATE: ${{ needs.resolve_request.outputs.allow_fork_candidate }} BASELINE_REF: ${{ needs.resolve_request.outputs.baseline_ref }} CANDIDATE_REF: ${{ needs.resolve_request.outputs.candidate_ref }} GH_TOKEN: ${{ github.token }} @@ -255,64 +175,48 @@ jobs: git fetch --no-tags origin "+refs/pull/${PR_NUMBER}/head:refs/remotes/origin/pr/${PR_NUMBER}" || true fi - validate_ref() { - local label="$1" + resolve_commit() { local input_ref="$2" local revision="" - local reason="" if ! revision="$(git rev-parse --verify "${input_ref}^{commit}" 2>/dev/null)"; then - echo "${label} ref '${input_ref}' is not available in the workflow checkout." >&2 + echo "$1 ref '${input_ref}' is not available in the workflow checkout." >&2 exit 1 fi - if git merge-base --is-ancestor "$revision" refs/remotes/origin/main; then - reason="main-ancestor" - elif git tag --points-at "$revision" | grep -Eq '^v'; then - reason="release-tag" - else - local pr_head_count - pr_head_count="$( - gh api \ - -H "Accept: application/vnd.github+json" \ - "repos/${GITHUB_REPOSITORY}/commits/${revision}/pulls" \ - --jq '[.[] | select(.state == "open" and .head.repo.full_name == "'"${GITHUB_REPOSITORY}"'" and .head.sha == "'"${revision}"'")] | length' - )" - if [[ "$pr_head_count" != "0" ]]; then - reason="open-pr-head" - elif [[ "$label" == "candidate" && "${ALLOW_FORK_CANDIDATE:-false}" == "true" && -n "${PR_NUMBER:-}" ]]; then - local fork_pr_head_count - fork_pr_head_count="$( - gh api \ - -H "Accept: application/vnd.github+json" \ - "repos/${GITHUB_REPOSITORY}/pulls/${PR_NUMBER}" \ - --jq 'if .state == "open" and .head.repo.full_name != "'"${GITHUB_REPOSITORY}"'" and .head.sha == "'"${revision}"'" then 1 else 0 end' - )" - if [[ "$fork_pr_head_count" == "1" ]]; then - reason="maintainer-approved-fork-pr-head" - fi - fi - fi - - if [[ -z "$reason" ]]; then - echo "${label} ref '${input_ref}' resolved to ${revision}, which is not trusted for this secret-bearing Mantis run. Add fork-ok only for a maintainer-approved fork PR head." >&2 - exit 1 - fi - printf '%s\t%s\n' "$revision" "$reason" + printf '%s\n' "$revision" } - baseline_revision="$(validate_ref baseline "$BASELINE_REF")" - baseline_trust="${baseline_revision#*$'\t'}" - baseline_revision="${baseline_revision%%$'\t'*}" - candidate_revision="$(validate_ref candidate "$CANDIDATE_REF")" - candidate_trust="${candidate_revision#*$'\t'}" - candidate_revision="${candidate_revision%%$'\t'*}" + baseline_revision="$(resolve_commit baseline "$BASELINE_REF")" + candidate_revision="$(resolve_commit candidate "$CANDIDATE_REF")" + if ! git merge-base --is-ancestor "$baseline_revision" refs/remotes/origin/main; then + echo "baseline ref '${BASELINE_REF}' resolved to ${baseline_revision}, which is not on main." >&2 + exit 1 + fi + pr_head="$( + gh api \ + -H "Accept: application/vnd.github+json" \ + "repos/${GITHUB_REPOSITORY}/pulls/${PR_NUMBER}" \ + --jq '{state, head_sha: .head.sha, head_repo: .head.repo.full_name}' + )" + pr_state="$(jq -r '.state' <<<"$pr_head")" + pr_head_sha="$(jq -r '.head_sha' <<<"$pr_head")" + pr_head_repo="$(jq -r '.head_repo' <<<"$pr_head")" + if [[ "$pr_state" != "open" || "$candidate_revision" != "$pr_head_sha" ]]; then + echo "candidate ref '${CANDIDATE_REF}' resolved to ${candidate_revision}, which is not the open PR head." >&2 + exit 1 + fi + candidate_trust="open-pr-head" + if [[ "$pr_head_repo" != "$GITHUB_REPOSITORY" ]]; then + candidate_trust="fork-pr-head" + fi + echo "baseline_revision=${baseline_revision}" >> "$GITHUB_OUTPUT" echo "candidate_revision=${candidate_revision}" >> "$GITHUB_OUTPUT" echo "candidate_trust=${candidate_trust}" >> "$GITHUB_OUTPUT" { echo "baseline: \`${BASELINE_REF}\`" echo "baseline SHA: \`${baseline_revision}\`" - echo "baseline trust: \`${baseline_trust}\`" + echo "baseline trust: \`main-ancestor\`" echo "candidate: \`${CANDIDATE_REF}\`" echo "candidate SHA: \`${candidate_revision}\`" echo "candidate trust: \`${candidate_trust}\`" @@ -321,7 +225,6 @@ jobs: run_telegram_desktop_proof: name: Run agentic native Telegram proof needs: [resolve_request, validate_refs] - if: ${{ needs.resolve_request.outputs.should_run == 'true' }} runs-on: blacksmith-16vcpu-ubuntu-2404 timeout-minutes: 360 environment: qa-live-shared diff --git a/test/scripts/mantis-telegram-desktop-proof-workflow.test.ts b/test/scripts/mantis-telegram-desktop-proof-workflow.test.ts index 56b74551acf..0814d7d328c 100644 --- a/test/scripts/mantis-telegram-desktop-proof-workflow.test.ts +++ b/test/scripts/mantis-telegram-desktop-proof-workflow.test.ts @@ -62,6 +62,7 @@ describe("Mantis Telegram Desktop proof workflow", () => { const workflow = readFileSync(WORKFLOW, "utf8"); expect(workflow).toContain("@openclaw-mantis"); expect(workflow).toContain("/openclaw-mantis"); + expect(workflow).toContain("mantis: telegram-visible-proof"); expect(workflow).not.toContain("@Mantis"); expect(workflow).not.toContain("@mantis"); expect(workflow).not.toContain('"/mantis"'); @@ -114,13 +115,23 @@ describe("Mantis Telegram Desktop proof workflow", () => { expect(prompt).toContain("do not run\n `pnpm qa:telegram-user:crabbox` directly"); }); - it("requires explicit maintainer fork approval before accepting fork PR heads", () => { + it("derives refs from the PR instead of parsing comment prose", () => { const workflowText = readFileSync(WORKFLOW, "utf8"); - expect(workflowText).toContain("@openclaw-mantis"); - expect(workflowText).toContain("fork[-_]ok"); - expect(workflowText).toContain("ALLOW_FORK_CANDIDATE"); - expect(workflowText).toContain("maintainer-approved-fork-pr-head"); - expect(workflowText).toContain(".head.repo.full_name !="); + expect(workflowText).toContain('setOutput("baseline_ref", pr.base.sha)'); + expect(workflowText).toContain('setOutput("candidate_ref", pr.head.sha)'); + expect(workflowText).not.toContain("body.match"); + expect(workflowText).not.toContain("baselineMatch"); + expect(workflowText).not.toContain("candidateMatch"); + expect(workflowText).not.toContain("leaseMatch"); + expect(workflowText).not.toContain("fork-ok"); + expect(workflowText).not.toContain("allow_fork_candidate"); + }); + + it("trusts the open PR head and marks fork heads for sandboxed handling", () => { + const workflowText = readFileSync(WORKFLOW, "utf8"); + expect(workflowText).toContain("repos/${GITHUB_REPOSITORY}/pulls/${PR_NUMBER}"); + expect(workflowText).toContain('candidate_trust="fork-pr-head"'); + expect(workflowText).toContain('pr_head_repo" != "$GITHUB_REPOSITORY"'); const agent = workflowStep("Run Codex Mantis Telegram agent"); expect(agent.env?.MANTIS_CANDIDATE_TRUST).toBe( @@ -129,6 +140,7 @@ describe("Mantis Telegram Desktop proof workflow", () => { const prompt = readFileSync(PROMPT, "utf8"); expect(prompt).toContain("MANTIS_CANDIDATE_TRUST"); + expect(prompt).toContain("fork-pr-head"); expect(prompt).toContain("untrusted fork code"); });