diff --git a/.github/codex/prompts/mantis-telegram-desktop-proof.md b/.github/codex/prompts/mantis-telegram-desktop-proof.md new file mode 100644 index 00000000000..dd5d52a9449 --- /dev/null +++ b/.github/codex/prompts/mantis-telegram-desktop-proof.md @@ -0,0 +1,85 @@ +# Mantis Telegram Desktop Proof Agent + +You are Mantis running native Telegram Desktop visual proof for an OpenClaw PR. + +Goal: inspect the pull request, decide the best Telegram-visible behavior to +prove, run before/after native Telegram Desktop sessions, iterate until the GIFs +are visually good, and leave a Mantis evidence manifest for the workflow to +publish. + +Hard limits: + +- Do not post GitHub comments or reviews. The workflow publishes the manifest. +- Do not commit, push, label, merge, or edit PR metadata. +- Do not print secrets, credential payloads, Telegram profile data, TDLib data, + or raw session archives. +- Do not use fixed `/status` proof unless it genuinely proves the PR. +- Do not finish with tiny, cropped-wrong, off-bottom, or sidebar-heavy GIFs. +- Do not invent a generic proof. The proof must match the PR behavior. + +Inputs are provided as environment variables: + +- `MANTIS_PR_NUMBER` +- `BASELINE_REF` +- `BASELINE_SHA` +- `CANDIDATE_REF` +- `CANDIDATE_SHA` +- `MANTIS_OUTPUT_DIR` +- `MANTIS_INSTRUCTIONS` +- `CRABBOX_PROVIDER` +- optional `CRABBOX_LEASE_ID` + +Required workflow: + +1. Read `.agents/skills/telegram-crabbox-e2e-proof/SKILL.md`. +2. Inspect the PR with `gh pr view "$MANTIS_PR_NUMBER"` and + `gh pr diff "$MANTIS_PR_NUMBER"` when `MANTIS_PR_NUMBER` is set. If the run + came from workflow dispatch without a PR number, inspect + `BASELINE_SHA..CANDIDATE_SHA`. +3. Decide what Telegram message, mock model response, command, callback, button, + media, or sequence best proves the PR. Use `MANTIS_INSTRUCTIONS` as extra + maintainer guidance, not as a replacement for reading the PR. +4. Create detached worktrees under + `.artifacts/qa-e2e/mantis/telegram-desktop-proof-worktrees/baseline` and + `.artifacts/qa-e2e/mantis/telegram-desktop-proof-worktrees/candidate`, then + install and build each worktree with the repo's normal `pnpm` commands. +5. In each worktree, run the real-user Telegram Crabbox proof flow from the + skill. Use the same proof idea for baseline and candidate. You may iterate + and rerun if the visual result is not convincing. +6. Open Telegram Desktop directly to the newest relevant message with the + runner `view` command before finishing each recording. Keep the chat scrolled + to the bottom so new proof messages appear in-frame. +7. Finish each session with `--preview-crop telegram-window`. +8. Build `${MANTIS_OUTPUT_DIR}/mantis-evidence.json` with: + + ```bash + node scripts/mantis/build-telegram-desktop-proof-evidence.mjs \ + --output-dir "$MANTIS_OUTPUT_DIR" \ + --baseline-repo-root \ + --baseline-output-dir \ + --baseline-ref "$BASELINE_REF" \ + --baseline-sha "$BASELINE_SHA" \ + --candidate-repo-root \ + --candidate-output-dir \ + --candidate-ref "$CANDIDATE_REF" \ + --candidate-sha "$CANDIDATE_SHA" \ + --scenario-label telegram-desktop-proof + ``` + +Visual acceptance: + +- The GIFs show native Telegram Desktop, not transcript HTML. +- Telegram is in single-chat proof view with no left chat list or right info + pane. +- The proof behavior is visible without reading logs. +- Main and PR GIFs are comparable side by side. +- The final relevant message or button is visible near the bottom. +- If one run fails because the PR genuinely changes behavior, still finish the + session and produce the manifest if useful visual artifacts exist. + +Expected final state: + +- `${MANTIS_OUTPUT_DIR}/mantis-evidence.json` exists. +- The manifest contains paired `motionPreview` artifacts labeled `Main` and + `This PR`. +- The worktree can be dirty only under `.artifacts/`. diff --git a/.github/workflows/mantis-scenario.yml b/.github/workflows/mantis-scenario.yml index c053a25adb0..8486d658c0b 100644 --- a/.github/workflows/mantis-scenario.yml +++ b/.github/workflows/mantis-scenario.yml @@ -13,6 +13,7 @@ on: - discord-thread-reply-filepath-attachment - slack-desktop-smoke - telegram-live + - telegram-desktop-proof baseline_ref: description: Optional baseline ref for before/after scenarios required: false @@ -103,6 +104,23 @@ jobs: fi gh "${args[@]}" ;; + telegram-desktop-proof) + baseline_ref="$BASELINE_REF" + if [[ -z "$baseline_ref" || "$baseline_ref" == "0bf06e953fdda290799fc9fb9244a8f67fdae593" ]]; then + baseline_ref="main" + fi + args=( + workflow run mantis-telegram-desktop-proof.yml + --repo "$GITHUB_REPOSITORY" + --ref main + -f "baseline_ref=${baseline_ref}" + -f "candidate_ref=${CANDIDATE_REF}" + ) + if [[ -n "${PR_NUMBER:-}" ]]; then + args+=(-f "pr_number=${PR_NUMBER}") + fi + gh "${args[@]}" + ;; *) echo "Unsupported Mantis scenario: ${SCENARIO_ID}" >&2 exit 1 diff --git a/.github/workflows/mantis-telegram-desktop-proof.yml b/.github/workflows/mantis-telegram-desktop-proof.yml new file mode 100644 index 00000000000..1703540ba25 --- /dev/null +++ b/.github/workflows/mantis-telegram-desktop-proof.yml @@ -0,0 +1,428 @@ +name: Mantis Telegram Desktop Proof + +on: + issue_comment: + types: [created] + workflow_dispatch: + inputs: + baseline_ref: + description: Ref, tag, or SHA to capture as the before GIF + required: true + default: main + type: string + candidate_ref: + description: Ref, tag, or SHA to capture as the after GIF + required: true + default: main + type: string + pr_number: + description: Optional PR number to receive the QA evidence comment + required: false + type: string + instructions: + description: Optional freeform proof instructions for the agent + required: false + type: string + crabbox_provider: + description: Crabbox provider for the native Telegram Desktop capture + required: false + default: aws + type: choice + options: + - aws + - hetzner + crabbox_lease_id: + description: Optional existing Crabbox desktop lease id or slug to reuse + required: false + type: string + +permissions: + contents: write + issues: write + pull-requests: write + +concurrency: + group: mantis-telegram-desktop-proof-${{ github.event.issue.number || inputs.pr_number || inputs.candidate_ref || github.run_id }}-${{ github.run_attempt }} + cancel-in-progress: false + +env: + FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true" + NODE_VERSION: "24.x" + PNPM_VERSION: "10.33.0" + OPENCLAW_BUILD_PRIVATE_QA: "1" + OPENCLAW_ENABLE_PRIVATE_QA_CLI: "1" + CRABBOX_REF: main + MANTIS_OUTPUT_DIR: .artifacts/qa-e2e/mantis/telegram-desktop-proof + +jobs: + authorize_actor: + name: Authorize workflow actor + if: >- + ${{ + github.event_name == 'workflow_dispatch' || + ( + github.event_name == 'issue_comment' && + github.event.issue.pull_request && + ( + contains(github.event.comment.body, '@Mantis') || + contains(github.event.comment.body, '@mantis') || + contains(github.event.comment.body, '/mantis') + ) + ) + }} + runs-on: ubuntu-24.04 + steps: + - name: Require maintainer-level repository access + uses: actions/github-script@v8 + with: + script: | + const allowed = new Set(["admin", "maintain", "write"]); + const { owner, repo } = context.repo; + const { data } = await github.rest.repos.getCollaboratorPermissionLevel({ + owner, + repo, + username: context.actor, + }); + const permission = data.permission; + core.info(`Actor ${context.actor} permission: ${permission}`); + if (!allowed.has(permission)) { + core.setFailed( + `Workflow requires write/maintain/admin access. Actor "${context.actor}" has "${permission}".`, + ); + } + + resolve_request: + name: Resolve Mantis request + needs: authorize_actor + runs-on: ubuntu-24.04 + outputs: + baseline_ref: ${{ steps.resolve.outputs.baseline_ref }} + candidate_ref: ${{ steps.resolve.outputs.candidate_ref }} + crabbox_provider: ${{ steps.resolve.outputs.crabbox_provider }} + instructions: ${{ steps.resolve.outputs.instructions }} + lease_id: ${{ steps.resolve.outputs.lease_id }} + pr_number: ${{ steps.resolve.outputs.pr_number }} + request_source: ${{ steps.resolve.outputs.request_source }} + should_run: ${{ steps.resolve.outputs.should_run }} + steps: + - name: Resolve refs and target PR + id: resolve + uses: actions/github-script@v8 + with: + script: | + const eventName = context.eventName; + + function setOutput(name, value) { + core.setOutput(name, value ?? ""); + core.info(`${name}=${value ?? ""}`); + } + + if (eventName === "workflow_dispatch") { + const inputs = context.payload.inputs ?? {}; + setOutput("should_run", "true"); + setOutput("baseline_ref", inputs.baseline_ref || "main"); + setOutput("candidate_ref", inputs.candidate_ref || "main"); + setOutput("pr_number", inputs.pr_number || ""); + setOutput("instructions", inputs.instructions || ""); + setOutput("crabbox_provider", inputs.crabbox_provider || "aws"); + setOutput("lease_id", inputs.crabbox_lease_id || ""); + setOutput("request_source", "workflow_dispatch"); + return; + } + + if (eventName !== "issue_comment") { + core.setFailed(`Unsupported event: ${eventName}`); + return; + } + + const issue = context.payload.issue; + const body = context.payload.comment?.body ?? ""; + if (!issue?.pull_request) { + core.setFailed("Mantis issue_comment trigger requires a pull request comment."); + return; + } + + const normalized = body.toLowerCase(); + const requested = + (normalized.includes("@mantis") || normalized.includes("/mantis")) && + normalized.includes("telegram") && + (normalized.includes("desktop") || normalized.includes("native")) && + normalized.includes("proof"); + if (!requested) { + core.notice("Comment mentioned Mantis but did not request Telegram desktop proof."); + setOutput("should_run", "false"); + setOutput("baseline_ref", ""); + setOutput("candidate_ref", ""); + setOutput("pr_number", ""); + setOutput("instructions", ""); + setOutput("crabbox_provider", ""); + setOutput("lease_id", ""); + setOutput("request_source", "unsupported_issue_comment"); + return; + } + + const { owner, repo } = context.repo; + const { data: pr } = await github.rest.pulls.get({ + owner, + repo, + pull_number: issue.number, + }); + const baselineMatch = body.match(/(?:baseline|base)[\s:=]+([^\s`]+)/i); + const candidateMatch = body.match(/(?:candidate|head)[\s:=]+([^\s`]+)/i); + const providerMatch = body.match(/(?:provider|crabbox_provider)[\s:=]+([^\s`]+)/i); + const leaseMatch = body.match(/(?:lease|lease_id|crabbox_lease_id)[\s:=]+([^\s`]+)/i); + const provider = providerMatch?.[1] || "aws"; + if (!["aws", "hetzner"].includes(provider)) { + core.setFailed(`Unsupported Crabbox provider for Mantis Telegram desktop proof: ${provider}`); + return; + } + const rawCandidate = candidateMatch?.[1]; + const candidate = + rawCandidate && !["head", "pr", "pr-head"].includes(rawCandidate.toLowerCase()) + ? rawCandidate + : pr.head.sha; + + setOutput("should_run", "true"); + setOutput("baseline_ref", baselineMatch?.[1] || "main"); + setOutput("candidate_ref", candidate); + setOutput("pr_number", String(issue.number)); + setOutput("instructions", body); + setOutput("crabbox_provider", provider); + setOutput("lease_id", leaseMatch?.[1] || ""); + setOutput("request_source", "issue_comment"); + + await github.rest.reactions.createForIssueComment({ + owner, + repo, + comment_id: context.payload.comment.id, + content: "eyes", + }).catch((error) => core.warning(`Could not add eyes reaction: ${error.message}`)); + + validate_refs: + name: Validate selected refs + needs: resolve_request + if: ${{ needs.resolve_request.outputs.should_run == 'true' }} + runs-on: ubuntu-24.04 + outputs: + baseline_revision: ${{ steps.validate.outputs.baseline_revision }} + candidate_revision: ${{ steps.validate.outputs.candidate_revision }} + steps: + - name: Checkout harness ref + uses: actions/checkout@v6 + with: + persist-credentials: false + fetch-depth: 0 + + - name: Validate refs are trusted + id: validate + env: + BASELINE_REF: ${{ needs.resolve_request.outputs.baseline_ref }} + CANDIDATE_REF: ${{ needs.resolve_request.outputs.candidate_ref }} + GH_TOKEN: ${{ github.token }} + shell: bash + run: | + set -euo pipefail + + git fetch --no-tags origin +refs/heads/main:refs/remotes/origin/main + + validate_ref() { + local label="$1" + local input_ref="$2" + local revision="" + local reason="" + + revision="$(git rev-parse "${input_ref}^{commit}")" + if git merge-base --is-ancestor "$revision" refs/remotes/origin/main; then + reason="main-ancestor" + elif git tag --points-at "$revision" | grep -Eq '^v'; then + reason="release-tag" + else + local pr_head_count + pr_head_count="$( + gh api \ + -H "Accept: application/vnd.github+json" \ + "repos/${GITHUB_REPOSITORY}/commits/${revision}/pulls" \ + --jq '[.[] | select(.state == "open" and .head.repo.full_name == "'"${GITHUB_REPOSITORY}"'" and .head.sha == "'"${revision}"'")] | length' + )" + if [[ "$pr_head_count" != "0" ]]; then + reason="open-pr-head" + fi + fi + + if [[ -z "$reason" ]]; then + echo "${label} ref '${input_ref}' resolved to ${revision}, which is not trusted for this secret-bearing Mantis run." >&2 + exit 1 + fi + printf '%s\n' "$revision" + } + + baseline_revision="$(validate_ref baseline "$BASELINE_REF")" + candidate_revision="$(validate_ref candidate "$CANDIDATE_REF")" + echo "baseline_revision=${baseline_revision}" >> "$GITHUB_OUTPUT" + echo "candidate_revision=${candidate_revision}" >> "$GITHUB_OUTPUT" + { + echo "baseline: \`${BASELINE_REF}\`" + echo "baseline SHA: \`${baseline_revision}\`" + echo "candidate: \`${CANDIDATE_REF}\`" + echo "candidate SHA: \`${candidate_revision}\`" + } >> "$GITHUB_STEP_SUMMARY" + + run_telegram_desktop_proof: + name: Run agentic native Telegram proof + needs: [resolve_request, validate_refs] + if: ${{ needs.resolve_request.outputs.should_run == 'true' }} + runs-on: ubuntu-24.04 + timeout-minutes: 360 + environment: qa-live-shared + outputs: + comparison_status: ${{ steps.inspect.outputs.comparison_status }} + output_dir: ${{ steps.inspect.outputs.output_dir }} + steps: + - name: Checkout harness ref + uses: actions/checkout@v6 + with: + persist-credentials: false + fetch-depth: 0 + + - name: Setup Node environment + uses: ./.github/actions/setup-node-env + with: + node-version: ${{ env.NODE_VERSION }} + pnpm-version: ${{ env.PNPM_VERSION }} + install-bun: "true" + + - name: Setup Go for Crabbox CLI + uses: actions/setup-go@v6 + with: + go-version: "1.26.x" + cache: false + + - name: Install Crabbox CLI + shell: bash + run: | + set -euo pipefail + install_dir="${RUNNER_TEMP}/crabbox" + mkdir -p "$install_dir/src" "$HOME/.local/bin" + git init "$install_dir/src" + git -C "$install_dir/src" remote add origin https://github.com/openclaw/crabbox.git + git -C "$install_dir/src" fetch --depth 1 origin "$CRABBOX_REF" + git -C "$install_dir/src" checkout --detach FETCH_HEAD + go build -C "$install_dir/src" -o "$HOME/.local/bin/crabbox" ./cmd/crabbox + echo "$HOME/.local/bin" >> "$GITHUB_PATH" + "$HOME/.local/bin/crabbox" --version + "$HOME/.local/bin/crabbox" media preview --help >/dev/null + + - name: Ensure agent key exists + env: + OPENAI_API_KEY: ${{ secrets.OPENCLAW_MANTIS_AGENT_OPENAI_API_KEY || secrets.OPENAI_API_KEY }} + run: | + set -euo pipefail + if [ -z "${OPENAI_API_KEY:-}" ]; then + echo "Missing OPENCLAW_MANTIS_AGENT_OPENAI_API_KEY or OPENAI_API_KEY secret." >&2 + exit 1 + fi + + - name: Run Codex Mantis Telegram agent + uses: openai/codex-action@5c3f4ccdb2b8790f73d6b21751ac00e602aa0c02 + env: + BASELINE_REF: ${{ needs.resolve_request.outputs.baseline_ref }} + BASELINE_SHA: ${{ needs.validate_refs.outputs.baseline_revision }} + CANDIDATE_REF: ${{ needs.resolve_request.outputs.candidate_ref }} + CANDIDATE_SHA: ${{ needs.validate_refs.outputs.candidate_revision }} + CRABBOX_ACCESS_CLIENT_ID: ${{ secrets.CRABBOX_ACCESS_CLIENT_ID }} + CRABBOX_ACCESS_CLIENT_SECRET: ${{ secrets.CRABBOX_ACCESS_CLIENT_SECRET }} + CRABBOX_COORDINATOR: ${{ secrets.CRABBOX_COORDINATOR }} + CRABBOX_COORDINATOR_TOKEN: ${{ secrets.CRABBOX_COORDINATOR_TOKEN }} + CRABBOX_LEASE_ID: ${{ needs.resolve_request.outputs.lease_id }} + CRABBOX_PROVIDER: ${{ needs.resolve_request.outputs.crabbox_provider }} + GH_TOKEN: ${{ github.token }} + MANTIS_INSTRUCTIONS: ${{ needs.resolve_request.outputs.instructions }} + MANTIS_OUTPUT_DIR: ${{ env.MANTIS_OUTPUT_DIR }} + MANTIS_PR_NUMBER: ${{ needs.resolve_request.outputs.pr_number }} + OPENCLAW_QA_CONVEX_SECRET_CI: ${{ secrets.OPENCLAW_QA_CONVEX_SECRET_CI }} + OPENCLAW_QA_CONVEX_SITE_URL: ${{ secrets.OPENCLAW_QA_CONVEX_SITE_URL }} + OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR: ${{ secrets.OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR }} + OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR_TOKEN: ${{ secrets.OPENCLAW_QA_MANTIS_CRABBOX_COORDINATOR_TOKEN }} + with: + openai-api-key: ${{ secrets.OPENCLAW_MANTIS_AGENT_OPENAI_API_KEY || secrets.OPENAI_API_KEY }} + prompt-file: .github/codex/prompts/mantis-telegram-desktop-proof.md + model: ${{ vars.OPENCLAW_CI_OPENAI_MODEL_BARE }} + effort: high + sandbox: danger-full-access + safety-strategy: drop-sudo + codex-args: '["--full-auto"]' + + - name: Inspect Mantis evidence manifest + id: inspect + if: ${{ always() }} + shell: bash + run: | + set -euo pipefail + output_dir="$MANTIS_OUTPUT_DIR" + echo "output_dir=${output_dir}" >> "$GITHUB_OUTPUT" + manifest="$output_dir/mantis-evidence.json" + if [[ ! -f "$manifest" ]]; then + echo "Mantis agent did not produce ${manifest}." >&2 + exit 1 + fi + comparison_status="$(jq -r 'if .comparison.pass then "pass" else "fail" end' "$manifest")" + echo "comparison_status=${comparison_status}" >> "$GITHUB_OUTPUT" + + - name: Upload Mantis Telegram desktop artifacts + id: upload_artifact + if: ${{ always() && steps.inspect.outputs.output_dir != '' }} + uses: actions/upload-artifact@v4 + with: + name: mantis-telegram-desktop-proof-${{ github.run_id }}-${{ github.run_attempt }} + path: ${{ steps.inspect.outputs.output_dir }} + retention-days: 14 + if-no-files-found: warn + + - name: Create Mantis GitHub App token + id: mantis_app_token + if: ${{ always() && needs.resolve_request.outputs.pr_number != '' }} + uses: actions/create-github-app-token@v3 + with: + app-id: ${{ secrets.MANTIS_GITHUB_APP_ID }} + private-key: ${{ secrets.MANTIS_GITHUB_APP_PRIVATE_KEY }} + owner: ${{ github.repository_owner }} + repositories: ${{ github.event.repository.name }} + permission-contents: write + permission-issues: write + permission-pull-requests: write + + - name: Comment PR with inline QA evidence + if: ${{ always() && needs.resolve_request.outputs.pr_number != '' && steps.inspect.outputs.output_dir != '' }} + env: + ARTIFACT_URL: ${{ steps.upload_artifact.outputs.artifact-url }} + GH_TOKEN: ${{ steps.mantis_app_token.outputs.token }} + REQUEST_SOURCE: ${{ needs.resolve_request.outputs.request_source }} + TARGET_PR: ${{ needs.resolve_request.outputs.pr_number }} + shell: bash + run: | + set -euo pipefail + root="${{ steps.inspect.outputs.output_dir }}" + if [[ ! -f "$root/mantis-evidence.json" ]]; then + echo "No Mantis evidence manifest found; skipping PR evidence comment." + exit 0 + fi + artifact_url_args=() + if [[ -n "${ARTIFACT_URL:-}" ]]; then + artifact_url_args=(--artifact-url "$ARTIFACT_URL") + fi + node scripts/mantis/publish-pr-evidence.mjs \ + --manifest "$root/mantis-evidence.json" \ + --target-pr "$TARGET_PR" \ + --artifact-root "mantis/telegram-desktop/pr-${TARGET_PR}/run-${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}" \ + --marker "" \ + "${artifact_url_args[@]}" \ + --run-url "https://github.com/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}" \ + --request-source "$REQUEST_SOURCE" + + - name: Fail when Mantis Telegram desktop proof failed + if: ${{ always() && steps.inspect.outputs.output_dir != '' && steps.inspect.outputs.comparison_status != 'pass' }} + env: + COMPARISON_STATUS: ${{ steps.inspect.outputs.comparison_status }} + run: | + echo "Mantis Telegram desktop proof failed: comparison=${COMPARISON_STATUS:-unset}." >&2 + exit 1