From c5792ef936503450f03e1574cdd4258e077bc9f7 Mon Sep 17 00:00:00 2001
From: jiarong0907 <xingjiarong0907@gmail.com>
Date: Sat, 14 Feb 2026 01:03:42 -0600
Subject: [PATCH 1/3] Fix the bug of evaluation is not triggered

---
 .github/workflows/pr-evaluation.yml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/.github/workflows/pr-evaluation.yml b/.github/workflows/pr-evaluation.yml
index 7cfcb57..893ccf9 100644
--- a/.github/workflows/pr-evaluation.yml
+++ b/.github/workflows/pr-evaluation.yml
@@ -45,10 +45,16 @@ jobs:
 
       - name: Add evaluation label to PR
         uses: actions/github-script@v7
+        env:
+          HAS_ROUTER_EVAL_BOT_TOKEN: ${{ secrets.ROUTER_EVAL_BOT_TOKEN != '' }}
         with:
+          github-token: ${{ secrets.ROUTER_EVAL_BOT_TOKEN || github.token }}
           script: |
             const labelName = 'evaluate-requested';
             const issue_number = context.payload.issue.number;
+            if (process.env.HAS_ROUTER_EVAL_BOT_TOKEN !== 'true') {
+              core.warning('ROUTER_EVAL_BOT_TOKEN is not configured. Labeling may not trigger the evaluation workflow due to GITHUB_TOKEN recursion guard.');
+            }
 
             try {
               await github.rest.issues.addLabels({

From 1c30a6de270b6c9d14aab3cb099e1ee45d14a065 Mon Sep 17 00:00:00 2001
From: jiarong0907 <xingjiarong0907@gmail.com>
Date: Sat, 14 Feb 2026 01:12:43 -0600
Subject: [PATCH 2/3] Try workflow dispatch

---
 .github/workflows/pr-evaluation-run.yml | 60 +++++++++++--------------
 .github/workflows/pr-evaluation.yml     | 59 +++++++++++-------------
 2 files changed, 51 insertions(+), 68 deletions(-)

diff --git a/.github/workflows/pr-evaluation-run.yml b/.github/workflows/pr-evaluation-run.yml
index 8632fed..475a37b 100644
--- a/.github/workflows/pr-evaluation-run.yml
+++ b/.github/workflows/pr-evaluation-run.yml
@@ -1,35 +1,46 @@
 name: Router Submission Evaluation
 
 on:
-  pull_request_target:
-    types: [labeled, synchronize, reopened]
+  workflow_dispatch:
+    inputs:
+      pr_number:
+        description: Pull request number to evaluate
+        required: true
+        type: string
+      base_ref:
+        description: Base branch ref for evaluation scripts checkout
+        required: true
+        type: string
+      base_sha:
+        description: Base commit SHA for PR diff/evaluation
+        required: true
+        type: string
 
 jobs:
   evaluate-router:
-    if: >-
-      contains(github.event.pull_request.labels.*.name, 'evaluate-requested') &&
-      (
-        github.event.action != 'labeled' ||
-        github.event.label.name == 'evaluate-requested'
-      )
     runs-on: self-hosted
     permissions:
       contents: read
       issues: write
       checks: write
       pull-requests: write
+    env:
+      PR_NUMBER: ${{ inputs.pr_number }}
+      BASE_REF: ${{ inputs.base_ref }}
+      BASE_SHA: ${{ inputs.base_sha }}
+      PR_CHECKOUT_REF: ${{ format('refs/pull/{0}/head', inputs.pr_number) }}
     steps:
       - name: Checkout base repository (for evaluation scripts)
         uses: actions/checkout@v4
         with:
-          ref: ${{ github.event.pull_request.base.ref }}
+          ref: ${{ env.BASE_REF }}
           path: base
           fetch-depth: 0
 
       - name: Checkout PR branch (for prediction file only)
         uses: actions/checkout@v4
         with:
-          ref: ${{ github.event.pull_request.head.sha }}
+          ref: ${{ env.PR_CHECKOUT_REF }}
           path: pr
           fetch-depth: 0
 
@@ -39,8 +50,8 @@ jobs:
         working-directory: pr
         run: |
           set -euo pipefail
-          BASE_REF="${{ github.event.pull_request.base.ref }}"
-          BASE_SHA="${{ github.event.pull_request.base.sha }}"
+          BASE_REF="${{ env.BASE_REF }}"
+          BASE_SHA="${{ env.BASE_SHA }}"
 
           if [[ -z "$BASE_SHA" ]]; then
             echo "Error: Could not determine PR base SHA" >&2
@@ -154,9 +165,9 @@ jobs:
           ROUTERARENA_DATASET_DIR: ${{ github.workspace }}/dataset
         run: |
           set -euo pipefail; trap 'cat evaluation_output.txt' EXIT
-          BASE_SHA="${{ github.event.pull_request.base.sha }}"
+          BASE_SHA="${{ env.BASE_SHA }}"
           uv run python automation/process_pr_submission.py \
-            --pr "${{ github.event.pull_request.number }}" \
+            --pr "${{ env.PR_NUMBER }}" \
             --router "${{ steps.detect.outputs.router }}" \
             --split "${{ steps.detect.outputs.split }}" \
             --base-ref "$BASE_SHA" > evaluation_output.txt 2>&1
@@ -206,28 +217,9 @@ jobs:
             comment += '*Evaluation completed by RouterArena automated workflow*';
 
             await github.rest.issues.createComment({
-              issue_number: context.payload.pull_request.number,
+              issue_number: Number('${{ env.PR_NUMBER }}'),
               owner: context.repo.owner,
               repo: context.repo.repo,
               body: comment
             });
             console.log('Successfully posted evaluation results as PR comment');
-
-      - name: Remove evaluation label
-        if: ${{ always() }}
-        uses: actions/github-script@v7
-        with:
-          script: |
-            try {
-              await github.rest.issues.removeLabel({
-                owner: context.repo.owner,
-                repo: context.repo.repo,
-                issue_number: context.payload.pull_request.number,
-                name: 'evaluate-requested'
-              });
-            } catch (error) {
-              // Ignore if label is already removed/missing.
-              if (error.status !== 404) {
-                throw error;
-              }
-            }
diff --git a/.github/workflows/pr-evaluation.yml b/.github/workflows/pr-evaluation.yml
index 893ccf9..5635825 100644
--- a/.github/workflows/pr-evaluation.yml
+++ b/.github/workflows/pr-evaluation.yml
@@ -17,6 +17,7 @@ jobs:
       )
     runs-on: self-hosted
     permissions:
+      actions: write
       issues: write
       pull-requests: write
       checks: write
@@ -43,42 +44,32 @@ jobs:
               }
             }
 
-      - name: Add evaluation label to PR
+      - name: Fetch PR details
+        id: pr
         uses: actions/github-script@v7
-        env:
-          HAS_ROUTER_EVAL_BOT_TOKEN: ${{ secrets.ROUTER_EVAL_BOT_TOKEN != '' }}
         with:
-          github-token: ${{ secrets.ROUTER_EVAL_BOT_TOKEN || github.token }}
           script: |
-            const labelName = 'evaluate-requested';
-            const issue_number = context.payload.issue.number;
-            if (process.env.HAS_ROUTER_EVAL_BOT_TOKEN !== 'true') {
-              core.warning('ROUTER_EVAL_BOT_TOKEN is not configured. Labeling may not trigger the evaluation workflow due to GITHUB_TOKEN recursion guard.');
-            }
+            const pr = await github.rest.pulls.get({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              pull_number: context.payload.issue.number
+            });
+            core.setOutput('number', String(pr.data.number));
+            core.setOutput('base_ref', pr.data.base.ref);
+            core.setOutput('base_sha', pr.data.base.sha);
 
-            try {
-              await github.rest.issues.addLabels({
-                owner: context.repo.owner,
-                repo: context.repo.repo,
-                issue_number,
-                labels: [labelName]
-              });
-            } catch (error) {
-              // If the label does not exist yet, create it once then retry.
-              if (error.status !== 422) {
-                throw error;
+      - name: Dispatch evaluation workflow
+        uses: actions/github-script@v7
+        with:
+          script: |
+            await github.rest.actions.createWorkflowDispatch({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              workflow_id: 'pr-evaluation-run.yml',
+              ref: '${{ steps.pr.outputs.base_ref }}',
+              inputs: {
+                pr_number: '${{ steps.pr.outputs.number }}',
+                base_ref: '${{ steps.pr.outputs.base_ref }}',
+                base_sha: '${{ steps.pr.outputs.base_sha }}'
               }
-              await github.rest.issues.createLabel({
-                owner: context.repo.owner,
-                repo: context.repo.repo,
-                name: labelName,
-                color: '0e8a16',
-                description: 'Triggers Router Submission Evaluation workflow'
-              });
-              await github.rest.issues.addLabels({
-                owner: context.repo.owner,
-                repo: context.repo.repo,
-                issue_number,
-                labels: [labelName]
-              });
-            }
+            });

From 955f8cc0c3224256feb8e52bed886ebff29443bf Mon Sep 17 00:00:00 2001
From: jiarong0907 <xingjiarong0907@gmail.com>
Date: Sat, 14 Feb 2026 01:21:03 -0600
Subject: [PATCH 3/3] progress tracker updates

---
 .github/workflows/pr-evaluation-run.yml | 67 +++++++++++++++++++++++++
 1 file changed, 67 insertions(+)

diff --git a/.github/workflows/pr-evaluation-run.yml b/.github/workflows/pr-evaluation-run.yml
index 475a37b..a417b8e 100644
--- a/.github/workflows/pr-evaluation-run.yml
+++ b/.github/workflows/pr-evaluation-run.yml
@@ -30,6 +30,38 @@ jobs:
       BASE_SHA: ${{ inputs.base_sha }}
       PR_CHECKOUT_REF: ${{ format('refs/pull/{0}/head', inputs.pr_number) }}
     steps:
+      - name: Fetch PR head SHA
+        id: prmeta
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const pr = await github.rest.pulls.get({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              pull_number: Number('${{ env.PR_NUMBER }}')
+            });
+            core.setOutput('head_sha', pr.data.head.sha);
+
+      - name: Create in-progress PR check
+        id: checkrun
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const result = await github.rest.checks.create({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              name: 'Router Submission Evaluation (/evaluate)',
+              head_sha: '${{ steps.prmeta.outputs.head_sha }}',
+              status: 'in_progress',
+              started_at: new Date().toISOString(),
+              details_url: `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`,
+              output: {
+                title: 'Evaluation started',
+                summary: 'Router evaluation is running.'
+              }
+            });
+            core.setOutput('id', String(result.data.id));
+
       - name: Checkout base repository (for evaluation scripts)
         uses: actions/checkout@v4
         with:
@@ -223,3 +255,38 @@ jobs:
               body: comment
             });
             console.log('Successfully posted evaluation results as PR comment');
+
+      - name: Complete PR check
+        if: ${{ always() && steps.checkrun.outputs.id != '' }}
+        uses: actions/github-script@v7
+        env:
+          DETECTED_ROUTER: ${{ steps.detect.outputs.router }}
+          DETECT_OUTCOME: ${{ steps.detect.outcome }}
+          EVALUATE_OUTCOME: ${{ steps.evaluate.outcome }}
+        with:
+          script: |
+            let conclusion = 'success';
+            let title = 'Evaluation completed';
+            let summary = 'Router evaluation finished successfully.';
+
+            if (!process.env.DETECTED_ROUTER) {
+              conclusion = process.env.DETECT_OUTCOME === 'success' ? 'neutral' : 'failure';
+              title = process.env.DETECT_OUTCOME === 'success' ? 'No router file detected' : 'Evaluation setup failed';
+              summary = process.env.DETECT_OUTCOME === 'success'
+                ? 'No changed prediction file was detected for this PR, so evaluation was skipped.'
+                : 'Failed while detecting prediction files for this PR.';
+            } else if (process.env.EVALUATE_OUTCOME !== 'success') {
+              conclusion = 'failure';
+              title = 'Evaluation failed';
+              summary = 'The evaluation step failed. Check this workflow run logs for details.';
+            }
+
+            await github.rest.checks.update({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              check_run_id: Number('${{ steps.checkrun.outputs.id }}'),
+              status: 'completed',
+              conclusion,
+              completed_at: new Date().toISOString(),
+              output: { title, summary }
+            });