[Don't Merge] Testing automatic eval #3
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Router Submission Evaluation | |
| on: | |
| pull_request: | |
| types: [opened, synchronize, reopened] | |
| paths: | |
| - "router_inference/predictions/**" | |
| jobs: | |
| evaluate-router: | |
| runs-on: self-hosted | |
| permissions: | |
| contents: read | |
| pull-requests: read | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 | |
| - name: Detect new prediction file | |
| id: detect | |
| shell: bash | |
| run: | | |
| set -euo pipefail | |
| git fetch origin main | |
| NEW_FILES=$(git diff --name-status origin/main...HEAD -- router_inference/predictions/*.json | awk '$1 == "A" {print $2}') | |
| if [[ -z "$NEW_FILES" ]]; then | |
| echo "No newly added prediction file detected; skipping evaluation." | |
| echo "router=" >> "$GITHUB_OUTPUT" | |
| exit 0 | |
| fi | |
| if [[ $(echo "$NEW_FILES" | wc -l) -ne 1 ]]; then | |
| echo "Expected exactly one new prediction file, found:" >&2 | |
| echo "$NEW_FILES" >&2 | |
| exit 1 | |
| fi | |
| ROUTER_NAME=$(basename "$NEW_FILES" .json) | |
| echo "router=$ROUTER_NAME" >> "$GITHUB_OUTPUT" | |
| - name: Show detected router | |
| if: ${{ steps.detect.outputs.router != '' }} | |
| run: | | |
| set -euo pipefail | |
| echo "Detected router submission: ${{ steps.detect.outputs.router }}" | |
| - name: Prepare dataset | |
| if: ${{ steps.detect.outputs.router != '' }} | |
| env: | |
| HF_TOKEN: ${{ secrets.HF_TOKEN }} | |
| run: | | |
| set -euo pipefail | |
| # Verify HF_TOKEN is set if needed for private datasets | |
| if [[ -z "${HF_TOKEN:-}" ]]; then | |
| echo "⚠ Warning: HF_TOKEN not set. This may fail if accessing private datasets." | |
| fi | |
| # Prepare dataset if not already present | |
| if [[ ! -d "${{ github.workspace }}/dataset" ]] || [[ ! -f "${{ github.workspace }}/dataset/router_data_10.json" ]]; then | |
| echo "Preparing dataset..." | |
| # Ensure dataset directory exists | |
| mkdir -p "${{ github.workspace }}/dataset" | |
| uv run python scripts/process_datasets/prep_datasets.py | |
| else | |
| echo "Dataset already present, skipping download." | |
| fi | |
| - name: Evaluate submission | |
| if: ${{ steps.detect.outputs.router != '' }} | |
| env: | |
| ROUTERARENA_DATASET_DIR: ${{ github.workspace }}/dataset | |
| HF_TOKEN: ${{ secrets.HF_TOKEN }} | |
| run: | | |
| set -euo pipefail | |
| uv run python automation/process_pr_submission.py \ | |
| --pr "${{ github.event.pull_request.number }}" \ | |
| --router "${{ steps.detect.outputs.router }}" \ | |
| --split sub_10 |