Skip to content

feat: Add llm-eval-harness kit #795

feat: Add llm-eval-harness kit

feat: Add llm-eval-harness kit #795

Workflow file for this run

name: Validate PR Contribution
# Phase 1 — structural validation.
# Runs on every PR that touches kits/. Posts a single auto-updating comment
# with errors/warnings/info, and manages the following labels:
# - passing-checks → zero errors found
# - requested-improvements → warnings present but no hard errors
# - failing-checks → one or more hard errors found
# - agentkit-challenge → applied to every PR that touches kits/
#
# Structure validated against CONTRIBUTING.md and CLAUDE.md:
#
# kits/<name>/ ← FLAT — no category subdirectory
# lamatic.config.ts ← REQUIRED (all types)
# agent.md ← REQUIRED (all types)
# README.md ← REQUIRED (all types)
# constitutions/default.md ← REQUIRED (all types)
# flows/<flow-name>.ts ← REQUIRED — one .ts file per flow
# .env.example ← REQUIRED (bundles + kits)
# apps/package.json ← REQUIRED (kits only)
# apps/.env.example ← REQUIRED (kits only)
on:
pull_request_target:
types: [opened, edited, synchronize, reopened]
paths:
- 'kits/**'
- '.github/workflows/validate-pr.yml'
jobs:
validate:
if: startsWith(github.event.pull_request.title, 'feat:')
runs-on: ubuntu-latest
timeout-minutes: 5
permissions:
contents: read
pull-requests: write
issues: write
steps:
- name: Checkout PR head
uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.sha }}
fetch-depth: 0
- name: Fetch base branch
run: git fetch origin ${{ github.event.pull_request.base.ref }}
- name: Validate contribution structure
id: validate
env:
BASE_REF: ${{ github.event.pull_request.base.ref }}
run: |
set -euo pipefail
ERRORS=()
WARNINGS=()
NEW_KITS=()
EXISTING_MODIFIED=()
# --- A. Compute diff ---
MERGE_BASE=$(git merge-base "origin/$BASE_REF" HEAD)
CHANGED_FILES=$(git diff --name-only --diff-filter=ACMRD "$MERGE_BASE"...HEAD || true)
if [ -z "$CHANGED_FILES" ]; then
SUMMARY_FILE="/tmp/pr_validation_summary.md"
{
echo "## :robot_face: AgentKit Structural Validation"
echo ""
echo "No contribution files detected in this PR."
} > "$SUMMARY_FILE"
cat "$SUMMARY_FILE" >> "$GITHUB_STEP_SUMMARY"
exit 0
fi
# --- B. Extract unique kit paths (flat — kits/<name>/) ---
declare -A KIT_MAP
OTHER_FILES=()
while IFS= read -r file; do
[ -z "$file" ] && continue
if [[ "$file" == kits/* ]]; then
# Flat structure: kits/<name>/... → 2 levels
kit_path=$(echo "$file" | cut -d/ -f1-2)
KIT_MAP["$kit_path"]=1
else
OTHER_FILES+=("$file")
fi
done <<< "$CHANGED_FILES"
# --- C. Check 1: No edits to existing kits ---
CHECK1_PASS=true
for kit_path in "${!KIT_MAP[@]}"; do
if git ls-tree --name-only "$MERGE_BASE" -- "$kit_path" 2>/dev/null | grep -q .; then
EXISTING_MODIFIED+=("$kit_path")
ERRORS+=("Existing kit modified: $kit_path — feat: PRs should only add new contributions")
CHECK1_PASS=false
else
NEW_KITS+=("$kit_path")
fi
done
# --- D. Check 2: Required root files ---
CHECK2_PASS=true
for kit_path in "${NEW_KITS[@]}"; do
# Required for ALL types
for req in lamatic.config.ts agent.md README.md; do
if [ ! -f "$kit_path/$req" ]; then
ERRORS+=("Missing $req in $kit_path")
CHECK2_PASS=false
fi
done
# Required constitutions/default.md for ALL types
if [ ! -f "$kit_path/constitutions/default.md" ]; then
ERRORS+=("Missing constitutions/default.md in $kit_path")
CHECK2_PASS=false
fi
# flows/ directory must exist
if [ ! -d "$kit_path/flows" ]; then
ERRORS+=("Missing flows/ directory in $kit_path")
CHECK2_PASS=false
fi
done
# --- E. Check 3: Flow files must be .ts files ---
CHECK3_PASS=true
for kit_path in "${NEW_KITS[@]}"; do
if [ ! -d "$kit_path/flows" ]; then
continue
fi
flow_count=0
for flow_file in "$kit_path/flows"/*.ts; do
[ -f "$flow_file" ] || continue
flow_count=$((flow_count + 1))
done
if [ "$flow_count" -eq 0 ]; then
ERRORS+=("No .ts flow files found in $kit_path/flows/ — each flow must be a .ts file exported from Lamatic Studio")
CHECK3_PASS=false
fi
# Warn if old-style flow subdirectories exist (json-based structure)
for flow_dir in "$kit_path/flows"/*/; do
[ -d "$flow_dir" ] || continue
WARNINGS+=("Old-style flow subdirectory found: $flow_dir — flows should be .ts files, not folders. Re-export from Lamatic Studio.")
done
done
# --- F. Check 4: lamatic.config.ts must declare type ---
CHECK4_PASS=true
for kit_path in "${NEW_KITS[@]}"; do
config_file="$kit_path/lamatic.config.ts"
if [ ! -f "$config_file" ]; then
continue
fi
# Check type field is present
if ! grep -qE '"kit"|'\''kit'\''|"bundle"|'\''bundle'\''|"template"|'\''template'\''' "$config_file"; then
ERRORS+=("lamatic.config.ts in $kit_path is missing a valid type field (\"kit\", \"bundle\", or \"template\")")
CHECK4_PASS=false
continue
fi
# Determine type
if grep -qE '"kit"|'\''kit'\''' "$config_file"; then
KIT_TYPE="kit"
elif grep -qE '"bundle"|'\''bundle'\''' "$config_file"; then
KIT_TYPE="bundle"
else
KIT_TYPE="template"
fi
# Kits require apps/package.json and apps/.env.example
if [ "$KIT_TYPE" = "kit" ]; then
if [ ! -f "$kit_path/apps/package.json" ]; then
ERRORS+=("Kit $kit_path is missing apps/package.json — kits must include a Next.js app")
CHECK4_PASS=false
fi
if [ ! -f "$kit_path/apps/.env.example" ]; then
ERRORS+=("Kit $kit_path is missing apps/.env.example")
CHECK4_PASS=false
fi
fi
# Bundles and kits require .env.example at root
if [ "$KIT_TYPE" = "kit" ] || [ "$KIT_TYPE" = "bundle" ]; then
if [ ! -f "$kit_path/.env.example" ]; then
WARNINGS+=("$kit_path is missing .env.example — bundles and kits should include one")
fi
fi
# Check links.github points to kits/<name>
kit_name=$(basename "$kit_path")
if ! grep -q "kits/$kit_name" "$config_file"; then
WARNINGS+=("lamatic.config.ts in $kit_path — links.github should point to kits/$kit_name")
fi
done
# --- G. Check 5: Warn on changes outside kits/ ---
CHECK5_WARN=false
if [ ${#OTHER_FILES[@]} -gt 0 ]; then
CHECK5_WARN=true
for f in "${OTHER_FILES[@]}"; do
WARNINGS+=("File outside kits/ modified: $f")
done
fi
# --- H. Check 6: No committed .env files ---
for kit_path in "${NEW_KITS[@]}"; do
for env_file in "$kit_path/.env" "$kit_path/.env.local" "$kit_path/apps/.env" "$kit_path/apps/.env.local"; do
if [ -f "$env_file" ]; then
ERRORS+=("Committed env file found: $env_file — never commit .env or .env.local, only .env.example")
fi
done
done
# --- I. Build summary ---
SUMMARY_FILE="/tmp/pr_validation_summary.md"
{
echo "## :robot_face: AgentKit Structural Validation"
echo ""
if [ ${#NEW_KITS[@]} -gt 0 ]; then
echo "### New Contributions Detected"
for k in "${NEW_KITS[@]}"; do
kit_name=$(basename "$k")
if [ -f "$k/lamatic.config.ts" ]; then
if grep -q '"kit"' "$k/lamatic.config.ts"; then
ktype="Kit"
elif grep -q '"bundle"' "$k/lamatic.config.ts"; then
ktype="Bundle"
else
ktype="Template"
fi
else
ktype="Unknown"
fi
echo "- **$ktype**: \`$k\`"
done
echo ""
fi
if [ ${#EXISTING_MODIFIED[@]} -gt 0 ]; then
echo "### Existing Kits Modified (not allowed in feat: PRs)"
for k in "${EXISTING_MODIFIED[@]}"; do
echo "- \`$k\`"
done
echo ""
fi
echo "### Check Results"
echo ""
echo "| Check | Status |"
echo "|-------|--------|"
[ "$CHECK1_PASS" = true ] && echo "| No edits to existing kits | :white_check_mark: Pass |" || echo "| No edits to existing kits | :x: Fail |"
[ "$CHECK2_PASS" = true ] && echo "| Required root files present | :white_check_mark: Pass |" || echo "| Required root files present | :x: Fail |"
[ "$CHECK3_PASS" = true ] && echo "| Flow .ts files present | :white_check_mark: Pass |" || echo "| Flow .ts files present | :x: Fail |"
[ "$CHECK4_PASS" = true ] && echo "| lamatic.config.ts valid | :white_check_mark: Pass |" || echo "| lamatic.config.ts valid | :x: Fail |"
[ "$CHECK5_WARN" = false ] && echo "| No changes outside kits/ | :white_check_mark: Pass |" || echo "| No changes outside kits/ | :warning: Warning |"
echo ""
if [ ${#ERRORS[@]} -gt 0 ]; then
echo "### :x: Errors"
echo ""
for err in "${ERRORS[@]}"; do
echo "- $err"
done
echo ""
fi
if [ ${#WARNINGS[@]} -gt 0 ]; then
echo "### :warning: Warnings"
echo ""
for warn in "${WARNINGS[@]}"; do
echo "- $warn"
done
echo ""
fi
if [ ${#ERRORS[@]} -eq 0 ]; then
echo "---"
echo ":tada: All checks passed! This contribution follows the AgentKit structure."
else
echo "---"
echo ":stop_sign: Please fix the errors above before this PR can be merged."
echo ""
echo "Refer to [CONTRIBUTING.md](./CONTRIBUTING.md) and [CLAUDE.md](./CLAUDE.md) for the expected folder structure."
fi
} > "$SUMMARY_FILE"
cat "$SUMMARY_FILE" >> "$GITHUB_STEP_SUMMARY"
if [ ${#ERRORS[@]} -gt 0 ]; then
for err in "${ERRORS[@]}"; do
echo "::error::$err"
done
exit 1
fi
echo "=== ALL CHECKS PASSED ==="
- name: Post validation results as PR comment
if: always()
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
SUMMARY_FILE="/tmp/pr_validation_summary.md"
PR_NUMBER="${{ github.event.pull_request.number }}"
REPO="${{ github.repository }}"
if [ ! -f "$SUMMARY_FILE" ]; then
echo "No summary file found, skipping comment."
exit 0
fi
COMMENT_ID=$(gh api "repos/$REPO/issues/$PR_NUMBER/comments" \
--jq '.[] | select(.user.type == "Bot" and (.body | startswith("## :robot_face: AgentKit Structural Validation"))) | .id' \
| head -1)
if [ -n "$COMMENT_ID" ]; then
gh api "repos/$REPO/issues/comments/$COMMENT_ID" \
--method PATCH \
-f body="$(cat "$SUMMARY_FILE")"
else
gh api "repos/$REPO/issues/$PR_NUMBER/comments" \
--method POST \
-f body="$(cat "$SUMMARY_FILE")"
fi
- name: Ensure all required labels exist in repo
if: always()
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
REPO="${{ github.repository }}"
declare -A LABELS
LABELS["passing-checks"]="0e8a16:All structural validation checks passed"
LABELS["requested-improvements"]="e4a11b:Validation passed with warnings — improvements requested"
LABELS["failing-checks"]="d73a4a:One or more structural validation checks failed"
LABELS["agentkit-challenge"]="1d76db:Submitted as part of the AgentKit Challenge"
LABELS["challenge-failed"]="b60205:Challenge deadline exceeded with unresolved failures"
for LABEL in "${!LABELS[@]}"; do
IFS=':' read -r COLOR DESCRIPTION <<< "${LABELS[$LABEL]}"
if ! gh api "repos/$REPO/labels/$LABEL" --silent 2>/dev/null; then
gh api "repos/$REPO/labels" \
--method POST \
-f name="$LABEL" \
-f color="$COLOR" \
-f description="$DESCRIPTION" 2>/dev/null || true
fi
done
- name: Manage labels and reviewer
if: always()
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
PR_NUMBER="${{ github.event.pull_request.number }}"
REPO="${{ github.repository }}"
OUTCOME="${{ steps.validate.outcome }}"
WARN_COUNT=$(grep -c "^- " /tmp/pr_validation_summary.md 2>/dev/null | head -1 || echo "0")
# Always apply agentkit-challenge
gh pr edit "$PR_NUMBER" --add-label "agentkit-challenge" --repo "$REPO" || true
if [ "$OUTCOME" = "success" ]; then
if grep -q ":warning: Warnings" /tmp/pr_validation_summary.md 2>/dev/null; then
gh pr edit "$PR_NUMBER" --add-label "requested-improvements" --repo "$REPO" || true
gh pr edit "$PR_NUMBER" --remove-label "passing-checks" --repo "$REPO" 2>/dev/null || true
gh pr edit "$PR_NUMBER" --remove-label "failing-checks" --repo "$REPO" 2>/dev/null || true
else
gh pr edit "$PR_NUMBER" --add-label "passing-checks" --repo "$REPO" || true
gh pr edit "$PR_NUMBER" --remove-label "failing-checks" --repo "$REPO" 2>/dev/null || true
gh pr edit "$PR_NUMBER" --remove-label "requested-improvements" --repo "$REPO" 2>/dev/null || true
fi
else
gh pr edit "$PR_NUMBER" --add-label "failing-checks" --repo "$REPO" || true
gh pr edit "$PR_NUMBER" --remove-label "passing-checks" --repo "$REPO" 2>/dev/null || true
gh pr edit "$PR_NUMBER" --remove-label "requested-improvements" --repo "$REPO" 2>/dev/null || true
# Assign reviewer
gh api "repos/$REPO/pulls/$PR_NUMBER/requested_reviewers" \
--method POST \
-f 'reviewers[]=amanintech' 2>/dev/null || true
# Record failure timestamp for expiry workflow
FAIL_TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
EXISTING=$(gh api "repos/$REPO/issues/$PR_NUMBER/comments" \
--jq '.[] | select(.body | contains("agentkit-challenge-failed-at")) | .id' \
| head -1)
if [ -z "$EXISTING" ]; then
printf "%s\n" "<!-- agentkit-challenge-failed-at: $FAIL_TIMESTAMP -->" "" "Failure recorded at $FAIL_TIMESTAMP UTC. If this PR is not fixed within 4 weeks it will be automatically closed." > /tmp/fail_comment.md
gh api "repos/$REPO/issues/$PR_NUMBER/comments" \
--method POST \
-f body="$(cat /tmp/fail_comment.md)" || true
fi
fi
- name: Fail job if validation errored
if: always()
run: |
if [ "${{ steps.validate.outcome }}" != "success" ]; then
echo "::error::Structural validation reported errors. See the PR comment for the full list."
exit 1
fi