Skip to content

All Examples (Windows Native) #18

All Examples (Windows Native)

All Examples (Windows Native) #18

# File: run-all-once-win.yml
# Code: Claude Code
# Review: Ryoichi Ando (ryoichi.ando@zozo.com)
# License: Apache v2.0
# Generated by: run-all-once-gen-win.py
name: All Examples (Windows Native)
on:
workflow_dispatch:
inputs:
instance_type:
description: 'EC2 instance type'
required: true
default: 'g6e.2xlarge'
type: choice
options:
- g6.2xlarge
- g6e.2xlarge
region:
description: 'AWS Region'
required: true
default: 'us-east-2'
type: choice
options:
- us-east-1
- us-east-2
- ap-northeast-1
jobs:
run-batch-1:
name: Run Batch 1 (Windows)
runs-on: ubuntu-latest
permissions:
id-token: write
contents: read
env:
AWS_REGION: ${{ github.event.inputs.region }}
INSTANCE_TYPE: ${{ github.event.inputs.instance_type }}
BRANCH: ${{ github.ref_name }}
EXAMPLES: "belt cards codim curtain domino"
WORKDIR: C:\ppf-contact-solver
USER: Administrator
steps:
- name: Show input parameters
run: |
echo "## Input Parameters - Batch 1 (Windows)"
echo "Branch: ${{ github.ref_name }}"
echo "Instance Type: ${{ github.event.inputs.instance_type }}"
echo "Region: ${{ github.event.inputs.region }}"
echo "Examples: belt cards codim curtain domino"
- name: Checkout repository
uses: actions/checkout@v4
- name: Configure AWS credentials via OIDC
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: ${{ secrets.AWS_ROLE_ARN }}
aws-region: ${{ env.AWS_REGION }}
- name: Verify AWS authentication
run: |
echo "Testing AWS authentication..."
aws sts get-caller-identity
- name: Get GitHub Actions runner public IP
id: runner-ip
run: |
echo "Fetching GitHub Actions runner public IP..."
RUNNER_IP=$(curl -s --max-time 10 https://checkip.amazonaws.com | tr -d '\n')
if [ -z "$RUNNER_IP" ]; then
echo "ERROR: Failed to get IP from checkip.amazonaws.com"
exit 1
fi
echo "::add-mask::$RUNNER_IP"
echo "RUNNER_IP=$RUNNER_IP" >> $GITHUB_OUTPUT
echo "GitHub Actions Runner IP: $RUNNER_IP"
- name: Find Windows Server 2025 AMI
id: ami
run: |
AMI_ID=$(aws ec2 describe-images \
--owners amazon \
--filters "Name=name,Values=Windows_Server-2025-English-Full-Base-*" "Name=state,Values=available" \
--query 'sort_by(Images, &CreationDate)[-1].ImageId' \
--region "$AWS_REGION" --output text)
if [ "$AMI_ID" = "None" ] || [ -z "$AMI_ID" ]; then
echo "ERROR: Windows Server 2025 AMI not found"
exit 1
fi
echo "AMI_ID=$AMI_ID" >> $GITHUB_OUTPUT
- name: Get default VPC ID
id: vpc
run: |
VPC_ID=$(aws ec2 describe-vpcs --filters "Name=isDefault,Values=true" \
--query 'Vpcs[0].VpcId' --region "$AWS_REGION" --output text)
if [ "$VPC_ID" = "None" ] || [ -z "$VPC_ID" ]; then
echo "ERROR: Default VPC not found"
exit 1
fi
echo "VPC_ID=$VPC_ID" >> $GITHUB_OUTPUT
- name: Generate unique identifiers
id: ids
run: |
TIMESTAMP=$(date +%Y%m%d%H%M%S)
RANDOM_SUFFIX=$(head /dev/urandom | tr -dc a-z0-9 | head -c 6)
SSH_PORT=$((10001 + RANDOM % 55535))
echo "::add-mask::$SSH_PORT"
echo "TIMESTAMP=$TIMESTAMP" >> $GITHUB_OUTPUT
echo "SSH_PORT=$SSH_PORT" >> $GITHUB_OUTPUT
- name: Setup persistent security group
id: security-group
run: |
SG_NAME="github-actions-windows-persistent"
SG_ID=$(aws ec2 describe-security-groups --filters "Name=group-name,Values=$SG_NAME" \
--query 'SecurityGroups[0].GroupId' --region "$AWS_REGION" --output text || echo "")
if [ "$SG_ID" = "None" ] || [ -z "$SG_ID" ]; then
SG_ID=$(aws ec2 create-security-group --group-name "$SG_NAME" \
--description "GitHub Actions Windows builds" \
--vpc-id "${{ steps.vpc.outputs.VPC_ID }}" \
--query 'GroupId' --region "$AWS_REGION" --output text)
aws ec2 create-tags --resources "$SG_ID" --tags "Key=Name,Value=$SG_NAME" --region "$AWS_REGION"
fi
echo "SG_ID=$SG_ID" >> $GITHUB_OUTPUT
aws ec2 authorize-security-group-ingress --group-id "$SG_ID" \
--ip-permissions "IpProtocol=tcp,FromPort=${{ steps.ids.outputs.SSH_PORT }},ToPort=${{ steps.ids.outputs.SSH_PORT }},IpRanges=[{CidrIp=${{ steps.runner-ip.outputs.RUNNER_IP }}/32}]" \
--region "$AWS_REGION" 2>&1 || echo "Rule may exist"
echo "RUNNER_IP_CIDR=${{ steps.runner-ip.outputs.RUNNER_IP }}/32" >> $GITHUB_OUTPUT
echo "SSH_PORT=${{ steps.ids.outputs.SSH_PORT }}" >> $GITHUB_OUTPUT
- name: Retrieve SSH key from Parameter Store
id: keypair
run: |
aws ssm get-parameter --name "/github-actions/ec2/ssh-key" --with-decryption \
--query 'Parameter.Value' --region "$AWS_REGION" --output text > /tmp/github-actions-ec2.pem
chmod 600 /tmp/github-actions-ec2.pem
echo "KEY_PATH=/tmp/github-actions-ec2.pem" >> $GITHUB_OUTPUT
- name: Create Windows user data script
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
sed "s/SSH_PORT_PLACEHOLDER/$SSH_PORT/g" .github/workflows/scripts/win/user-data.ps1 > /tmp/user-data.ps1
- name: Launch EC2 instance
id: instance
run: |
INSTANCE_ID=$(aws ec2 run-instances \
--image-id "${{ steps.ami.outputs.AMI_ID }}" \
--instance-type "$INSTANCE_TYPE" \
--key-name "${{ secrets.AWS_KEY_PAIR_NAME }}" \
--security-group-ids "${{ steps.security-group.outputs.SG_ID }}" \
--user-data file:///tmp/user-data.ps1 \
--block-device-mappings "DeviceName=/dev/sda1,Ebs={VolumeSize=100,VolumeType=gp3,DeleteOnTermination=true}" \
--tag-specifications "ResourceType=instance,Tags=[{Key=Name,Value=gpu-runner-win-batch-1-${{ steps.ids.outputs.TIMESTAMP }}},{Key=ManagedBy,Value=GitHubActions},{Key=Batch,Value=1}]" \
--instance-initiated-shutdown-behavior terminate \
--query 'Instances[0].InstanceId' --region "$AWS_REGION" --output text)
echo "INSTANCE_ID=$INSTANCE_ID" >> $GITHUB_OUTPUT
- name: Wait for instance to be running
run: |
aws ec2 wait instance-running --instance-ids "${{ steps.instance.outputs.INSTANCE_ID }}" --region "$AWS_REGION"
PUBLIC_IP=$(aws ec2 describe-instances --instance-ids "${{ steps.instance.outputs.INSTANCE_ID }}" \
--query 'Reservations[0].Instances[0].PublicIpAddress' --region "$AWS_REGION" --output text)
echo "::add-mask::$PUBLIC_IP"
echo "PUBLIC_IP=$PUBLIC_IP" >> $GITHUB_ENV
- name: Wait for SSH
run: |
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
for i in $(seq 1 60); do
if ssh -p $SSH_PORT -o ConnectTimeout=5 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-o BatchMode=yes -i "$KEY_PATH" Administrator@$PUBLIC_IP "echo SSH_READY" 2>/dev/null | grep -q SSH_READY; then
echo "SSH ready!"
break
fi
echo "Waiting for SSH ($i/60)..."
sleep 30
done
for i in $(seq 1 30); do
if ssh -p $SSH_PORT -o ConnectTimeout=10 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" Administrator@$PUBLIC_IP "if (Test-Path C:\\ssh_ready.txt) { echo READY }" 2>/dev/null | grep -q READY; then
echo "Setup complete!"
break
fi
sleep 30
done
- name: Install NVIDIA driver only (no CUDA toolkit)
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" .github/workflows/scripts/win/install-nvidia-driver.ps1 Administrator@$PUBLIC_IP:C:/install_driver.ps1
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -ExecutionPolicy Bypass -File C:/install_driver.ps1"
- name: Transfer repository to instance
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
git archive --format=zip --output=/tmp/repo.zip HEAD
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" /tmp/repo.zip Administrator@$PUBLIC_IP:C:/source.zip
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -Command \"if (Test-Path 'C:\\ppf-contact-solver') { Remove-Item -Recurse -Force 'C:\\ppf-contact-solver' }; New-Item -ItemType Directory -Path 'C:\\ppf-contact-solver' -Force; Expand-Archive -Path 'C:\\source.zip' -DestinationPath 'C:\\ppf-contact-solver' -Force; Remove-Item 'C:\\source.zip'\""
- name: Run warmup.bat
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-o ServerAliveInterval=60 -i "$KEY_PATH" Administrator@$PUBLIC_IP \
"cmd /c 'cd C:\\ppf-contact-solver\\build-win-native && warmup.bat /nopause'"
- name: Run build.bat
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-o ServerAliveInterval=60 -i "$KEY_PATH" Administrator@$PUBLIC_IP \
"cmd /c 'cd C:\\ppf-contact-solver\\build-win-native && build.bat /nopause'"
- name: Convert assertion notebook to Python script
run: |
echo "Converting assertion notebook: examples/fail-examples/assertion.ipynb"
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
# Use the same conversion pattern as main examples
cat > /tmp/convert_assertion.ps1 << 'EOFPS1'
$ErrorActionPreference = "Stop"
Set-Location C:\ppf-contact-solver
$env:PATH = "C:\ppf-contact-solver\build-win-native\python;C:\ppf-contact-solver\build-win-native\python\Scripts;" + $env:PATH
New-Item -ItemType Directory -Path "C:\ci" -Force | Out-Null
Write-Host "Converting assertion.ipynb to Python script..."
& C:\ppf-contact-solver\build-win-native\python\python.exe -m jupyter nbconvert --to python "examples/fail-examples/assertion.ipynb" --output "C:\ci\assertion_base.py"
$header = "import sys`nimport os`nsys.path.insert(0, r'C:\ppf-contact-solver')`nsys.path.insert(0, r'C:\ppf-contact-solver\frontend')`nos.environ['PYTHONPATH'] = r'C:\ppf-contact-solver;C:\ppf-contact-solver\frontend;' + os.environ.get('PYTHONPATH', '')"
$baseContent = Get-Content "C:\ci\assertion_base.py" -Raw
$header + "`n" + $baseContent | Set-Content "C:\ci\assertion.py"
Write-Host "Assertion script prepared at C:\ci\assertion.py"
EOFPS1
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" /tmp/convert_assertion.ps1 Administrator@$PUBLIC_IP:C:/convert_assertion.ps1
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -ExecutionPolicy Bypass -File C:/convert_assertion.ps1"
- name: Run assertion test (expect failure)
run: |
echo "Running assertion test to verify error propagation via SSH..."
echo "This test uses the same execution pattern as main examples"
echo "Expected result: FAILURE (AssertionError)"
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
# Create script that runs the same way as main examples
cat > /tmp/run_assertion.ps1 << 'EOFPS1'
Set-Location C:\ppf-contact-solver
"assertion" | Set-Content "frontend\.CI"
& C:\ppf-contact-solver\build-win-native\python\python.exe C:\ci\assertion.py 2>&1 | Tee-Object -FilePath "C:\ci\assertion.log"
exit $LASTEXITCODE
EOFPS1
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" /tmp/run_assertion.ps1 Administrator@$PUBLIC_IP:C:/run_assertion.ps1
# Run and expect failure
if ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -ExecutionPolicy Bypass -File C:/run_assertion.ps1"; then
echo "ERROR: Assertion test should have failed but succeeded"
echo "This means errors are NOT being propagated correctly!"
exit 1
else
echo "SUCCESS: Assertion test failed as expected"
echo "Error propagation via SSH is working correctly"
echo "Main example tests can now proceed with confidence"
fi
- name: Setup CI directory
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -Command \"New-Item -ItemType Directory -Path 'C:\\ci' -Force\""
- name: Run belt
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
sed "s/EXAMPLE_PLACEHOLDER/belt/g" .github/workflows/scripts/win/run-example.ps1 > /tmp/run_belt.ps1
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" /tmp/run_belt.ps1 Administrator@$PUBLIC_IP:C:/run_belt.ps1
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-o ServerAliveInterval=60 -i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -ExecutionPolicy Bypass -File C:/run_belt.ps1"
- name: Run cards
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
sed "s/EXAMPLE_PLACEHOLDER/cards/g" .github/workflows/scripts/win/run-example.ps1 > /tmp/run_cards.ps1
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" /tmp/run_cards.ps1 Administrator@$PUBLIC_IP:C:/run_cards.ps1
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-o ServerAliveInterval=60 -i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -ExecutionPolicy Bypass -File C:/run_cards.ps1"
- name: Run codim
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
sed "s/EXAMPLE_PLACEHOLDER/codim/g" .github/workflows/scripts/win/run-example.ps1 > /tmp/run_codim.ps1
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" /tmp/run_codim.ps1 Administrator@$PUBLIC_IP:C:/run_codim.ps1
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-o ServerAliveInterval=60 -i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -ExecutionPolicy Bypass -File C:/run_codim.ps1"
- name: Run curtain
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
sed "s/EXAMPLE_PLACEHOLDER/curtain/g" .github/workflows/scripts/win/run-example.ps1 > /tmp/run_curtain.ps1
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" /tmp/run_curtain.ps1 Administrator@$PUBLIC_IP:C:/run_curtain.ps1
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-o ServerAliveInterval=60 -i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -ExecutionPolicy Bypass -File C:/run_curtain.ps1"
- name: Run domino
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
sed "s/EXAMPLE_PLACEHOLDER/domino/g" .github/workflows/scripts/win/run-example.ps1 > /tmp/run_domino.ps1
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" /tmp/run_domino.ps1 Administrator@$PUBLIC_IP:C:/run_domino.ps1
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-o ServerAliveInterval=60 -i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -ExecutionPolicy Bypass -File C:/run_domino.ps1"
- name: Collect results
if: success() || failure()
run: |
echo "Collecting results..."
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
mkdir -p ci
# Delete large binary files on remote before copying to save bandwidth
# CI output is in project-relative cache: C:\ppf-contact-solver\cache\ppf-cts\ci
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -Command \"Get-ChildItem -Path C:\\ppf-contact-solver\\cache\\ppf-cts\\ci -Recurse -Include '*.bin','*.pickle','*.ply','*.gz' -ErrorAction SilentlyContinue | Remove-Item -Force\"" || true
# Copy CI output from ppf-cts cache directory
scp -P $SSH_PORT -r -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" "Administrator@$PUBLIC_IP:C:/ppf-contact-solver/cache/ppf-cts/ci/*" ./ci/ || echo "No ppf-cts CI files found"
# Also copy logs and scripts from C:\ci
scp -P $SSH_PORT -r -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" "Administrator@$PUBLIC_IP:C:/ci/*" ./ci/ || echo "No script/log files found"
echo "## Collected Files:"
ls -laR ci/ || echo "No files collected"
- name: Upload artifact
if: success() || failure()
uses: actions/upload-artifact@v4
with:
name: ci-win-batch-1
path: ci
retention-days: 3
- name: GPU information
if: success() || failure()
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" Administrator@$PUBLIC_IP "nvidia-smi" || true
- name: Re-authenticate for cleanup
if: always()
continue-on-error: true
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: ${{ secrets.AWS_ROLE_ARN }}
aws-region: ${{ env.AWS_REGION }}
- name: Cleanup - Terminate Instance
if: always()
continue-on-error: true
run: |
if [ -n "${{ steps.instance.outputs.INSTANCE_ID }}" ]; then
aws ec2 terminate-instances --instance-ids "${{ steps.instance.outputs.INSTANCE_ID }}" --region "$AWS_REGION" || true
fi
- name: Cleanup - Remove Ingress Rules
if: always()
continue-on-error: true
run: |
if [ -n "${{ steps.security-group.outputs.SG_ID }}" ] && [ -n "${{ steps.security-group.outputs.RUNNER_IP_CIDR }}" ]; then
aws ec2 revoke-security-group-ingress --group-id "${{ steps.security-group.outputs.SG_ID }}" \
--ip-permissions "IpProtocol=tcp,FromPort=${{ steps.security-group.outputs.SSH_PORT }},ToPort=${{ steps.security-group.outputs.SSH_PORT }},IpRanges=[{CidrIp=${{ steps.security-group.outputs.RUNNER_IP_CIDR }}}]" \
--region "$AWS_REGION" 2>&1 || true
fi
- name: Cleanup - Remove Local SSH Key
if: always()
continue-on-error: true
run: rm -f "${{ steps.keypair.outputs.KEY_PATH }}"
- name: Summary
if: always()
run: |
echo "## Batch 1 Summary"
echo "- Examples: belt cards codim curtain domino"
echo "- Instance: ${{ steps.instance.outputs.INSTANCE_ID || 'Not launched' }}"
run-batch-2:
name: Run Batch 2 (Windows)
runs-on: ubuntu-latest
permissions:
id-token: write
contents: read
env:
AWS_REGION: ${{ github.event.inputs.region }}
INSTANCE_TYPE: ${{ github.event.inputs.instance_type }}
BRANCH: ${{ github.ref_name }}
EXAMPLES: "drape fishingknot fitting friction"
WORKDIR: C:\ppf-contact-solver
USER: Administrator
steps:
- name: Show input parameters
run: |
echo "## Input Parameters - Batch 2 (Windows)"
echo "Branch: ${{ github.ref_name }}"
echo "Instance Type: ${{ github.event.inputs.instance_type }}"
echo "Region: ${{ github.event.inputs.region }}"
echo "Examples: drape fishingknot fitting friction"
- name: Checkout repository
uses: actions/checkout@v4
- name: Configure AWS credentials via OIDC
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: ${{ secrets.AWS_ROLE_ARN }}
aws-region: ${{ env.AWS_REGION }}
- name: Verify AWS authentication
run: |
echo "Testing AWS authentication..."
aws sts get-caller-identity
- name: Get GitHub Actions runner public IP
id: runner-ip
run: |
echo "Fetching GitHub Actions runner public IP..."
RUNNER_IP=$(curl -s --max-time 10 https://checkip.amazonaws.com | tr -d '\n')
if [ -z "$RUNNER_IP" ]; then
echo "ERROR: Failed to get IP from checkip.amazonaws.com"
exit 1
fi
echo "::add-mask::$RUNNER_IP"
echo "RUNNER_IP=$RUNNER_IP" >> $GITHUB_OUTPUT
echo "GitHub Actions Runner IP: $RUNNER_IP"
- name: Find Windows Server 2025 AMI
id: ami
run: |
AMI_ID=$(aws ec2 describe-images \
--owners amazon \
--filters "Name=name,Values=Windows_Server-2025-English-Full-Base-*" "Name=state,Values=available" \
--query 'sort_by(Images, &CreationDate)[-1].ImageId' \
--region "$AWS_REGION" --output text)
if [ "$AMI_ID" = "None" ] || [ -z "$AMI_ID" ]; then
echo "ERROR: Windows Server 2025 AMI not found"
exit 1
fi
echo "AMI_ID=$AMI_ID" >> $GITHUB_OUTPUT
- name: Get default VPC ID
id: vpc
run: |
VPC_ID=$(aws ec2 describe-vpcs --filters "Name=isDefault,Values=true" \
--query 'Vpcs[0].VpcId' --region "$AWS_REGION" --output text)
if [ "$VPC_ID" = "None" ] || [ -z "$VPC_ID" ]; then
echo "ERROR: Default VPC not found"
exit 1
fi
echo "VPC_ID=$VPC_ID" >> $GITHUB_OUTPUT
- name: Generate unique identifiers
id: ids
run: |
TIMESTAMP=$(date +%Y%m%d%H%M%S)
RANDOM_SUFFIX=$(head /dev/urandom | tr -dc a-z0-9 | head -c 6)
SSH_PORT=$((10001 + RANDOM % 55535))
echo "::add-mask::$SSH_PORT"
echo "TIMESTAMP=$TIMESTAMP" >> $GITHUB_OUTPUT
echo "SSH_PORT=$SSH_PORT" >> $GITHUB_OUTPUT
- name: Setup persistent security group
id: security-group
run: |
SG_NAME="github-actions-windows-persistent"
SG_ID=$(aws ec2 describe-security-groups --filters "Name=group-name,Values=$SG_NAME" \
--query 'SecurityGroups[0].GroupId' --region "$AWS_REGION" --output text || echo "")
if [ "$SG_ID" = "None" ] || [ -z "$SG_ID" ]; then
SG_ID=$(aws ec2 create-security-group --group-name "$SG_NAME" \
--description "GitHub Actions Windows builds" \
--vpc-id "${{ steps.vpc.outputs.VPC_ID }}" \
--query 'GroupId' --region "$AWS_REGION" --output text)
aws ec2 create-tags --resources "$SG_ID" --tags "Key=Name,Value=$SG_NAME" --region "$AWS_REGION"
fi
echo "SG_ID=$SG_ID" >> $GITHUB_OUTPUT
aws ec2 authorize-security-group-ingress --group-id "$SG_ID" \
--ip-permissions "IpProtocol=tcp,FromPort=${{ steps.ids.outputs.SSH_PORT }},ToPort=${{ steps.ids.outputs.SSH_PORT }},IpRanges=[{CidrIp=${{ steps.runner-ip.outputs.RUNNER_IP }}/32}]" \
--region "$AWS_REGION" 2>&1 || echo "Rule may exist"
echo "RUNNER_IP_CIDR=${{ steps.runner-ip.outputs.RUNNER_IP }}/32" >> $GITHUB_OUTPUT
echo "SSH_PORT=${{ steps.ids.outputs.SSH_PORT }}" >> $GITHUB_OUTPUT
- name: Retrieve SSH key from Parameter Store
id: keypair
run: |
aws ssm get-parameter --name "/github-actions/ec2/ssh-key" --with-decryption \
--query 'Parameter.Value' --region "$AWS_REGION" --output text > /tmp/github-actions-ec2.pem
chmod 600 /tmp/github-actions-ec2.pem
echo "KEY_PATH=/tmp/github-actions-ec2.pem" >> $GITHUB_OUTPUT
- name: Create Windows user data script
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
sed "s/SSH_PORT_PLACEHOLDER/$SSH_PORT/g" .github/workflows/scripts/win/user-data.ps1 > /tmp/user-data.ps1
- name: Launch EC2 instance
id: instance
run: |
INSTANCE_ID=$(aws ec2 run-instances \
--image-id "${{ steps.ami.outputs.AMI_ID }}" \
--instance-type "$INSTANCE_TYPE" \
--key-name "${{ secrets.AWS_KEY_PAIR_NAME }}" \
--security-group-ids "${{ steps.security-group.outputs.SG_ID }}" \
--user-data file:///tmp/user-data.ps1 \
--block-device-mappings "DeviceName=/dev/sda1,Ebs={VolumeSize=100,VolumeType=gp3,DeleteOnTermination=true}" \
--tag-specifications "ResourceType=instance,Tags=[{Key=Name,Value=gpu-runner-win-batch-2-${{ steps.ids.outputs.TIMESTAMP }}},{Key=ManagedBy,Value=GitHubActions},{Key=Batch,Value=2}]" \
--instance-initiated-shutdown-behavior terminate \
--query 'Instances[0].InstanceId' --region "$AWS_REGION" --output text)
echo "INSTANCE_ID=$INSTANCE_ID" >> $GITHUB_OUTPUT
- name: Wait for instance to be running
run: |
aws ec2 wait instance-running --instance-ids "${{ steps.instance.outputs.INSTANCE_ID }}" --region "$AWS_REGION"
PUBLIC_IP=$(aws ec2 describe-instances --instance-ids "${{ steps.instance.outputs.INSTANCE_ID }}" \
--query 'Reservations[0].Instances[0].PublicIpAddress' --region "$AWS_REGION" --output text)
echo "::add-mask::$PUBLIC_IP"
echo "PUBLIC_IP=$PUBLIC_IP" >> $GITHUB_ENV
- name: Wait for SSH
run: |
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
for i in $(seq 1 60); do
if ssh -p $SSH_PORT -o ConnectTimeout=5 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-o BatchMode=yes -i "$KEY_PATH" Administrator@$PUBLIC_IP "echo SSH_READY" 2>/dev/null | grep -q SSH_READY; then
echo "SSH ready!"
break
fi
echo "Waiting for SSH ($i/60)..."
sleep 30
done
for i in $(seq 1 30); do
if ssh -p $SSH_PORT -o ConnectTimeout=10 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" Administrator@$PUBLIC_IP "if (Test-Path C:\\ssh_ready.txt) { echo READY }" 2>/dev/null | grep -q READY; then
echo "Setup complete!"
break
fi
sleep 30
done
- name: Install NVIDIA driver only (no CUDA toolkit)
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" .github/workflows/scripts/win/install-nvidia-driver.ps1 Administrator@$PUBLIC_IP:C:/install_driver.ps1
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -ExecutionPolicy Bypass -File C:/install_driver.ps1"
- name: Transfer repository to instance
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
git archive --format=zip --output=/tmp/repo.zip HEAD
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" /tmp/repo.zip Administrator@$PUBLIC_IP:C:/source.zip
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -Command \"if (Test-Path 'C:\\ppf-contact-solver') { Remove-Item -Recurse -Force 'C:\\ppf-contact-solver' }; New-Item -ItemType Directory -Path 'C:\\ppf-contact-solver' -Force; Expand-Archive -Path 'C:\\source.zip' -DestinationPath 'C:\\ppf-contact-solver' -Force; Remove-Item 'C:\\source.zip'\""
- name: Run warmup.bat
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-o ServerAliveInterval=60 -i "$KEY_PATH" Administrator@$PUBLIC_IP \
"cmd /c 'cd C:\\ppf-contact-solver\\build-win-native && warmup.bat /nopause'"
- name: Run build.bat
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-o ServerAliveInterval=60 -i "$KEY_PATH" Administrator@$PUBLIC_IP \
"cmd /c 'cd C:\\ppf-contact-solver\\build-win-native && build.bat /nopause'"
- name: Convert assertion notebook to Python script
run: |
echo "Converting assertion notebook: examples/fail-examples/assertion.ipynb"
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
# Use the same conversion pattern as main examples
cat > /tmp/convert_assertion.ps1 << 'EOFPS1'
$ErrorActionPreference = "Stop"
Set-Location C:\ppf-contact-solver
$env:PATH = "C:\ppf-contact-solver\build-win-native\python;C:\ppf-contact-solver\build-win-native\python\Scripts;" + $env:PATH
New-Item -ItemType Directory -Path "C:\ci" -Force | Out-Null
Write-Host "Converting assertion.ipynb to Python script..."
& C:\ppf-contact-solver\build-win-native\python\python.exe -m jupyter nbconvert --to python "examples/fail-examples/assertion.ipynb" --output "C:\ci\assertion_base.py"
$header = "import sys`nimport os`nsys.path.insert(0, r'C:\ppf-contact-solver')`nsys.path.insert(0, r'C:\ppf-contact-solver\frontend')`nos.environ['PYTHONPATH'] = r'C:\ppf-contact-solver;C:\ppf-contact-solver\frontend;' + os.environ.get('PYTHONPATH', '')"
$baseContent = Get-Content "C:\ci\assertion_base.py" -Raw
$header + "`n" + $baseContent | Set-Content "C:\ci\assertion.py"
Write-Host "Assertion script prepared at C:\ci\assertion.py"
EOFPS1
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" /tmp/convert_assertion.ps1 Administrator@$PUBLIC_IP:C:/convert_assertion.ps1
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -ExecutionPolicy Bypass -File C:/convert_assertion.ps1"
- name: Run assertion test (expect failure)
run: |
echo "Running assertion test to verify error propagation via SSH..."
echo "This test uses the same execution pattern as main examples"
echo "Expected result: FAILURE (AssertionError)"
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
# Create script that runs the same way as main examples
cat > /tmp/run_assertion.ps1 << 'EOFPS1'
Set-Location C:\ppf-contact-solver
"assertion" | Set-Content "frontend\.CI"
& C:\ppf-contact-solver\build-win-native\python\python.exe C:\ci\assertion.py 2>&1 | Tee-Object -FilePath "C:\ci\assertion.log"
exit $LASTEXITCODE
EOFPS1
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" /tmp/run_assertion.ps1 Administrator@$PUBLIC_IP:C:/run_assertion.ps1
# Run and expect failure
if ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -ExecutionPolicy Bypass -File C:/run_assertion.ps1"; then
echo "ERROR: Assertion test should have failed but succeeded"
echo "This means errors are NOT being propagated correctly!"
exit 1
else
echo "SUCCESS: Assertion test failed as expected"
echo "Error propagation via SSH is working correctly"
echo "Main example tests can now proceed with confidence"
fi
- name: Setup CI directory
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -Command \"New-Item -ItemType Directory -Path 'C:\\ci' -Force\""
- name: Run drape
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
sed "s/EXAMPLE_PLACEHOLDER/drape/g" .github/workflows/scripts/win/run-example.ps1 > /tmp/run_drape.ps1
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" /tmp/run_drape.ps1 Administrator@$PUBLIC_IP:C:/run_drape.ps1
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-o ServerAliveInterval=60 -i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -ExecutionPolicy Bypass -File C:/run_drape.ps1"
- name: Run fishingknot
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
sed "s/EXAMPLE_PLACEHOLDER/fishingknot/g" .github/workflows/scripts/win/run-example.ps1 > /tmp/run_fishingknot.ps1
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" /tmp/run_fishingknot.ps1 Administrator@$PUBLIC_IP:C:/run_fishingknot.ps1
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-o ServerAliveInterval=60 -i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -ExecutionPolicy Bypass -File C:/run_fishingknot.ps1"
- name: Run fitting
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
sed "s/EXAMPLE_PLACEHOLDER/fitting/g" .github/workflows/scripts/win/run-example.ps1 > /tmp/run_fitting.ps1
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" /tmp/run_fitting.ps1 Administrator@$PUBLIC_IP:C:/run_fitting.ps1
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-o ServerAliveInterval=60 -i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -ExecutionPolicy Bypass -File C:/run_fitting.ps1"
- name: Run friction
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
sed "s/EXAMPLE_PLACEHOLDER/friction/g" .github/workflows/scripts/win/run-example.ps1 > /tmp/run_friction.ps1
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" /tmp/run_friction.ps1 Administrator@$PUBLIC_IP:C:/run_friction.ps1
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-o ServerAliveInterval=60 -i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -ExecutionPolicy Bypass -File C:/run_friction.ps1"
- name: Collect results
if: success() || failure()
run: |
echo "Collecting results..."
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
mkdir -p ci
# Delete large binary files on remote before copying to save bandwidth
# CI output is in project-relative cache: C:\ppf-contact-solver\cache\ppf-cts\ci
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -Command \"Get-ChildItem -Path C:\\ppf-contact-solver\\cache\\ppf-cts\\ci -Recurse -Include '*.bin','*.pickle','*.ply','*.gz' -ErrorAction SilentlyContinue | Remove-Item -Force\"" || true
# Copy CI output from ppf-cts cache directory
scp -P $SSH_PORT -r -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" "Administrator@$PUBLIC_IP:C:/ppf-contact-solver/cache/ppf-cts/ci/*" ./ci/ || echo "No ppf-cts CI files found"
# Also copy logs and scripts from C:\ci
scp -P $SSH_PORT -r -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" "Administrator@$PUBLIC_IP:C:/ci/*" ./ci/ || echo "No script/log files found"
echo "## Collected Files:"
ls -laR ci/ || echo "No files collected"
- name: Upload artifact
if: success() || failure()
uses: actions/upload-artifact@v4
with:
name: ci-win-batch-2
path: ci
retention-days: 3
- name: GPU information
if: success() || failure()
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" Administrator@$PUBLIC_IP "nvidia-smi" || true
- name: Re-authenticate for cleanup
if: always()
continue-on-error: true
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: ${{ secrets.AWS_ROLE_ARN }}
aws-region: ${{ env.AWS_REGION }}
- name: Cleanup - Terminate Instance
if: always()
continue-on-error: true
run: |
if [ -n "${{ steps.instance.outputs.INSTANCE_ID }}" ]; then
aws ec2 terminate-instances --instance-ids "${{ steps.instance.outputs.INSTANCE_ID }}" --region "$AWS_REGION" || true
fi
- name: Cleanup - Remove Ingress Rules
if: always()
continue-on-error: true
run: |
if [ -n "${{ steps.security-group.outputs.SG_ID }}" ] && [ -n "${{ steps.security-group.outputs.RUNNER_IP_CIDR }}" ]; then
aws ec2 revoke-security-group-ingress --group-id "${{ steps.security-group.outputs.SG_ID }}" \
--ip-permissions "IpProtocol=tcp,FromPort=${{ steps.security-group.outputs.SSH_PORT }},ToPort=${{ steps.security-group.outputs.SSH_PORT }},IpRanges=[{CidrIp=${{ steps.security-group.outputs.RUNNER_IP_CIDR }}}]" \
--region "$AWS_REGION" 2>&1 || true
fi
- name: Cleanup - Remove Local SSH Key
if: always()
continue-on-error: true
run: rm -f "${{ steps.keypair.outputs.KEY_PATH }}"
- name: Summary
if: always()
run: |
echo "## Batch 2 Summary"
echo "- Examples: drape fishingknot fitting friction"
echo "- Instance: ${{ steps.instance.outputs.INSTANCE_ID || 'Not launched' }}"
run-batch-3:
name: Run Batch 3 (Windows)
runs-on: ubuntu-latest
permissions:
id-token: write
contents: read
env:
AWS_REGION: ${{ github.event.inputs.region }}
INSTANCE_TYPE: ${{ github.event.inputs.instance_type }}
BRANCH: ${{ github.ref_name }}
EXAMPLES: "hang needle noodle ribbon"
WORKDIR: C:\ppf-contact-solver
USER: Administrator
steps:
- name: Show input parameters
run: |
echo "## Input Parameters - Batch 3 (Windows)"
echo "Branch: ${{ github.ref_name }}"
echo "Instance Type: ${{ github.event.inputs.instance_type }}"
echo "Region: ${{ github.event.inputs.region }}"
echo "Examples: hang needle noodle ribbon"
- name: Checkout repository
uses: actions/checkout@v4
- name: Configure AWS credentials via OIDC
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: ${{ secrets.AWS_ROLE_ARN }}
aws-region: ${{ env.AWS_REGION }}
- name: Verify AWS authentication
run: |
echo "Testing AWS authentication..."
aws sts get-caller-identity
- name: Get GitHub Actions runner public IP
id: runner-ip
run: |
echo "Fetching GitHub Actions runner public IP..."
RUNNER_IP=$(curl -s --max-time 10 https://checkip.amazonaws.com | tr -d '\n')
if [ -z "$RUNNER_IP" ]; then
echo "ERROR: Failed to get IP from checkip.amazonaws.com"
exit 1
fi
echo "::add-mask::$RUNNER_IP"
echo "RUNNER_IP=$RUNNER_IP" >> $GITHUB_OUTPUT
echo "GitHub Actions Runner IP: $RUNNER_IP"
- name: Find Windows Server 2025 AMI
id: ami
run: |
AMI_ID=$(aws ec2 describe-images \
--owners amazon \
--filters "Name=name,Values=Windows_Server-2025-English-Full-Base-*" "Name=state,Values=available" \
--query 'sort_by(Images, &CreationDate)[-1].ImageId' \
--region "$AWS_REGION" --output text)
if [ "$AMI_ID" = "None" ] || [ -z "$AMI_ID" ]; then
echo "ERROR: Windows Server 2025 AMI not found"
exit 1
fi
echo "AMI_ID=$AMI_ID" >> $GITHUB_OUTPUT
- name: Get default VPC ID
id: vpc
run: |
VPC_ID=$(aws ec2 describe-vpcs --filters "Name=isDefault,Values=true" \
--query 'Vpcs[0].VpcId' --region "$AWS_REGION" --output text)
if [ "$VPC_ID" = "None" ] || [ -z "$VPC_ID" ]; then
echo "ERROR: Default VPC not found"
exit 1
fi
echo "VPC_ID=$VPC_ID" >> $GITHUB_OUTPUT
- name: Generate unique identifiers
id: ids
run: |
TIMESTAMP=$(date +%Y%m%d%H%M%S)
RANDOM_SUFFIX=$(head /dev/urandom | tr -dc a-z0-9 | head -c 6)
SSH_PORT=$((10001 + RANDOM % 55535))
echo "::add-mask::$SSH_PORT"
echo "TIMESTAMP=$TIMESTAMP" >> $GITHUB_OUTPUT
echo "SSH_PORT=$SSH_PORT" >> $GITHUB_OUTPUT
- name: Setup persistent security group
id: security-group
run: |
SG_NAME="github-actions-windows-persistent"
SG_ID=$(aws ec2 describe-security-groups --filters "Name=group-name,Values=$SG_NAME" \
--query 'SecurityGroups[0].GroupId' --region "$AWS_REGION" --output text || echo "")
if [ "$SG_ID" = "None" ] || [ -z "$SG_ID" ]; then
SG_ID=$(aws ec2 create-security-group --group-name "$SG_NAME" \
--description "GitHub Actions Windows builds" \
--vpc-id "${{ steps.vpc.outputs.VPC_ID }}" \
--query 'GroupId' --region "$AWS_REGION" --output text)
aws ec2 create-tags --resources "$SG_ID" --tags "Key=Name,Value=$SG_NAME" --region "$AWS_REGION"
fi
echo "SG_ID=$SG_ID" >> $GITHUB_OUTPUT
aws ec2 authorize-security-group-ingress --group-id "$SG_ID" \
--ip-permissions "IpProtocol=tcp,FromPort=${{ steps.ids.outputs.SSH_PORT }},ToPort=${{ steps.ids.outputs.SSH_PORT }},IpRanges=[{CidrIp=${{ steps.runner-ip.outputs.RUNNER_IP }}/32}]" \
--region "$AWS_REGION" 2>&1 || echo "Rule may exist"
echo "RUNNER_IP_CIDR=${{ steps.runner-ip.outputs.RUNNER_IP }}/32" >> $GITHUB_OUTPUT
echo "SSH_PORT=${{ steps.ids.outputs.SSH_PORT }}" >> $GITHUB_OUTPUT
- name: Retrieve SSH key from Parameter Store
id: keypair
run: |
aws ssm get-parameter --name "/github-actions/ec2/ssh-key" --with-decryption \
--query 'Parameter.Value' --region "$AWS_REGION" --output text > /tmp/github-actions-ec2.pem
chmod 600 /tmp/github-actions-ec2.pem
echo "KEY_PATH=/tmp/github-actions-ec2.pem" >> $GITHUB_OUTPUT
- name: Create Windows user data script
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
sed "s/SSH_PORT_PLACEHOLDER/$SSH_PORT/g" .github/workflows/scripts/win/user-data.ps1 > /tmp/user-data.ps1
- name: Launch EC2 instance
id: instance
run: |
INSTANCE_ID=$(aws ec2 run-instances \
--image-id "${{ steps.ami.outputs.AMI_ID }}" \
--instance-type "$INSTANCE_TYPE" \
--key-name "${{ secrets.AWS_KEY_PAIR_NAME }}" \
--security-group-ids "${{ steps.security-group.outputs.SG_ID }}" \
--user-data file:///tmp/user-data.ps1 \
--block-device-mappings "DeviceName=/dev/sda1,Ebs={VolumeSize=100,VolumeType=gp3,DeleteOnTermination=true}" \
--tag-specifications "ResourceType=instance,Tags=[{Key=Name,Value=gpu-runner-win-batch-3-${{ steps.ids.outputs.TIMESTAMP }}},{Key=ManagedBy,Value=GitHubActions},{Key=Batch,Value=3}]" \
--instance-initiated-shutdown-behavior terminate \
--query 'Instances[0].InstanceId' --region "$AWS_REGION" --output text)
echo "INSTANCE_ID=$INSTANCE_ID" >> $GITHUB_OUTPUT
- name: Wait for instance to be running
run: |
aws ec2 wait instance-running --instance-ids "${{ steps.instance.outputs.INSTANCE_ID }}" --region "$AWS_REGION"
PUBLIC_IP=$(aws ec2 describe-instances --instance-ids "${{ steps.instance.outputs.INSTANCE_ID }}" \
--query 'Reservations[0].Instances[0].PublicIpAddress' --region "$AWS_REGION" --output text)
echo "::add-mask::$PUBLIC_IP"
echo "PUBLIC_IP=$PUBLIC_IP" >> $GITHUB_ENV
- name: Wait for SSH
run: |
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
for i in $(seq 1 60); do
if ssh -p $SSH_PORT -o ConnectTimeout=5 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-o BatchMode=yes -i "$KEY_PATH" Administrator@$PUBLIC_IP "echo SSH_READY" 2>/dev/null | grep -q SSH_READY; then
echo "SSH ready!"
break
fi
echo "Waiting for SSH ($i/60)..."
sleep 30
done
for i in $(seq 1 30); do
if ssh -p $SSH_PORT -o ConnectTimeout=10 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" Administrator@$PUBLIC_IP "if (Test-Path C:\\ssh_ready.txt) { echo READY }" 2>/dev/null | grep -q READY; then
echo "Setup complete!"
break
fi
sleep 30
done
- name: Install NVIDIA driver only (no CUDA toolkit)
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" .github/workflows/scripts/win/install-nvidia-driver.ps1 Administrator@$PUBLIC_IP:C:/install_driver.ps1
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -ExecutionPolicy Bypass -File C:/install_driver.ps1"
- name: Transfer repository to instance
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
git archive --format=zip --output=/tmp/repo.zip HEAD
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" /tmp/repo.zip Administrator@$PUBLIC_IP:C:/source.zip
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -Command \"if (Test-Path 'C:\\ppf-contact-solver') { Remove-Item -Recurse -Force 'C:\\ppf-contact-solver' }; New-Item -ItemType Directory -Path 'C:\\ppf-contact-solver' -Force; Expand-Archive -Path 'C:\\source.zip' -DestinationPath 'C:\\ppf-contact-solver' -Force; Remove-Item 'C:\\source.zip'\""
- name: Run warmup.bat
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-o ServerAliveInterval=60 -i "$KEY_PATH" Administrator@$PUBLIC_IP \
"cmd /c 'cd C:\\ppf-contact-solver\\build-win-native && warmup.bat /nopause'"
- name: Run build.bat
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-o ServerAliveInterval=60 -i "$KEY_PATH" Administrator@$PUBLIC_IP \
"cmd /c 'cd C:\\ppf-contact-solver\\build-win-native && build.bat /nopause'"
- name: Convert assertion notebook to Python script
run: |
echo "Converting assertion notebook: examples/fail-examples/assertion.ipynb"
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
# Use the same conversion pattern as main examples
cat > /tmp/convert_assertion.ps1 << 'EOFPS1'
$ErrorActionPreference = "Stop"
Set-Location C:\ppf-contact-solver
$env:PATH = "C:\ppf-contact-solver\build-win-native\python;C:\ppf-contact-solver\build-win-native\python\Scripts;" + $env:PATH
New-Item -ItemType Directory -Path "C:\ci" -Force | Out-Null
Write-Host "Converting assertion.ipynb to Python script..."
& C:\ppf-contact-solver\build-win-native\python\python.exe -m jupyter nbconvert --to python "examples/fail-examples/assertion.ipynb" --output "C:\ci\assertion_base.py"
$header = "import sys`nimport os`nsys.path.insert(0, r'C:\ppf-contact-solver')`nsys.path.insert(0, r'C:\ppf-contact-solver\frontend')`nos.environ['PYTHONPATH'] = r'C:\ppf-contact-solver;C:\ppf-contact-solver\frontend;' + os.environ.get('PYTHONPATH', '')"
$baseContent = Get-Content "C:\ci\assertion_base.py" -Raw
$header + "`n" + $baseContent | Set-Content "C:\ci\assertion.py"
Write-Host "Assertion script prepared at C:\ci\assertion.py"
EOFPS1
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" /tmp/convert_assertion.ps1 Administrator@$PUBLIC_IP:C:/convert_assertion.ps1
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -ExecutionPolicy Bypass -File C:/convert_assertion.ps1"
- name: Run assertion test (expect failure)
run: |
echo "Running assertion test to verify error propagation via SSH..."
echo "This test uses the same execution pattern as main examples"
echo "Expected result: FAILURE (AssertionError)"
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
# Create script that runs the same way as main examples
cat > /tmp/run_assertion.ps1 << 'EOFPS1'
Set-Location C:\ppf-contact-solver
"assertion" | Set-Content "frontend\.CI"
& C:\ppf-contact-solver\build-win-native\python\python.exe C:\ci\assertion.py 2>&1 | Tee-Object -FilePath "C:\ci\assertion.log"
exit $LASTEXITCODE
EOFPS1
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" /tmp/run_assertion.ps1 Administrator@$PUBLIC_IP:C:/run_assertion.ps1
# Run and expect failure
if ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -ExecutionPolicy Bypass -File C:/run_assertion.ps1"; then
echo "ERROR: Assertion test should have failed but succeeded"
echo "This means errors are NOT being propagated correctly!"
exit 1
else
echo "SUCCESS: Assertion test failed as expected"
echo "Error propagation via SSH is working correctly"
echo "Main example tests can now proceed with confidence"
fi
- name: Setup CI directory
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -Command \"New-Item -ItemType Directory -Path 'C:\\ci' -Force\""
- name: Run hang
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
sed "s/EXAMPLE_PLACEHOLDER/hang/g" .github/workflows/scripts/win/run-example.ps1 > /tmp/run_hang.ps1
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" /tmp/run_hang.ps1 Administrator@$PUBLIC_IP:C:/run_hang.ps1
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-o ServerAliveInterval=60 -i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -ExecutionPolicy Bypass -File C:/run_hang.ps1"
- name: Run needle
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
sed "s/EXAMPLE_PLACEHOLDER/needle/g" .github/workflows/scripts/win/run-example.ps1 > /tmp/run_needle.ps1
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" /tmp/run_needle.ps1 Administrator@$PUBLIC_IP:C:/run_needle.ps1
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-o ServerAliveInterval=60 -i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -ExecutionPolicy Bypass -File C:/run_needle.ps1"
- name: Run noodle
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
sed "s/EXAMPLE_PLACEHOLDER/noodle/g" .github/workflows/scripts/win/run-example.ps1 > /tmp/run_noodle.ps1
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" /tmp/run_noodle.ps1 Administrator@$PUBLIC_IP:C:/run_noodle.ps1
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-o ServerAliveInterval=60 -i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -ExecutionPolicy Bypass -File C:/run_noodle.ps1"
- name: Run ribbon
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
sed "s/EXAMPLE_PLACEHOLDER/ribbon/g" .github/workflows/scripts/win/run-example.ps1 > /tmp/run_ribbon.ps1
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" /tmp/run_ribbon.ps1 Administrator@$PUBLIC_IP:C:/run_ribbon.ps1
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-o ServerAliveInterval=60 -i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -ExecutionPolicy Bypass -File C:/run_ribbon.ps1"
- name: Collect results
if: success() || failure()
run: |
echo "Collecting results..."
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
mkdir -p ci
# Delete large binary files on remote before copying to save bandwidth
# CI output is in project-relative cache: C:\ppf-contact-solver\cache\ppf-cts\ci
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -Command \"Get-ChildItem -Path C:\\ppf-contact-solver\\cache\\ppf-cts\\ci -Recurse -Include '*.bin','*.pickle','*.ply','*.gz' -ErrorAction SilentlyContinue | Remove-Item -Force\"" || true
# Copy CI output from ppf-cts cache directory
scp -P $SSH_PORT -r -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" "Administrator@$PUBLIC_IP:C:/ppf-contact-solver/cache/ppf-cts/ci/*" ./ci/ || echo "No ppf-cts CI files found"
# Also copy logs and scripts from C:\ci
scp -P $SSH_PORT -r -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" "Administrator@$PUBLIC_IP:C:/ci/*" ./ci/ || echo "No script/log files found"
echo "## Collected Files:"
ls -laR ci/ || echo "No files collected"
- name: Upload artifact
if: success() || failure()
uses: actions/upload-artifact@v4
with:
name: ci-win-batch-3
path: ci
retention-days: 3
- name: GPU information
if: success() || failure()
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" Administrator@$PUBLIC_IP "nvidia-smi" || true
- name: Re-authenticate for cleanup
if: always()
continue-on-error: true
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: ${{ secrets.AWS_ROLE_ARN }}
aws-region: ${{ env.AWS_REGION }}
- name: Cleanup - Terminate Instance
if: always()
continue-on-error: true
run: |
if [ -n "${{ steps.instance.outputs.INSTANCE_ID }}" ]; then
aws ec2 terminate-instances --instance-ids "${{ steps.instance.outputs.INSTANCE_ID }}" --region "$AWS_REGION" || true
fi
- name: Cleanup - Remove Ingress Rules
if: always()
continue-on-error: true
run: |
if [ -n "${{ steps.security-group.outputs.SG_ID }}" ] && [ -n "${{ steps.security-group.outputs.RUNNER_IP_CIDR }}" ]; then
aws ec2 revoke-security-group-ingress --group-id "${{ steps.security-group.outputs.SG_ID }}" \
--ip-permissions "IpProtocol=tcp,FromPort=${{ steps.security-group.outputs.SSH_PORT }},ToPort=${{ steps.security-group.outputs.SSH_PORT }},IpRanges=[{CidrIp=${{ steps.security-group.outputs.RUNNER_IP_CIDR }}}]" \
--region "$AWS_REGION" 2>&1 || true
fi
- name: Cleanup - Remove Local SSH Key
if: always()
continue-on-error: true
run: rm -f "${{ steps.keypair.outputs.KEY_PATH }}"
- name: Summary
if: always()
run: |
echo "## Batch 3 Summary"
echo "- Examples: hang needle noodle ribbon"
echo "- Instance: ${{ steps.instance.outputs.INSTANCE_ID || 'Not launched' }}"
run-batch-4:
name: Run Batch 4 (Windows)
runs-on: ubuntu-latest
permissions:
id-token: write
contents: read
env:
AWS_REGION: ${{ github.event.inputs.region }}
INSTANCE_TYPE: ${{ github.event.inputs.instance_type }}
BRANCH: ${{ github.ref_name }}
EXAMPLES: "roller stack trampoline trapped"
WORKDIR: C:\ppf-contact-solver
USER: Administrator
steps:
- name: Show input parameters
run: |
echo "## Input Parameters - Batch 4 (Windows)"
echo "Branch: ${{ github.ref_name }}"
echo "Instance Type: ${{ github.event.inputs.instance_type }}"
echo "Region: ${{ github.event.inputs.region }}"
echo "Examples: roller stack trampoline trapped"
- name: Checkout repository
uses: actions/checkout@v4
- name: Configure AWS credentials via OIDC
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: ${{ secrets.AWS_ROLE_ARN }}
aws-region: ${{ env.AWS_REGION }}
- name: Verify AWS authentication
run: |
echo "Testing AWS authentication..."
aws sts get-caller-identity
- name: Get GitHub Actions runner public IP
id: runner-ip
run: |
echo "Fetching GitHub Actions runner public IP..."
RUNNER_IP=$(curl -s --max-time 10 https://checkip.amazonaws.com | tr -d '\n')
if [ -z "$RUNNER_IP" ]; then
echo "ERROR: Failed to get IP from checkip.amazonaws.com"
exit 1
fi
echo "::add-mask::$RUNNER_IP"
echo "RUNNER_IP=$RUNNER_IP" >> $GITHUB_OUTPUT
echo "GitHub Actions Runner IP: $RUNNER_IP"
- name: Find Windows Server 2025 AMI
id: ami
run: |
AMI_ID=$(aws ec2 describe-images \
--owners amazon \
--filters "Name=name,Values=Windows_Server-2025-English-Full-Base-*" "Name=state,Values=available" \
--query 'sort_by(Images, &CreationDate)[-1].ImageId' \
--region "$AWS_REGION" --output text)
if [ "$AMI_ID" = "None" ] || [ -z "$AMI_ID" ]; then
echo "ERROR: Windows Server 2025 AMI not found"
exit 1
fi
echo "AMI_ID=$AMI_ID" >> $GITHUB_OUTPUT
- name: Get default VPC ID
id: vpc
run: |
VPC_ID=$(aws ec2 describe-vpcs --filters "Name=isDefault,Values=true" \
--query 'Vpcs[0].VpcId' --region "$AWS_REGION" --output text)
if [ "$VPC_ID" = "None" ] || [ -z "$VPC_ID" ]; then
echo "ERROR: Default VPC not found"
exit 1
fi
echo "VPC_ID=$VPC_ID" >> $GITHUB_OUTPUT
- name: Generate unique identifiers
id: ids
run: |
TIMESTAMP=$(date +%Y%m%d%H%M%S)
RANDOM_SUFFIX=$(head /dev/urandom | tr -dc a-z0-9 | head -c 6)
SSH_PORT=$((10001 + RANDOM % 55535))
echo "::add-mask::$SSH_PORT"
echo "TIMESTAMP=$TIMESTAMP" >> $GITHUB_OUTPUT
echo "SSH_PORT=$SSH_PORT" >> $GITHUB_OUTPUT
- name: Setup persistent security group
id: security-group
run: |
SG_NAME="github-actions-windows-persistent"
SG_ID=$(aws ec2 describe-security-groups --filters "Name=group-name,Values=$SG_NAME" \
--query 'SecurityGroups[0].GroupId' --region "$AWS_REGION" --output text || echo "")
if [ "$SG_ID" = "None" ] || [ -z "$SG_ID" ]; then
SG_ID=$(aws ec2 create-security-group --group-name "$SG_NAME" \
--description "GitHub Actions Windows builds" \
--vpc-id "${{ steps.vpc.outputs.VPC_ID }}" \
--query 'GroupId' --region "$AWS_REGION" --output text)
aws ec2 create-tags --resources "$SG_ID" --tags "Key=Name,Value=$SG_NAME" --region "$AWS_REGION"
fi
echo "SG_ID=$SG_ID" >> $GITHUB_OUTPUT
aws ec2 authorize-security-group-ingress --group-id "$SG_ID" \
--ip-permissions "IpProtocol=tcp,FromPort=${{ steps.ids.outputs.SSH_PORT }},ToPort=${{ steps.ids.outputs.SSH_PORT }},IpRanges=[{CidrIp=${{ steps.runner-ip.outputs.RUNNER_IP }}/32}]" \
--region "$AWS_REGION" 2>&1 || echo "Rule may exist"
echo "RUNNER_IP_CIDR=${{ steps.runner-ip.outputs.RUNNER_IP }}/32" >> $GITHUB_OUTPUT
echo "SSH_PORT=${{ steps.ids.outputs.SSH_PORT }}" >> $GITHUB_OUTPUT
- name: Retrieve SSH key from Parameter Store
id: keypair
run: |
aws ssm get-parameter --name "/github-actions/ec2/ssh-key" --with-decryption \
--query 'Parameter.Value' --region "$AWS_REGION" --output text > /tmp/github-actions-ec2.pem
chmod 600 /tmp/github-actions-ec2.pem
echo "KEY_PATH=/tmp/github-actions-ec2.pem" >> $GITHUB_OUTPUT
- name: Create Windows user data script
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
sed "s/SSH_PORT_PLACEHOLDER/$SSH_PORT/g" .github/workflows/scripts/win/user-data.ps1 > /tmp/user-data.ps1
- name: Launch EC2 instance
id: instance
run: |
INSTANCE_ID=$(aws ec2 run-instances \
--image-id "${{ steps.ami.outputs.AMI_ID }}" \
--instance-type "$INSTANCE_TYPE" \
--key-name "${{ secrets.AWS_KEY_PAIR_NAME }}" \
--security-group-ids "${{ steps.security-group.outputs.SG_ID }}" \
--user-data file:///tmp/user-data.ps1 \
--block-device-mappings "DeviceName=/dev/sda1,Ebs={VolumeSize=100,VolumeType=gp3,DeleteOnTermination=true}" \
--tag-specifications "ResourceType=instance,Tags=[{Key=Name,Value=gpu-runner-win-batch-4-${{ steps.ids.outputs.TIMESTAMP }}},{Key=ManagedBy,Value=GitHubActions},{Key=Batch,Value=4}]" \
--instance-initiated-shutdown-behavior terminate \
--query 'Instances[0].InstanceId' --region "$AWS_REGION" --output text)
echo "INSTANCE_ID=$INSTANCE_ID" >> $GITHUB_OUTPUT
- name: Wait for instance to be running
run: |
aws ec2 wait instance-running --instance-ids "${{ steps.instance.outputs.INSTANCE_ID }}" --region "$AWS_REGION"
PUBLIC_IP=$(aws ec2 describe-instances --instance-ids "${{ steps.instance.outputs.INSTANCE_ID }}" \
--query 'Reservations[0].Instances[0].PublicIpAddress' --region "$AWS_REGION" --output text)
echo "::add-mask::$PUBLIC_IP"
echo "PUBLIC_IP=$PUBLIC_IP" >> $GITHUB_ENV
- name: Wait for SSH
run: |
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
for i in $(seq 1 60); do
if ssh -p $SSH_PORT -o ConnectTimeout=5 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-o BatchMode=yes -i "$KEY_PATH" Administrator@$PUBLIC_IP "echo SSH_READY" 2>/dev/null | grep -q SSH_READY; then
echo "SSH ready!"
break
fi
echo "Waiting for SSH ($i/60)..."
sleep 30
done
for i in $(seq 1 30); do
if ssh -p $SSH_PORT -o ConnectTimeout=10 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" Administrator@$PUBLIC_IP "if (Test-Path C:\\ssh_ready.txt) { echo READY }" 2>/dev/null | grep -q READY; then
echo "Setup complete!"
break
fi
sleep 30
done
- name: Install NVIDIA driver only (no CUDA toolkit)
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" .github/workflows/scripts/win/install-nvidia-driver.ps1 Administrator@$PUBLIC_IP:C:/install_driver.ps1
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -ExecutionPolicy Bypass -File C:/install_driver.ps1"
- name: Transfer repository to instance
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
git archive --format=zip --output=/tmp/repo.zip HEAD
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" /tmp/repo.zip Administrator@$PUBLIC_IP:C:/source.zip
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -Command \"if (Test-Path 'C:\\ppf-contact-solver') { Remove-Item -Recurse -Force 'C:\\ppf-contact-solver' }; New-Item -ItemType Directory -Path 'C:\\ppf-contact-solver' -Force; Expand-Archive -Path 'C:\\source.zip' -DestinationPath 'C:\\ppf-contact-solver' -Force; Remove-Item 'C:\\source.zip'\""
- name: Run warmup.bat
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-o ServerAliveInterval=60 -i "$KEY_PATH" Administrator@$PUBLIC_IP \
"cmd /c 'cd C:\\ppf-contact-solver\\build-win-native && warmup.bat /nopause'"
- name: Run build.bat
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-o ServerAliveInterval=60 -i "$KEY_PATH" Administrator@$PUBLIC_IP \
"cmd /c 'cd C:\\ppf-contact-solver\\build-win-native && build.bat /nopause'"
- name: Convert assertion notebook to Python script
run: |
echo "Converting assertion notebook: examples/fail-examples/assertion.ipynb"
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
# Use the same conversion pattern as main examples
cat > /tmp/convert_assertion.ps1 << 'EOFPS1'
$ErrorActionPreference = "Stop"
Set-Location C:\ppf-contact-solver
$env:PATH = "C:\ppf-contact-solver\build-win-native\python;C:\ppf-contact-solver\build-win-native\python\Scripts;" + $env:PATH
New-Item -ItemType Directory -Path "C:\ci" -Force | Out-Null
Write-Host "Converting assertion.ipynb to Python script..."
& C:\ppf-contact-solver\build-win-native\python\python.exe -m jupyter nbconvert --to python "examples/fail-examples/assertion.ipynb" --output "C:\ci\assertion_base.py"
$header = "import sys`nimport os`nsys.path.insert(0, r'C:\ppf-contact-solver')`nsys.path.insert(0, r'C:\ppf-contact-solver\frontend')`nos.environ['PYTHONPATH'] = r'C:\ppf-contact-solver;C:\ppf-contact-solver\frontend;' + os.environ.get('PYTHONPATH', '')"
$baseContent = Get-Content "C:\ci\assertion_base.py" -Raw
$header + "`n" + $baseContent | Set-Content "C:\ci\assertion.py"
Write-Host "Assertion script prepared at C:\ci\assertion.py"
EOFPS1
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" /tmp/convert_assertion.ps1 Administrator@$PUBLIC_IP:C:/convert_assertion.ps1
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -ExecutionPolicy Bypass -File C:/convert_assertion.ps1"
- name: Run assertion test (expect failure)
run: |
echo "Running assertion test to verify error propagation via SSH..."
echo "This test uses the same execution pattern as main examples"
echo "Expected result: FAILURE (AssertionError)"
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
# Create script that runs the same way as main examples
cat > /tmp/run_assertion.ps1 << 'EOFPS1'
Set-Location C:\ppf-contact-solver
"assertion" | Set-Content "frontend\.CI"
& C:\ppf-contact-solver\build-win-native\python\python.exe C:\ci\assertion.py 2>&1 | Tee-Object -FilePath "C:\ci\assertion.log"
exit $LASTEXITCODE
EOFPS1
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" /tmp/run_assertion.ps1 Administrator@$PUBLIC_IP:C:/run_assertion.ps1
# Run and expect failure
if ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -ExecutionPolicy Bypass -File C:/run_assertion.ps1"; then
echo "ERROR: Assertion test should have failed but succeeded"
echo "This means errors are NOT being propagated correctly!"
exit 1
else
echo "SUCCESS: Assertion test failed as expected"
echo "Error propagation via SSH is working correctly"
echo "Main example tests can now proceed with confidence"
fi
- name: Setup CI directory
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -Command \"New-Item -ItemType Directory -Path 'C:\\ci' -Force\""
- name: Run roller
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
sed "s/EXAMPLE_PLACEHOLDER/roller/g" .github/workflows/scripts/win/run-example.ps1 > /tmp/run_roller.ps1
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" /tmp/run_roller.ps1 Administrator@$PUBLIC_IP:C:/run_roller.ps1
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-o ServerAliveInterval=60 -i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -ExecutionPolicy Bypass -File C:/run_roller.ps1"
- name: Run stack
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
sed "s/EXAMPLE_PLACEHOLDER/stack/g" .github/workflows/scripts/win/run-example.ps1 > /tmp/run_stack.ps1
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" /tmp/run_stack.ps1 Administrator@$PUBLIC_IP:C:/run_stack.ps1
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-o ServerAliveInterval=60 -i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -ExecutionPolicy Bypass -File C:/run_stack.ps1"
- name: Run trampoline
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
sed "s/EXAMPLE_PLACEHOLDER/trampoline/g" .github/workflows/scripts/win/run-example.ps1 > /tmp/run_trampoline.ps1
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" /tmp/run_trampoline.ps1 Administrator@$PUBLIC_IP:C:/run_trampoline.ps1
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-o ServerAliveInterval=60 -i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -ExecutionPolicy Bypass -File C:/run_trampoline.ps1"
- name: Run trapped
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
sed "s/EXAMPLE_PLACEHOLDER/trapped/g" .github/workflows/scripts/win/run-example.ps1 > /tmp/run_trapped.ps1
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" /tmp/run_trapped.ps1 Administrator@$PUBLIC_IP:C:/run_trapped.ps1
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-o ServerAliveInterval=60 -i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -ExecutionPolicy Bypass -File C:/run_trapped.ps1"
- name: Collect results
if: success() || failure()
run: |
echo "Collecting results..."
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
mkdir -p ci
# Delete large binary files on remote before copying to save bandwidth
# CI output is in project-relative cache: C:\ppf-contact-solver\cache\ppf-cts\ci
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -Command \"Get-ChildItem -Path C:\\ppf-contact-solver\\cache\\ppf-cts\\ci -Recurse -Include '*.bin','*.pickle','*.ply','*.gz' -ErrorAction SilentlyContinue | Remove-Item -Force\"" || true
# Copy CI output from ppf-cts cache directory
scp -P $SSH_PORT -r -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" "Administrator@$PUBLIC_IP:C:/ppf-contact-solver/cache/ppf-cts/ci/*" ./ci/ || echo "No ppf-cts CI files found"
# Also copy logs and scripts from C:\ci
scp -P $SSH_PORT -r -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" "Administrator@$PUBLIC_IP:C:/ci/*" ./ci/ || echo "No script/log files found"
echo "## Collected Files:"
ls -laR ci/ || echo "No files collected"
- name: Upload artifact
if: success() || failure()
uses: actions/upload-artifact@v4
with:
name: ci-win-batch-4
path: ci
retention-days: 3
- name: GPU information
if: success() || failure()
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" Administrator@$PUBLIC_IP "nvidia-smi" || true
- name: Re-authenticate for cleanup
if: always()
continue-on-error: true
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: ${{ secrets.AWS_ROLE_ARN }}
aws-region: ${{ env.AWS_REGION }}
- name: Cleanup - Terminate Instance
if: always()
continue-on-error: true
run: |
if [ -n "${{ steps.instance.outputs.INSTANCE_ID }}" ]; then
aws ec2 terminate-instances --instance-ids "${{ steps.instance.outputs.INSTANCE_ID }}" --region "$AWS_REGION" || true
fi
- name: Cleanup - Remove Ingress Rules
if: always()
continue-on-error: true
run: |
if [ -n "${{ steps.security-group.outputs.SG_ID }}" ] && [ -n "${{ steps.security-group.outputs.RUNNER_IP_CIDR }}" ]; then
aws ec2 revoke-security-group-ingress --group-id "${{ steps.security-group.outputs.SG_ID }}" \
--ip-permissions "IpProtocol=tcp,FromPort=${{ steps.security-group.outputs.SSH_PORT }},ToPort=${{ steps.security-group.outputs.SSH_PORT }},IpRanges=[{CidrIp=${{ steps.security-group.outputs.RUNNER_IP_CIDR }}}]" \
--region "$AWS_REGION" 2>&1 || true
fi
- name: Cleanup - Remove Local SSH Key
if: always()
continue-on-error: true
run: rm -f "${{ steps.keypair.outputs.KEY_PATH }}"
- name: Summary
if: always()
run: |
echo "## Batch 4 Summary"
echo "- Examples: roller stack trampoline trapped"
echo "- Instance: ${{ steps.instance.outputs.INSTANCE_ID || 'Not launched' }}"
run-batch-5:
name: Run Batch 5 (Windows)
runs-on: ubuntu-latest
permissions:
id-token: write
contents: read
env:
AWS_REGION: ${{ github.event.inputs.region }}
INSTANCE_TYPE: ${{ github.event.inputs.instance_type }}
BRANCH: ${{ github.ref_name }}
EXAMPLES: "twist five-twist woven yarn"
WORKDIR: C:\ppf-contact-solver
USER: Administrator
steps:
- name: Show input parameters
run: |
echo "## Input Parameters - Batch 5 (Windows)"
echo "Branch: ${{ github.ref_name }}"
echo "Instance Type: ${{ github.event.inputs.instance_type }}"
echo "Region: ${{ github.event.inputs.region }}"
echo "Examples: twist five-twist woven yarn"
- name: Checkout repository
uses: actions/checkout@v4
- name: Configure AWS credentials via OIDC
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: ${{ secrets.AWS_ROLE_ARN }}
aws-region: ${{ env.AWS_REGION }}
- name: Verify AWS authentication
run: |
echo "Testing AWS authentication..."
aws sts get-caller-identity
- name: Get GitHub Actions runner public IP
id: runner-ip
run: |
echo "Fetching GitHub Actions runner public IP..."
RUNNER_IP=$(curl -s --max-time 10 https://checkip.amazonaws.com | tr -d '\n')
if [ -z "$RUNNER_IP" ]; then
echo "ERROR: Failed to get IP from checkip.amazonaws.com"
exit 1
fi
echo "::add-mask::$RUNNER_IP"
echo "RUNNER_IP=$RUNNER_IP" >> $GITHUB_OUTPUT
echo "GitHub Actions Runner IP: $RUNNER_IP"
- name: Find Windows Server 2025 AMI
id: ami
run: |
AMI_ID=$(aws ec2 describe-images \
--owners amazon \
--filters "Name=name,Values=Windows_Server-2025-English-Full-Base-*" "Name=state,Values=available" \
--query 'sort_by(Images, &CreationDate)[-1].ImageId' \
--region "$AWS_REGION" --output text)
if [ "$AMI_ID" = "None" ] || [ -z "$AMI_ID" ]; then
echo "ERROR: Windows Server 2025 AMI not found"
exit 1
fi
echo "AMI_ID=$AMI_ID" >> $GITHUB_OUTPUT
- name: Get default VPC ID
id: vpc
run: |
VPC_ID=$(aws ec2 describe-vpcs --filters "Name=isDefault,Values=true" \
--query 'Vpcs[0].VpcId' --region "$AWS_REGION" --output text)
if [ "$VPC_ID" = "None" ] || [ -z "$VPC_ID" ]; then
echo "ERROR: Default VPC not found"
exit 1
fi
echo "VPC_ID=$VPC_ID" >> $GITHUB_OUTPUT
- name: Generate unique identifiers
id: ids
run: |
TIMESTAMP=$(date +%Y%m%d%H%M%S)
RANDOM_SUFFIX=$(head /dev/urandom | tr -dc a-z0-9 | head -c 6)
SSH_PORT=$((10001 + RANDOM % 55535))
echo "::add-mask::$SSH_PORT"
echo "TIMESTAMP=$TIMESTAMP" >> $GITHUB_OUTPUT
echo "SSH_PORT=$SSH_PORT" >> $GITHUB_OUTPUT
- name: Setup persistent security group
id: security-group
run: |
SG_NAME="github-actions-windows-persistent"
SG_ID=$(aws ec2 describe-security-groups --filters "Name=group-name,Values=$SG_NAME" \
--query 'SecurityGroups[0].GroupId' --region "$AWS_REGION" --output text || echo "")
if [ "$SG_ID" = "None" ] || [ -z "$SG_ID" ]; then
SG_ID=$(aws ec2 create-security-group --group-name "$SG_NAME" \
--description "GitHub Actions Windows builds" \
--vpc-id "${{ steps.vpc.outputs.VPC_ID }}" \
--query 'GroupId' --region "$AWS_REGION" --output text)
aws ec2 create-tags --resources "$SG_ID" --tags "Key=Name,Value=$SG_NAME" --region "$AWS_REGION"
fi
echo "SG_ID=$SG_ID" >> $GITHUB_OUTPUT
aws ec2 authorize-security-group-ingress --group-id "$SG_ID" \
--ip-permissions "IpProtocol=tcp,FromPort=${{ steps.ids.outputs.SSH_PORT }},ToPort=${{ steps.ids.outputs.SSH_PORT }},IpRanges=[{CidrIp=${{ steps.runner-ip.outputs.RUNNER_IP }}/32}]" \
--region "$AWS_REGION" 2>&1 || echo "Rule may exist"
echo "RUNNER_IP_CIDR=${{ steps.runner-ip.outputs.RUNNER_IP }}/32" >> $GITHUB_OUTPUT
echo "SSH_PORT=${{ steps.ids.outputs.SSH_PORT }}" >> $GITHUB_OUTPUT
- name: Retrieve SSH key from Parameter Store
id: keypair
run: |
aws ssm get-parameter --name "/github-actions/ec2/ssh-key" --with-decryption \
--query 'Parameter.Value' --region "$AWS_REGION" --output text > /tmp/github-actions-ec2.pem
chmod 600 /tmp/github-actions-ec2.pem
echo "KEY_PATH=/tmp/github-actions-ec2.pem" >> $GITHUB_OUTPUT
- name: Create Windows user data script
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
sed "s/SSH_PORT_PLACEHOLDER/$SSH_PORT/g" .github/workflows/scripts/win/user-data.ps1 > /tmp/user-data.ps1
- name: Launch EC2 instance
id: instance
run: |
INSTANCE_ID=$(aws ec2 run-instances \
--image-id "${{ steps.ami.outputs.AMI_ID }}" \
--instance-type "$INSTANCE_TYPE" \
--key-name "${{ secrets.AWS_KEY_PAIR_NAME }}" \
--security-group-ids "${{ steps.security-group.outputs.SG_ID }}" \
--user-data file:///tmp/user-data.ps1 \
--block-device-mappings "DeviceName=/dev/sda1,Ebs={VolumeSize=100,VolumeType=gp3,DeleteOnTermination=true}" \
--tag-specifications "ResourceType=instance,Tags=[{Key=Name,Value=gpu-runner-win-batch-5-${{ steps.ids.outputs.TIMESTAMP }}},{Key=ManagedBy,Value=GitHubActions},{Key=Batch,Value=5}]" \
--instance-initiated-shutdown-behavior terminate \
--query 'Instances[0].InstanceId' --region "$AWS_REGION" --output text)
echo "INSTANCE_ID=$INSTANCE_ID" >> $GITHUB_OUTPUT
- name: Wait for instance to be running
run: |
aws ec2 wait instance-running --instance-ids "${{ steps.instance.outputs.INSTANCE_ID }}" --region "$AWS_REGION"
PUBLIC_IP=$(aws ec2 describe-instances --instance-ids "${{ steps.instance.outputs.INSTANCE_ID }}" \
--query 'Reservations[0].Instances[0].PublicIpAddress' --region "$AWS_REGION" --output text)
echo "::add-mask::$PUBLIC_IP"
echo "PUBLIC_IP=$PUBLIC_IP" >> $GITHUB_ENV
- name: Wait for SSH
run: |
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
for i in $(seq 1 60); do
if ssh -p $SSH_PORT -o ConnectTimeout=5 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-o BatchMode=yes -i "$KEY_PATH" Administrator@$PUBLIC_IP "echo SSH_READY" 2>/dev/null | grep -q SSH_READY; then
echo "SSH ready!"
break
fi
echo "Waiting for SSH ($i/60)..."
sleep 30
done
for i in $(seq 1 30); do
if ssh -p $SSH_PORT -o ConnectTimeout=10 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" Administrator@$PUBLIC_IP "if (Test-Path C:\\ssh_ready.txt) { echo READY }" 2>/dev/null | grep -q READY; then
echo "Setup complete!"
break
fi
sleep 30
done
- name: Install NVIDIA driver only (no CUDA toolkit)
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" .github/workflows/scripts/win/install-nvidia-driver.ps1 Administrator@$PUBLIC_IP:C:/install_driver.ps1
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -ExecutionPolicy Bypass -File C:/install_driver.ps1"
- name: Transfer repository to instance
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
git archive --format=zip --output=/tmp/repo.zip HEAD
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" /tmp/repo.zip Administrator@$PUBLIC_IP:C:/source.zip
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -Command \"if (Test-Path 'C:\\ppf-contact-solver') { Remove-Item -Recurse -Force 'C:\\ppf-contact-solver' }; New-Item -ItemType Directory -Path 'C:\\ppf-contact-solver' -Force; Expand-Archive -Path 'C:\\source.zip' -DestinationPath 'C:\\ppf-contact-solver' -Force; Remove-Item 'C:\\source.zip'\""
- name: Run warmup.bat
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-o ServerAliveInterval=60 -i "$KEY_PATH" Administrator@$PUBLIC_IP \
"cmd /c 'cd C:\\ppf-contact-solver\\build-win-native && warmup.bat /nopause'"
- name: Run build.bat
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-o ServerAliveInterval=60 -i "$KEY_PATH" Administrator@$PUBLIC_IP \
"cmd /c 'cd C:\\ppf-contact-solver\\build-win-native && build.bat /nopause'"
- name: Convert assertion notebook to Python script
run: |
echo "Converting assertion notebook: examples/fail-examples/assertion.ipynb"
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
# Use the same conversion pattern as main examples
cat > /tmp/convert_assertion.ps1 << 'EOFPS1'
$ErrorActionPreference = "Stop"
Set-Location C:\ppf-contact-solver
$env:PATH = "C:\ppf-contact-solver\build-win-native\python;C:\ppf-contact-solver\build-win-native\python\Scripts;" + $env:PATH
New-Item -ItemType Directory -Path "C:\ci" -Force | Out-Null
Write-Host "Converting assertion.ipynb to Python script..."
& C:\ppf-contact-solver\build-win-native\python\python.exe -m jupyter nbconvert --to python "examples/fail-examples/assertion.ipynb" --output "C:\ci\assertion_base.py"
$header = "import sys`nimport os`nsys.path.insert(0, r'C:\ppf-contact-solver')`nsys.path.insert(0, r'C:\ppf-contact-solver\frontend')`nos.environ['PYTHONPATH'] = r'C:\ppf-contact-solver;C:\ppf-contact-solver\frontend;' + os.environ.get('PYTHONPATH', '')"
$baseContent = Get-Content "C:\ci\assertion_base.py" -Raw
$header + "`n" + $baseContent | Set-Content "C:\ci\assertion.py"
Write-Host "Assertion script prepared at C:\ci\assertion.py"
EOFPS1
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" /tmp/convert_assertion.ps1 Administrator@$PUBLIC_IP:C:/convert_assertion.ps1
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -ExecutionPolicy Bypass -File C:/convert_assertion.ps1"
- name: Run assertion test (expect failure)
run: |
echo "Running assertion test to verify error propagation via SSH..."
echo "This test uses the same execution pattern as main examples"
echo "Expected result: FAILURE (AssertionError)"
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
# Create script that runs the same way as main examples
cat > /tmp/run_assertion.ps1 << 'EOFPS1'
Set-Location C:\ppf-contact-solver
"assertion" | Set-Content "frontend\.CI"
& C:\ppf-contact-solver\build-win-native\python\python.exe C:\ci\assertion.py 2>&1 | Tee-Object -FilePath "C:\ci\assertion.log"
exit $LASTEXITCODE
EOFPS1
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" /tmp/run_assertion.ps1 Administrator@$PUBLIC_IP:C:/run_assertion.ps1
# Run and expect failure
if ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -ExecutionPolicy Bypass -File C:/run_assertion.ps1"; then
echo "ERROR: Assertion test should have failed but succeeded"
echo "This means errors are NOT being propagated correctly!"
exit 1
else
echo "SUCCESS: Assertion test failed as expected"
echo "Error propagation via SSH is working correctly"
echo "Main example tests can now proceed with confidence"
fi
- name: Setup CI directory
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -Command \"New-Item -ItemType Directory -Path 'C:\\ci' -Force\""
- name: Run twist
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
sed "s/EXAMPLE_PLACEHOLDER/twist/g" .github/workflows/scripts/win/run-example.ps1 > /tmp/run_twist.ps1
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" /tmp/run_twist.ps1 Administrator@$PUBLIC_IP:C:/run_twist.ps1
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-o ServerAliveInterval=60 -i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -ExecutionPolicy Bypass -File C:/run_twist.ps1"
- name: Run five-twist
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
sed "s/EXAMPLE_PLACEHOLDER/five-twist/g" .github/workflows/scripts/win/run-example.ps1 > /tmp/run_five-twist.ps1
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" /tmp/run_five-twist.ps1 Administrator@$PUBLIC_IP:C:/run_five-twist.ps1
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-o ServerAliveInterval=60 -i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -ExecutionPolicy Bypass -File C:/run_five-twist.ps1"
- name: Run woven
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
sed "s/EXAMPLE_PLACEHOLDER/woven/g" .github/workflows/scripts/win/run-example.ps1 > /tmp/run_woven.ps1
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" /tmp/run_woven.ps1 Administrator@$PUBLIC_IP:C:/run_woven.ps1
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-o ServerAliveInterval=60 -i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -ExecutionPolicy Bypass -File C:/run_woven.ps1"
- name: Run yarn
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
sed "s/EXAMPLE_PLACEHOLDER/yarn/g" .github/workflows/scripts/win/run-example.ps1 > /tmp/run_yarn.ps1
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" /tmp/run_yarn.ps1 Administrator@$PUBLIC_IP:C:/run_yarn.ps1
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-o ServerAliveInterval=60 -i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -ExecutionPolicy Bypass -File C:/run_yarn.ps1"
- name: Collect results
if: success() || failure()
run: |
echo "Collecting results..."
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
mkdir -p ci
# Delete large binary files on remote before copying to save bandwidth
# CI output is in project-relative cache: C:\ppf-contact-solver\cache\ppf-cts\ci
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -Command \"Get-ChildItem -Path C:\\ppf-contact-solver\\cache\\ppf-cts\\ci -Recurse -Include '*.bin','*.pickle','*.ply','*.gz' -ErrorAction SilentlyContinue | Remove-Item -Force\"" || true
# Copy CI output from ppf-cts cache directory
scp -P $SSH_PORT -r -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" "Administrator@$PUBLIC_IP:C:/ppf-contact-solver/cache/ppf-cts/ci/*" ./ci/ || echo "No ppf-cts CI files found"
# Also copy logs and scripts from C:\ci
scp -P $SSH_PORT -r -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" "Administrator@$PUBLIC_IP:C:/ci/*" ./ci/ || echo "No script/log files found"
echo "## Collected Files:"
ls -laR ci/ || echo "No files collected"
- name: Upload artifact
if: success() || failure()
uses: actions/upload-artifact@v4
with:
name: ci-win-batch-5
path: ci
retention-days: 3
- name: GPU information
if: success() || failure()
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -o ServerAliveCountMax=10 \
-i "$KEY_PATH" Administrator@$PUBLIC_IP "nvidia-smi" || true
- name: Re-authenticate for cleanup
if: always()
continue-on-error: true
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: ${{ secrets.AWS_ROLE_ARN }}
aws-region: ${{ env.AWS_REGION }}
- name: Cleanup - Terminate Instance
if: always()
continue-on-error: true
run: |
if [ -n "${{ steps.instance.outputs.INSTANCE_ID }}" ]; then
aws ec2 terminate-instances --instance-ids "${{ steps.instance.outputs.INSTANCE_ID }}" --region "$AWS_REGION" || true
fi
- name: Cleanup - Remove Ingress Rules
if: always()
continue-on-error: true
run: |
if [ -n "${{ steps.security-group.outputs.SG_ID }}" ] && [ -n "${{ steps.security-group.outputs.RUNNER_IP_CIDR }}" ]; then
aws ec2 revoke-security-group-ingress --group-id "${{ steps.security-group.outputs.SG_ID }}" \
--ip-permissions "IpProtocol=tcp,FromPort=${{ steps.security-group.outputs.SSH_PORT }},ToPort=${{ steps.security-group.outputs.SSH_PORT }},IpRanges=[{CidrIp=${{ steps.security-group.outputs.RUNNER_IP_CIDR }}}]" \
--region "$AWS_REGION" 2>&1 || true
fi
- name: Cleanup - Remove Local SSH Key
if: always()
continue-on-error: true
run: rm -f "${{ steps.keypair.outputs.KEY_PATH }}"
- name: Summary
if: always()
run: |
echo "## Batch 5 Summary"
echo "- Examples: twist five-twist woven yarn"
echo "- Instance: ${{ steps.instance.outputs.INSTANCE_ID || 'Not launched' }}"