Skip to content

Build Windows

Build Windows #15

Workflow file for this run

# File: release-win.yml
# Code: Claude Code
# Review: Ryoichi Ando ([email protected])
# License: Apache v2.0
name: Build Windows
on:
workflow_dispatch:
inputs:
dry_run:
description: 'Dry run (build only, no release)'
required: true
type: boolean
default: true
jobs:
release-windows:
name: Build and Release Windows Bundle
runs-on: ubuntu-latest
permissions:
id-token: write
contents: write
env:
AWS_REGION: us-east-2
INSTANCE_TYPE: g6e.2xlarge
WORKDIR: C:\ppf-contact-solver
USER: Administrator
steps:
- name: Get version and mode
id: version
run: |
VERSION="$(TZ='Asia/Tokyo' date '+%Y-%m-%d-%H-%M')"
DRY_RUN="${{ github.event.inputs.dry_run }}"
echo "VERSION=$VERSION" >> $GITHUB_OUTPUT
echo "DRY_RUN=$DRY_RUN" >> $GITHUB_OUTPUT
echo "Version: $VERSION"
echo "Dry run: $DRY_RUN"
- name: Checkout repository
uses: actions/checkout@v4
- name: Configure AWS credentials via OIDC
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: ${{ secrets.AWS_ROLE_ARN }}
aws-region: ${{ env.AWS_REGION }}
- name: Verify AWS authentication
run: |
echo "Testing AWS authentication..."
aws sts get-caller-identity
echo "AWS Region: $AWS_REGION"
echo "Instance Type: $INSTANCE_TYPE"
echo "Version: ${{ steps.version.outputs.VERSION }}"
- name: Get GitHub Actions runner public IP
id: runner-ip
run: |
echo "Fetching GitHub Actions runner public IP..."
RUNNER_IP=$(curl -s --max-time 10 https://checkip.amazonaws.com | tr -d '\n')
if [ -z "$RUNNER_IP" ]; then
echo "ERROR: Failed to get IP from checkip.amazonaws.com"
exit 1
fi
echo "::add-mask::$RUNNER_IP"
echo "RUNNER_IP=$RUNNER_IP" >> $GITHUB_OUTPUT
echo "GitHub Actions Runner IP: $RUNNER_IP"
- name: Find Windows Server 2025 AMI
id: ami
run: |
echo "Finding latest Windows Server 2025 AMI..."
AMI_ID=$(aws ec2 describe-images \
--owners amazon \
--filters \
"Name=name,Values=Windows_Server-2025-English-Full-Base-*" \
"Name=state,Values=available" \
--query 'sort_by(Images, &CreationDate)[-1].ImageId' \
--region "$AWS_REGION" \
--output text)
if [ "$AMI_ID" = "None" ] || [ -z "$AMI_ID" ]; then
echo "ERROR: Windows Server 2025 AMI not found in region $AWS_REGION"
exit 1
fi
echo "AMI_ID=$AMI_ID" >> $GITHUB_OUTPUT
echo "Found AMI: $AMI_ID"
- name: Get default VPC ID
id: vpc
run: |
echo "Getting default VPC ID..."
VPC_ID=$(aws ec2 describe-vpcs \
--filters "Name=isDefault,Values=true" \
--query 'Vpcs[0].VpcId' \
--region "$AWS_REGION" \
--output text)
if [ "$VPC_ID" = "None" ] || [ -z "$VPC_ID" ]; then
echo "ERROR: Default VPC not found in region $AWS_REGION"
exit 1
fi
echo "VPC_ID=$VPC_ID" >> $GITHUB_OUTPUT
echo "Default VPC: $VPC_ID"
- name: Generate unique identifiers
id: ids
run: |
TIMESTAMP=$(date +%Y%m%d%H%M%S)
RANDOM_SUFFIX=$(head /dev/urandom | tr -dc a-z0-9 | head -c 6)
TEMP_INSTANCE_ID="temp-${TIMESTAMP}-${RANDOM_SUFFIX}"
SSH_PORT=$((10001 + RANDOM % 55535))
echo "::add-mask::$SSH_PORT"
echo "TIMESTAMP=$TIMESTAMP" >> $GITHUB_OUTPUT
echo "TEMP_INSTANCE_ID=$TEMP_INSTANCE_ID" >> $GITHUB_OUTPUT
echo "SSH_PORT=$SSH_PORT" >> $GITHUB_OUTPUT
echo "Temporary Instance ID: $TEMP_INSTANCE_ID"
echo "SSH Port: $SSH_PORT"
- name: Setup persistent security group
id: security-group
run: |
echo "Setting up persistent security group 'github-actions-windows-persistent'..."
SG_NAME="github-actions-windows-persistent"
SG_DESCRIPTION="Persistent security group for GitHub Actions Windows builds with dynamic rules"
SG_ID=$(aws ec2 describe-security-groups \
--filters "Name=group-name,Values=$SG_NAME" \
--query 'SecurityGroups[0].GroupId' \
--region "$AWS_REGION" \
--output text || echo "")
if [ "$SG_ID" = "None" ] || [ -z "$SG_ID" ]; then
echo "Security group does not exist. Creating new one..."
SG_ID=$(aws ec2 create-security-group \
--group-name "$SG_NAME" \
--description "$SG_DESCRIPTION" \
--vpc-id "${{ steps.vpc.outputs.VPC_ID }}" \
--query 'GroupId' \
--region "$AWS_REGION" \
--output text)
echo "Security Group created: $SG_ID"
aws ec2 create-tags \
--resources "$SG_ID" \
--tags \
"Key=Name,Value=$SG_NAME" \
"Key=ManagedBy,Value=GitHubActions" \
"Key=Purpose,Value=WindowsBuildPersistentDynamicRules" \
"Key=CreatedAt,Value=$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
--region "$AWS_REGION"
echo "Security Group tagged successfully"
else
echo "Using existing security group: $SG_ID"
fi
echo "SG_ID=$SG_ID" >> $GITHUB_OUTPUT
echo "Adding ingress rule for runner IP on port ${{ steps.ids.outputs.SSH_PORT }}"
aws ec2 authorize-security-group-ingress \
--group-id "$SG_ID" \
--ip-permissions \
"IpProtocol=tcp,FromPort=${{ steps.ids.outputs.SSH_PORT }},ToPort=${{ steps.ids.outputs.SSH_PORT }},IpRanges=[{CidrIp=${{ steps.runner-ip.outputs.RUNNER_IP }}/32,Description='GHA Run ${{ github.run_id }} Port ${{ steps.ids.outputs.SSH_PORT }}'}]" \
--region "$AWS_REGION" 2>&1 || echo "Note: Rule may already exist"
echo "RUNNER_IP_CIDR=${{ steps.runner-ip.outputs.RUNNER_IP }}/32" >> $GITHUB_OUTPUT
echo "SSH_PORT=${{ steps.ids.outputs.SSH_PORT }}" >> $GITHUB_OUTPUT
echo "SSH ingress rule added successfully"
- name: Retrieve SSH key from Parameter Store
id: keypair
run: |
echo "Retrieving SSH private key from AWS Systems Manager..."
aws ssm get-parameter \
--name "/github-actions/ec2/ssh-key" \
--with-decryption \
--query 'Parameter.Value' \
--region "$AWS_REGION" \
--output text > /tmp/github-actions-ec2.pem
chmod 600 /tmp/github-actions-ec2.pem
echo "SSH key retrieved successfully"
echo "KEY_PATH=/tmp/github-actions-ec2.pem" >> $GITHUB_OUTPUT
- name: Create Windows user data script
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
sed "s/SSH_PORT_PLACEHOLDER/$SSH_PORT/g" .github/workflows/scripts/win/user-data.ps1 > /tmp/user-data.ps1
echo "User data script created with SSH port $SSH_PORT"
- name: Launch EC2 instance
id: instance
run: |
echo "Launching Windows EC2 instance..."
INSTANCE_ID=$(aws ec2 run-instances \
--image-id "${{ steps.ami.outputs.AMI_ID }}" \
--instance-type "$INSTANCE_TYPE" \
--key-name "${{ secrets.AWS_KEY_PAIR_NAME }}" \
--security-group-ids "${{ steps.security-group.outputs.SG_ID }}" \
--user-data file:///tmp/user-data.ps1 \
--block-device-mappings "DeviceName=/dev/sda1,Ebs={VolumeSize=100,VolumeType=gp3,DeleteOnTermination=true}" \
--tag-specifications \
"ResourceType=instance,Tags=[{Key=Name,Value=gpu-runner-win-release-${{ steps.version.outputs.VERSION }}-${{ steps.ids.outputs.TIMESTAMP }}},{Key=ManagedBy,Value=GitHubActions},{Key=Purpose,Value=WindowsReleaseBuild},{Key=Workflow,Value=${{ github.workflow }}},{Key=RunId,Value=${{ github.run_id }}},{Key=Version,Value=${{ steps.version.outputs.VERSION }}},{Key=SSHPort,Value=${{ steps.ids.outputs.SSH_PORT }}}]" \
"ResourceType=volume,Tags=[{Key=Name,Value=gpu-runner-win-release-${{ steps.version.outputs.VERSION }}-${{ steps.ids.outputs.TIMESTAMP }}-volume},{Key=ManagedBy,Value=GitHubActions},{Key=Purpose,Value=WindowsReleaseBuild}]" \
--instance-initiated-shutdown-behavior terminate \
--query 'Instances[0].InstanceId' \
--region "$AWS_REGION" \
--output text)
echo "INSTANCE_ID=$INSTANCE_ID" >> $GITHUB_OUTPUT
echo "Instance launched: $INSTANCE_ID"
- name: Wait for instance to be running
run: |
echo "Waiting for instance to be running..."
aws ec2 wait instance-running \
--instance-ids "${{ steps.instance.outputs.INSTANCE_ID }}" \
--region "$AWS_REGION"
PUBLIC_IP=$(aws ec2 describe-instances \
--instance-ids "${{ steps.instance.outputs.INSTANCE_ID }}" \
--query 'Reservations[0].Instances[0].PublicIpAddress' \
--region "$AWS_REGION" \
--output text)
echo "::add-mask::$PUBLIC_IP"
echo "PUBLIC_IP=$PUBLIC_IP" >> $GITHUB_ENV
echo "Instance is running at: $PUBLIC_IP"
- name: Wait for SSH
run: |
echo "Waiting for SSH to be ready..."
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
echo "Waiting for SSH on port $SSH_PORT..."
MAX_SSH_ATTEMPTS=60
ATTEMPT=0
while [ $ATTEMPT -lt $MAX_SSH_ATTEMPTS ]; do
if ssh -p $SSH_PORT -o ConnectTimeout=5 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o BatchMode=yes -i "$KEY_PATH" Administrator@$PUBLIC_IP "echo SSH_READY" 2>/dev/null | grep -q SSH_READY; then
echo "SSH connection established!"
break
fi
ATTEMPT=$((ATTEMPT + 1))
if [ $ATTEMPT -eq $MAX_SSH_ATTEMPTS ]; then
echo "Failed to establish SSH connection"
exit 1
fi
echo "SSH not ready (attempt $ATTEMPT/$MAX_SSH_ATTEMPTS), waiting 30s..."
sleep 30
done
echo "Waiting for SSH setup completion..."
MAX_SETUP_ATTEMPTS=30
ATTEMPT=0
while [ $ATTEMPT -lt $MAX_SETUP_ATTEMPTS ]; do
if ssh -p $SSH_PORT -o ConnectTimeout=10 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-i "$KEY_PATH" Administrator@$PUBLIC_IP \
"if (Test-Path C:\\ssh_ready.txt) { echo READY } else { echo NOT_READY }" 2>/dev/null | grep -q READY; then
echo "SSH setup complete!"
break
fi
ATTEMPT=$((ATTEMPT + 1))
if [ $ATTEMPT -eq $MAX_SETUP_ATTEMPTS ]; then
echo "SSH setup timed out, continuing anyway..."
break
fi
echo "SSH setup not complete (attempt $ATTEMPT/$MAX_SETUP_ATTEMPTS), waiting 30s..."
sleep 30
done
- name: Install NVIDIA driver only (no CUDA toolkit)
run: |
echo "Installing NVIDIA driver (this will take a few minutes)..."
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-i "$KEY_PATH" .github/workflows/scripts/win/install-nvidia-driver.ps1 Administrator@$PUBLIC_IP:C:/install_driver.ps1
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -ExecutionPolicy Bypass -File C:/install_driver.ps1"
- name: Create archive of repository
run: |
echo "Creating repository archive..."
git archive --format=zip --output=/tmp/repo.zip HEAD
- name: Transfer repository to instance
run: |
echo "Transferring repository to instance..."
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-i "$KEY_PATH" /tmp/repo.zip Administrator@$PUBLIC_IP:C:/source.zip
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -Command \"if (Test-Path 'C:\\ppf-contact-solver') { Remove-Item -Recurse -Force 'C:\\ppf-contact-solver' }; New-Item -ItemType Directory -Path 'C:\\ppf-contact-solver' -Force; Expand-Archive -Path 'C:\\source.zip' -DestinationPath 'C:\\ppf-contact-solver' -Force; Remove-Item 'C:\\source.zip'\""
- name: Run warmup.bat
run: |
echo "Running warmup.bat..."
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -i "$KEY_PATH" Administrator@$PUBLIC_IP \
"cmd /c 'cd C:\\ppf-contact-solver\\build-win-native && warmup.bat /nopause'"
- name: Run build.bat
run: |
echo "Running build.bat..."
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -i "$KEY_PATH" Administrator@$PUBLIC_IP \
"cmd /c 'cd C:\\ppf-contact-solver\\build-win-native && build.bat /nopause'"
- name: Run bundle.bat
run: |
echo "Running bundle.bat..."
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -i "$KEY_PATH" Administrator@$PUBLIC_IP \
"cmd /c 'cd C:\\ppf-contact-solver\\build-win-native && bundle.bat /nopause'"
- name: Test bundle with headless.bat
run: |
echo "Testing bundle with headless.bat..."
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -i "$KEY_PATH" Administrator@$PUBLIC_IP \
"cmd /c 'cd C:\\ppf-contact-solver\\build-win-native\\dist && headless.bat /nopause'"
- name: Run fast-check-all.bat on bundle
run: |
echo "Running fast-check-all.bat on bundle..."
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -i "$KEY_PATH" Administrator@$PUBLIC_IP \
"cmd /c 'cd C:\\ppf-contact-solver\\build-win-native\\dist && fast-check-all.bat /nopause'"
- name: Download build test log
if: always()
continue-on-error: true
run: |
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
echo "Downloading test results log from build instance..."
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-i "$KEY_PATH" "Administrator@$PUBLIC_IP:C:/ppf-contact-solver/build-win-native/dist/fast-check-results.log" \
./build-fast-check-results.log 2>/dev/null || echo "Log file not found"
if [ -f ./build-fast-check-results.log ]; then
echo "=== Build Instance Test Results ==="
cat ./build-fast-check-results.log
fi
- name: Clean up test artifacts
run: |
echo "Cleaning up test artifacts..."
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -Command \"Remove-Item -Recurse -Force 'C:\\ppf-contact-solver\\build-win-native\\dist\\local' -ErrorAction SilentlyContinue; Remove-Item -Recurse -Force 'C:\\ppf-contact-solver\\build-win-native\\dist\\cache' -ErrorAction SilentlyContinue; Remove-Item -Recurse -Force 'C:\\ppf-contact-solver\\build-win-native\\dist\\export' -ErrorAction SilentlyContinue; Get-ChildItem -Path 'C:\\ppf-contact-solver\\build-win-native\\dist' -Recurse -Directory -Filter '__pycache__' | Remove-Item -Recurse -Force -ErrorAction SilentlyContinue; Get-ChildItem -Path 'C:\\ppf-contact-solver\\build-win-native\\dist' -Recurse -Include '*.pyc','*.pyo' | Remove-Item -Force -ErrorAction SilentlyContinue\""
- name: Create release archive
run: |
echo "Creating release archive..."
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
VERSION="${{ steps.version.outputs.VERSION }}"
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -i "$KEY_PATH" Administrator@$PUBLIC_IP \
"powershell -Command \"Compress-Archive -Path 'C:\\ppf-contact-solver\\build-win-native\\dist\\*' -DestinationPath 'C:\\ppf-contact-solver-${VERSION}-win64.zip' -Force\""
- name: Download release archive
run: |
echo "Downloading release archive..."
SSH_PORT="${{ steps.ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
VERSION="${{ steps.version.outputs.VERSION }}"
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-i "$KEY_PATH" "Administrator@$PUBLIC_IP:C:/ppf-contact-solver-${VERSION}-win64.zip" ./
ls -lh "ppf-contact-solver-${VERSION}-win64.zip"
# ============================================================
# PHASE 2: Clean Environment Verification
# Launch a second Windows instance with only NVIDIA driver
# to verify the bundle has no external DLL dependencies
# ============================================================
- name: Re-authenticate for build instance cleanup
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: ${{ secrets.AWS_ROLE_ARN }}
aws-region: ${{ env.AWS_REGION }}
- name: Terminate build instance
run: |
echo "Terminating build instance: ${{ steps.instance.outputs.INSTANCE_ID }}"
aws ec2 terminate-instances \
--instance-ids "${{ steps.instance.outputs.INSTANCE_ID }}" \
--region "$AWS_REGION"
echo "Build instance termination requested"
- name: Remove build instance ingress rule
run: |
echo "Removing build instance ingress rule..."
aws ec2 revoke-security-group-ingress \
--group-id "${{ steps.security-group.outputs.SG_ID }}" \
--ip-permissions \
"IpProtocol=tcp,FromPort=${{ steps.security-group.outputs.SSH_PORT }},ToPort=${{ steps.security-group.outputs.SSH_PORT }},IpRanges=[{CidrIp=${{ steps.security-group.outputs.RUNNER_IP_CIDR }}}]" \
--region "$AWS_REGION" 2>&1 || echo "Rule may have been removed"
- name: Generate verification instance identifiers
id: verify-ids
run: |
TIMESTAMP=$(date +%Y%m%d%H%M%S)
RANDOM_SUFFIX=$(head /dev/urandom | tr -dc a-z0-9 | head -c 6)
VERIFY_INSTANCE_ID="verify-${TIMESTAMP}-${RANDOM_SUFFIX}"
VERIFY_SSH_PORT=$((10001 + RANDOM % 55535))
echo "::add-mask::$VERIFY_SSH_PORT"
echo "TIMESTAMP=$TIMESTAMP" >> $GITHUB_OUTPUT
echo "VERIFY_INSTANCE_ID=$VERIFY_INSTANCE_ID" >> $GITHUB_OUTPUT
echo "SSH_PORT=$VERIFY_SSH_PORT" >> $GITHUB_OUTPUT
echo "Verification Instance ID: $VERIFY_INSTANCE_ID"
echo "Verification SSH Port: $VERIFY_SSH_PORT"
- name: Add verification instance security group rule
id: verify-security-group
run: |
echo "Adding ingress rule for verification instance on port ${{ steps.verify-ids.outputs.SSH_PORT }}"
aws ec2 authorize-security-group-ingress \
--group-id "${{ steps.security-group.outputs.SG_ID }}" \
--ip-permissions \
"IpProtocol=tcp,FromPort=${{ steps.verify-ids.outputs.SSH_PORT }},ToPort=${{ steps.verify-ids.outputs.SSH_PORT }},IpRanges=[{CidrIp=${{ steps.runner-ip.outputs.RUNNER_IP }}/32,Description='GHA Run ${{ github.run_id }} Verify Port ${{ steps.verify-ids.outputs.SSH_PORT }}'}]" \
--region "$AWS_REGION" 2>&1 || echo "Note: Rule may already exist"
echo "SSH_PORT=${{ steps.verify-ids.outputs.SSH_PORT }}" >> $GITHUB_OUTPUT
echo "RUNNER_IP_CIDR=${{ steps.runner-ip.outputs.RUNNER_IP }}/32" >> $GITHUB_OUTPUT
- name: Create verification instance user data script
run: |
SSH_PORT="${{ steps.verify-ids.outputs.SSH_PORT }}"
sed "s/SSH_PORT_PLACEHOLDER/$SSH_PORT/g" .github/workflows/scripts/win/user-data.ps1 > /tmp/verify-user-data.ps1
echo "Verification user data script created with SSH port $SSH_PORT"
- name: Launch verification instance
id: verify-instance
run: |
echo "Launching verification Windows instance (minimal setup - driver only)..."
INSTANCE_ID=$(aws ec2 run-instances \
--image-id "${{ steps.ami.outputs.AMI_ID }}" \
--instance-type "$INSTANCE_TYPE" \
--key-name "${{ secrets.AWS_KEY_PAIR_NAME }}" \
--security-group-ids "${{ steps.security-group.outputs.SG_ID }}" \
--user-data file:///tmp/verify-user-data.ps1 \
--block-device-mappings "DeviceName=/dev/sda1,Ebs={VolumeSize=100,VolumeType=gp3,DeleteOnTermination=true}" \
--tag-specifications \
"ResourceType=instance,Tags=[{Key=Name,Value=gpu-runner-win-verify-${{ steps.version.outputs.VERSION }}-${{ steps.verify-ids.outputs.TIMESTAMP }}},{Key=ManagedBy,Value=GitHubActions},{Key=Purpose,Value=WindowsBundleVerification},{Key=Workflow,Value=${{ github.workflow }}},{Key=RunId,Value=${{ github.run_id }}},{Key=Version,Value=${{ steps.version.outputs.VERSION }}},{Key=SSHPort,Value=${{ steps.verify-ids.outputs.SSH_PORT }}}]" \
"ResourceType=volume,Tags=[{Key=Name,Value=gpu-runner-win-verify-${{ steps.version.outputs.VERSION }}-${{ steps.verify-ids.outputs.TIMESTAMP }}-volume},{Key=ManagedBy,Value=GitHubActions},{Key=Purpose,Value=WindowsBundleVerification}]" \
--instance-initiated-shutdown-behavior terminate \
--query 'Instances[0].InstanceId' \
--region "$AWS_REGION" \
--output text)
echo "INSTANCE_ID=$INSTANCE_ID" >> $GITHUB_OUTPUT
echo "Verification instance launched: $INSTANCE_ID"
- name: Wait for verification instance to be running
run: |
echo "Waiting for verification instance to be running..."
aws ec2 wait instance-running \
--instance-ids "${{ steps.verify-instance.outputs.INSTANCE_ID }}" \
--region "$AWS_REGION"
VERIFY_PUBLIC_IP=$(aws ec2 describe-instances \
--instance-ids "${{ steps.verify-instance.outputs.INSTANCE_ID }}" \
--query 'Reservations[0].Instances[0].PublicIpAddress' \
--region "$AWS_REGION" \
--output text)
echo "::add-mask::$VERIFY_PUBLIC_IP"
echo "VERIFY_PUBLIC_IP=$VERIFY_PUBLIC_IP" >> $GITHUB_ENV
echo "Verification instance is running at: $VERIFY_PUBLIC_IP"
- name: Wait for verification instance SSH
run: |
echo "Waiting for SSH to be ready on verification instance..."
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
SSH_PORT="${{ steps.verify-ids.outputs.SSH_PORT }}"
echo "Waiting for SSH on port $SSH_PORT..."
MAX_SSH_ATTEMPTS=60
ATTEMPT=0
while [ $ATTEMPT -lt $MAX_SSH_ATTEMPTS ]; do
if ssh -p $SSH_PORT -o ConnectTimeout=5 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o BatchMode=yes -i "$KEY_PATH" Administrator@$VERIFY_PUBLIC_IP "echo SSH_READY" 2>/dev/null | grep -q SSH_READY; then
echo "SSH connection established!"
break
fi
ATTEMPT=$((ATTEMPT + 1))
if [ $ATTEMPT -eq $MAX_SSH_ATTEMPTS ]; then
echo "Failed to establish SSH connection"
exit 1
fi
echo "SSH not ready (attempt $ATTEMPT/$MAX_SSH_ATTEMPTS), waiting 30s..."
sleep 30
done
echo "Waiting for SSH setup completion..."
MAX_SETUP_ATTEMPTS=30
ATTEMPT=0
while [ $ATTEMPT -lt $MAX_SETUP_ATTEMPTS ]; do
if ssh -p $SSH_PORT -o ConnectTimeout=10 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-i "$KEY_PATH" Administrator@$VERIFY_PUBLIC_IP \
"if (Test-Path C:\\ssh_ready.txt) { echo READY } else { echo NOT_READY }" 2>/dev/null | grep -q READY; then
echo "SSH setup complete!"
break
fi
ATTEMPT=$((ATTEMPT + 1))
if [ $ATTEMPT -eq $MAX_SETUP_ATTEMPTS ]; then
echo "SSH setup timed out, continuing anyway..."
break
fi
echo "SSH setup not complete (attempt $ATTEMPT/$MAX_SETUP_ATTEMPTS), waiting 30s..."
sleep 30
done
- name: Install NVIDIA driver only (no CUDA toolkit)
run: |
echo "Installing NVIDIA driver only (this will take a few minutes)..."
SSH_PORT="${{ steps.verify-ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-i "$KEY_PATH" .github/workflows/scripts/win/install-nvidia-driver.ps1 Administrator@$VERIFY_PUBLIC_IP:C:/install_driver.ps1
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -i "$KEY_PATH" Administrator@$VERIFY_PUBLIC_IP \
"powershell -ExecutionPolicy Bypass -File C:/install_driver.ps1"
- name: Transfer bundle to verification instance
run: |
echo "Transferring bundle to verification instance..."
SSH_PORT="${{ steps.verify-ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
VERSION="${{ steps.version.outputs.VERSION }}"
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-i "$KEY_PATH" "ppf-contact-solver-${VERSION}-win64.zip" Administrator@$VERIFY_PUBLIC_IP:C:/bundle.zip
echo "Bundle transferred successfully"
- name: Extract and verify bundle
run: |
echo "Extracting bundle on verification instance..."
SSH_PORT="${{ steps.verify-ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-i "$KEY_PATH" Administrator@$VERIFY_PUBLIC_IP \
"powershell -Command \"if (Test-Path 'C:\\bundle') { Remove-Item -Recurse -Force 'C:\\bundle' }; New-Item -ItemType Directory -Path 'C:\\bundle' -Force; Expand-Archive -Path 'C:\\bundle.zip' -DestinationPath 'C:\\bundle' -Force\""
echo "Bundle extracted successfully"
- name: Run headless.bat on clean environment
run: |
echo "Running headless.bat on clean environment (no CUDA toolkit, no build tools)..."
SSH_PORT="${{ steps.verify-ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -i "$KEY_PATH" Administrator@$VERIFY_PUBLIC_IP \
"cmd /c 'cd C:\\bundle && headless.bat /nopause'"
echo "Bundle verification PASSED - no external DLL dependencies detected"
- name: Run fast-check-all.bat on clean environment
run: |
echo "Running fast-check-all.bat on clean environment..."
SSH_PORT="${{ steps.verify-ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
ssh -p $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=60 -i "$KEY_PATH" Administrator@$VERIFY_PUBLIC_IP \
"cmd /c 'cd C:\\bundle && fast-check-all.bat /nopause'"
echo "All example notebooks PASSED on clean environment"
- name: Download verification test log
if: always()
continue-on-error: true
run: |
SSH_PORT="${{ steps.verify-ids.outputs.SSH_PORT }}"
KEY_PATH="${{ steps.keypair.outputs.KEY_PATH }}"
echo "Downloading test results log from verification instance..."
scp -P $SSH_PORT -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-i "$KEY_PATH" "Administrator@$VERIFY_PUBLIC_IP:C:/bundle/fast-check-results.log" \
./verify-fast-check-results.log 2>/dev/null || echo "Log file not found"
if [ -f ./verify-fast-check-results.log ]; then
echo "=== Verification Instance Test Results ==="
cat ./verify-fast-check-results.log
fi
- name: Terminate verification instance
run: |
echo "Terminating verification instance: ${{ steps.verify-instance.outputs.INSTANCE_ID }}"
aws ec2 terminate-instances \
--instance-ids "${{ steps.verify-instance.outputs.INSTANCE_ID }}" \
--region "$AWS_REGION"
echo "Verification instance termination initiated"
- name: Remove verification instance ingress rule
run: |
echo "Removing verification instance ingress rule..."
aws ec2 revoke-security-group-ingress \
--group-id "${{ steps.security-group.outputs.SG_ID }}" \
--ip-permissions \
"IpProtocol=tcp,FromPort=${{ steps.verify-security-group.outputs.SSH_PORT }},ToPort=${{ steps.verify-security-group.outputs.SSH_PORT }},IpRanges=[{CidrIp=${{ steps.verify-security-group.outputs.RUNNER_IP_CIDR }}}]" \
--region "$AWS_REGION" 2>&1 || echo "Rule may have been removed"
# ============================================================
# PHASE 3: Release (if not dry run)
# ============================================================
- name: Create version tag
if: steps.version.outputs.DRY_RUN != 'true'
run: |
VERSION="${{ steps.version.outputs.VERSION }}"
echo "Creating tag $VERSION..."
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
git tag -a "$VERSION" -m "Release $VERSION"
git push origin "$VERSION"
- name: Create GitHub Release
if: steps.version.outputs.DRY_RUN != 'true'
env:
GH_TOKEN: ${{ github.token }}
run: |
VERSION="${{ steps.version.outputs.VERSION }}"
echo "Creating GitHub Release for $VERSION..."
gh release create "$VERSION" \
--title "ZOZO's Contact Solver $VERSION" \
--generate-notes \
"ppf-contact-solver-${VERSION}-win64.zip#Windows Bundle (win64)"
- name: Extract archive for artifact (dry run)
if: steps.version.outputs.DRY_RUN == 'true'
run: |
VERSION="${{ steps.version.outputs.VERSION }}"
mkdir -p artifact-contents
# Capture exit code without triggering errexit (set -e)
UNZIP_EXIT=0
unzip "ppf-contact-solver-${VERSION}-win64.zip" -d artifact-contents || UNZIP_EXIT=$?
if [ $UNZIP_EXIT -gt 1 ]; then
echo "unzip failed with exit code $UNZIP_EXIT"
exit 1
fi
echo "unzip completed with exit code $UNZIP_EXIT"
# Fix permissions for upload (Windows zip may have restrictive perms)
chmod -R u+rwX artifact-contents/
# Verify key files exist
if [ ! -f "artifact-contents/target/release/ppf-contact-solver.exe" ]; then
echo "ERROR: ppf-contact-solver.exe not found in archive"
exit 1
fi
if [ ! -f "artifact-contents/headless.bat" ]; then
echo "ERROR: headless.bat not found in archive"
exit 1
fi
echo "Archive extracted successfully"
- name: Upload artifact (dry run)
if: steps.version.outputs.DRY_RUN == 'true'
uses: actions/upload-artifact@v4
with:
name: ${{ steps.version.outputs.VERSION }}
path: artifact-contents/
retention-days: 7
- name: Dry run summary
if: steps.version.outputs.DRY_RUN == 'true'
run: |
VERSION="${{ steps.version.outputs.VERSION }}"
echo "## DRY RUN COMPLETE"
echo "Build and verification succeeded. No release was created."
echo ""
echo "Verification: Bundle tested on clean Windows instance with only NVIDIA driver"
echo " (no CUDA toolkit, no build tools, no Chocolatey, no Git)"
echo ""
echo "Archive uploaded as artifact: ppf-contact-solver-${VERSION}-win64"
ls -lh "ppf-contact-solver-${VERSION}-win64.zip"
- name: Re-authenticate for cleanup
if: always()
continue-on-error: true
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: ${{ secrets.AWS_ROLE_ARN }}
aws-region: ${{ env.AWS_REGION }}
- name: Cleanup - Terminate Instances
if: always()
continue-on-error: true
run: |
# Terminate build instance if known
if [ -n "${{ steps.instance.outputs.INSTANCE_ID }}" ]; then
echo "Terminating build instance: ${{ steps.instance.outputs.INSTANCE_ID }}"
aws ec2 terminate-instances \
--instance-ids "${{ steps.instance.outputs.INSTANCE_ID }}" \
--region "$AWS_REGION" || true
fi
# Terminate verification instance if known
if [ -n "${{ steps.verify-instance.outputs.INSTANCE_ID }}" ]; then
echo "Terminating verification instance: ${{ steps.verify-instance.outputs.INSTANCE_ID }}"
aws ec2 terminate-instances \
--instance-ids "${{ steps.verify-instance.outputs.INSTANCE_ID }}" \
--region "$AWS_REGION" || true
fi
# Fallback: Find and terminate any instances tagged with this run ID
# This catches instances launched but not captured in step outputs (e.g., on cancellation)
echo "Searching for any orphaned instances from this run..."
ORPHANED_INSTANCES=$(aws ec2 describe-instances \
--filters \
"Name=tag:RunId,Values=${{ github.run_id }}" \
"Name=instance-state-name,Values=pending,running,stopping,stopped" \
--query 'Reservations[].Instances[].InstanceId' \
--region "$AWS_REGION" \
--output text || echo "")
if [ -n "$ORPHANED_INSTANCES" ]; then
echo "Found orphaned instances: $ORPHANED_INSTANCES"
aws ec2 terminate-instances \
--instance-ids $ORPHANED_INSTANCES \
--region "$AWS_REGION" || true
else
echo "No orphaned instances found"
fi
- name: Cleanup - Remove Ingress Rules
if: always()
continue-on-error: true
run: |
# Remove build instance ingress rule
if [ -n "${{ steps.security-group.outputs.SG_ID }}" ] && [ -n "${{ steps.security-group.outputs.RUNNER_IP_CIDR }}" ]; then
echo "Removing build instance ingress rules..."
aws ec2 revoke-security-group-ingress \
--group-id "${{ steps.security-group.outputs.SG_ID }}" \
--ip-permissions \
"IpProtocol=tcp,FromPort=${{ steps.security-group.outputs.SSH_PORT }},ToPort=${{ steps.security-group.outputs.SSH_PORT }},IpRanges=[{CidrIp=${{ steps.security-group.outputs.RUNNER_IP_CIDR }}}]" \
--region "$AWS_REGION" 2>&1 || echo "Rule may have been removed"
fi
# Remove verification instance ingress rule
if [ -n "${{ steps.security-group.outputs.SG_ID }}" ] && [ -n "${{ steps.verify-security-group.outputs.RUNNER_IP_CIDR }}" ]; then
echo "Removing verification instance ingress rules..."
aws ec2 revoke-security-group-ingress \
--group-id "${{ steps.security-group.outputs.SG_ID }}" \
--ip-permissions \
"IpProtocol=tcp,FromPort=${{ steps.verify-security-group.outputs.SSH_PORT }},ToPort=${{ steps.verify-security-group.outputs.SSH_PORT }},IpRanges=[{CidrIp=${{ steps.verify-security-group.outputs.RUNNER_IP_CIDR }}}]" \
--region "$AWS_REGION" 2>&1 || echo "Rule may have been removed"
fi
- name: Cleanup - Remove Local SSH Key
if: always()
continue-on-error: true
run: |
rm -f "${{ steps.keypair.outputs.KEY_PATH }}"
- name: Summary
if: always()
run: |
echo "## Release Build Summary"
echo "- Version: ${{ steps.version.outputs.VERSION }}"
echo "- Region: $AWS_REGION"
echo "- Instance Type: $INSTANCE_TYPE"
echo "- Build Instance ID: ${{ steps.instance.outputs.INSTANCE_ID || 'Not launched' }}"
echo "- Verification Instance ID: ${{ steps.verify-instance.outputs.INSTANCE_ID || 'Not launched' }}"
echo ""
echo "## Workflow Phases"
echo "1. Build Phase: Compiled on instance with CUDA 12.8 and build tools"
echo "2. Verification Phase: Tested on clean instance with only NVIDIA driver"
echo "3. Release Phase: Created GitHub release (if not dry run)"