Firefox-AI · ti3x · Feb 5, 2026 · waqas-anonymco · Feb 18, 2026 · waqas-anonymco
diff --git a/gcp_vm_scripts/cc_benchmarks/0_start_vm_h100.sh b/gcp_vm_scripts/cc_benchmarks/0_start_vm_h100.sh
@@ -0,0 +1,142 @@
+#!/bin/bash
+
+# ==============================================================================
+# Script: 0_start_vm_h100.sh
+# Description: Provisions an H100 GPU VM (a3-highgpu-1g) for CC benchmarks.
+#              Supports multi-zone fallback and Confidential Compute (TDX).
+#
+# Usage:
+#   ./0_start_vm_h100.sh [--confidential] [--secure-boot] [--zone <zone>]
+#
+# Flags:
+#   --confidential : Enable Confidential Compute (TDX) for the VM.
+#   --secure-boot   : Enable Shielded Secure Boot for the VM.
+#   --zone         : Target a specific zone (overrides fallback list).
+# ==============================================================================
+
+# Configuration
+PROJECT_ID="fx-gen-ai-sandbox"
+VM_NAME="h100-test-vm"
+MACHINE_TYPE="a3-highgpu-1g"
+GPU_COUNT=1
+DISK_SIZE=250
+SERVICE_ACCOUNT="18209811701-compute@developer.gserviceaccount.com"
+IMAGE="projects/ubuntu-os-accelerator-images/global/images/ubuntu-accelerator-2404-amd64-with-nvidia-580-v20251021"
+
+# List of zones to try in sequential order (fallback mechanism)
+ZONES=("us-central1-a" "us-central1-c" "europe-west4-b")
+
+# Default Flags
+CONFIDENTIAL_FLAG=""
+SECURE_BOOT_FLAG="--no-shielded-secure-boot"
+
+# Parse arguments
+while [[ "$#" -gt 0 ]]; do
+  case $1 in
+    --confidential)
+      # Enable Confidential Computing with Intel TDX
+      CONFIDENTIAL_FLAG="--confidential-compute-type=TDX"
+      echo "Enabling Confidential Computing (TDX)..."
+      ;;
+    --secure-boot)
+      # Enable Shielded Secure Boot
+      SECURE_BOOT_FLAG="--shielded-secure-boot"
+      echo "Enabling Shielded Secure Boot..."
+      ;;
+    --zone)
+      # Override zones list with a specific user-provided zone
+      if [[ -n "$2" && "$2" != --* ]]; then
+        ZONES=("$2")
+        echo "Targeting specific zone: $2"
+        shift
+      else
+        echo "Error: --zone requires a value."
+        exit 1
+      fi
+      ;;
+    *)
+      echo "Unknown argument: $1"
+      ;;
+  esac
+  shift
+done
+
+# Iterate through zones until a VM is successfully created
+for ZONE in "${ZONES[@]}"; do
+    REGION="${ZONE%-*}"
+    echo "--------------------------------------------------------"
+    echo "Attempting to start VM $VM_NAME in zone $ZONE..."
+    echo "--------------------------------------------------------"
+
+    # 1. Ensure Snapshot Schedule exists in the region
+    # Resource policies are regional, so we ensure it exists for the target region.
+    echo "Checking/Creating snapshot schedule 'default-schedule-1' in region $REGION..."
+    gcloud compute resource-policies create snapshot-schedule default-schedule-1 \
+        --project=$PROJECT_ID \
+        --region=$REGION \
+        --max-retention-days=14 \
+        --on-source-disk-delete=keep-auto-snapshots \
+        --daily-schedule \
+        --start-time=00:00 \
+        2>/dev/null || echo "Snapshot schedule already exists or could not be created."
+
+    # 2. Configure Disk based on Zone
+    if [[ "$ZONE" == europe-west4* ]]; then
+        DISK_TYPE="hyperdisk-balanced"
+        DISK_EXTRAS=",provisioned-iops=6000,provisioned-throughput=890"
+    else
+        DISK_TYPE="pd-balanced"
+        DISK_EXTRAS=""
+    fi
+
+    # 3. Construct and Execute the gcloud create command
+    # Confidential Compute (TDX) and Secure Boot flags are applied here if set via arguments.
+    if gcloud compute instances create "$VM_NAME" \
+        --project="$PROJECT_ID" \
+        --zone="$ZONE" \
+        --machine-type="$MACHINE_TYPE" \
+        --network-interface="network-tier=PREMIUM,nic-type=GVNIC,stack-type=IPV4_ONLY,subnet=sandbox-vpc-default" \
+        --metadata="enable-osconfig=TRUE" \
+        --no-restart-on-failure \
+        --maintenance-policy="TERMINATE" \
+        --provisioning-model="SPOT" \
+        --instance-termination-action="STOP" \
+        --discard-local-ssds-at-termination-timestamp=true \
+        --service-account="$SERVICE_ACCOUNT" \
+        --scopes="https://www.googleapis.com/auth/devstorage.read_only,https://www.googleapis.com/auth/logging.write,https://www.googleapis.com/auth/monitoring.write,https://www.googleapis.com/auth/service.management.readonly,https://www.googleapis.com/auth/servicecontrol,https://www.googleapis.com/auth/trace.append" \
+        --accelerator="count=$GPU_COUNT,type=nvidia-h100-80gb" \
+        --create-disk="auto-delete=yes,boot=yes,device-name=$VM_NAME,disk-resource-policy=projects/$PROJECT_ID/regions/$REGION/resourcePolicies/default-schedule-1,image=$IMAGE,mode=rw,size=$DISK_SIZE,type=$DISK_TYPE$DISK_EXTRAS" \
+        $SECURE_BOOT_FLAG \
+        $CONFIDENTIAL_FLAG \
+        --shielded-vtpm \
+        --shielded-integrity-monitoring \
+        --labels="goog-ops-agent-policy=v2-x86-template-1-4-0,goog-ec-src=vm_add-gcloud" \
+        --reservation-affinity="none"; then
+
+        echo "Successfully created VM in $ZONE."
+
+        # 4. Post-creation: Configure Ops Agent
+        echo "Configuring Ops Agent..."
+        printf 'agentsRule:\n  packageState: installed\n  version: latest\ninstanceFilter:\n  inclusionLabels:\n  - labels:\n      goog-ops-agent-policy: v2-x86-template-1-4-0\n' > config.yaml
+
+        POLICY_NAME="goog-ops-agent-v2-x86-template-1-4-0-${ZONE}"
+        echo "Applying Ops Agent policy: $POLICY_NAME"
+        gcloud compute instances ops-agents policies create "$POLICY_NAME" \
+            --project="$PROJECT_ID" \
+            --zone="$ZONE" \
+            --file=config.yaml || \
+        gcloud compute instances ops-agents policies update "$POLICY_NAME" \
+            --project="$PROJECT_ID" \
+            --zone="$ZONE" \
+            --file=config.yaml || \
+        echo "Warning: Failed to create or update Ops Agent policy, but VM is up."
+
+        echo "All set!"
+        exit 0
+    else
+        echo "Failed to create VM in $ZONE. Trying next zone..."
+    fi
+done
+
+echo "Error: Could not start VM in any of the specified zones."
+exit 1
diff --git a/gcp_vm_scripts/cc_benchmarks/1_setup_environment.sh b/gcp_vm_scripts/cc_benchmarks/1_setup_environment.sh
@@ -0,0 +1,116 @@
+#!/bin/bash
+
+# ==============================================================================
+# Script 1: Setup Environment
+#
+# Purpose:
+#   This script performs a one-time setup for a new machine by installing
+#   Docker and the NVIDIA Container Toolkit. It is idempotent, meaning it
+#   can be safely re-run without causing issues.
+#
+# Usage:
+#   ./1_setup_environment.sh
+# ==============================================================================
+
+# Exit immediately if a command exits with a non-zero status.
+set -e
+
+# Parse command-line arguments
+CONFIDENTIAL=false
+while [[ "$#" -gt 0 ]]; do
+    case $1 in
+        --confidential) CONFIDENTIAL=true ;;
+        *) echo "Unknown parameter passed: $1"; exit 1 ;;
+    esac
+    shift
+done
+
+echo "--- [Step 1/1] Setting up environment ---"
+
+# --- Install Docker ---
+if command -v docker &> /dev/null; then
+    echo "Docker is already installed. Skipping installation."
+else
+    echo "Installing Docker..."
+    sudo apt-get update
+    sudo apt-get install -y ca-certificates curl
+    sudo install -m 0755 -d /etc/apt/keyrings
+    sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
+    sudo chmod a+r /etc/apt/keyrings/docker.asc
+    echo \
+      "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
+      $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \
+      sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
+    sudo apt-get update
+    sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
+    echo "Docker installation complete."
+fi
+
+# Enable Confidential Computing before verifying CUDA Toolkit.
+if [ "$CONFIDENTIAL" = true ]; then
+    echo "Enabling Confidential Computing..."
+    # Enable Linux Kernel Crypto API 
+    echo "install nvidia /sbin/modprobe ecdsa_generic; /sbin/modprobe ecdh; /sbin/modprobe --ignore-install nvidia" | sudo tee /etc/modprobe.d/nvidia-lkca.conf
+    sudo update-initramfs -u
+
+    # Enable Confidential Compute GPUs Ready state
+    sudo nvidia-smi conf-compute -srs 1
+
+    # Set startup unit to enable Confidential Compute GPUs Ready state on each boot
+    sudo tee /etc/systemd/system/cc-gpu-ready.service > /dev/null << 'EOF'
+[Unit]
+Description=Set Confidential Compute GPU to Ready mode
+After=multi-user.target
+Wants=nvidia-persistenced.service
+
+[Service]
+Type=oneshot
+ExecStartPre=/bin/sleep 2
+ExecStart=/usr/bin/nvidia-smi conf-compute -srs 1
+ExecStartPost=/usr/bin/nvidia-smi conf-compute -grs
+RemainAfterExit=true
+
+[Install]
+WantedBy=multi-user.target
+EOF
+
+    sudo systemctl daemon-reload
+    sudo systemctl enable cc-gpu-ready.service
+
+    nvidia-smi conf-compute -f    # should say CC status: ON
+    nvidia-smi conf-compute -grs  # should say ready
+fi
+
+# --- Install NVIDIA Container Toolkit ---
+if dpkg -l | grep -q nvidia-container-toolkit; then
+    echo "NVIDIA Container Toolkit is already installed. Skipping installation."
+else
+    echo "Installing NVIDIA Container Toolkit..."
+    curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
+      && curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
+        sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
+        sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
+    sudo apt-get update
+    sudo apt-get install -y nvidia-container-toolkit
+    sudo nvidia-ctk runtime configure --runtime=docker
+    sudo systemctl restart docker
+    echo "NVIDIA Container Toolkit installation complete."
+fi
+
+echo "--- Environment setup is complete. ---"
+
+sleep 3
+echo "Enabling persistence mode..."
+
+# Enable persistence mode to establish a secure Security Protocol and Data Model (SPDM) connection
+sudo mkdir -p /etc/systemd/system/nvidia-persistenced.service.d
+cat <<EOF | sudo tee /etc/systemd/system/nvidia-persistenced.service.d/override.conf            
+[Service]
+# Clear the original ExecStart then provide our desired command:
+ExecStart=
+ExecStart=/usr/bin/nvidia-persistenced --user nvidia-persistenced --uvm-persistence-mode --verbose
+EOF
+
+sudo systemctl daemon-reload
+sudo systemctl enable nvidia-persistenced.service
+sudo reboot
diff --git a/gcp_vm_scripts/cc_benchmarks/2_start_server.sh b/gcp_vm_scripts/cc_benchmarks/2_start_server.sh
@@ -0,0 +1,128 @@
+#!/bin/bash
+
+# ==============================================================================
+# Script 2: Start TRT-LLM Server (CC Benchmarks)
+#
+# Purpose:
+#   Starts the TRT-LLM server in a detached Docker container with the
+#   optimal configuration for CC Benchmarks (H100).
+#
+# Usage:
+#   ./2_start_server.sh --hardware <HW> --model <MODEL>
+#
+# Example:
+#   ./2_start_server.sh --hardware H100 --model Qwen
+# ==============================================================================
+
+# Exit immediately if a command exits with a non-zero status.
+set -e
+
+# --- Configuration ---
+DOCKER_IMAGE="nvcr.io/nvidia/tensorrt-llm/release:1.1.0rc1"
+CONTAINER_NAME="trtllm_server"
+
+# Associative arrays for configurations
+declare -A MODEL_PATHS
+MODEL_PATHS["Qwen_H100"]="Qwen/Qwen3-30B-A3B"
+MODEL_PATHS["Mistral_H100"]="mistralai/Mistral-7B-v0.1"
+
+declare -A MAX_BATCH_SIZES
+MAX_BATCH_SIZES["Qwen_H100"]=512
+MAX_BATCH_SIZES["Mistral_H100"]=2056
+
+declare -A TP_SIZES
+TP_SIZES["Qwen_H100"]=1
+TP_SIZES["Mistral_H100"]=1
+
+# --- Helper Functions ---
+usage() {
+    echo "Usage: $0 --hardware H100 --model <Qwen|Mistral>"
+    exit 1
+}
+
+# --- Main Execution ---
+main() {
+    HARDWARE=""
+    MODEL=""
+
+    while [[ "$#" -gt 0 ]]; do
+        case $1 in
+            --hardware) HARDWARE="$2"; shift ;;
+            --model) MODEL="$2"; shift ;;
+            *) usage ;;
+        esac
+        shift
+    done
+
+    if [ -z "$HARDWARE" ] || [ -z "$MODEL" ]; then
+        echo "Error: --hardware and --model are required arguments."
+        usage
+    fi
+
+    CONFIG_KEY="${MODEL}_${HARDWARE}"
+    MODEL_PATH=${MODEL_PATHS[$CONFIG_KEY]}
+    MAX_BATCH_SIZE=${MAX_BATCH_SIZES[$CONFIG_KEY]}
+    TP_SIZE=${TP_SIZES[$CONFIG_KEY]}
+
+    if [ -z "$MODEL_PATH" ]; then
+        echo "Error: Invalid hardware/model combination. Only H100 with Qwen or Mistral is supported."
+        exit 1
+    fi
+
+    echo "--- [Step 1/2] Starting server with configuration ---"
+    echo "Hardware: $HARDWARE"
+    echo "Model: $MODEL"
+    echo "Model Path: $MODEL_PATH"
+    echo "Max Batch Size: $MAX_BATCH_SIZE"
+    echo "TP Size: $TP_SIZE"
+    echo "----------------------------------------------------"
+
+    if [ "$(sudo docker ps -q -f name=$CONTAINER_NAME)" ]; then
+        echo "Error: A container with the name '$CONTAINER_NAME' is already running."
+        echo "Please stop it first by running ./4_stop_server.sh"
+        exit 1
+    fi
+
+    if [ "$(sudo docker ps -aq -f status=exited -f name=$CONTAINER_NAME)" ]; then
+        echo "Removing existing stopped container..."
+        sudo docker rm $CONTAINER_NAME
+    fi
+
+    if [ "$(sudo docker ps -aq -f name=$CONTAINER_NAME)" ]; then
+        echo "Removing existing container..."
+        sudo docker rm -f $CONTAINER_NAME
+    fi
+
+    echo "Creating local directory for artifacts..."
+    mkdir -p ~/llm_benchmarks/artifacts
+    echo "Creating local directory for scripts..."
+    mkdir -p ~/scripts
+    echo "Creating local directory for genai-bench output..."
+    mkdir -p ~/genai-bench-output
+
+    echo "Starting Docker container '$CONTAINER_NAME' in detached mode..."
+    if [ -z "$HF_TOKEN" ]; then
+        echo "HF_TOKEN is not set. Please enter your Hugging Face token:"
+        read -s HF_TOKEN
+        echo "HF_TOKEN set: $HF_TOKEN"
+    fi
+
+    sudo docker run --ipc host --gpus all -p 8000:8000 -v ~/llm_benchmarks/artifacts:/app/tensorrt_llm/artifacts -v ~/scripts:/app/scripts -v ~/genai-bench-output:/genai-bench -e HF_TOKEN=$HF_TOKEN -d --name $CONTAINER_NAME $DOCKER_IMAGE sleep infinity
+
+    echo "--- [Step 2/2] Launching trtllm-serve inside the container ---"
+
+    EXEC_CMD="trtllm-serve \"$MODEL_PATH\" \
+        --host 0.0.0.0 \
+        --max_batch_size $MAX_BATCH_SIZE \
+        --max_num_tokens 16384 \
+        --max_seq_len 16384 \
+        --tp_size $TP_SIZE"
+
+    sudo docker exec -d $CONTAINER_NAME bash -c "$EXEC_CMD > /var/log/trtllm_server.log 2>&1 &"
+
+    echo "Server is starting in the background. It may take a few minutes to become ready."
+    echo "You can check the logs with: sudo docker exec -it $CONTAINER_NAME tail -f /var/log/trtllm_server.log"
+    echo "To get an interactive shell inside the container, run: sudo docker exec -it $CONTAINER_NAME bash"
+}
+
+main "$@"