Skip to content

[Nightly] Enable guilty search #1849

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 93 additions & 0 deletions .github/scripts/guilty_search.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
#!/bin/bash
set -xe
export GIT_PAGER=cat

# Init params
WORKSPACE=$(realpath ${WORKSPACE:-"/tmp"})
PYTORCH_VERSION=${PYTORCH_VERSION:-"main"}
TORCH_XPU_OPS_VERSION=${TORCH_XPU_OPS_VERSION:-"main"}
for var; do
eval "export $(echo ${var@Q} |sed "s/^'-*//g;s/=/='/")"
done

# Clean WORKSPACE
mkdir -p ${WORKSPACE}
rm -rf "${WORKSPACE:?}/"* || sudo rm -rf "${WORKSPACE:?}/"*

# Build pytorch
pip uninstall -y torch
source $(dirname $(realpath $0))/env.sh 2> /dev/null
build_status="$($(dirname $(realpath $0))/build.sh \
--WORKSPACE="${WORKSPACE}" \
--PYTORCH_VERSION="${PYTORCH_VERSION}" \
--TORCH_XPU_OPS_VERSION="${TORCH_XPU_OPS_VERSION}" \
> ${GITHUB_WORKSPACE}/gs-logs/build-${PYTORCH_VERSION}-${TORCH_XPU_OPS_VERSION}.log 2>&1 && echo $? || echo $?)"
if [ ${build_status} -ne 0 ];then
tail -n 100 ${GITHUB_WORKSPACE}/gs-logs/build-${PYTORCH_VERSION}-${TORCH_XPU_OPS_VERSION}.log
echo "Build got failed"
exit 1
fi
pip list |grep torch

# Test
test_result=1
if [ "${SEARCH_CHECK}" == "accuracy" ];then
cd ${WORKSPACE}/pytorch
rm -rf torch
test_status="$(eval "${SEARCH_CASE} --output=${WORKSPACE}/tmp.csv" \
> ${GITHUB_WORKSPACE}/gs-logs/test-${PYTORCH_VERSION}-${TORCH_XPU_OPS_VERSION}.log 2>&1 && echo $? || echo $?)"
if [ ${test_status} -eq 0 ];then
acc_result=$(tail -n 1 ${WORKSPACE}/tmp.csv |awk -F, '{print $4}')
if [[ "${acc_result}" == "pass"* ]];then
test_result=0
fi
fi
elif [ "${SEARCH_CHECK}" == "performance" ];then
cd ${WORKSPACE}/pytorch
rm -rf torch
test_status="$(eval "${SEARCH_CASE} --output=${WORKSPACE}/tmp.csv" \
> ${GITHUB_WORKSPACE}/gs-logs/test-${PYTORCH_VERSION}-${TORCH_XPU_OPS_VERSION}.log 2>&1 && echo $? || echo $?)"
if [ ${test_status} -eq 0 ];then
perf_result=$(tail -n 1 ${WORKSPACE}/tmp.csv |awk -F, '{print $5}')
test_result=$(echo "${perf_result},${SEARCH_GOOD_VALUE:-"0.00001"},${SEARCH_CRITERIA}" |awk -F, '{
if ($1/$2 > (1 - $3)){
print "0";
}else{
print "1";
}
}')
fi
elif [ "${SEARCH_CHECK}" == "ut_regressions" ];then
cd ${WORKSPACE}/pytorch/third_party/torch-xpu-ops/test/regressions
test_status="$(eval "${SEARCH_CASE}" \
> ${GITHUB_WORKSPACE}/gs-logs/test-${PYTORCH_VERSION}-${TORCH_XPU_OPS_VERSION}.log 2>&1 && echo $? || echo $?)"
if [ ${test_status} -eq 0 ];then
test_result=0
fi
elif [ "${SEARCH_CHECK}" == "ut_extended" ];then
cd ${WORKSPACE}/pytorch/third_party/torch-xpu-ops/test/xpu/extended
test_status="$(eval "${SEARCH_CASE}" \
> ${GITHUB_WORKSPACE}/gs-logs/test-${PYTORCH_VERSION}-${TORCH_XPU_OPS_VERSION}.log 2>&1 && echo $? || echo $?)"
if [ ${test_status} -eq 0 ];then
test_result=0
fi
elif [ "${SEARCH_CHECK}" == "ut_xpu" ];then
cd ${WORKSPACE}/pytorch/third_party/torch-xpu-ops/test/xpu
test_status="$(eval "${SEARCH_CASE}" \
> ${GITHUB_WORKSPACE}/gs-logs/test-${PYTORCH_VERSION}-${TORCH_XPU_OPS_VERSION}.log 2>&1 && echo $? || echo $?)"
if [ ${test_status} -eq 0 ];then
test_result=0
fi
else
test_status="$(eval "${SEARCH_CASE}" \
> ${GITHUB_WORKSPACE}/gs-logs/test-${PYTORCH_VERSION}-${TORCH_XPU_OPS_VERSION}.log 2>&1 && echo $? || echo $?)"
if [ ${test_status} -eq 0 ];then
test_result=0
fi
fi

# Test result
cat ${GITHUB_WORKSPACE}/gs-logs/test-${PYTORCH_VERSION}-${TORCH_XPU_OPS_VERSION}.log
echo "${test_result},${acc_result},${perf_result},${PYTORCH_VERSION},${TORCH_XPU_OPS_VERSION}" |\
tee -a ${GITHUB_WORKSPACE}/gs-logs/summary.csv |tee -a ${WORKSPACE}/result.csv
exit ${test_result}
190 changes: 190 additions & 0 deletions .github/workflows/_guilty_search.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
name: Guilty Search

on:
workflow_dispatch:
inputs:
runner:
required: true
type: string
default: 'pvc_rolling'
description: Test node
triton:
type: string
default: ''
description: Triton version if need
python:
type: string
default: '3.10'
description: Python version
search_commits:
required: true
type: string
default: ''
description: Target commits, such as 'pytorch=old/new,xpu-ops=old/new'
search_check:
type: string
default: ''
description: Test case type, 'performance, accuracy, <ut_regressions/ut_extended/ut_xpu> or others'
search_case:
required: true
type: string
default: ''
description: Test case, such as 'python xxx.py or pytest -k xxx'
search_criteria:
type: string
default: '0.1'
description: Criteria for performance check, default is 10%

permissions: read-all

jobs:
guilty-search:
env:
GH_TOKEN: ${{ github.token }}
SEARCH_COMMITS: ${{ inputs.search_commits }}
SEARCH_CHECK: ${{ inputs.search_check }}
SEARCH_CASE: ${{ inputs.search_case }}
SEARCH_CRITERIA: ${{ inputs.search_criteria }}
runs-on: ${{ inputs.runner }}
steps:
- name: Cleanup workspace
run: |
rm -rf ./* || sudo rm -rf ./*
mkdir gs-logs gs-search
echo "Status,Acc,Perf,PyTorch,Torch-xpu-ops" > gs-logs/summary.csv
- name: Checkout torch-xpu-ops
uses: actions/checkout@v4
with:
path: gs-scripts
- name: Prepare source code
run: |
git clone https://github.com/pytorch/pytorch gs-pytorch
cd gs-pytorch
LATEST_PT_COMMIT="$(git rev-parse HEAD)"
cd ..
git clone https://github.com/intel/torch-xpu-ops gs-torch-xpu-ops
cd gs-torch-xpu-ops
LATEST_XPU_COMMIT="$(git rev-parse HEAD)"
cd ..
echo "LATEST_PT_COMMIT=${LATEST_PT_COMMIT}" >> ${GITHUB_ENV}
echo "LATEST_XPU_COMMIT=${LATEST_XPU_COMMIT}" >> ${GITHUB_ENV}
- name: Setup python env
run: |
conda create python=${{ inputs.python }} -y -n guilty-search
source activate guilty-search
conda info -e
which python && which pip
conda list
pip install cmake ninja pandas psutil scipy requests pybind11
- name: Prepare test env
run: |
source activate guilty-search
if [[ "${{ inputs.search_case }}" == *"benchmarks/dynamo/huggingface.py"* ]];then
pip install transformers==4.44.2
elif [[ "${{ inputs.search_case }}" == *"benchmarks/dynamo/timm_models.py"* ]];then
pip install --no-deps git+https://github.com/huggingface/[email protected]
pip install $(curl -sSL https://raw.githubusercontent.com/huggingface/pytorch-image-models/v1.0.14/requirements.txt | grep -vE torch)
elif [[ "${{ inputs.search_case }}" == *"benchmarks/dynamo/torchbench.py"* ]];then
model_name="$(echo ${{ inputs.search_case }} |sed 's+.*\--only *++;s/ .*//')"
pip install -U torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/xpu
git clone https://github.com/pytorch/benchmark gs-benchmark
cd gs-benchmark
echo "PYTHONPATH=${PWD}:${PYTHONPATH}" >> ${GITHUB_ENV}
python install.py ${model_name}
pip uninstall -y torch
else
pip install -r gs-pytorch/.ci/docker/requirements-ci.txt
fi
- name: Triton Installation
run: |
source activate guilty-search
cd gs-pytorch
rm -rf pytorch_triton_xpu-*.whl
if [ "${{ inputs.triton }}" != "" ];then
TRITON_COMMIT_ID="${{ inputs.triton }}"
else
TRITON_COMMIT_ID="$(cat .ci/docker/ci_commit_pins/triton-xpu.txt)"
fi
TRITON_VERSION_NAME="$(
curl -sSL https://raw.githubusercontent.com/intel/intel-xpu-backend-for-triton/${TRITON_COMMIT_ID}/python/triton/__init__.py 2>&1 |\
grep '__version__' |head -n 1 |awk -F "'" '{print $2}'
)"
python .github/scripts/build_triton_wheel.py --device xpu --commit-hash ${TRITON_COMMIT_ID} --triton-version ${TRITON_VERSION_NAME}
pip install pytorch_triton_xpu-*.whl
- name: Guilty search pytorch
if: ${{ contains(inputs.search_commits, 'pytorch') }}
run: |
source activate guilty-search
set -xe -o pipefail
pytorch_commits="$(echo ${{ inputs.search_commits }} |sed 's+.*pytorch=++;s+,.*++')"
old_commit="$(echo ${pytorch_commits} |awk -F '/' '{print $1}')"
new_commit="$(echo ${pytorch_commits} |awk -F '/' '{print $2}')"
old_status="$(${{ github.workspace }}/gs-scripts/.github/scripts/guilty_search.sh \
--WORKSPACE="${{ github.workspace }}/gs-search" \
--PYTORCH_VERSION="${old_commit}" \
--TORCH_XPU_OPS_VERSION="${LATEST_XPU_COMMIT}" \
> ${{ github.workspace }}/gs-logs/search-${old_commit}-${LATEST_XPU_COMMIT}.log 2>&1 && echo $? || echo $?)"
old_result="$(tail -n 1 ${{ github.workspace }}/gs-search/result.csv)"
export SEARCH_GOOD_VALUE="$(echo ${old_result} |awk -F, '{print $3}')"
new_status="$(${{ github.workspace }}/gs-scripts/.github/scripts/guilty_search.sh \
--WORKSPACE="${{ github.workspace }}/gs-search" \
--PYTORCH_VERSION="${new_commit}" \
--TORCH_XPU_OPS_VERSION="${LATEST_XPU_COMMIT}" \
> ${{ github.workspace }}/gs-logs/search-${new_commit}-${LATEST_XPU_COMMIT}.log 2>&1 && echo $? || echo $?)"
new_result="$(tail -n 1 ${{ github.workspace }}/gs-search/result.csv)"
if [ "${old_status}" != "${new_status}" ];then
cd gs-pytorch
git reset --hard
bisect_status="$(git bisect start ${new_commit} ${old_commit} \
${{ github.workspace }}/gs-scripts/.github/scripts/guilty_search.sh \
--WORKSPACE="${{ github.workspace }}/gs-search" \
--PYTORCH_VERSION="$(git rev-parse HEAD)" \
--TORCH_XPU_OPS_VERSION="${LATEST_XPU_COMMIT}" \
> ${{ github.workspace }}/gs-logs/bisect-pytorch.log 2>&1 && echo $? || echo $?)"
git bisect log |tee ${{ github.workspace }}/gs-logs/result-pytorch.log
else
echo "Checked and no regression !"
fi
- name: Guilty search torch-xpu-ops
if: ${{ contains(inputs.search_commits, 'xpu-ops') }}
run: |
source activate guilty-search
set -xe -o pipefail
xpu_ops_commits="$(echo ${{ inputs.search_commits }} |sed 's+.*xpu-ops=++;s+,.*++')"
old_commit="$(echo ${xpu_ops_commits} |awk -F '/' '{print $1}')"
new_commit="$(echo ${xpu_ops_commits} |awk -F '/' '{print $2}')"
old_status="$(${{ github.workspace }}/gs-scripts/.github/scripts/guilty_search.sh \
--WORKSPACE="${{ github.workspace }}/gs-search" \
--PYTORCH_VERSION="${LATEST_PT_COMMIT}" \
--TORCH_XPU_OPS_VERSION="${old_commit}" \
> ${{ github.workspace }}/gs-logs/search-${LATEST_PT_COMMIT}-${old_commit}.log && echo $? || echo $?)"
old_result="$(tail -n 1 ${{ github.workspace }}/gs-search/result.csv)"
export SEARCH_GOOD_VALUE="$(echo ${old_result} |awk -F, '{print $3}')"
new_status="$(${{ github.workspace }}/gs-scripts/.github/scripts/guilty_search.sh \
--WORKSPACE="${{ github.workspace }}/gs-search" \
--PYTORCH_VERSION="${LATEST_PT_COMMIT}" \
--TORCH_XPU_OPS_VERSION="${new_commit}" \
> ${{ github.workspace }}/gs-logs/search-${LATEST_PT_COMMIT}-${new_commit}.log && echo $? || echo $?)"
new_result="$(tail -n 1 ${{ github.workspace }}/gs-search/result.csv)"
if [ "${old_status}" != "${new_status}" ];then
cd gs-pytorch
git reset --hard
bisect_status="$(
git bisect start ${new_commit} ${old_commit} \
${{ github.workspace }}/gs-scripts/.github/scripts/guilty_search.sh \
--WORKSPACE="${{ github.workspace }}/gs-search" \
--PYTORCH_VERSION="${LATEST_PT_COMMIT}" \
--TORCH_XPU_OPS_VERSION="$(git rev-parse HEAD)" \
> ${{ github.workspace }}/gs-logs/bisect-torch-xpu-ops.log 2>&1 && echo $? || echo $?)"
git bisect log |tee ${{ github.workspace }}/gs-logs/result-torch-xpu-ops.log
else
echo "Checked and no regression !"
fi
- name: Summary
run: |
cat gs-logs/summary.csv |tee -a ${GITHUB_STEP_SUMMARY}
for reulst_log in $(find gs-logs -name "result-*.log")
do
echo -e "\n\n\n${reulst_log}" |tee -a ${GITHUB_STEP_SUMMARY}
cat ${reulst_log} |tee -a ${GITHUB_STEP_SUMMARY}
done