diff --git a/.github/scripts/bisect_search.sh b/.github/scripts/bisect_search.sh new file mode 100755 index 000000000..94e9b0503 --- /dev/null +++ b/.github/scripts/bisect_search.sh @@ -0,0 +1,93 @@ +#!/bin/bash +set -xe +export GIT_PAGER=cat + +# Init params +WORKSPACE=$(realpath ${WORKSPACE:-"/tmp"}) +PYTORCH_VERSION=${PYTORCH_VERSION:-"main"} +TORCH_XPU_OPS_VERSION=${TORCH_XPU_OPS_VERSION:-"main"} +for var; do + eval "export $(echo ${var@Q} |sed "s/^'-*//g;s/=/='/")" +done + +# Clean WORKSPACE +mkdir -p ${WORKSPACE} +rm -rf "${WORKSPACE:?}/"* || sudo rm -rf "${WORKSPACE:?}/"* + +# Build pytorch +pip uninstall -y torch +source $(dirname $(realpath $0))/env.sh 2> /dev/null +build_status="$($(dirname $(realpath $0))/build.sh \ + --WORKSPACE="${WORKSPACE}" \ + --PYTORCH_VERSION="${PYTORCH_VERSION}" \ + --TORCH_XPU_OPS_VERSION="${TORCH_XPU_OPS_VERSION}" \ + > ${GITHUB_WORKSPACE}/gs-logs/build-${PYTORCH_VERSION}-${TORCH_XPU_OPS_VERSION}.log 2>&1 && echo $? || echo $?)" +if [ ${build_status} -ne 0 ];then + tail -n 100 ${GITHUB_WORKSPACE}/gs-logs/build-${PYTORCH_VERSION}-${TORCH_XPU_OPS_VERSION}.log + echo "Build got failed" + exit 1 +fi +pip list |grep torch + +# Test +test_result=1 +if [ "${SEARCH_CHECK}" == "accuracy" ];then + cd ${WORKSPACE}/pytorch + rm -rf torch + test_status="$(eval "${SEARCH_CASE} --output=${WORKSPACE}/tmp.csv" \ + > ${GITHUB_WORKSPACE}/gs-logs/test-${PYTORCH_VERSION}-${TORCH_XPU_OPS_VERSION}.log 2>&1 && echo $? || echo $?)" + if [ ${test_status} -eq 0 ];then + acc_result=$(tail -n 1 ${WORKSPACE}/tmp.csv |awk -F, '{print $4}') + if [[ "${acc_result}" == "pass"* ]];then + test_result=0 + fi + fi +elif [ "${SEARCH_CHECK}" == "performance" ];then + cd ${WORKSPACE}/pytorch + rm -rf torch + test_status="$(eval "${SEARCH_CASE} --output=${WORKSPACE}/tmp.csv" \ + > ${GITHUB_WORKSPACE}/gs-logs/test-${PYTORCH_VERSION}-${TORCH_XPU_OPS_VERSION}.log 2>&1 && echo $? || echo $?)" + if [ ${test_status} -eq 0 ];then + perf_result=$(tail -n 1 ${WORKSPACE}/tmp.csv |awk -F, '{print $5}') + test_result=$(echo "${perf_result},${SEARCH_GOOD_VALUE:-"0.00001"},${SEARCH_CRITERIA}" |awk -F, '{ + if ($1/$2 > (1 - $3)){ + print "0"; + }else{ + print "1"; + } + }') + fi +elif [ "${SEARCH_CHECK}" == "ut_regressions" ];then + cd ${WORKSPACE}/pytorch/third_party/torch-xpu-ops/test/regressions + test_status="$(eval "${SEARCH_CASE}" \ + > ${GITHUB_WORKSPACE}/gs-logs/test-${PYTORCH_VERSION}-${TORCH_XPU_OPS_VERSION}.log 2>&1 && echo $? || echo $?)" + if [ ${test_status} -eq 0 ];then + test_result=0 + fi +elif [ "${SEARCH_CHECK}" == "ut_extended" ];then + cd ${WORKSPACE}/pytorch/third_party/torch-xpu-ops/test/xpu/extended + test_status="$(eval "${SEARCH_CASE}" \ + > ${GITHUB_WORKSPACE}/gs-logs/test-${PYTORCH_VERSION}-${TORCH_XPU_OPS_VERSION}.log 2>&1 && echo $? || echo $?)" + if [ ${test_status} -eq 0 ];then + test_result=0 + fi +elif [ "${SEARCH_CHECK}" == "ut_xpu" ];then + cd ${WORKSPACE}/pytorch/third_party/torch-xpu-ops/test/xpu + test_status="$(eval "${SEARCH_CASE}" \ + > ${GITHUB_WORKSPACE}/gs-logs/test-${PYTORCH_VERSION}-${TORCH_XPU_OPS_VERSION}.log 2>&1 && echo $? || echo $?)" + if [ ${test_status} -eq 0 ];then + test_result=0 + fi +else + test_status="$(eval "${SEARCH_CASE}" \ + > ${GITHUB_WORKSPACE}/gs-logs/test-${PYTORCH_VERSION}-${TORCH_XPU_OPS_VERSION}.log 2>&1 && echo $? || echo $?)" + if [ ${test_status} -eq 0 ];then + test_result=0 + fi +fi + +# Test result +cat ${GITHUB_WORKSPACE}/gs-logs/test-${PYTORCH_VERSION}-${TORCH_XPU_OPS_VERSION}.log +echo "${test_result},${acc_result},${perf_result},${PYTORCH_VERSION},${TORCH_XPU_OPS_VERSION}" |\ + tee -a ${GITHUB_WORKSPACE}/gs-logs/summary.csv |tee -a ${WORKSPACE}/result.csv +exit ${test_result} diff --git a/.github/workflows/_bisect_search.yml b/.github/workflows/_bisect_search.yml new file mode 100644 index 000000000..7434825fc --- /dev/null +++ b/.github/workflows/_bisect_search.yml @@ -0,0 +1,244 @@ +name: Bisect Search + +on: + workflow_dispatch: + inputs: + runner: + required: true + type: string + default: 'pvc_rolling' + description: Test node + search_commits: + required: true + type: string + default: '' + description: Target commits, such as 'pytorch=old/new,xpu-ops=old/new' + search_check: + type: string + default: '' + description: Test case type, 'performance, accuracy, or others' + search_case: + required: true + type: string + default: '' + description: Test case, such as 'python xxx.py or pytest -k xxx' + search_criteria: + type: string + default: '0.1' + description: Criteria for performance check, default is 10% + triton: + type: string + default: 'pinned' + description: Triton pinned by pytorch by default, or 'commit/branch', or 'repo@commit/repo@branch' + oneapi: + type: string + default: 'installed' + description: Installed oneAPI DLE on host by default, fill offline.sh url if needed + python: + type: string + default: '3.10' + description: Python version + +permissions: read-all + +jobs: + get_runner: + runs-on: ${{ inputs.runner }} + outputs: + test_host: ${{ steps.runner-info.outputs.test_host }} + test_user: ${{ steps.runner-info.outputs.test_user }} + test_group: ${{ steps.runner-info.outputs.test_group }} + steps: + - name: Get runner info + id: runner-info + run: | + # get test runner + echo "test_host=${RUNNER_NAME}" |tee -a ${GITHUB_OUTPUT} + echo "test_user=$(id -u)" |tee -a ${GITHUB_OUTPUT} + echo "test_group=$(getent group render |cut -d: -f3)" |tee -a ${GITHUB_OUTPUT} + # show host info + cat /etc/os-release + uname -a + source /opt/intel/oneapi/setvars.sh + sycl-ls + dpkg -l |grep -E 'libigc-dev|libze-dev|level-zero-dev' + - name: Cleanup workspace + if: ${{ always() }} + run: | + # clean docker cache + docker stop $(docker ps -aq) || true + docker system prune -af || true + # clean files + ls -al + sudo find ./ |grep -v "^\./$" |xargs sudo rm -rf + + biisect-search: + needs: get_runner + runs-on: ${{ needs.get_runner.outputs.test_host }} + container: + image: mengfeili/intel-pvc-driver:1146-1136 + volumes: + - ${{ github.workspace }}:${{ github.workspace }} + options: --device=/dev/mem --device=/dev/dri --group-add video --privileged --shm-size=8g + -u ${{ needs.get_runner.outputs.test_user }}:${{ needs.get_runner.outputs.test_group }} + env: + AGENT_TOOLSDIRECTORY: /tmp/_tools + GH_TOKEN: ${{ github.token }} + HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + SEARCH_COMMITS: ${{ inputs.search_commits }} + SEARCH_CHECK: ${{ inputs.search_check }} + SEARCH_CASE: ${{ inputs.search_case }} + SEARCH_CRITERIA: ${{ inputs.search_criteria }} + defaults: + run: + shell: bash -xe {0} + steps: + - name: Setup python-${{ inputs.python }} + uses: actions/setup-python@v5 + with: + python-version: ${{ inputs.python }} + - name: Check runner + run: | + ls -al + find ./ |grep -v "^\./$" |xargs rm -rf + hostname && whoami && id + clinfo --list + gcc -v && g++ -v + which python && which pip + python -V + pip install -U pip wheel setuptools + pip list + uname -a + dpkg -l |grep -E 'libigc-dev|libze-dev|level-zero-dev' + pip install cmake ninja pandas psutil scipy requests pybind11 + mkdir gs-logs gs-search + echo "Status,Acc,Perf,PyTorch,Torch-xpu-ops" > gs-logs/summary.csv + - name: Install oneAPI DLE + if: ${{ inputs.oneapi != 'installed' }} + run: | + rm -rf ~/intel ~/.intel + wget -q -O oneapi.sh "${{ inputs.oneapi }}" + bash oneapi.sh -a -s --eula accept --action install --install-dir ${HOME}/intel/oneapi + echo "XPU_ONEAPI_PATH=${HOME}/intel/oneapi" >> ${GITHUB_ENV} + - name: Checkout torch-xpu-ops + uses: actions/checkout@v4 + with: + path: gs-scripts + - name: Prepare source code + run: | + git clone https://github.com/pytorch/pytorch gs-pytorch + cd gs-pytorch + LATEST_PT_COMMIT="$(git rev-parse HEAD)" + cd .. + git clone https://github.com/intel/torch-xpu-ops gs-torch-xpu-ops + cd gs-torch-xpu-ops + LATEST_XPU_COMMIT="$(git rev-parse HEAD)" + cd .. + echo "LATEST_PT_COMMIT=${LATEST_PT_COMMIT}" >> ${GITHUB_ENV} + echo "LATEST_XPU_COMMIT=${LATEST_XPU_COMMIT}" >> ${GITHUB_ENV} + - name: Prepare test env + run: | + if [[ "${{ inputs.search_case }}" == *"benchmarks/dynamo/huggingface.py"* ]];then + pip install transformers==4.44.2 + elif [[ "${{ inputs.search_case }}" == *"benchmarks/dynamo/timm_models.py"* ]];then + pip install --no-deps git+https://github.com/huggingface/pytorch-image-models@v1.0.14 + pip install $(curl -sSL https://raw.githubusercontent.com/huggingface/pytorch-image-models/v1.0.14/requirements.txt | grep -vE torch) + elif [[ "${{ inputs.search_case }}" == *"benchmarks/dynamo/torchbench.py"* ]];then + model_name="$(echo ${{ inputs.search_case }} |sed 's+.*\--only *++;s/ .*//')" + pip install -U torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/xpu + git clone https://github.com/pytorch/benchmark gs-benchmark + cd gs-benchmark + echo "PYTHONPATH=${PWD}:${PYTHONPATH}" >> ${GITHUB_ENV} + python install.py ${model_name} + pip uninstall -y torch + else + pip install -r gs-pytorch/.ci/docker/requirements-ci.txt + fi + - name: Triton Installation + run: | + cd gs-pytorch + rm -rf pytorch_triton_xpu-*.whl + if [ "${{ inputs.triton }}" != "pinned" ];then + TRITON_COMMIT_ID="${{ inputs.triton }}" + else + TRITON_COMMIT_ID="$(cat .ci/docker/ci_commit_pins/triton-xpu.txt)" + fi + TRITON_VERSION_NAME="$( + curl -sSL https://raw.githubusercontent.com/intel/intel-xpu-backend-for-triton/${TRITON_COMMIT_ID}/python/triton/__init__.py 2>&1 |\ + grep '__version__' |head -n 1 |awk -F "'" '{print $2}' + )" + python .github/scripts/build_triton_wheel.py --device xpu --commit-hash ${TRITON_COMMIT_ID} --triton-version ${TRITON_VERSION_NAME} + pip install pytorch_triton_xpu-*.whl + - name: Bisect search pytorch + if: ${{ contains(inputs.search_commits, 'pytorch') }} + run: | + pytorch_commits="$(echo ${{ inputs.search_commits }} |sed 's+.*pytorch=++;s+,.*++')" + old_commit="$(echo ${pytorch_commits} |awk -F '/' '{print $1}')" + new_commit="$(echo ${pytorch_commits} |awk -F '/' '{print $2}')" + old_status="$(${{ github.workspace }}/gs-scripts/.github/scripts/bisect_search.sh \ + --WORKSPACE="${{ github.workspace }}/gs-search" \ + --PYTORCH_VERSION="${old_commit}" \ + --TORCH_XPU_OPS_VERSION="${LATEST_XPU_COMMIT}" \ + > ${{ github.workspace }}/gs-logs/search-${old_commit}-${LATEST_XPU_COMMIT}.log 2>&1 && echo $? || echo $?)" + old_result="$(tail -n 1 ${{ github.workspace }}/gs-search/result.csv)" + export SEARCH_GOOD_VALUE="$(echo ${old_result} |awk -F, '{print $3}')" + new_status="$(${{ github.workspace }}/gs-scripts/.github/scripts/bisect_search.sh \ + --WORKSPACE="${{ github.workspace }}/gs-search" \ + --PYTORCH_VERSION="${new_commit}" \ + --TORCH_XPU_OPS_VERSION="${LATEST_XPU_COMMIT}" \ + > ${{ github.workspace }}/gs-logs/search-${new_commit}-${LATEST_XPU_COMMIT}.log 2>&1 && echo $? || echo $?)" + new_result="$(tail -n 1 ${{ github.workspace }}/gs-search/result.csv)" + if [ "${old_status}" != "${new_status}" ];then + cd gs-pytorch + git reset --hard + bisect_status="$(git bisect start ${new_commit} ${old_commit} \ + ${{ github.workspace }}/gs-scripts/.github/scripts/bisect_search.sh \ + --WORKSPACE="${{ github.workspace }}/gs-search" \ + --PYTORCH_VERSION="$(git rev-parse HEAD)" \ + --TORCH_XPU_OPS_VERSION="${LATEST_XPU_COMMIT}" \ + > ${{ github.workspace }}/gs-logs/bisect-pytorch.log 2>&1 && echo $? || echo $?)" + git bisect log |tee ${{ github.workspace }}/gs-logs/result-pytorch.log + else + echo "Checked and no regression !" + fi + - name: Bisect search torch-xpu-ops + if: ${{ contains(inputs.search_commits, 'xpu-ops') }} + run: | + xpu_ops_commits="$(echo ${{ inputs.search_commits }} |sed 's+.*xpu-ops=++;s+,.*++')" + old_commit="$(echo ${xpu_ops_commits} |awk -F '/' '{print $1}')" + new_commit="$(echo ${xpu_ops_commits} |awk -F '/' '{print $2}')" + old_status="$(${{ github.workspace }}/gs-scripts/.github/scripts/bisect_search.sh \ + --WORKSPACE="${{ github.workspace }}/gs-search" \ + --PYTORCH_VERSION="${LATEST_PT_COMMIT}" \ + --TORCH_XPU_OPS_VERSION="${old_commit}" \ + > ${{ github.workspace }}/gs-logs/search-${LATEST_PT_COMMIT}-${old_commit}.log && echo $? || echo $?)" + old_result="$(tail -n 1 ${{ github.workspace }}/gs-search/result.csv)" + export SEARCH_GOOD_VALUE="$(echo ${old_result} |awk -F, '{print $3}')" + new_status="$(${{ github.workspace }}/gs-scripts/.github/scripts/bisect_search.sh \ + --WORKSPACE="${{ github.workspace }}/gs-search" \ + --PYTORCH_VERSION="${LATEST_PT_COMMIT}" \ + --TORCH_XPU_OPS_VERSION="${new_commit}" \ + > ${{ github.workspace }}/gs-logs/search-${LATEST_PT_COMMIT}-${new_commit}.log && echo $? || echo $?)" + new_result="$(tail -n 1 ${{ github.workspace }}/gs-search/result.csv)" + if [ "${old_status}" != "${new_status}" ];then + cd gs-pytorch + git reset --hard + bisect_status="$( + git bisect start ${new_commit} ${old_commit} \ + ${{ github.workspace }}/gs-scripts/.github/scripts/bisect_search.sh \ + --WORKSPACE="${{ github.workspace }}/gs-search" \ + --PYTORCH_VERSION="${LATEST_PT_COMMIT}" \ + --TORCH_XPU_OPS_VERSION="$(git rev-parse HEAD)" \ + > ${{ github.workspace }}/gs-logs/bisect-torch-xpu-ops.log 2>&1 && echo $? || echo $?)" + git bisect log |tee ${{ github.workspace }}/gs-logs/result-torch-xpu-ops.log + else + echo "Checked and no regression !" + fi + - name: Summary + run: | + cat gs-logs/summary.csv |tee -a ${GITHUB_STEP_SUMMARY} + for reulst_log in $(find gs-logs -name "result-*.log") + do + echo -e "\n\n\n${reulst_log}" |tee -a ${GITHUB_STEP_SUMMARY} + cat ${reulst_log} |tee -a ${GITHUB_STEP_SUMMARY} + done