Skip to content

Commit ce94de2

Browse files
authored
[BE] Move torch xpu wheel build in manylinux_2_28 container both for CI/CD (#1580)
1. build xpu wheel in container manylinux_2_28 2. split build steps to standalone script 3. build wheel post op
1 parent 95d604e commit ce94de2

File tree

11 files changed

+181
-119
lines changed

11 files changed

+181
-119
lines changed

.github/actions/inductor-xpu-e2e-test/action.yml

-2
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,6 @@ runs:
5151
shell: bash
5252
run: |
5353
source activate e2e_ci
54-
source .github/scripts/env.sh ${{ inputs.pytorch }}
5554
if [[ ${{ inputs.suite }} == *"torchbench"* ]]; then
5655
if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then
5756
cd ../ && rm -rf audio && git clone --single-branch -b main https://github.com/pytorch/audio.git
@@ -94,7 +93,6 @@ runs:
9493
shell: bash
9594
run: |
9695
source activate e2e_ci
97-
source .github/scripts/env.sh ${{ inputs.pytorch }}
9896
cp .github/scripts/inductor_xpu_test.sh ../pytorch
9997
cd ../pytorch
10098

.github/actions/pt2e/action.yml

-2
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ runs:
3636
shell: bash
3737
run: |
3838
source activate e2e_ci
39-
source .github/scripts/env.sh ${{ inputs.pytorch }}
4039
# accuracy code
4140
if [[ "${{ inputs.scenario }}" == *"accuracy"* ]];then
4241
rm -rf pt2e-accuracy
@@ -95,7 +94,6 @@ runs:
9594
shell: bash
9695
run: |
9796
source activate e2e_ci
98-
source .github/scripts/env.sh ${{ inputs.pytorch }}
9997
pt2e_logs_dir="${{ github.workspace }}/../pytorch/inductor_log/pt2e"
10098
rm -rf "${pt2e_logs_dir}" && mkdir -p "${pt2e_logs_dir}"
10199
if [[ "${{ inputs.scenario }}" == *"accuracy"* ]];then

.github/scripts/build.sh

+79
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
#!/bin/bash
2+
# Usage:
3+
# ./build.sh --WORKSPACE=<path/to/dir> \
4+
# --PYTORCH_REPO=<pytorch repo url> --PYTORCH_VERSION=<pytorch branch or commit> \
5+
# --TORCH_XPU_OPS_REPO=<torch-xpu-ops repo url> \
6+
# --TORCH_XPU_OPS_VERSION=<torch-xpu-ops branch, commit or pinned(use pytorch pinned commit)>
7+
set -xe
8+
export GIT_PAGER=cat
9+
10+
# Init params
11+
WORKSPACE=$(realpath ${WORKSPACE:-"/tmp"})
12+
PYTORCH_REPO=${PYTORCH_REPO:-"https://github.com/pytorch/pytorch.git"}
13+
PYTORCH_VERSION=${PYTORCH_VERSION:-"main"}
14+
TORCH_XPU_OPS_REPO=${TORCH_XPU_OPS_REPO:-"https://github.com/intel/torch-xpu-ops.git"}
15+
TORCH_XPU_OPS_VERSION=${TORCH_XPU_OPS_VERSION:-"main"}
16+
for var; do
17+
eval "export $(echo ${var@Q} |sed "s/^'-*//g;s/=/='/")"
18+
done
19+
20+
# Set pytorch
21+
rm -rf ${WORKSPACE}/pytorch
22+
git clone ${PYTORCH_REPO} ${WORKSPACE}/pytorch
23+
cd ${WORKSPACE}/pytorch
24+
git checkout ${PYTORCH_VERSION}
25+
git remote -v && git branch && git show -s
26+
git rev-parse HEAD > ${WORKSPACE}/pytorch.commit
27+
28+
# Set torch-xpu-ops
29+
if [ "${TORCH_XPU_OPS_VERSION,,}" == "pinned" ];then
30+
TORCH_XPU_OPS_REPO="https://github.com/intel/torch-xpu-ops.git"
31+
TORCH_XPU_OPS_VERSION="$(cat ${WORKSPACE}/pytorch/third_party/xpu.txt)"
32+
fi
33+
if [ "${TORCH_XPU_OPS_VERSION,,}" != "cicd" ];then
34+
rm -rf ${WORKSPACE}/torch-xpu-ops
35+
git clone ${TORCH_XPU_OPS_REPO} ${WORKSPACE}/torch-xpu-ops
36+
cd ${WORKSPACE}/torch-xpu-ops
37+
git checkout ${TORCH_XPU_OPS_VERSION}
38+
fi
39+
cd ${WORKSPACE}/torch-xpu-ops
40+
git remote -v && git branch && git show -s
41+
cd ${WORKSPACE}/pytorch
42+
rm -rf third_party/torch-xpu-ops
43+
cp -r ${WORKSPACE}/torch-xpu-ops third_party/torch-xpu-ops
44+
sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt
45+
46+
# Pre Build
47+
cd ${WORKSPACE}/pytorch
48+
python -m pip install requests
49+
python third_party/torch-xpu-ops/.github/scripts/apply_torch_pr.py
50+
git diff && git submodule sync && git submodule update --init --recursive
51+
python -m pip install -r requirements.txt
52+
python -m pip install mkl-static mkl-include
53+
# python -m pip install -U cmake==3.31.6
54+
export USE_ONEMKL=1
55+
export USE_XCCL=1
56+
57+
# Build
58+
WERROR=1 python setup.py bdist_wheel
59+
60+
# Post Build
61+
python -m pip install patchelf
62+
rm -rf ./tmp
63+
bash third_party/torch-xpu-ops/.github/scripts/rpath.sh ${WORKSPACE}/pytorch/dist/torch*.whl
64+
python -m pip install --force-reinstall tmp/torch*.whl
65+
66+
# Verify
67+
cd ${WORKSPACE}
68+
python ${WORKSPACE}/pytorch/torch/utils/collect_env.py
69+
python -c "import torch; print(torch.__config__.show())"
70+
python -c "import torch; print(torch.__config__.parallel_info())"
71+
xpu_is_compiled="$(python -c 'import torch; print(torch.xpu._is_compiled())')"
72+
73+
# Save wheel
74+
if [ "${xpu_is_compiled,,}" == "true" ];then
75+
cp ${WORKSPACE}/pytorch/tmp/torch*.whl ${WORKSPACE}
76+
else
77+
echo "Build got failed!"
78+
exit 1
79+
fi

.github/scripts/env.sh

+17-10
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,19 @@
11
#!/bin/bash
22

3-
if [ "$1" != "nightly_wheel" ];then
4-
source /opt/intel/oneapi/compiler/latest/env/vars.sh
5-
source /opt/intel/oneapi/umf/latest/env/vars.sh
6-
source /opt/intel/oneapi/pti/latest/env/vars.sh
7-
source /opt/intel/oneapi/ccl/latest/env/vars.sh
8-
source /opt/intel/oneapi/mpi/latest/env/vars.sh
9-
source /opt/intel/oneapi/mkl/latest/env/vars.sh
10-
else
11-
echo "Don't need to source DL-Essential for nightly wheel"
12-
fi
3+
source /opt/intel/oneapi/compiler/latest/env/vars.sh
4+
source /opt/intel/oneapi/umf/latest/env/vars.sh
5+
source /opt/intel/oneapi/pti/latest/env/vars.sh
6+
source /opt/intel/oneapi/ccl/latest/env/vars.sh
7+
source /opt/intel/oneapi/mpi/latest/env/vars.sh
8+
source /opt/intel/oneapi/mkl/latest/env/vars.sh
9+
export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="\
10+
intel-cmplr-lib-rt==2025.0.5 |\
11+
intel-cmplr-lib-ur==2025.0.5 |\
12+
intel-cmplr-lic-rt==2025.0.5 |\
13+
intel-sycl-rt==2025.0.5 |\
14+
impi-devel==2021.14.2 |\
15+
oneccl-devel==2021.14.1 |\
16+
mkl-devel==2025.0.1 |\
17+
onemkl-sycl-dft==2025.0.1 |\
18+
tcmlib==1.2.0 | umf==0.9.1 | intel-pti==0.10.2 \
19+
"

.github/scripts/lintrunner.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ if ! command -v lintrunner &> /dev/null; then
2424
fi
2525

2626
# Ignoring errors in one specific run
27-
export SHELLCHECK_OPTS="-e SC2154"
27+
export SHELLCHECK_OPTS="-e SC2154 -e SC2086 -e SC1091 -e SC2046"
2828

2929
# This has already been cached in the docker image
3030
lintrunner init 2> /dev/null

.github/workflows/_linux_build.yml

+84-83
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ on:
44
workflow_call:
55
inputs:
66
pytorch:
7-
required: false
7+
required: true
88
type: string
99
default: 'main'
1010
description: Pytorch branch/commit
@@ -13,6 +13,11 @@ on:
1313
type: string
1414
default: 'false'
1515
description: Keep torch-xpu-ops pin. `true` means use pined commit
16+
driver:
17+
required: false
18+
type: string
19+
default: 'lts'
20+
description: Driver lts/rolling
1621
python:
1722
required: false
1823
type: string
@@ -23,11 +28,6 @@ on:
2328
type: string
2429
default: 'linux.idc.xpu'
2530
description: Runner label
26-
driver:
27-
required: false
28-
type: string
29-
default: 'lts'
30-
description: Driver lts/rolling
3131
update_lkg:
3232
required: false
3333
type: string
@@ -44,68 +44,85 @@ permissions:
4444
jobs:
4545
build:
4646
runs-on: ${{ inputs.runner }}
47+
container:
48+
image: 'pytorch/manylinux2_28-builder:xpu-main'
49+
volumes:
50+
- ${{ github.workspace }}:${{ github.workspace }}
51+
env:
52+
PATH: /opt/xpu-build/bin:/usr/share/Modules/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
53+
commit_issue: 1280
54+
GH_TOKEN: ${{ github.token }}
55+
NEOReadDebugKeys: ${{ inputs.driver == 'rolling' && '1' || '0' }}
56+
DisableScratchPages: ${{ inputs.driver == 'rolling' && '1' || '0' }}
4757
outputs:
4858
TORCH_COMMIT_ID: ${{ steps.build_version.outputs.TORCH_COMMIT_ID }}
4959
timeout-minutes: 900
50-
env:
51-
commit_issue: 1280
52-
GH_TOKEN: ${{ github.token }}
53-
NEOReadDebugKeys: ${{ inputs.driver == 'rolling' && '1' || '0' }}
54-
DisableScratchPages: ${{ inputs.driver == 'rolling' && '1' || '0' }}
5560
steps:
61+
- name: Setup based env
62+
run: |
63+
# Cleanup workspace
64+
rm -rf ${{ github.workspace }}/*
65+
# Install gh
66+
dnf install 'dnf-command(config-manager)'
67+
dnf config-manager --add-repo https://cli.github.com/packages/rpm/gh-cli.repo
68+
dnf autoremove -y git236* && dnf install -y git
69+
dnf install gh --repo gh-cli -y
70+
# Setup python
71+
local_python=$(echo ${{ inputs.python }} |awk -F. '{printf("cp%s%s-cp%s%s", $1, $2, $1, $2)}')
72+
/opt/python/${local_python}/bin/python -m venv /opt/xpu-build
73+
which python && python -V && pip list
74+
pip install -U pip wheel setuptools
5675
- name: Checkout torch-xpu-ops
5776
uses: actions/checkout@v4
58-
- name: Prepare Stock Pytorch
59-
run: |
60-
pwd
61-
which conda && conda clean -ay
62-
conda remove --all -y -n xpu_build || \
63-
rm -rf $(dirname ${CONDA_EXE})/../envs/xpu_build
64-
conda create -n xpu_build python=${{ inputs.python }} cmake=3.28 ninja -y
65-
source activate xpu_build
66-
cd ../ && rm -rf pytorch
67-
pip install requests
68-
git clone https://github.com/pytorch/pytorch pytorch
69-
cd pytorch && git checkout $(echo ${{ inputs.pytorch }})
70-
# apply PRs for stock pytorch
71-
python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py
72-
git status && git show -s
73-
git submodule sync && git submodule update --init --recursive
74-
if [[ ${{ inputs.keep_torch_xpu_ops }} == 'true' ]]; then
75-
echo "Don't replace torch-xpu-ops!"
76-
else
77-
rm -rf third_party/torch-xpu-ops && cp -r ../torch-xpu-ops third_party/
78-
# Workaround for torch-xpu-ops ci test
79-
sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt
80-
fi
77+
with:
78+
path: torch-xpu-ops
8179
- name: Build Pytorch XPU
8280
run: |
8381
set -xe
84-
source activate xpu_build
85-
source .github/scripts/env.sh ${{ inputs.pytorch }}
86-
export USE_ONEMKL=1
87-
export USE_XCCL=1
88-
if [[ ${{ inputs.driver }} == 'lts' ]]; then
89-
export TORCH_XPU_ARCH_LIST='pvc'
82+
if [ "${{ inputs.driver }}" == "lts" ]; then
83+
export TORCH_XPU_ARCH_LIST='pvc'
84+
fi
85+
if [[ "${{ inputs.pytorch }}" == *"https://"* ]];then
86+
PYTORCH_REPO="$(echo ${{ inputs.pytorch }} |sed 's/@.*//')"
87+
PYTORCH_VERSION="$(echo ${{ inputs.pytorch }} |sed 's/.*@//')"
88+
else
89+
PYTORCH_REPO="https://github.com/pytorch/pytorch.git"
90+
PYTORCH_VERSION="${{ inputs.pytorch }}"
91+
fi
92+
if [[ "${{ inputs.keep_torch_xpu_ops }}" == *"https://"* ]];then
93+
TORCH_XPU_OPS_REPO="$(echo ${{ inputs.keep_torch_xpu_ops }} |sed 's/@.*//')"
94+
TORCH_XPU_OPS_VERSION="$(echo ${{ inputs.keep_torch_xpu_ops }} |sed 's/.*@//')"
95+
elif [ "${{ inputs.keep_torch_xpu_ops }}" == "true" ];then
96+
TORCH_XPU_OPS_VERSION="pinned"
97+
else
98+
TORCH_XPU_OPS_VERSION="cicd"
9099
fi
91-
pip install mkl-static==2025.0.1 mkl-include==2025.0.1
100+
# oneAPI DLE
101+
source ${{ github.workspace }}/torch-xpu-ops/.github/scripts/env.sh
102+
icpx --version
103+
# gcc 11
104+
source /opt/rh/gcc-toolset-11/enable
105+
build_status="$(
106+
${{ github.workspace }}/torch-xpu-ops/.github/scripts/build.sh \
107+
--WORKSPACE="${{ github.workspace }}" \
108+
--PYTORCH_REPO="${PYTORCH_REPO}" \
109+
--PYTORCH_VERSION="${PYTORCH_VERSION}" \
110+
--TORCH_XPU_OPS_REPO="${TORCH_XPU_OPS_REPO}" \
111+
--TORCH_XPU_OPS_VERSION="${TORCH_XPU_OPS_VERSION}" \
112+
> ${{ github.workspace }}/pytorch_build_${PYTORCH_VERSION//\//-}.log 2>&1 && echo $? || echo $?
113+
)"
114+
tail -n 100 ${{ github.workspace }}/pytorch_build_${PYTORCH_VERSION//\//-}.log
92115
build_url="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
93116
repo="${{ github.repository }}"
94117
last_commit=$(gh --repo $repo issue view $commit_issue --json body -q .body | grep ${{ inputs.pytorch }} | cut -d'[' -f 2 | cut -d']' -f 1)
95-
cd ../pytorch
96-
current_commit=$(git rev-parse HEAD)
118+
current_commit=$(cat ${{ github.workspace }}/pytorch.commit)
97119
is_fork_pr=false
98120
if [ -n "${{ github.event.pull_request }}" ] && [ "${{ github.event.pull_request.head.repo.full_name }}" != "${{ github.repository }}" ]; then
99121
is_fork_pr=true
100122
fi
101123
echo ">>>>>>>>>>>>Fork PR: ${is_fork_pr}, pytorch branch: ${{ inputs.pytorch }}, last commit: ${last_commit}, current commit: ${current_commit}"
102-
103-
export CMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}:${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
104-
pip install -r requirements.txt
105-
WERROR=1 python setup.py bdist_wheel 2>&1 | tee pytorch_${current_commit}_build.log
106-
107124
if [[ "${is_fork_pr}" == "false" ]]; then
108-
if [ -f dist/torch*.whl ] && \
125+
if [ "${build_status}" == "0" ] && \
109126
[ "${{ inputs.update_lkg }}" == "true" ] && \
110127
[ "${last_commit}" != "${current_commit}" ] && \
111128
[[ "${{ inputs.pytorch }}" == "main" || "${{ inputs.pytorch }}" == "release/"* ]]; then
@@ -114,55 +131,34 @@ jobs:
114131
gh --repo $repo issue edit $commit_issue --body-file new_body.txt
115132
gh --repo $repo issue comment $commit_issue -b "Update LKG torch, refer ${build_url}"
116133
fi
117-
if [ ! -f dist/torch*.whl ]; then
134+
if [ ! -f ${{ github.workspace }}/torch*.whl ] && [ "${TORCH_XPU_OPS_VERSION}" == "cicd" ]; then
118135
echo "Wheel build failed, use last commit in the issue https://github.com/intel/torch-xpu-ops/issues/1280"
119136
gh --repo $repo issue comment $commit_issue -b "Wheel build failed with commit [${current_commit}](https://github.com/pytorch/pytorch/tree/${current_commit}), refer ${build_url}. CC @intel/torch-xpu-ops-maintain @EikanWang @riverliuintel @fengyuan14 @xytintel @etaf @chuanqi129 @mengfei25"
120-
python setup.py clean
121-
git clean -df .
122-
git reset --hard
123-
git checkout $last_commit
124-
# apply PRs for stock pytorch
125-
python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py
126-
git status && git show -s
127-
git submodule sync && git submodule update --init --recursive
128-
if [[ ${{ inputs.keep_torch_xpu_ops }} == 'true' ]]; then
129-
echo "Don't replace torch-xpu-ops!"
130-
else
131-
rm -rf third_party/torch-xpu-ops && cp -r ../torch-xpu-ops third_party/
132-
# Workaround for torch-xpu-ops ci test
133-
sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt
134-
fi
135-
WERROR=1 python setup.py bdist_wheel
137+
build_status="$(
138+
${{ github.workspace }}/torch-xpu-ops/.github/scripts/build.sh \
139+
--WORKSPACE="${{ github.workspace }}" \
140+
--PYTORCH_REPO="https://github.com/pytorch/pytorch.git" \
141+
--PYTORCH_VERSION="${last_commit}" \
142+
--TORCH_XPU_OPS_REPO="${TORCH_XPU_OPS_REPO}" \
143+
--TORCH_XPU_OPS_VERSION="${TORCH_XPU_OPS_VERSION}" \
144+
>> ${{ github.workspace }}/pytorch_build_${last_commit}.log 2>&1 && echo $? || echo $?
145+
)"
146+
tail -n 100 ${{ github.workspace }}/pytorch_build_${last_commit}.log
136147
fi
137148
else
138149
echo "Forked PR, don't update the issue"
139150
fi
140-
pip install --force-reinstall dist/*.whl
141-
cp dist/*.whl ${{ github.workspace }}/
142-
cp pytorch_${current_commit}_build.log ${{ github.workspace }}/
151+
exit ${build_status}
143152
- name: Torch Config
144153
run: |
145-
source activate xpu_build
146-
source .github/scripts/env.sh ${{ inputs.pytorch }}
147154
python -c "import torch; print(torch.__config__.show())"
148155
python -c "import torch; print(torch.__config__.parallel_info())"
149156
python -c "import torch; print(torch.__config__.torch.xpu.device_count())"
150-
cd ..
151157
python pytorch/torch/utils/collect_env.py
152158
- name: Identify Build version
153159
id: build_version
154160
run: |
155-
source .github/scripts/env.sh
156-
cd ../pytorch
157-
echo "TORCH_BRANCH_ID=$(git rev-parse --abbrev-ref HEAD)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
158-
echo "TORCH_COMMIT_ID=$(git rev-parse HEAD)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
159-
echo "DRIVER_VERSION=$(sycl-ls |grep 'opencl:gpu' |awk '{print $NF}' |sort |uniq -c |sed 's/ //g;s/\[/*[/')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
160-
echo "KERNEL_VERSION=$(uname -rv 2>&1)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
161-
echo "BUNDLE_VERSION=$(icpx --version 2>&1 |grep 'DPC++/C++' |sed 's/.*(//;s/).*//')" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
162-
. /etc/os-release
163-
echo "OS_PRETTY_NAME=${PRETTY_NAME}" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
164-
echo "GCC_VERSION=$(gcc -dumpversion)" |tee -a "${GITHUB_OUTPUT}" >> "${GITHUB_ENV}"
165-
echo ${GITHUB_ENV}
161+
echo "TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')" |tee -a "${GITHUB_OUTPUT}"
166162
- name: Upload Torch XPU Wheel
167163
if: ${{ ! cancelled() }}
168164
uses: actions/upload-artifact@v4
@@ -175,3 +171,8 @@ jobs:
175171
with:
176172
name: Torch-XPU-Build-Log-${{ github.event.pull_request.number || github.sha }}
177173
path: ${{ github.workspace }}/pytorch_*.log
174+
- name: Cleanup
175+
if: always()
176+
run: |
177+
chmod 777 . -R
178+
rm -rf pytorch torch-xpu-ops pytorch_*.log torch*.whl

0 commit comments

Comments
 (0)