Skip to content

Commit 5c1c479

Browse files
[Nightly] Add op microbench (#1477)
1 parent 36107bf commit 5c1c479

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+3078
-0
lines changed

.github/scripts/microbench_summary.sh

+203
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
#! /bin/bash
2+
# This script is for op perf summary, both for forward and backward op
3+
4+
# usage
5+
# Summary forward op time, forward_op_summary.csv is forward summary file
6+
## bash microbench_summary.sh path/to/profile's log forward_op_summary.csv
7+
# Summary backward op time, backward_op_summary.csv is backward summary file, True means summary backward, default is false.
8+
## bash microbench_summary.sh path/to/profile's log backward_op_summary.csv True
9+
10+
results_dir="$1"
11+
output_file="$2"
12+
Get_backward=${3:-False}
13+
cd "$results_dir" || exit
14+
15+
echo "case_name;datatype;op_name;shape;channels_last;dim;output_size;P;reduce;kernel_size;stride;replacement;num_samples;scale_factor;mode;padding_mode;align_corners;shifts;affine;backward;time(us)" >> "$output_file"
16+
17+
function op_summary {
18+
while IFS= read -r line1 && IFS= read -r line2 <&3; do
19+
text=${line1}
20+
IFS=';' read -ra pairs <<< "$(echo "$text" | tr -d '\n' | tr -s ' ')"
21+
for pair in "${pairs[@]}"; do
22+
IFS=':' read -r key value <<< "$pair"
23+
key=$(echo "$key" | xargs)
24+
value=$(echo "$value" | xargs)
25+
if [[ shape = "$key" ]] ; then
26+
shape=${value}
27+
fi
28+
if [[ datatype = "$key" ]] ; then
29+
datatype=${value}
30+
fi
31+
if [[ dim = "$key" ]] || [[ dims = "$key" ]] ; then
32+
dim=${value}
33+
fi
34+
if [[ output_size = "$key" ]] ; then
35+
output_size=${value}
36+
fi
37+
if [[ channels_last = "$key" ]] ; then
38+
channels_last=${value}
39+
fi
40+
if [[ backward = "$key" ]] ; then
41+
backward=${value}
42+
fi
43+
if [[ reduce = "$key" ]] ; then
44+
reduce=${value}
45+
fi
46+
if [[ kernel_size = "$key" ]] ; then
47+
kernel_size=${value}
48+
fi
49+
if [[ P = "$key" ]] ; then
50+
P=${value}
51+
fi
52+
if [[ stride = "$key" ]] ; then
53+
stride=${value}
54+
fi
55+
if [[ replacement = "$key" ]] ; then
56+
replacement=${value}
57+
fi
58+
if [[ num_samples = "$key" ]] ; then
59+
num_samples=${value}
60+
fi
61+
if [[ scale_factor = "$key" ]] ; then
62+
scale_factor=${value}
63+
fi
64+
if [[ mode = "$key" ]] ; then
65+
mode=${value}
66+
fi
67+
if [[ padding_mode = "$key" ]] ; then
68+
padding_mode=${value}
69+
fi
70+
if [[ align_corners = "$key" ]] ; then
71+
align_corners=${value}
72+
fi
73+
if [[ affine = "$key" ]] ; then
74+
affine=${value}
75+
fi
76+
if [[ shifts = "$key" ]] ; then
77+
shifts=${value}
78+
fi
79+
done
80+
number=""
81+
if [[ $line2 =~ ^([0-9.]+)([a-zA-Z]+)$ ]] ; then
82+
number="${BASH_REMATCH[1]}"
83+
unit="${BASH_REMATCH[2]}"
84+
fi
85+
# Align the time units
86+
if [[ $unit == "ms" ]] ;then
87+
number=$(echo "scale=3; $number * 1000" | bc)
88+
fi
89+
if [[ $unit == "s" ]] ;then
90+
number=$(echo "scale=3; $number * 1000000" | bc)
91+
fi
92+
if [[ $Get_backward == "True" ]] && [[ $backward == "False" ]]; then
93+
echo "Only Forward"
94+
else
95+
echo "${i%.*};${datatype};${op_name};$shape;$channels_last;$dim;$output_size;$P;$reduce;$kernel_size;$stride;$replacement;$num_samples;$scale_factor;$mode;$padding_mode;$align_corners;$shifts;$affine;$backward;$number" >> "$output_file"
96+
fi
97+
done < <(echo "$texts") 3< <(echo "$times")
98+
}
99+
100+
filename=$(find -- *.log)
101+
102+
for i in $filename
103+
do
104+
output_size=""
105+
P=""
106+
channels_last=""
107+
dim=""
108+
backward=""
109+
reduce=""
110+
kernel_size=""
111+
affine=""
112+
output_size=""
113+
stride=""
114+
replacement=""
115+
num_samples=""
116+
scale_factor=""
117+
mode=""
118+
padding_mode=""
119+
align_corners=""
120+
shifts=""
121+
case_name="${i%.*}"
122+
op_name=$(echo "$case_name" | awk -F. '{print $NF}')
123+
if [[ $Get_backward == "False" ]] ; then
124+
if [[ $op_name =~ batch_norm ]] ; then
125+
op_name="aten::batch_norm"
126+
times=$(grep -E "${op_name}" "${i}" | awk '{print $10}')
127+
elif [[ $op_name =~ exponential ]] || [[ $op_name =~ geometric ]] || [[ $op_name =~ uniform ]] || [[ $op_name =~ random ]] || [[ $op_name =~ normal ]] || [[ $op_name =~ log_normal ]] || [[ $op_name =~ bernoulli ]] || [[ $op_name =~ cauchy ]] ;then
128+
op_name=$op_name"_"
129+
times=$(grep -E "${op_name}" "${i}" | awk '{print $10}')
130+
elif [[ $op_name == unique ]] ; then
131+
op_name="unique2"
132+
times=$(grep -E "${op_name}" "${i}" | awk '{print $10}')
133+
elif [[ $op_name == max_pool3d ]] || [[ $op_name == max_pool2d ]] ; then
134+
op_name=$op_name"_with_indices"
135+
times=$(grep -E "${op_name} " "${i}" | awk '{print $10}')
136+
elif [[ $op_name == dropout ]] || [[ $op_name == layer_norm ]] ; then
137+
times=$(grep -w "${op_name}" "${i}" | awk '{print $10}')
138+
elif [[ $op_name == ctc_loss ]] ; then
139+
op_name="_"$op_name
140+
times=$(grep -w "${op_name}" "${i}" | awk '{print $10}')
141+
elif [[ $op_name == adaptive_avg_pool2d ]] ; then
142+
op_name="adaptive_avg_pool2d"
143+
times=$(grep -w "${op_name} " "${i}" | awk '{print $10}')
144+
elif [[ $op_name == softmax ]] ; then
145+
op_name="aten::softmax"
146+
times=$(grep -E "${op_name}" "${i}" | awk '{print $10}')
147+
elif [[ $op_name == group_norm ]] ; then
148+
op_name="aten::group_norm"
149+
times=$(grep -E "${op_name}" "${i}" | awk '{print $10}')
150+
else
151+
times=$(grep -E "${op_name} " "${i}" | awk '{print $10}')
152+
fi
153+
else
154+
if [[ $op_name =~ batch_norm ]] ; then
155+
op_name="batch_norm_backward"
156+
times=$(grep -E "${op_name}" "${i}" | awk '{print $10}')
157+
elif [[ $op_name == max_pool3d ]] || [[ $op_name == max_pool2d ]] ; then
158+
op_name=$op_name"_with_indices_backward"
159+
times=$(grep -E "${op_name} " "${i}" | awk '{print $10}')
160+
elif [[ $op_name == col2im ]] ; then
161+
op_name="Col2ImBackward0"
162+
times=$(grep -E "${op_name} " "${i}" | grep -v "autograd::engine" | awk '{print $10}')
163+
elif [[ $op_name == im2col ]] ; then
164+
op_name="Im2ColBackward0"
165+
times=$(grep -E "${op_name} " "${i}" | grep -v "autograd::engine" | awk '{print $10}')
166+
elif [[ $op_name == flip ]] ; then
167+
op_name="FlipBackward0"
168+
times=$(grep -E "${op_name} " "${i}" | grep -v "autograd::engine" | awk '{print $10}')
169+
elif [[ $op_name == matmul ]] ; then
170+
op_name="MmBackward0"
171+
times=$(grep -E "${op_name} " "${i}" | grep -v "autograd::engine" | awk '{print $10}')
172+
elif [[ $op_name == roll ]] ; then
173+
op_name="RollBackward0"
174+
times=$(grep -E "${op_name} " "${i}" | grep -v "autograd::engine" | awk '{print $10}')
175+
elif [[ $op_name == softmax ]] ; then
176+
op_name=$op_name"_backward_data"
177+
times=$(grep -E "${op_name} " "${i}" | awk '{print $10}')
178+
elif [[ $op_name == remainder ]] ; then
179+
op_name="RemainderBackward0"
180+
times=$(grep -E "${op_name} " "${i}" | awk '{print $10}')
181+
elif [[ $op_name == l1_loss ]] ; then
182+
op_name="l1_loss"
183+
else
184+
op_name=$op_name"_backward"
185+
times=$(grep -E "${op_name} " "${i}" | awk '{print $10}')
186+
fi
187+
fi
188+
189+
texts=$(grep -E "shape :|shape:" "$i")
190+
number=""
191+
if [[ $op_name == l1_loss ]] && [[ $Get_backward == "True" ]] ; then
192+
op_name="AbsBackward0"
193+
times=$(grep -E "${op_name} " "${i}" | grep -v "autograd" | awk '{print $10}' | head -n 6)
194+
texts=$(grep -E "shape :|shape:" "$i" | head -n 6)
195+
op_summary
196+
op_name="MeanBackward0"
197+
times=$(grep -E "${op_name} " "${i}" | grep -v "autograd" | awk '{print $10}')
198+
texts=$(grep -E "shape :|shape:" "$i" | tail -n 6)
199+
op_summary
200+
else
201+
op_summary
202+
fi
203+
done
+128
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
name: Linux OP Benchmark Test
2+
3+
on:
4+
workflow_call:
5+
inputs:
6+
pytorch:
7+
required: false
8+
type: string
9+
default: 'main'
10+
description: Pytorch branch/commit
11+
keep_torch_xpu_ops:
12+
required: false
13+
type: string
14+
default: 'false'
15+
description: Keep torch-xpu-ops pin. `true` means use pined commit
16+
triton:
17+
required: false
18+
type: string
19+
default: ''
20+
description: Triton commit. Use pytorch pined commit by default
21+
python:
22+
required: false
23+
type: string
24+
default: '3.10'
25+
description: Python version
26+
runner:
27+
required: true
28+
type: string
29+
default: 'linux.idc.xpu'
30+
description: Runner label
31+
driver:
32+
required: false
33+
type: string
34+
default: 'rolling'
35+
description: Driver lts/rolling
36+
37+
permissions: read-all
38+
39+
jobs:
40+
op_benchmark_test:
41+
runs-on: ${{ inputs.runner }}
42+
timeout-minutes: 900
43+
env:
44+
NEOReadDebugKeys: ${{ inputs.driver == 'rolling' && '1' || '0' }}
45+
DisableScratchPages: ${{ inputs.driver == 'rolling' && '1' || '0' }}
46+
steps:
47+
- name: Checkout torch-xpu-ops
48+
uses: actions/checkout@v4
49+
- name: Prepare Stock Pytorch
50+
run: |
51+
pwd
52+
which conda && conda clean -ay
53+
conda remove --all -y -n xpu_op_${ZE_AFFINITY_MASK} || \
54+
rm -rf $(dirname ${CONDA_EXE})/../envs/xpu_op_${ZE_AFFINITY_MASK}
55+
conda create -n xpu_op_${ZE_AFFINITY_MASK} python=${{ inputs.python }} cmake ninja -y
56+
source activate xpu_op_${ZE_AFFINITY_MASK}
57+
cd ../ && rm -rf pytorch
58+
pip install requests
59+
git clone https://github.com/pytorch/pytorch pytorch
60+
if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then
61+
cd pytorch && git checkout $(echo ${{ inputs.pytorch }})
62+
# apply PRs for stock pytorch
63+
python ../torch-xpu-ops/.github/scripts/apply_torch_pr.py
64+
git status && git show -s
65+
git submodule sync && git submodule update --init --recursive
66+
if [[ ${{ inputs.keep_torch_xpu_ops }} == 'true' ]]; then
67+
echo "Don't replace torch-xpu-ops!"
68+
else
69+
rm -rf third_party/torch-xpu-ops && cp -r ../torch-xpu-ops third_party/
70+
# Workaround for torch-xpu-ops ci test
71+
sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt
72+
fi
73+
fi
74+
- name: Download Pytorch wheel
75+
if: ${{ inputs.pytorch != 'nightly_wheel' }}
76+
uses: actions/download-artifact@v4
77+
with:
78+
name: Torch-XPU-Wheel-${{ github.event.pull_request.number || github.sha }}
79+
path: ${{ github.workspace }}
80+
- name: Install Pytorch XPU
81+
run: |
82+
source activate xpu_op_${ZE_AFFINITY_MASK}
83+
source .github/scripts/env.sh ${{ inputs.pytorch }}
84+
if [ "${{ inputs.pytorch }}" != "nightly_wheel" ]; then
85+
cd ../pytorch
86+
export CMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}:${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
87+
pip install -r requirements.txt
88+
pip install --force-reinstall ${{ github.workspace }}/torch*.whl
89+
git clone https://github.com/pytorch/vision && cd vision && python setup.py install && cd ..
90+
else
91+
pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu
92+
TORCH_COMMIT_ID=$(python -c 'import torch; print(torch.version.git_version)')
93+
fi
94+
pip install -r .ci/docker/requirements-ci.txt
95+
- name: Torch Config
96+
run: |
97+
source activate xpu_op_${ZE_AFFINITY_MASK}
98+
source .github/scripts/env.sh ${{ inputs.pytorch }}
99+
python -c "import torch; print(torch.__config__.show())"
100+
python -c "import torch; print(torch.__config__.parallel_info())"
101+
python -c "import torch; print(torch.__config__.torch.xpu.device_count())"
102+
103+
cd ..
104+
python pytorch/torch/utils/collect_env.py
105+
rm -rf /tmp/torchinductor_*
106+
rm -rf ~/.triton/cache
107+
- name: Run Torch XPU Op Benchmark
108+
if: ${{ inputs.driver == 'rolling' }}
109+
run: |
110+
source .github/scripts/env.sh ${{ inputs.pytorch }}
111+
source activate xpu_op_${ZE_AFFINITY_MASK}
112+
mkdir -p ${{ github.workspace }}/op_benchmark
113+
cd test/microbench
114+
filename=$(find -- *.py)
115+
for i in $filename
116+
do
117+
python ${i%.*}.py > ${{ github.workspace }}/op_benchmark/${i%.*}.log
118+
done
119+
# Summary forward op time
120+
bash ${{ github.workspace }}/.github/scripts/microbench_summary.sh ${{ github.workspace }}/op_benchmark ${{ github.workspace }}/op_benchmark/forward_op_summary.csv
121+
# Summary backward op time
122+
bash ${{ github.workspace }}/.github/scripts/microbench_summary.sh ${{ github.workspace }}/op_benchmark ${{ github.workspace }}/op_benchmark/backward_op_summary.csv True
123+
- name: Upload Inductor XPU OP benchmark Log
124+
if: always()
125+
uses: actions/upload-artifact@v4
126+
with:
127+
name: Inductor-XPU-OP-Benchmark-Data-${{ github.event.pull_request.number || github.sha }}
128+
path: ${{ github.workspace }}/op_benchmark

.github/workflows/nightly_ondemand_rolling.yml

+12
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,18 @@ jobs:
9595
triton: ${{ github.event_name == 'schedule' && '' || inputs.triton }}
9696
driver: rolling
9797
runner: pvc_rolling
98+
99+
Linux-Nightly-Ondemand-OP-Microbench-Tests-Rolling:
100+
name: linux-nightly-ondemand-rolling / Op_microbench
101+
needs: Linux-Nightly-Ondemand-Build-Rolling
102+
uses: ./.github/workflows/_linux_op_benchmark.yml
103+
with:
104+
keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }}
105+
pytorch: ${{ needs.Linux-Nightly-Ondemand-Build-Rolling.outputs.torch_commit_id }}
106+
python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
107+
triton: ${{ github.event_name == 'schedule' && '' || inputs.triton }}
108+
driver: rolling
109+
runner: pvc_rolling
98110

99111
Linux-Nightly-Ondemand-E2E-Tests-Rolling:
100112
runs-on: pvc_rolling

0 commit comments

Comments
 (0)