Skip to content

Commit b3b2116

Browse files
authored
Merge branch 'buddy-compiler:main' into main
2 parents d3afb63 + 332f70d commit b3b2116

File tree

24 files changed

+1375
-24
lines changed

24 files changed

+1375
-24
lines changed

examples/BuddyBert/CMakeLists.txt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,15 @@ add_custom_command(
3333
-eliminate-empty-tensors
3434
-empty-tensor-to-alloc-tensor
3535
-one-shot-bufferize="bufferize-function-boundaries"
36+
-ownership-based-buffer-deallocation
37+
-buffer-deallocation-simplification
38+
-bufferization-lower-deallocations
3639
-matmul-parallel-vectorization-optimize
3740
-batchmatmul-optimize
3841
-convert-linalg-to-affine-loops
3942
-affine-loop-fusion
4043
-affine-parallelize
44+
-lower-affine
4145
-convert-scf-to-openmp
4246
-convert-vector-to-scf
4347
-expand-strided-metadata
@@ -81,5 +85,5 @@ target_compile_definitions(buddy-bert-run PRIVATE
8185

8286
target_link_directories(buddy-bert-run PRIVATE ${LLVM_LIBRARY_DIR})
8387

84-
set(BUDDY_BERT_LIBS BERT mlir_c_runner_utils)
88+
set(BUDDY_BERT_LIBS BERT mlir_c_runner_utils omp)
8589
target_link_libraries(buddy-bert-run ${BUDDY_BERT_LIBS})

examples/BuddyDeepSeekR1/CMakeLists.txt

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ add_custom_command(
2626
-eliminate-empty-tensors
2727
-empty-tensor-to-alloc-tensor
2828
-one-shot-bufferize="bufferize-function-boundaries"
29+
-ownership-based-buffer-deallocation
30+
-buffer-deallocation-simplification
31+
-bufferization-lower-deallocations
2932
-matmul-parallel-vectorization-optimize
3033
-batchmatmul-optimize
3134
-convert-linalg-to-affine-loops
@@ -67,11 +70,15 @@ add_custom_command(
6770
-eliminate-empty-tensors
6871
-empty-tensor-to-alloc-tensor
6972
-one-shot-bufferize="bufferize-function-boundaries"
73+
-ownership-based-buffer-deallocation
74+
-buffer-deallocation-simplification
75+
-bufferization-lower-deallocations
7076
-matmul-parallel-vectorization-optimize
7177
-batchmatmul-optimize
7278
-convert-linalg-to-affine-loops
7379
-affine-loop-fusion
7480
-affine-parallelize
81+
-lower-affine
7582
-convert-scf-to-openmp
7683
-func-bufferize-dynamic-offset
7784
-convert-vector-to-scf
@@ -109,6 +116,9 @@ add_custom_command(
109116
-eliminate-empty-tensors
110117
-empty-tensor-to-alloc-tensor
111118
-one-shot-bufferize="bufferize-function-boundaries"
119+
-ownership-based-buffer-deallocation
120+
-buffer-deallocation-simplification
121+
-bufferization-lower-deallocations
112122
-matmul-parallel-vectorization-optimize
113123
-batchmatmul-optimize
114124
-convert-linalg-to-affine-loops
@@ -150,11 +160,15 @@ add_custom_command(
150160
-eliminate-empty-tensors
151161
-empty-tensor-to-alloc-tensor
152162
-one-shot-bufferize="bufferize-function-boundaries"
163+
-ownership-based-buffer-deallocation
164+
-buffer-deallocation-simplification
165+
-bufferization-lower-deallocations
153166
-matmul-parallel-vectorization-optimize
154167
-batchmatmul-optimize
155168
-convert-linalg-to-affine-loops
156169
-affine-loop-fusion
157170
-affine-parallelize
171+
-lower-affine
158172
-convert-scf-to-openmp
159173
-func-bufferize-dynamic-offset
160174
-convert-vector-to-scf

examples/BuddyLeNet/CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@ add_custom_command(
2929
-eliminate-empty-tensors
3030
-convert-tensor-to-linalg
3131
-one-shot-bufferize="bufferize-function-boundaries"
32+
-ownership-based-buffer-deallocation
33+
-buffer-deallocation-simplification
34+
-bufferization-lower-deallocations
3235
-batchmatmul-optimize
3336
-convert-linalg-to-affine-loops
3437
-func-bufferize-dynamic-offset
@@ -98,6 +101,9 @@ add_custom_command(
98101
${BUDDY_BINARY_DIR}/buddy-opt
99102
-one-shot-bufferize="bufferize-function-boundaries"
100103
-func-bufferize-dynamic-offset
104+
-ownership-based-buffer-deallocation
105+
-buffer-deallocation-simplification
106+
-bufferization-lower-deallocations
101107
-convert-linalg-to-parallel-loops
102108
-canonicalize
103109
-gpu-map-parallel-loops

examples/BuddyLlama/CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@ add_custom_command(
1717
-eliminate-empty-tensors
1818
-empty-tensor-to-alloc-tensor
1919
-one-shot-bufferize="bufferize-function-boundaries"
20+
-ownership-based-buffer-deallocation
21+
-buffer-deallocation-simplification
22+
-bufferization-lower-deallocations
2023
-matmul-parallel-vectorization-optimize
2124
-batchmatmul-optimize
2225
-convert-linalg-to-affine-loops
@@ -58,11 +61,15 @@ add_custom_command(
5861
-eliminate-empty-tensors
5962
-empty-tensor-to-alloc-tensor
6063
-one-shot-bufferize="bufferize-function-boundaries"
64+
-ownership-based-buffer-deallocation
65+
-buffer-deallocation-simplification
66+
-bufferization-lower-deallocations
6167
-matmul-parallel-vectorization-optimize
6268
-batchmatmul-optimize
6369
-convert-linalg-to-affine-loops
6470
-affine-loop-fusion
6571
-affine-parallelize
72+
-lower-affine
6673
-convert-scf-to-openmp
6774
-convert-vector-to-scf
6875
-expand-strided-metadata

examples/BuddyMobileNetV3/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@ add_custom_command(
3838
${BUDDY_BINARY_DIR}/buddy-opt
3939
-convert-elementwise-to-linalg
4040
-one-shot-bufferize="bufferize-function-boundaries"
41+
-ownership-based-buffer-deallocation
42+
-buffer-deallocation-simplification
43+
-bufferization-lower-deallocations
4144
-func-bufferize-dynamic-offset
4245
-convert-linalg-to-loops
4346
-convert-scf-to-cf

examples/BuddyResNet18/CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,9 @@ add_custom_command(
3838
${BUDDY_BINARY_DIR}/buddy-opt
3939
-convert-elementwise-to-linalg
4040
-one-shot-bufferize="bufferize-function-boundaries"
41-
-func-bufferize-dynamic-offset
41+
-ownership-based-buffer-deallocation
42+
-buffer-deallocation-simplification
43+
-bufferization-lower-deallocations
4244
-convert-linalg-to-loops
4345
-convert-scf-to-cf
4446
-convert-cf-to-llvm

examples/BuddyStableDiffusion/CMakeLists.txt

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
add_custom_command(
22
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/arg0_text_encoder.data
3-
${CMAKE_CURRENT_BINARY_DIR}arg1_text_encoder.data
3+
${CMAKE_CURRENT_BINARY_DIR}/arg1_text_encoder.data
44
${CMAKE_CURRENT_BINARY_DIR}/arg0_unet.data
55
${CMAKE_CURRENT_BINARY_DIR}/arg0_vae.data
66
${CMAKE_CURRENT_BINARY_DIR}/forward_text_encoder.mlir
@@ -23,6 +23,9 @@ add_custom_command(
2323
-eliminate-empty-tensors
2424
-empty-tensor-to-alloc-tensor
2525
-one-shot-bufferize="bufferize-function-boundaries"
26+
-ownership-based-buffer-deallocation
27+
-buffer-deallocation-simplification
28+
-bufferization-lower-deallocations
2629
-matmul-parallel-vectorization-optimize
2730
-batchmatmul-optimize
2831
-convert-linalg-to-affine-loops
@@ -64,13 +67,17 @@ add_custom_command(
6467
-eliminate-empty-tensors
6568
-empty-tensor-to-alloc-tensor
6669
-one-shot-bufferize="bufferize-function-boundaries"
70+
-ownership-based-buffer-deallocation
71+
-buffer-deallocation-simplification
72+
-bufferization-lower-deallocations
6773
-func-bufferize-dynamic-offset
6874
-conv-nhwc-fhwc-optimize
6975
-matmul-parallel-vectorization-optimize
7076
-batchmatmul-optimize
7177
-convert-linalg-to-affine-loops
7278
-affine-loop-fusion
7379
-affine-parallelize
80+
-lower-affine
7481
-convert-scf-to-openmp
7582
-convert-vector-to-scf
7683
-expand-strided-metadata
@@ -106,7 +113,10 @@ add_custom_command(
106113
-arith-expand
107114
-eliminate-empty-tensors
108115
-empty-tensor-to-alloc-tensor
109-
-one-shot-bufferize
116+
-one-shot-bufferize="bufferize-function-boundaries"
117+
-ownership-based-buffer-deallocation
118+
-buffer-deallocation-simplification
119+
-bufferization-lower-deallocations
110120
-matmul-parallel-vectorization-optimize
111121
-batchmatmul-optimize
112122
-convert-linalg-to-affine-loops
@@ -147,13 +157,16 @@ add_custom_command(
147157
-eliminate-empty-tensors
148158
-empty-tensor-to-alloc-tensor
149159
-one-shot-bufferize="bufferize-function-boundaries"
150-
-func-bufferize-dynamic-offset
160+
-ownership-based-buffer-deallocation
161+
-buffer-deallocation-simplification
162+
-bufferization-lower-deallocations
151163
-conv-nhwc-fhwc-optimize
152164
-matmul-parallel-vectorization-optimize
153165
-batchmatmul-optimize
154166
-convert-linalg-to-affine-loops
155167
-affine-loop-fusion
156168
-affine-parallelize
169+
-lower-affine
157170
-convert-scf-to-openmp
158171
-convert-vector-to-scf
159172
-expand-strided-metadata
@@ -190,6 +203,9 @@ add_custom_command(
190203
-eliminate-empty-tensors
191204
-empty-tensor-to-alloc-tensor
192205
-one-shot-bufferize="bufferize-function-boundaries"
206+
-ownership-based-buffer-deallocation
207+
-buffer-deallocation-simplification
208+
-bufferization-lower-deallocations
193209
-matmul-parallel-vectorization-optimize
194210
-batchmatmul-optimize
195211
-convert-linalg-to-affine-loops
@@ -231,13 +247,16 @@ add_custom_command(
231247
-eliminate-empty-tensors
232248
-empty-tensor-to-alloc-tensor
233249
-one-shot-bufferize="bufferize-function-boundaries"
234-
-func-bufferize-dynamic-offset
250+
-ownership-based-buffer-deallocation
251+
-buffer-deallocation-simplification
252+
-bufferization-lower-deallocations
235253
-conv-nhwc-fhwc-optimize
236254
-matmul-parallel-vectorization-optimize
237255
-batchmatmul-optimize
238256
-convert-linalg-to-affine-loops
239257
-affine-loop-fusion
240258
-affine-parallelize
259+
-lower-affine
241260
-convert-scf-to-openmp
242261
-convert-vector-to-scf
243262
-expand-strided-metadata

examples/BuddyWhisper/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,15 @@ add_custom_command(
3232
-convert-elementwise-to-linalg
3333
-empty-tensor-to-alloc-tensor
3434
-one-shot-bufferize="bufferize-function-boundaries"
35+
-ownership-based-buffer-deallocation
36+
-buffer-deallocation-simplification
37+
-bufferization-lower-deallocations
3538
-matmul-parallel-vectorization-optimize
3639
-batchmatmul-optimize
3740
-convert-linalg-to-affine-loops
3841
-affine-loop-fusion
3942
-affine-parallelize
43+
-lower-affine
4044
-convert-scf-to-openmp
4145
-func-bufferize-dynamic-offset
4246
-convert-linalg-to-loops

examples/VIRDialect/.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
log*
22
core
3-
a.out
3+
*.s
4+
*.out
45
*.elf
6+
*.csv
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
#!/usr/bin/env bash
2+
3+
# Usage: ./fixed-kernels-gen.sh
4+
# This script generates test cases for the AVX2 / AVX512 / ARM NEON kernels.
5+
6+
set -u # Avoid undefined variables, removed -e
7+
8+
# ==== Configuration ====
9+
RUN=1 # 0=print only, 1=execute make
10+
11+
FIXED_STEPS=(4 8 16 32 64 128 256 512 1024)
12+
13+
SIZES=(4096 4098 131072 131074 4194304 4194306 67108864 67108866)
14+
15+
FIXED_TARGET="vector-saxpy-fixed-aot"
16+
17+
gen_cases () {
18+
# Extract target name and steps array name
19+
local target="$1"; shift
20+
local steps_array_name="$1"; shift
21+
22+
# Initialize counter for test cases
23+
local count=0
24+
25+
# Iterate through all data sizes
26+
for size in "${SIZES[@]}"; do
27+
# Iterate through all step sizes for current vectorization strategy
28+
# Use indirect expansion to access the array
29+
for step in $(eval "echo \${$steps_array_name[@]}"); do
30+
# Build make command with current parameters
31+
cmd="make $target STEP=$step SIZE=$size"
32+
echo "$cmd"
33+
34+
# Execute command if RUN flag is set to 1
35+
if [[ "$RUN" -eq 1 ]]; then
36+
eval "$cmd"
37+
fi
38+
39+
# Increment test case counter
40+
((count++))
41+
done
42+
done
43+
44+
# Print summary of generated test cases
45+
echo "===> $target generated ${count} test cases in total"
46+
}
47+
48+
gen_cases "$FIXED_TARGET" FIXED_STEPS

0 commit comments

Comments
 (0)