@@ -2484,33 +2484,41 @@ type __builtin_IB_WorkGroupReduce_##func##_##type_abbr(type X)
2484
2484
} \
2485
2485
SPIRV_BUILTIN (ControlBarrier , _i32_i32_i32 , )(Workgroup , 0 , AcquireRelease | WorkgroupMemory ); \
2486
2486
\
2487
- type low_data ; \
2488
- type high_data ; \
2489
- type reduce ; \
2490
- if (sg_size == 32 ) /* SIMD32 */ \
2487
+ if (sg_id == 0 ) \
2491
2488
{ \
2492
- low_data = sg_lid < values_num ? scratch [sg_lid ] : identity ; \
2493
- high_data = sg_lid + 32 < values_num ? scratch [sg_lid + 32 ] : identity ; \
2494
- /* 64 (from 64) elements reduces to 32 */ \
2495
- reduce = op (low_data , high_data ); \
2496
- } \
2497
- else if (sg_size == 16 ) /* SIMD16 */ \
2498
- { \
2499
- low_data = sg_lid < values_num ? scratch [sg_lid ] : identity ; \
2500
- type mid_low_data = sg_lid + 16 < values_num ? scratch [sg_lid + 16 ] : identity ; \
2501
- type mid_high_data = sg_lid + 32 < values_num ? scratch [sg_lid + 32 ] : identity ; \
2502
- high_data = sg_lid + 32 + 16 < values_num ? scratch [sg_lid + 32 + 16 ] : identity ; \
2503
- /* 32 first part (from 64) elements reduces to 16 */ \
2504
- low_data = op (low_data , mid_low_data ); \
2505
- /* 32 second part (from 64) elements reduces to 16 */ \
2506
- high_data = op (mid_high_data , high_data ); \
2507
- /* 64 (from 64) elements reduces to 16 */ \
2508
- reduce = op (low_data , high_data ); \
2509
- } \
2510
- /* SIMD8 is not available on PVC */ \
2489
+ type low_data ; \
2490
+ type high_data ; \
2491
+ type reduce ; \
2511
2492
\
2512
- sg_x = SPIRV_BUILTIN (Group ##func , _i32_i32_##type_abbr, )(Subgroup, GroupOperationReduce, reduce); \
2513
- return sg_x; \
2493
+ if (sg_size == 32 ) /* SIMD32 */ \
2494
+ { \
2495
+ low_data = sg_lid < values_num ? scratch [sg_lid ] : identity ; \
2496
+ high_data = sg_lid + 32 < values_num ? scratch [sg_lid + 32 ] : identity ; \
2497
+ /* 64 (from 64) elements reduces to 32 */ \
2498
+ reduce = op (low_data , high_data ); \
2499
+ } \
2500
+ else if (sg_size == 16 ) /* SIMD16 */ \
2501
+ { \
2502
+ low_data = sg_lid < values_num ? scratch [sg_lid ] : identity ; \
2503
+ type mid_low_data = sg_lid + 16 < values_num ? scratch [sg_lid + 16 ] : identity ; \
2504
+ type mid_high_data = sg_lid + 32 < values_num ? scratch [sg_lid + 32 ] : identity ; \
2505
+ high_data = sg_lid + 32 + 16 < values_num ? scratch [sg_lid + 32 + 16 ] : identity ; \
2506
+ /* 32 first part (from 64) elements reduces to 16 */ \
2507
+ low_data = op (low_data , mid_low_data ); \
2508
+ /* 32 second part (from 64) elements reduces to 16 */ \
2509
+ high_data = op (mid_high_data , high_data ); \
2510
+ /* 64 (from 64) elements reduces to 16 */ \
2511
+ reduce = op (low_data , high_data ); \
2512
+ } \
2513
+ /* SIMD8 is not available on PVC */ \
2514
+ \
2515
+ sg_x = SPIRV_BUILTIN (Group ##func , _i32_i32_##type_abbr, )(Subgroup, GroupOperationReduce, reduce); \
2516
+ if (sg_lid == 0) { \
2517
+ scratch[0] = sg_x; \
2518
+ } \
2519
+ } \
2520
+ SPIRV_BUILTIN(ControlBarrier, _i32_i32_i32, )(Workgroup, 0, AcquireRelease | WorkgroupMemory); \
2521
+ return scratch[0]; \
2514
2522
} \
2515
2523
} \
2516
2524
else \
0 commit comments