@@ -63,27 +63,27 @@ struct I32X8 {
6363}
6464
6565impl I32X8 {
66- #[ target_feature( enable = "avx2" ) ]
66+ #[ target_feature( enable = "avx2,bmi1,bmi2 " ) ]
6767 #[ inline]
6868 const unsafe fn vec ( self ) -> __m256i {
6969 self . data
7070 }
7171
72- #[ target_feature( enable = "avx2" ) ]
72+ #[ target_feature( enable = "avx2,bmi1,bmi2 " ) ]
7373 #[ inline]
7474 const unsafe fn new ( a : __m256i ) -> I32X8 {
7575 I32X8 { data : a }
7676 }
7777}
7878
7979impl TxOperations for I32X8 {
80- #[ target_feature( enable = "avx2" ) ]
80+ #[ target_feature( enable = "avx2,bmi1,bmi2 " ) ]
8181 #[ inline]
8282 unsafe fn zero ( ) -> Self {
8383 I32X8 :: new ( _mm256_setzero_si256 ( ) )
8484 }
8585
86- #[ target_feature( enable = "avx2" ) ]
86+ #[ target_feature( enable = "avx2,bmi1,bmi2 " ) ]
8787 #[ inline]
8888 unsafe fn tx_mul ( self , mul : ( i32 , i32 ) ) -> Self {
8989 I32X8 :: new ( _mm256_srav_epi32 (
@@ -95,7 +95,7 @@ impl TxOperations for I32X8 {
9595 ) )
9696 }
9797
98- #[ target_feature( enable = "avx2" ) ]
98+ #[ target_feature( enable = "avx2,bmi1,bmi2 " ) ]
9999 #[ inline]
100100 unsafe fn rshift1 ( self ) -> Self {
101101 I32X8 :: new ( _mm256_srai_epi32 (
@@ -107,34 +107,34 @@ impl TxOperations for I32X8 {
107107 ) )
108108 }
109109
110- #[ target_feature( enable = "avx2" ) ]
110+ #[ target_feature( enable = "avx2,bmi1,bmi2 " ) ]
111111 #[ inline]
112112 unsafe fn add ( self , b : Self ) -> Self {
113113 I32X8 :: new ( _mm256_add_epi32 ( self . vec ( ) , b. vec ( ) ) )
114114 }
115115
116- #[ target_feature( enable = "avx2" ) ]
116+ #[ target_feature( enable = "avx2,bmi1,bmi2 " ) ]
117117 #[ inline]
118118 unsafe fn sub ( self , b : Self ) -> Self {
119119 I32X8 :: new ( _mm256_sub_epi32 ( self . vec ( ) , b. vec ( ) ) )
120120 }
121121
122- #[ target_feature( enable = "avx2" ) ]
122+ #[ target_feature( enable = "avx2,bmi1,bmi2 " ) ]
123123 #[ inline]
124124 unsafe fn add_avg ( self , b : Self ) -> Self {
125125 I32X8 :: new ( _mm256_srai_epi32 ( _mm256_add_epi32 ( self . vec ( ) , b. vec ( ) ) , 1 ) )
126126 }
127127
128- #[ target_feature( enable = "avx2" ) ]
128+ #[ target_feature( enable = "avx2,bmi1,bmi2 " ) ]
129129 #[ inline]
130130 unsafe fn sub_avg ( self , b : Self ) -> Self {
131131 I32X8 :: new ( _mm256_srai_epi32 ( _mm256_sub_epi32 ( self . vec ( ) , b. vec ( ) ) , 1 ) )
132132 }
133133}
134134
135- impl_1d_tx ! ( target_feature( enable = "avx2" ) , unsafe ) ;
135+ impl_1d_tx ! ( target_feature( enable = "avx2,bmi1,bmi2 " ) , unsafe ) ;
136136
137- #[ target_feature( enable = "avx2" ) ]
137+ #[ target_feature( enable = "avx2,bmi1,bmi2 " ) ]
138138unsafe fn transpose_8x8_avx2 (
139139 input : ( I32X8 , I32X8 , I32X8 , I32X8 , I32X8 , I32X8 , I32X8 , I32X8 ) ,
140140) -> ( I32X8 , I32X8 , I32X8 , I32X8 , I32X8 , I32X8 , I32X8 , I32X8 ) {
@@ -175,7 +175,7 @@ unsafe fn transpose_8x8_avx2(
175175 )
176176}
177177
178- #[ target_feature( enable = "avx2" ) ]
178+ #[ target_feature( enable = "avx2,bmi1,bmi2 " ) ]
179179unsafe fn transpose_8x4_avx2 (
180180 input : ( I32X8 , I32X8 , I32X8 , I32X8 , I32X8 , I32X8 , I32X8 , I32X8 ) ,
181181) -> ( I32X8 , I32X8 , I32X8 , I32X8 ) {
@@ -213,7 +213,7 @@ unsafe fn transpose_8x4_avx2(
213213 )
214214}
215215
216- #[ target_feature( enable = "avx2" ) ]
216+ #[ target_feature( enable = "avx2,bmi1,bmi2 " ) ]
217217unsafe fn transpose_4x8_avx2 (
218218 input : ( I32X8 , I32X8 , I32X8 , I32X8 ) ,
219219) -> ( I32X8 , I32X8 , I32X8 , I32X8 , I32X8 , I32X8 , I32X8 , I32X8 ) {
@@ -246,7 +246,7 @@ unsafe fn transpose_4x8_avx2(
246246 )
247247}
248248
249- #[ target_feature( enable = "avx2" ) ]
249+ #[ target_feature( enable = "avx2,bmi1,bmi2 " ) ]
250250unsafe fn transpose_4x4_avx2 (
251251 input : ( I32X8 , I32X8 , I32X8 , I32X8 ) ,
252252) -> ( I32X8 , I32X8 , I32X8 , I32X8 ) {
@@ -265,13 +265,13 @@ unsafe fn transpose_4x4_avx2(
265265 )
266266}
267267
268- #[ target_feature( enable = "avx2" ) ]
268+ #[ target_feature( enable = "avx2,bmi1,bmi2 " ) ]
269269#[ inline]
270270unsafe fn shift_left ( a : I32X8 , shift : u8 ) -> I32X8 {
271271 I32X8 :: new ( _mm256_sllv_epi32 ( a. vec ( ) , _mm256_set1_epi32 ( shift as i32 ) ) )
272272}
273273
274- #[ target_feature( enable = "avx2" ) ]
274+ #[ target_feature( enable = "avx2,bmi1,bmi2 " ) ]
275275#[ inline]
276276unsafe fn shift_right ( a : I32X8 , shift : u8 ) -> I32X8 {
277277 I32X8 :: new ( _mm256_srav_epi32 (
@@ -280,7 +280,7 @@ unsafe fn shift_right(a: I32X8, shift: u8) -> I32X8 {
280280 ) )
281281}
282282
283- #[ target_feature( enable = "avx2" ) ]
283+ #[ target_feature( enable = "avx2,bmi1,bmi2 " ) ]
284284#[ inline]
285285unsafe fn round_shift_array_avx2 ( arr : & mut [ I32X8 ] , size : usize , bit : i8 ) {
286286 if bit == 0 {
@@ -328,7 +328,7 @@ impl SizeClass1D {
328328}
329329
330330#[ allow( clippy:: identity_op, clippy:: erasing_op) ]
331- #[ target_feature( enable = "avx2" ) ]
331+ #[ target_feature( enable = "avx2,bmi1,bmi2 " ) ]
332332unsafe fn forward_transform_avx2 < T : Coefficient > (
333333 input : & [ i16 ] , output : & mut [ T ] , stride : usize , tx_size : TxSize ,
334334 tx_type : TxType , bd : usize ,
@@ -355,7 +355,7 @@ unsafe fn forward_transform_avx2<T: Coefficient>(
355355 // Columns
356356 for cg in ( 0 ..txfm_size_col) . step_by ( 8 ) {
357357 let shift = cfg. shift [ 0 ] as u8 ;
358- #[ target_feature( enable = "avx2" ) ]
358+ #[ target_feature( enable = "avx2,bmi1,bmi2 " ) ]
359359 #[ inline]
360360 unsafe fn load_columns ( input_ptr : * const i16 , shift : u8 ) -> I32X8 {
361361 // TODO: load 64-bits for x4 wide columns
0 commit comments