@@ -94,97 +94,43 @@ public Vector4 ConvolveCore(ref Vector4 rowStartRef)
9494 {
9595 if ( IsHardwareAccelerated )
9696 {
97- if ( Vector512 . IsHardwareAccelerated )
98- {
99- float * bufferStart = this . bufferPtr ;
100- ref Vector4 rowEndRef = ref Unsafe . Add ( ref rowStartRef , this . Length & ~ 7 ) ;
101- Vector512 < float > result512_0 = Vector512 < float > . Zero ;
102- Vector512 < float > result512_1 = Vector512 < float > . Zero ;
103-
104- while ( Unsafe . IsAddressLessThan ( ref rowStartRef , ref rowEndRef ) )
105- {
106- Vector512 < float > pixels512_0 = Unsafe . As < Vector4 , Vector512 < float > > ( ref rowStartRef ) ;
107- Vector512 < float > pixels512_1 = Unsafe . As < Vector4 , Vector512 < float > > ( ref Unsafe . Add ( ref rowStartRef , ( nuint ) 4 ) ) ;
108-
109- result512_0 = Vector512_ . MultiplyAdd ( result512_0 , Vector512 . Load ( bufferStart ) , pixels512_0 ) ;
110- result512_1 = Vector512_ . MultiplyAdd ( result512_1 , Vector512 . Load ( bufferStart + 16 ) , pixels512_1 ) ;
111-
112- bufferStart += 32 ;
113- rowStartRef = ref Unsafe . Add ( ref rowStartRef , ( nuint ) 8 ) ;
114- }
115-
116- result512_0 += result512_1 ;
117-
118- if ( ( this . Length & 7 ) >= 4 )
119- {
120- Vector512 < float > pixels512_0 = Unsafe . As < Vector4 , Vector512 < float > > ( ref rowStartRef ) ;
121- result512_0 = Vector512_ . MultiplyAdd ( result512_0 , Vector512 . Load ( bufferStart ) , pixels512_0 ) ;
122-
123- bufferStart += 16 ;
124- rowStartRef = ref Unsafe . Add ( ref rowStartRef , ( nuint ) 4 ) ;
125- }
126-
127- Vector256 < float > result256 = result512_0 . GetLower ( ) + result512_0 . GetUpper ( ) ;
128-
129- if ( ( this . Length & 3 ) >= 2 )
130- {
131- Vector256 < float > pixels256_0 = Unsafe . As < Vector4 , Vector256 < float > > ( ref rowStartRef ) ;
132- result256 = Vector256_ . MultiplyAdd ( result256 , Vector256 . Load ( bufferStart ) , pixels256_0 ) ;
133-
134- bufferStart += 8 ;
135- rowStartRef = ref Unsafe . Add ( ref rowStartRef , ( nuint ) 2 ) ;
136- }
137-
138- Vector128 < float > result128 = result256 . GetLower ( ) + result256 . GetUpper ( ) ;
139-
140- if ( ( this . Length & 1 ) != 0 )
141- {
142- Vector128 < float > pixels128 = Unsafe . As < Vector4 , Vector128 < float > > ( ref rowStartRef ) ;
143- result128 = Vector128_ . MultiplyAdd ( result128 , Vector128 . Load ( bufferStart ) , pixels128 ) ;
144- }
97+ float * bufferStart = this . bufferPtr ;
98+ ref Vector4 rowEndRef = ref Unsafe . Add ( ref rowStartRef , this . Length & ~ 3 ) ;
99+ Vector256 < float > result256_0 = Vector256 < float > . Zero ;
100+ Vector256 < float > result256_1 = Vector256 < float > . Zero ;
145101
146- return result128 . AsVector4 ( ) ;
147- }
148- else
102+ while ( Unsafe . IsAddressLessThan ( ref rowStartRef , ref rowEndRef ) )
149103 {
150- float * bufferStart = this . bufferPtr ;
151- ref Vector4 rowEndRef = ref Unsafe . Add ( ref rowStartRef , this . Length & ~ 3 ) ;
152- Vector256 < float > result256_0 = Vector256 < float > . Zero ;
153- Vector256 < float > result256_1 = Vector256 < float > . Zero ;
104+ Vector256 < float > pixels256_0 = Unsafe . As < Vector4 , Vector256 < float > > ( ref rowStartRef ) ;
105+ Vector256 < float > pixels256_1 = Unsafe . As < Vector4 , Vector256 < float > > ( ref Unsafe . Add ( ref rowStartRef , ( nuint ) 2 ) ) ;
154106
155- while ( Unsafe . IsAddressLessThan ( ref rowStartRef , ref rowEndRef ) )
156- {
157- Vector256 < float > pixels256_0 = Unsafe . As < Vector4 , Vector256 < float > > ( ref rowStartRef ) ;
158- Vector256 < float > pixels256_1 = Unsafe . As < Vector4 , Vector256 < float > > ( ref Unsafe . Add ( ref rowStartRef , ( nuint ) 2 ) ) ;
107+ result256_0 = Vector256_ . MultiplyAdd ( result256_0 , Vector256 . Load ( bufferStart ) , pixels256_0 ) ;
108+ result256_1 = Vector256_ . MultiplyAdd ( result256_1 , Vector256 . Load ( bufferStart + 8 ) , pixels256_1 ) ;
159109
160- result256_0 = Vector256_ . MultiplyAdd ( result256_0 , Vector256 . Load ( bufferStart ) , pixels256_0 ) ;
161- result256_1 = Vector256_ . MultiplyAdd ( result256_1 , Vector256 . Load ( bufferStart + 8 ) , pixels256_1 ) ;
162-
163- bufferStart += 16 ;
164- rowStartRef = ref Unsafe . Add ( ref rowStartRef , ( nuint ) 4 ) ;
165- }
166-
167- result256_0 += result256_1 ;
110+ bufferStart += 16 ;
111+ rowStartRef = ref Unsafe . Add ( ref rowStartRef , ( nuint ) 4 ) ;
112+ }
168113
169- if ( ( this . Length & 3 ) >= 2 )
170- {
171- Vector256 < float > pixels256_0 = Unsafe . As < Vector4 , Vector256 < float > > ( ref rowStartRef ) ;
172- result256_0 = Vector256_ . MultiplyAdd ( result256_0 , Vector256 . Load ( bufferStart ) , pixels256_0 ) ;
114+ result256_0 += result256_1 ;
173115
174- bufferStart += 8 ;
175- rowStartRef = ref Unsafe . Add ( ref rowStartRef , ( nuint ) 2 ) ;
176- }
116+ if ( ( this . Length & 3 ) >= 2 )
117+ {
118+ Vector256 < float > pixels256_0 = Unsafe . As < Vector4 , Vector256 < float > > ( ref rowStartRef ) ;
119+ result256_0 = Vector256_ . MultiplyAdd ( result256_0 , Vector256 . Load ( bufferStart ) , pixels256_0 ) ;
177120
178- Vector128 < float > result128 = result256_0 . GetLower ( ) + result256_0 . GetUpper ( ) ;
121+ bufferStart += 8 ;
122+ rowStartRef = ref Unsafe . Add ( ref rowStartRef , ( nuint ) 2 ) ;
123+ }
179124
180- if ( ( this . Length & 1 ) != 0 )
181- {
182- Vector128 < float > pixels128 = Unsafe . As < Vector4 , Vector128 < float > > ( ref rowStartRef ) ;
183- result128 = Vector128_ . MultiplyAdd ( result128 , Vector128 . Load ( bufferStart ) , pixels128 ) ;
184- }
125+ Vector128 < float > result128 = result256_0 . GetLower ( ) + result256_0 . GetUpper ( ) ;
185126
186- return result128 . AsVector4 ( ) ;
127+ if ( ( this . Length & 1 ) != 0 )
128+ {
129+ Vector128 < float > pixels128 = Unsafe . As < Vector4 , Vector128 < float > > ( ref rowStartRef ) ;
130+ result128 = Vector128_ . MultiplyAdd ( result128 , Vector128 . Load ( bufferStart ) , pixels128 ) ;
187131 }
132+
133+ return result128 . AsVector4 ( ) ;
188134 }
189135 else
190136 {
@@ -219,7 +165,7 @@ internal void FillOrCopyAndExpand(Span<float> values)
219165 {
220166 DebugGuard . IsTrue ( values . Length == this . Length , nameof ( values ) , "ResizeKernel.Fill: values.Length != this.Length!" ) ;
221167
222- if ( Vector256 . IsHardwareAccelerated )
168+ if ( IsHardwareAccelerated )
223169 {
224170 Vector4 * bufferStart = ( Vector4 * ) this . bufferPtr ;
225171 ref float valuesStart = ref MemoryMarshal . GetReference ( values ) ;
0 commit comments