Skip to content

Commit a96c78d

Browse files
Don't use Vector512 (SLOW)
1 parent 1e87e4e commit a96c78d

File tree

3 files changed

+30
-116
lines changed

3 files changed

+30
-116
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ SixLabors.ImageSharp
88
<div align="center">
99

1010
[![Build Status](https://img.shields.io/github/actions/workflow/status/SixLabors/ImageSharp/build-and-test.yml?branch=main)](https://github.com/SixLabors/ImageSharp/actions)
11-
[![Code coverage](https://codecov.io/gh/SixLabors/ImageSharp/branch/main/graph/badge.svg)](https://codecov.io/gh/SixLabors/ImageSharp)
11+
[![codecov](https://codecov.io/gh/SixLabors/ImageSharp/graph/badge.svg?token=g2WJwz770q)](https://codecov.io/gh/SixLabors/ImageSharp)
1212
[![License: Six Labors Split](https://img.shields.io/badge/license-Six%20Labors%20Split-%23e30183)](https://github.com/SixLabors/ImageSharp/blob/main/LICENSE)
1313
[![Twitter](https://img.shields.io/twitter/url/http/shields.io.svg?style=flat&logo=twitter)](https://twitter.com/intent/tweet?hashtags=imagesharp,dotnet,oss&text=ImageSharp.+A+new+cross-platform+2D+graphics+API+in+C%23&url=https%3a%2f%2fgithub.com%2fSixLabors%2fImageSharp&via=sixlabors)
1414

src/ImageSharp/Common/Helpers/Numerics.cs

Lines changed: 1 addition & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1089,39 +1089,7 @@ public static nuint Vector512Count<TVector>(int length)
10891089
[MethodImpl(MethodImplOptions.AggressiveInlining)]
10901090
public static void Normalize(Span<float> span, float sum)
10911091
{
1092-
if (Vector512.IsHardwareAccelerated)
1093-
{
1094-
ref float startRef = ref MemoryMarshal.GetReference(span);
1095-
ref float endRef = ref Unsafe.Add(ref startRef, span.Length & ~15);
1096-
Vector512<float> sum512 = Vector512.Create(sum);
1097-
1098-
while (Unsafe.IsAddressLessThan(ref startRef, ref endRef))
1099-
{
1100-
Unsafe.As<float, Vector512<float>>(ref startRef) /= sum512;
1101-
startRef = ref Unsafe.Add(ref startRef, (nuint)16);
1102-
}
1103-
1104-
if ((span.Length & 15) >= 8)
1105-
{
1106-
Unsafe.As<float, Vector256<float>>(ref startRef) /= sum512.GetLower();
1107-
startRef = ref Unsafe.Add(ref startRef, (nuint)8);
1108-
}
1109-
1110-
if ((span.Length & 7) >= 4)
1111-
{
1112-
Unsafe.As<float, Vector128<float>>(ref startRef) /= sum512.GetLower().GetLower();
1113-
startRef = ref Unsafe.Add(ref startRef, (nuint)4);
1114-
}
1115-
1116-
endRef = ref Unsafe.Add(ref startRef, span.Length & 3);
1117-
1118-
while (Unsafe.IsAddressLessThan(ref startRef, ref endRef))
1119-
{
1120-
startRef /= sum;
1121-
startRef = ref Unsafe.Add(ref startRef, (nuint)1);
1122-
}
1123-
}
1124-
else if (Vector256.IsHardwareAccelerated)
1092+
if (Vector256.IsHardwareAccelerated)
11251093
{
11261094
ref float startRef = ref MemoryMarshal.GetReference(span);
11271095
ref float endRef = ref Unsafe.Add(ref startRef, span.Length & ~7);

src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernel.cs

Lines changed: 28 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -94,97 +94,43 @@ public Vector4 ConvolveCore(ref Vector4 rowStartRef)
9494
{
9595
if (IsHardwareAccelerated)
9696
{
97-
if (Vector512.IsHardwareAccelerated)
98-
{
99-
float* bufferStart = this.bufferPtr;
100-
ref Vector4 rowEndRef = ref Unsafe.Add(ref rowStartRef, this.Length & ~7);
101-
Vector512<float> result512_0 = Vector512<float>.Zero;
102-
Vector512<float> result512_1 = Vector512<float>.Zero;
103-
104-
while (Unsafe.IsAddressLessThan(ref rowStartRef, ref rowEndRef))
105-
{
106-
Vector512<float> pixels512_0 = Unsafe.As<Vector4, Vector512<float>>(ref rowStartRef);
107-
Vector512<float> pixels512_1 = Unsafe.As<Vector4, Vector512<float>>(ref Unsafe.Add(ref rowStartRef, (nuint)4));
108-
109-
result512_0 = Vector512_.MultiplyAdd(result512_0, Vector512.Load(bufferStart), pixels512_0);
110-
result512_1 = Vector512_.MultiplyAdd(result512_1, Vector512.Load(bufferStart + 16), pixels512_1);
111-
112-
bufferStart += 32;
113-
rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)8);
114-
}
115-
116-
result512_0 += result512_1;
117-
118-
if ((this.Length & 7) >= 4)
119-
{
120-
Vector512<float> pixels512_0 = Unsafe.As<Vector4, Vector512<float>>(ref rowStartRef);
121-
result512_0 = Vector512_.MultiplyAdd(result512_0, Vector512.Load(bufferStart), pixels512_0);
122-
123-
bufferStart += 16;
124-
rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)4);
125-
}
126-
127-
Vector256<float> result256 = result512_0.GetLower() + result512_0.GetUpper();
128-
129-
if ((this.Length & 3) >= 2)
130-
{
131-
Vector256<float> pixels256_0 = Unsafe.As<Vector4, Vector256<float>>(ref rowStartRef);
132-
result256 = Vector256_.MultiplyAdd(result256, Vector256.Load(bufferStart), pixels256_0);
133-
134-
bufferStart += 8;
135-
rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)2);
136-
}
137-
138-
Vector128<float> result128 = result256.GetLower() + result256.GetUpper();
139-
140-
if ((this.Length & 1) != 0)
141-
{
142-
Vector128<float> pixels128 = Unsafe.As<Vector4, Vector128<float>>(ref rowStartRef);
143-
result128 = Vector128_.MultiplyAdd(result128, Vector128.Load(bufferStart), pixels128);
144-
}
97+
float* bufferStart = this.bufferPtr;
98+
ref Vector4 rowEndRef = ref Unsafe.Add(ref rowStartRef, this.Length & ~3);
99+
Vector256<float> result256_0 = Vector256<float>.Zero;
100+
Vector256<float> result256_1 = Vector256<float>.Zero;
145101

146-
return result128.AsVector4();
147-
}
148-
else
102+
while (Unsafe.IsAddressLessThan(ref rowStartRef, ref rowEndRef))
149103
{
150-
float* bufferStart = this.bufferPtr;
151-
ref Vector4 rowEndRef = ref Unsafe.Add(ref rowStartRef, this.Length & ~3);
152-
Vector256<float> result256_0 = Vector256<float>.Zero;
153-
Vector256<float> result256_1 = Vector256<float>.Zero;
104+
Vector256<float> pixels256_0 = Unsafe.As<Vector4, Vector256<float>>(ref rowStartRef);
105+
Vector256<float> pixels256_1 = Unsafe.As<Vector4, Vector256<float>>(ref Unsafe.Add(ref rowStartRef, (nuint)2));
154106

155-
while (Unsafe.IsAddressLessThan(ref rowStartRef, ref rowEndRef))
156-
{
157-
Vector256<float> pixels256_0 = Unsafe.As<Vector4, Vector256<float>>(ref rowStartRef);
158-
Vector256<float> pixels256_1 = Unsafe.As<Vector4, Vector256<float>>(ref Unsafe.Add(ref rowStartRef, (nuint)2));
107+
result256_0 = Vector256_.MultiplyAdd(result256_0, Vector256.Load(bufferStart), pixels256_0);
108+
result256_1 = Vector256_.MultiplyAdd(result256_1, Vector256.Load(bufferStart + 8), pixels256_1);
159109

160-
result256_0 = Vector256_.MultiplyAdd(result256_0, Vector256.Load(bufferStart), pixels256_0);
161-
result256_1 = Vector256_.MultiplyAdd(result256_1, Vector256.Load(bufferStart + 8), pixels256_1);
162-
163-
bufferStart += 16;
164-
rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)4);
165-
}
166-
167-
result256_0 += result256_1;
110+
bufferStart += 16;
111+
rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)4);
112+
}
168113

169-
if ((this.Length & 3) >= 2)
170-
{
171-
Vector256<float> pixels256_0 = Unsafe.As<Vector4, Vector256<float>>(ref rowStartRef);
172-
result256_0 = Vector256_.MultiplyAdd(result256_0, Vector256.Load(bufferStart), pixels256_0);
114+
result256_0 += result256_1;
173115

174-
bufferStart += 8;
175-
rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)2);
176-
}
116+
if ((this.Length & 3) >= 2)
117+
{
118+
Vector256<float> pixels256_0 = Unsafe.As<Vector4, Vector256<float>>(ref rowStartRef);
119+
result256_0 = Vector256_.MultiplyAdd(result256_0, Vector256.Load(bufferStart), pixels256_0);
177120

178-
Vector128<float> result128 = result256_0.GetLower() + result256_0.GetUpper();
121+
bufferStart += 8;
122+
rowStartRef = ref Unsafe.Add(ref rowStartRef, (nuint)2);
123+
}
179124

180-
if ((this.Length & 1) != 0)
181-
{
182-
Vector128<float> pixels128 = Unsafe.As<Vector4, Vector128<float>>(ref rowStartRef);
183-
result128 = Vector128_.MultiplyAdd(result128, Vector128.Load(bufferStart), pixels128);
184-
}
125+
Vector128<float> result128 = result256_0.GetLower() + result256_0.GetUpper();
185126

186-
return result128.AsVector4();
127+
if ((this.Length & 1) != 0)
128+
{
129+
Vector128<float> pixels128 = Unsafe.As<Vector4, Vector128<float>>(ref rowStartRef);
130+
result128 = Vector128_.MultiplyAdd(result128, Vector128.Load(bufferStart), pixels128);
187131
}
132+
133+
return result128.AsVector4();
188134
}
189135
else
190136
{
@@ -219,7 +165,7 @@ internal void FillOrCopyAndExpand(Span<float> values)
219165
{
220166
DebugGuard.IsTrue(values.Length == this.Length, nameof(values), "ResizeKernel.Fill: values.Length != this.Length!");
221167

222-
if (Vector256.IsHardwareAccelerated)
168+
if (IsHardwareAccelerated)
223169
{
224170
Vector4* bufferStart = (Vector4*)this.bufferPtr;
225171
ref float valuesStart = ref MemoryMarshal.GetReference(values);

0 commit comments

Comments
 (0)