Skip to content

Commit 8d2bab3

Browse files
Merge pull request #2226 from saucecontrol/png-filters
PNG filter improvements
2 parents 5f796cd + 8a9126c commit 8d2bab3

File tree

7 files changed

+66
-92
lines changed

7 files changed

+66
-92
lines changed

src/ImageSharp/Common/Helpers/Numerics.cs

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -780,25 +780,13 @@ public static void Accumulate(ref Vector<uint> accumulator, Vector<byte> values)
780780
[MethodImpl(MethodImplOptions.AggressiveInlining)]
781781
public static int ReduceSum(Vector128<int> accumulator)
782782
{
783-
if (Ssse3.IsSupported)
784-
{
785-
Vector128<int> hadd = Ssse3.HorizontalAdd(accumulator, accumulator);
786-
Vector128<int> swapped = Sse2.Shuffle(hadd, 0x1);
787-
Vector128<int> tmp = Sse2.Add(hadd, swapped);
783+
// Add odd to even.
784+
Vector128<int> vsum = Sse2.Add(accumulator, Sse2.Shuffle(accumulator, 0b_11_11_01_01));
788785

789-
// Vector128<int>.ToScalar() isn't optimized pre-net5.0 https://github.com/dotnet/runtime/pull/37882
790-
return Sse2.ConvertToInt32(tmp);
791-
}
792-
else
793-
{
794-
int sum = 0;
795-
for (int i = 0; i < Vector128<int>.Count; i++)
796-
{
797-
sum += accumulator.GetElement(i);
798-
}
786+
// Add high to low.
787+
vsum = Sse2.Add(vsum, Sse2.Shuffle(vsum, 0b_11_10_11_10));
799788

800-
return sum;
801-
}
789+
return Sse2.ConvertToInt32(vsum);
802790
}
803791

804792
/// <summary>
@@ -821,6 +809,20 @@ public static int ReduceSum(Vector256<int> accumulator)
821809
return Sse2.ConvertToInt32(vsum);
822810
}
823811

812+
/// <summary>
813+
/// Reduces even elements of the vector into one sum.
814+
/// </summary>
815+
/// <param name="accumulator">The accumulator to reduce.</param>
816+
/// <returns>The sum of even elements.</returns>
817+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
818+
public static int EvenReduceSum(Vector128<int> accumulator)
819+
{
820+
// Add high to low.
821+
Vector128<int> vsum = Sse2.Add(accumulator, Sse2.Shuffle(accumulator, 0b_11_10_11_10));
822+
823+
return Sse2.ConvertToInt32(vsum);
824+
}
825+
824826
/// <summary>
825827
/// Reduces even elements of the vector into one sum.
826828
/// </summary>

src/ImageSharp/Formats/Png/Filters/AverageFilter.cs

Lines changed: 12 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -119,9 +119,9 @@ public static void Encode(ReadOnlySpan<byte> scanline, ReadOnlySpan<byte> previo
119119
sum = 0;
120120

121121
// Average(x) = Raw(x) - floor((Raw(x-bpp)+Prior(x))/2)
122-
resultBaseRef = 3;
122+
resultBaseRef = (byte)FilterType.Average;
123123

124-
int x = 0;
124+
nint x = 0;
125125
for (; x < bytesPerPixel; /* Note: ++x happens in the body to avoid one add operation */)
126126
{
127127
byte scan = Unsafe.Add(ref scanBaseRef, x);
@@ -138,7 +138,7 @@ public static void Encode(ReadOnlySpan<byte> scanline, ReadOnlySpan<byte> previo
138138
Vector256<int> sumAccumulator = Vector256<int>.Zero;
139139
Vector256<byte> allBitsSet = Avx2.CompareEqual(sumAccumulator, sumAccumulator).AsByte();
140140

141-
for (int xLeft = x - bytesPerPixel; x + Vector256<byte>.Count <= scanline.Length; xLeft += Vector256<byte>.Count)
141+
for (nint xLeft = x - bytesPerPixel; x <= scanline.Length - Vector256<byte>.Count; xLeft += Vector256<byte>.Count)
142142
{
143143
Vector256<byte> scan = Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref scanBaseRef, x));
144144
Vector256<byte> left = Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref scanBaseRef, xLeft));
@@ -157,12 +157,11 @@ public static void Encode(ReadOnlySpan<byte> scanline, ReadOnlySpan<byte> previo
157157
}
158158
else if (Sse2.IsSupported)
159159
{
160-
Vector128<sbyte> zero8 = Vector128<sbyte>.Zero;
161-
Vector128<short> zero16 = Vector128<short>.Zero;
160+
Vector128<byte> zero = Vector128<byte>.Zero;
162161
Vector128<int> sumAccumulator = Vector128<int>.Zero;
163162
Vector128<byte> allBitsSet = Sse2.CompareEqual(sumAccumulator, sumAccumulator).AsByte();
164163

165-
for (int xLeft = x - bytesPerPixel; x + Vector128<byte>.Count <= scanline.Length; xLeft += Vector128<byte>.Count)
164+
for (nint xLeft = x - bytesPerPixel; x <= scanline.Length - Vector128<byte>.Count; xLeft += Vector128<byte>.Count)
166165
{
167166
Vector128<byte> scan = Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref scanBaseRef, x));
168167
Vector128<byte> left = Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref scanBaseRef, xLeft));
@@ -174,36 +173,24 @@ public static void Encode(ReadOnlySpan<byte> scanline, ReadOnlySpan<byte> previo
174173
Unsafe.As<byte, Vector128<byte>>(ref Unsafe.Add(ref resultBaseRef, x + 1)) = res; // +1 to skip filter type
175174
x += Vector128<byte>.Count;
176175

177-
Vector128<sbyte> absRes;
176+
Vector128<byte> absRes;
178177
if (Ssse3.IsSupported)
179178
{
180-
absRes = Ssse3.Abs(res.AsSByte()).AsSByte();
179+
absRes = Ssse3.Abs(res.AsSByte());
181180
}
182181
else
183182
{
184-
Vector128<sbyte> mask = Sse2.CompareGreaterThan(res.AsSByte(), zero8);
185-
mask = Sse2.Xor(mask, allBitsSet.AsSByte());
186-
absRes = Sse2.Xor(Sse2.Add(res.AsSByte(), mask), mask);
183+
Vector128<sbyte> mask = Sse2.CompareGreaterThan(zero.AsSByte(), res.AsSByte());
184+
absRes = Sse2.Xor(Sse2.Add(res.AsSByte(), mask), mask).AsByte();
187185
}
188186

189-
Vector128<short> loRes16 = Sse2.UnpackLow(absRes, zero8).AsInt16();
190-
Vector128<short> hiRes16 = Sse2.UnpackHigh(absRes, zero8).AsInt16();
191-
192-
Vector128<int> loRes32 = Sse2.UnpackLow(loRes16, zero16).AsInt32();
193-
Vector128<int> hiRes32 = Sse2.UnpackHigh(loRes16, zero16).AsInt32();
194-
sumAccumulator = Sse2.Add(sumAccumulator, loRes32);
195-
sumAccumulator = Sse2.Add(sumAccumulator, hiRes32);
196-
197-
loRes32 = Sse2.UnpackLow(hiRes16, zero16).AsInt32();
198-
hiRes32 = Sse2.UnpackHigh(hiRes16, zero16).AsInt32();
199-
sumAccumulator = Sse2.Add(sumAccumulator, loRes32);
200-
sumAccumulator = Sse2.Add(sumAccumulator, hiRes32);
187+
sumAccumulator = Sse2.Add(sumAccumulator, Sse2.SumAbsoluteDifferences(absRes, zero).AsInt32());
201188
}
202189

203-
sum += Numerics.ReduceSum(sumAccumulator);
190+
sum += Numerics.EvenReduceSum(sumAccumulator);
204191
}
205192

206-
for (int xLeft = x - bytesPerPixel; x < scanline.Length; ++xLeft /* Note: ++x happens in the body to avoid one add operation */)
193+
for (nint xLeft = x - bytesPerPixel; x < scanline.Length; ++xLeft /* Note: ++x happens in the body to avoid one add operation */)
207194
{
208195
byte scan = Unsafe.Add(ref scanBaseRef, x);
209196
byte left = Unsafe.Add(ref scanBaseRef, xLeft);
@@ -213,8 +200,6 @@ public static void Encode(ReadOnlySpan<byte> scanline, ReadOnlySpan<byte> previo
213200
res = (byte)(scan - Average(left, above));
214201
sum += Numerics.Abs(unchecked((sbyte)res));
215202
}
216-
217-
sum -= 3;
218203
}
219204

220205
/// <summary>

src/ImageSharp/Formats/Png/Filters/NoneFilter.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright (c) Six Labors.
1+
// Copyright (c) Six Labors.
22
// Licensed under the Six Labors Split License.
33

44
using System;
@@ -21,8 +21,8 @@ internal static class NoneFilter
2121
[MethodImpl(MethodImplOptions.AggressiveInlining)]
2222
public static void Encode(ReadOnlySpan<byte> scanline, Span<byte> result)
2323
{
24-
// Insert a byte before the data.
25-
result[0] = 0;
24+
// Insert row filter byte before the data.
25+
result[0] = (byte)FilterType.None;
2626
result = result[1..];
2727
scanline[..Math.Min(scanline.Length, result.Length)].CopyTo(result);
2828
}

src/ImageSharp/Formats/Png/Filters/PaethFilter.cs

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ private static void DecodeScalar(Span<byte> scanline, Span<byte> previousScanlin
108108

109109
// Paeth(x) + PaethPredictor(Raw(x-bpp), Prior(x), Prior(x-bpp))
110110
int offset = bytesPerPixel + 1; // Add one because x starts at one.
111-
int x = 1;
111+
nint x = 1;
112112
for (; x < offset; x++)
113113
{
114114
ref byte scan = ref Unsafe.Add(ref scanBaseRef, x);
@@ -146,9 +146,9 @@ public static void Encode(ReadOnlySpan<byte> scanline, ReadOnlySpan<byte> previo
146146
sum = 0;
147147

148148
// Paeth(x) = Raw(x) - PaethPredictor(Raw(x-bpp), Prior(x), Prior(x - bpp))
149-
resultBaseRef = 4;
149+
resultBaseRef = (byte)FilterType.Paeth;
150150

151-
int x = 0;
151+
nint x = 0;
152152
for (; x < bytesPerPixel; /* Note: ++x happens in the body to avoid one add operation */)
153153
{
154154
byte scan = Unsafe.Add(ref scanBaseRef, x);
@@ -164,7 +164,7 @@ public static void Encode(ReadOnlySpan<byte> scanline, ReadOnlySpan<byte> previo
164164
Vector256<byte> zero = Vector256<byte>.Zero;
165165
Vector256<int> sumAccumulator = Vector256<int>.Zero;
166166

167-
for (int xLeft = x - bytesPerPixel; x + Vector256<byte>.Count <= scanline.Length; xLeft += Vector256<byte>.Count)
167+
for (nint xLeft = x - bytesPerPixel; x <= scanline.Length - Vector256<byte>.Count; xLeft += Vector256<byte>.Count)
168168
{
169169
Vector256<byte> scan = Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref scanBaseRef, x));
170170
Vector256<byte> left = Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref scanBaseRef, xLeft));
@@ -184,7 +184,7 @@ public static void Encode(ReadOnlySpan<byte> scanline, ReadOnlySpan<byte> previo
184184
{
185185
Vector<uint> sumAccumulator = Vector<uint>.Zero;
186186

187-
for (int xLeft = x - bytesPerPixel; x + Vector<byte>.Count <= scanline.Length; xLeft += Vector<byte>.Count)
187+
for (nint xLeft = x - bytesPerPixel; x <= scanline.Length - Vector<byte>.Count; xLeft += Vector<byte>.Count)
188188
{
189189
Vector<byte> scan = Unsafe.As<byte, Vector<byte>>(ref Unsafe.Add(ref scanBaseRef, x));
190190
Vector<byte> left = Unsafe.As<byte, Vector<byte>>(ref Unsafe.Add(ref scanBaseRef, xLeft));
@@ -204,7 +204,7 @@ public static void Encode(ReadOnlySpan<byte> scanline, ReadOnlySpan<byte> previo
204204
}
205205
}
206206

207-
for (int xLeft = x - bytesPerPixel; x < scanline.Length; ++xLeft /* Note: ++x happens in the body to avoid one add operation */)
207+
for (nint xLeft = x - bytesPerPixel; x < scanline.Length; ++xLeft /* Note: ++x happens in the body to avoid one add operation */)
208208
{
209209
byte scan = Unsafe.Add(ref scanBaseRef, x);
210210
byte left = Unsafe.Add(ref scanBaseRef, xLeft);
@@ -215,8 +215,6 @@ public static void Encode(ReadOnlySpan<byte> scanline, ReadOnlySpan<byte> previo
215215
res = (byte)(scan - PaethPredictor(left, above, upperLeft));
216216
sum += Numerics.Abs(unchecked((sbyte)res));
217217
}
218-
219-
sum -= 4;
220218
}
221219

222220
/// <summary>
@@ -250,6 +248,7 @@ private static byte PaethPredictor(byte left, byte above, byte upperLeft)
250248
return upperLeft;
251249
}
252250

251+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
253252
private static Vector256<byte> PaethPredictor(Vector256<byte> left, Vector256<byte> above, Vector256<byte> upleft)
254253
{
255254
Vector256<byte> zero = Vector256<byte>.Zero;
@@ -282,6 +281,7 @@ private static Vector256<byte> PaethPredictor(Vector256<byte> left, Vector256<by
282281
return Avx2.BlendVariable(resbc, left, Avx2.CompareEqual(Avx2.Min(minbc, pa), pa));
283282
}
284283

284+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
285285
private static Vector<byte> PaethPredictor(Vector<byte> left, Vector<byte> above, Vector<byte> upperLeft)
286286
{
287287
Vector.Widen(left, out Vector<ushort> a1, out Vector<ushort> a2);
@@ -293,16 +293,17 @@ private static Vector<byte> PaethPredictor(Vector<byte> left, Vector<byte> above
293293
return Vector.AsVectorByte(Vector.Narrow(p1, p2));
294294
}
295295

296+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
296297
private static Vector<short> PaethPredictor(Vector<short> left, Vector<short> above, Vector<short> upperLeft)
297298
{
298299
Vector<short> p = left + above - upperLeft;
299-
var pa = Vector.Abs(p - left);
300-
var pb = Vector.Abs(p - above);
301-
var pc = Vector.Abs(p - upperLeft);
300+
Vector<short> pa = Vector.Abs(p - left);
301+
Vector<short> pb = Vector.Abs(p - above);
302+
Vector<short> pc = Vector.Abs(p - upperLeft);
302303

303-
var pa_pb = Vector.LessThanOrEqual(pa, pb);
304-
var pa_pc = Vector.LessThanOrEqual(pa, pc);
305-
var pb_pc = Vector.LessThanOrEqual(pb, pc);
304+
Vector<short> pa_pb = Vector.LessThanOrEqual(pa, pb);
305+
Vector<short> pa_pc = Vector.LessThanOrEqual(pa, pc);
306+
Vector<short> pb_pc = Vector.LessThanOrEqual(pb, pc);
306307

307308
return Vector.ConditionalSelect(
308309
condition: Vector.BitwiseAnd(pa_pb, pa_pc),

src/ImageSharp/Formats/Png/Filters/SubFilter.cs

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -91,9 +91,9 @@ public static void Encode(ReadOnlySpan<byte> scanline, ReadOnlySpan<byte> result
9191
sum = 0;
9292

9393
// Sub(x) = Raw(x) - Raw(x-bpp)
94-
resultBaseRef = 1;
94+
resultBaseRef = (byte)FilterType.Sub;
9595

96-
int x = 0;
96+
nint x = 0;
9797
for (; x < bytesPerPixel; /* Note: ++x happens in the body to avoid one add operation */)
9898
{
9999
byte scan = Unsafe.Add(ref scanBaseRef, x);
@@ -108,7 +108,7 @@ public static void Encode(ReadOnlySpan<byte> scanline, ReadOnlySpan<byte> result
108108
Vector256<byte> zero = Vector256<byte>.Zero;
109109
Vector256<int> sumAccumulator = Vector256<int>.Zero;
110110

111-
for (int xLeft = x - bytesPerPixel; x + Vector256<byte>.Count <= scanline.Length; xLeft += Vector256<byte>.Count)
111+
for (nint xLeft = x - bytesPerPixel; x <= scanline.Length - Vector256<byte>.Count; xLeft += Vector256<byte>.Count)
112112
{
113113
Vector256<byte> scan = Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref scanBaseRef, x));
114114
Vector256<byte> prev = Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref scanBaseRef, xLeft));
@@ -126,7 +126,7 @@ public static void Encode(ReadOnlySpan<byte> scanline, ReadOnlySpan<byte> result
126126
{
127127
Vector<uint> sumAccumulator = Vector<uint>.Zero;
128128

129-
for (int xLeft = x - bytesPerPixel; x + Vector<byte>.Count <= scanline.Length; xLeft += Vector<byte>.Count)
129+
for (nint xLeft = x - bytesPerPixel; x <= scanline.Length - Vector<byte>.Count; xLeft += Vector<byte>.Count)
130130
{
131131
Vector<byte> scan = Unsafe.As<byte, Vector<byte>>(ref Unsafe.Add(ref scanBaseRef, x));
132132
Vector<byte> prev = Unsafe.As<byte, Vector<byte>>(ref Unsafe.Add(ref scanBaseRef, xLeft));
@@ -144,7 +144,7 @@ public static void Encode(ReadOnlySpan<byte> scanline, ReadOnlySpan<byte> result
144144
}
145145
}
146146

147-
for (int xLeft = x - bytesPerPixel; x < scanline.Length; ++xLeft /* Note: ++x happens in the body to avoid one add operation */)
147+
for (nint xLeft = x - bytesPerPixel; x < scanline.Length; ++xLeft /* Note: ++x happens in the body to avoid one add operation */)
148148
{
149149
byte scan = Unsafe.Add(ref scanBaseRef, x);
150150
byte prev = Unsafe.Add(ref scanBaseRef, xLeft);
@@ -153,8 +153,6 @@ public static void Encode(ReadOnlySpan<byte> scanline, ReadOnlySpan<byte> result
153153
res = (byte)(scan - prev);
154154
sum += Numerics.Abs(unchecked((sbyte)res));
155155
}
156-
157-
sum--;
158156
}
159157
}
160158
}

src/ImageSharp/Formats/Png/Filters/UpFilter.cs

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,7 @@ private static void DecodeAvx2(Span<byte> scanline, Span<byte> previousScanline)
4949
// Up(x) + Prior(x)
5050
int rb = scanline.Length;
5151
nint offset = 1;
52-
const int bytesPerBatch = 32;
53-
while (rb >= bytesPerBatch)
52+
while (rb >= Vector256<byte>.Count)
5453
{
5554
ref byte scanRef = ref Unsafe.Add(ref scanBaseRef, offset);
5655
Vector256<byte> current = Unsafe.As<byte, Vector256<byte>>(ref scanRef);
@@ -59,8 +58,8 @@ private static void DecodeAvx2(Span<byte> scanline, Span<byte> previousScanline)
5958
Vector256<byte> sum = Avx2.Add(up, current);
6059
Unsafe.As<byte, Vector256<byte>>(ref scanRef) = sum;
6160

62-
offset += bytesPerBatch;
63-
rb -= bytesPerBatch;
61+
offset += Vector256<byte>.Count;
62+
rb -= Vector256<byte>.Count;
6463
}
6564

6665
// Handle left over.
@@ -81,8 +80,7 @@ private static void DecodeSse2(Span<byte> scanline, Span<byte> previousScanline)
8180
// Up(x) + Prior(x)
8281
int rb = scanline.Length;
8382
nint offset = 1;
84-
const int bytesPerBatch = 16;
85-
while (rb >= bytesPerBatch)
83+
while (rb >= Vector128<byte>.Count)
8684
{
8785
ref byte scanRef = ref Unsafe.Add(ref scanBaseRef, offset);
8886
Vector128<byte> current = Unsafe.As<byte, Vector128<byte>>(ref scanRef);
@@ -91,8 +89,8 @@ private static void DecodeSse2(Span<byte> scanline, Span<byte> previousScanline)
9189
Vector128<byte> sum = Sse2.Add(up, current);
9290
Unsafe.As<byte, Vector128<byte>>(ref scanRef) = sum;
9391

94-
offset += bytesPerBatch;
95-
rb -= bytesPerBatch;
92+
offset += Vector128<byte>.Count;
93+
rb -= Vector128<byte>.Count;
9694
}
9795

9896
// Handle left over.
@@ -112,7 +110,7 @@ private static void DecodeScalar(Span<byte> scanline, Span<byte> previousScanlin
112110
ref byte prevBaseRef = ref MemoryMarshal.GetReference(previousScanline);
113111

114112
// Up(x) + Prior(x)
115-
for (int x = 1; x < scanline.Length; x++)
113+
for (nint x = 1; x < scanline.Length; x++)
116114
{
117115
ref byte scan = ref Unsafe.Add(ref scanBaseRef, x);
118116
byte above = Unsafe.Add(ref prevBaseRef, x);
@@ -139,16 +137,16 @@ public static void Encode(ReadOnlySpan<byte> scanline, ReadOnlySpan<byte> previo
139137
sum = 0;
140138

141139
// Up(x) = Raw(x) - Prior(x)
142-
resultBaseRef = 2;
140+
resultBaseRef = (byte)FilterType.Up;
143141

144-
int x = 0;
142+
nint x = 0;
145143

146144
if (Avx2.IsSupported)
147145
{
148146
Vector256<byte> zero = Vector256<byte>.Zero;
149147
Vector256<int> sumAccumulator = Vector256<int>.Zero;
150148

151-
for (; x + Vector256<byte>.Count <= scanline.Length;)
149+
for (; x <= scanline.Length - Vector256<byte>.Count;)
152150
{
153151
Vector256<byte> scan = Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref scanBaseRef, x));
154152
Vector256<byte> above = Unsafe.As<byte, Vector256<byte>>(ref Unsafe.Add(ref prevBaseRef, x));
@@ -166,7 +164,7 @@ public static void Encode(ReadOnlySpan<byte> scanline, ReadOnlySpan<byte> previo
166164
{
167165
Vector<uint> sumAccumulator = Vector<uint>.Zero;
168166

169-
for (; x + Vector<byte>.Count <= scanline.Length;)
167+
for (; x <= scanline.Length - Vector<byte>.Count;)
170168
{
171169
Vector<byte> scan = Unsafe.As<byte, Vector<byte>>(ref Unsafe.Add(ref scanBaseRef, x));
172170
Vector<byte> above = Unsafe.As<byte, Vector<byte>>(ref Unsafe.Add(ref prevBaseRef, x));
@@ -193,8 +191,6 @@ public static void Encode(ReadOnlySpan<byte> scanline, ReadOnlySpan<byte> previo
193191
res = (byte)(scan - above);
194192
sum += Numerics.Abs(unchecked((sbyte)res));
195193
}
196-
197-
sum -= 2;
198194
}
199195
}
200196
}

0 commit comments

Comments
 (0)