|
1 | | -#pragma warning disable SYSLIB5003 |
2 | | - |
3 | 1 | using System; |
4 | 2 | using System.Numerics; |
5 | 3 | using System.Runtime.Intrinsics; |
@@ -74,131 +72,120 @@ public unsafe ulong Scalar() |
74 | 72 | [Benchmark] |
75 | 73 | public unsafe ulong SvePartition() |
76 | 74 | { |
77 | | - if (Sve.IsSupported) |
| 75 | + fixed (uint* input = _input, left = _left, right = _right) |
78 | 76 | { |
79 | | - fixed (uint* input = _input, left = _left, right = _right) |
80 | | - { |
81 | | - long i = 0; |
| 77 | + long i = 0; |
82 | 78 |
|
83 | | - ulong indexLeft = 0; |
84 | | - ulong indexRight = 0; |
| 79 | + ulong indexLeft = 0; |
| 80 | + ulong indexRight = 0; |
85 | 81 |
|
86 | | - Vector<uint> ones = Vector<uint>.One; |
| 82 | + Vector<uint> ones = Vector<uint>.One; |
87 | 83 |
|
88 | | - Vector<uint> firstElemVec = Sve.DuplicateSelectedScalarToVector( |
89 | | - Sve.LoadVector(Sve.CreateTrueMaskUInt32(), input), 0 |
90 | | - ); |
| 84 | + Vector<uint> firstElemVec = Sve.DuplicateSelectedScalarToVector( |
| 85 | + Sve.LoadVector(Sve.CreateTrueMaskUInt32(), input), 0 |
| 86 | + ); |
91 | 87 |
|
92 | | - // Create a predicate for the loop. |
93 | | - Vector<uint> pLoop = Sve.CreateWhileLessThanMask32Bit(i, Size); |
| 88 | + // Create a predicate for the loop. |
| 89 | + Vector<uint> pLoop = Sve.CreateWhileLessThanMask32Bit(i, Size); |
94 | 90 |
|
95 | | - while (Sve.TestAnyTrue(Sve.CreateTrueMaskUInt32(), pLoop)) |
96 | | - { |
97 | | - // Load from the input array based on the loop predicate. |
98 | | - Vector<uint> data = Sve.LoadVector(pLoop, input + i); |
99 | | - |
100 | | - // Predicate for elements in input array less than the first element. |
101 | | - Vector<uint> pCompare = Sve.CompareLessThan(data, firstElemVec); |
| 91 | + while (Sve.TestAnyTrue(Sve.CreateTrueMaskUInt32(), pLoop)) |
| 92 | + { |
| 93 | + // Load from the input array based on the loop predicate. |
| 94 | + Vector<uint> data = Sve.LoadVector(pLoop, input + i); |
102 | 95 |
|
103 | | - // Apply the pLoop mask. |
104 | | - Vector<uint> pInner = Sve.ConditionalSelect(pLoop, pCompare, Vector<uint>.Zero); |
| 96 | + // Predicate for elements in input array less than the first element. |
| 97 | + Vector<uint> pCompare = Sve.CompareLessThan(data, firstElemVec); |
105 | 98 |
|
106 | | - // Squash all found elements to the lower lanes of the vector. |
107 | | - Vector<uint> compacted = Sve.Compact(pInner, data); |
| 99 | + // Apply the pLoop mask. |
| 100 | + Vector<uint> pInner = Sve.ConditionalSelect(pLoop, pCompare, Vector<uint>.Zero); |
108 | 101 |
|
109 | | - // Store the squashed elements to the first output array. |
110 | | - // (This uses the loop predicate, so some additional zeros may be stored). |
111 | | - Sve.StoreAndZip(pLoop, left + indexLeft, compacted); |
| 102 | + // Squash all found elements to the lower lanes of the vector. |
| 103 | + Vector<uint> compacted = Sve.Compact(pInner, data); |
112 | 104 |
|
113 | | - // Increment the position in the first output array by the number of elements found. |
114 | | - indexLeft = Sve.SaturatingIncrementByActiveElementCount(indexLeft, pInner); |
| 105 | + // Store the squashed elements to the first output array. |
| 106 | + // (This uses the loop predicate, so some additional zeros may be stored). |
| 107 | + Sve.StoreAndZip(pLoop, left + indexLeft, compacted); |
115 | 108 |
|
116 | | - // Find all elements in input array NOT less than the first element. |
117 | | - // (Flip the pCompare predicate by XORing with ones) |
118 | | - pInner = Sve.ConditionalSelect(pLoop, Sve.Xor(pCompare, ones), Vector<uint>.Zero); |
| 109 | + // Increment the position in the first output array by the number of elements found. |
| 110 | + indexLeft = Sve.SaturatingIncrementByActiveElementCount(indexLeft, pInner); |
119 | 111 |
|
120 | | - // Repeat for the right array. |
121 | | - compacted = Sve.Compact(pInner, data); |
122 | | - Sve.StoreAndZip(pLoop, right + indexRight, compacted); |
123 | | - indexRight = Sve.SaturatingIncrementByActiveElementCount(indexRight, pInner); |
| 112 | + // Find all elements in input array NOT less than the first element. |
| 113 | + // (Flip the pCompare predicate by XORing with ones) |
| 114 | + pInner = Sve.ConditionalSelect(pLoop, Sve.Xor(pCompare, ones), Vector<uint>.Zero); |
124 | 115 |
|
125 | | - i = Sve.SaturatingIncrementBy32BitElementCount(i, 1); |
126 | | - pLoop = Sve.CreateWhileLessThanMask32Bit(i, Size); |
127 | | - } |
| 116 | + // Repeat for the right array. |
| 117 | + compacted = Sve.Compact(pInner, data); |
| 118 | + Sve.StoreAndZip(pLoop, right + indexRight, compacted); |
| 119 | + indexRight = Sve.SaturatingIncrementByActiveElementCount(indexRight, pInner); |
128 | 120 |
|
129 | | - return indexRight; |
| 121 | + i = Sve.SaturatingIncrementBy32BitElementCount(i, 1); |
| 122 | + pLoop = Sve.CreateWhileLessThanMask32Bit(i, Size); |
130 | 123 | } |
131 | 124 |
|
| 125 | + return indexRight; |
132 | 126 | } |
133 | | - return 0; |
134 | 127 | } |
135 | 128 |
|
136 | 129 | [Benchmark] |
137 | 130 | public unsafe ulong SveTail() |
138 | 131 | { |
139 | | - if (Sve.IsSupported) |
| 132 | + fixed (uint* input = _input, left = _left, right = _right) |
140 | 133 | { |
141 | | - fixed (uint* input = _input, left = _left, right = _right) |
142 | | - { |
143 | | - long i = 0; |
| 134 | + long i = 0; |
144 | 135 |
|
145 | | - ulong indexLeft = 0; |
146 | | - ulong indexRight = 0; |
| 136 | + ulong indexLeft = 0; |
| 137 | + ulong indexRight = 0; |
147 | 138 |
|
148 | | - Vector<uint> firstElemVec = Sve.DuplicateSelectedScalarToVector( |
149 | | - Sve.LoadVector(Sve.CreateTrueMaskUInt32(), input), 0 |
150 | | - ); |
| 139 | + Vector<uint> firstElemVec = Sve.DuplicateSelectedScalarToVector( |
| 140 | + Sve.LoadVector(Sve.CreateTrueMaskUInt32(), input), 0 |
| 141 | + ); |
151 | 142 |
|
152 | | - Vector<uint> pTrue = Sve.CreateTrueMaskUInt32(); |
| 143 | + Vector<uint> pTrue = Sve.CreateTrueMaskUInt32(); |
153 | 144 |
|
154 | | - while (i < (Size - (int)Sve.Count32BitElements())) |
155 | | - { |
156 | | - Vector<uint> data = Sve.LoadVector(pTrue, input + i); |
| 145 | + while (i < (Size - (int)Sve.Count32BitElements())) |
| 146 | + { |
| 147 | + Vector<uint> data = Sve.LoadVector(pTrue, input + i); |
157 | 148 |
|
158 | | - // Predicate for elements in input array less than the first element. |
159 | | - Vector<uint> pInner = Sve.CompareLessThan(data, firstElemVec); |
| 149 | + // Predicate for elements in input array less than the first element. |
| 150 | + Vector<uint> pInner = Sve.CompareLessThan(data, firstElemVec); |
160 | 151 |
|
161 | | - // Squash all found elements to the lower lanes of the vector. |
162 | | - Vector<uint> compacted = Sve.Compact(pInner, data); |
| 152 | + // Squash all found elements to the lower lanes of the vector. |
| 153 | + Vector<uint> compacted = Sve.Compact(pInner, data); |
163 | 154 |
|
164 | | - // Store the squashed elements to the first output array. |
165 | | - Sve.StoreAndZip(pTrue, left + indexLeft, compacted); |
| 155 | + // Store the squashed elements to the first output array. |
| 156 | + Sve.StoreAndZip(pTrue, left + indexLeft, compacted); |
166 | 157 |
|
167 | | - // Increment the position in the first output array by the number of elements found. |
168 | | - indexLeft = Sve.SaturatingIncrementByActiveElementCount(indexLeft, pInner); |
| 158 | + // Increment the position in the first output array by the number of elements found. |
| 159 | + indexLeft = Sve.SaturatingIncrementByActiveElementCount(indexLeft, pInner); |
169 | 160 |
|
170 | | - // Find elements greater than or equal to the first element. |
171 | | - pInner = Sve.CompareGreaterThanOrEqual(data, firstElemVec); |
| 161 | + // Find elements greater than or equal to the first element. |
| 162 | + pInner = Sve.CompareGreaterThanOrEqual(data, firstElemVec); |
172 | 163 |
|
173 | | - // Repeat for the right array. |
174 | | - compacted = Sve.Compact(pInner, data); |
175 | | - Sve.StoreAndZip(pTrue, right + indexRight, compacted); |
176 | | - indexRight = Sve.SaturatingIncrementByActiveElementCount(indexRight, pInner); |
| 164 | + // Repeat for the right array. |
| 165 | + compacted = Sve.Compact(pInner, data); |
| 166 | + Sve.StoreAndZip(pTrue, right + indexRight, compacted); |
| 167 | + indexRight = Sve.SaturatingIncrementByActiveElementCount(indexRight, pInner); |
177 | 168 |
|
178 | | - i = Sve.SaturatingIncrementBy32BitElementCount(i, 1); |
179 | | - } |
| 169 | + i = Sve.SaturatingIncrementBy32BitElementCount(i, 1); |
| 170 | + } |
180 | 171 |
|
181 | | - // Handler remaining elements. |
182 | | - for (; i < Size; i++) |
| 172 | + // Handler remaining elements. |
| 173 | + for (; i < Size; i++) |
| 174 | + { |
| 175 | + if (input[i] < input[0]) |
183 | 176 | { |
184 | | - if (input[i] < input[0]) |
185 | | - { |
186 | | - left[indexLeft] = input[i]; |
187 | | - indexLeft++; |
188 | | - } |
189 | | - else |
190 | | - { |
191 | | - right[indexRight] = input[i]; |
192 | | - indexRight++; |
193 | | - } |
| 177 | + left[indexLeft] = input[i]; |
| 178 | + indexLeft++; |
| 179 | + } |
| 180 | + else |
| 181 | + { |
| 182 | + right[indexRight] = input[i]; |
| 183 | + indexRight++; |
194 | 184 | } |
195 | | - |
196 | | - return indexRight; |
197 | 185 | } |
| 186 | + |
| 187 | + return indexRight; |
198 | 188 | } |
199 | | - return 0; |
200 | 189 | } |
201 | 190 | } |
202 | 191 | } |
203 | | - |
204 | | -#pragma warning restore SYSLIB5003 |
0 commit comments