-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathbitarray.ts
266 lines (224 loc) · 9.36 KB
/
bitarray.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
/**
* An ES6 BitArray class for easy operations on sequences of bits.
*
* @author swiing
*/
/**
* It aims at ease of use.
*
* With regards to performance, it remains to be experimented/proven
* that it actually performs better than a regular array of booleans/numbers
* (at least in the specific context where it is used).
* Presumably, accessing values at indexes (array[i]) is slow, because
* it goes via a proxy. On the other hand, logical operations should be
* really fast. So it may depend on how much of those operations are used.
* Unless performance is a critical part, it should be ok for most cases
* anyway (it may be possible to increase performance by minimizing
* accesses to the proxy handlers - FFS if needed).
*
* Lastly, it is very memory-efficient, as each value is coded in 1 bit,
* as opposed to booleans being less efficiently coded.
* (see https://dev.to/shevchenkonik/memory-size-of-javascript-boolean-3mlj)
*
*/
import BitTypedArray from '@bitarray/typedarray';
import { base64MIMEChars, base64UrlChars } from './alphabet.js';
const alphabetLengthErrorMsg = "Alphabet's length must be a power of 2";
// I could leverage _views from @bitarray/typedarray, or create a new one here.
// import { _views } from "./bit-typedarray.js";
const _views = new WeakMap<ArrayBuffer, Uint32Array>();
function getViewer(instance: BitArray) {
let viewer = _views.get(instance.buffer);
if (!viewer) {
_views.set(instance.buffer, (viewer = new Uint32Array(instance.buffer)));
}
return viewer;
}
// This is an extension of BitTypedArray with features (methods,getters)
// not available in native TypedArray's, essentially, bit operations.
export default class BitArray extends BitTypedArray {
and: (bitArray: BitArray) => BitArray;
or: (bitArray: BitArray) => BitArray;
xor: (bitArray: BitArray) => BitArray;
static from: (source: Iterable<any>) => BitArray;
static of: (...args) => BitArray;
/**
* returns the number of 'true' entries
*/
// this is meant to be performant
// inspired by https://github.com/bramstein/bit-array/blob/master/lib/bit-array.js
get count(): number {
let count = 0;
let view = getViewer(this);
for (let i = 0; i < this.length / 32; i++) {
let x = view[i];
// See: http://bits.stephan-brumme.com/countBits.html for an explanation
x = x - ((x >> 1) & 0x55555555);
x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
x = x + (x >> 4);
x &= 0xf0f0f0f;
count += (x * 0x01010101) >> 24;
}
return count;
}
// Implementation note 1: I once used the maximum length of the two operands (for | and for ^),
// defaulting to 'false' for the operand with "missing" values. However, it is arguable that
// this is a good default assumption (it depends on context) so I finally prefer to go
// conservative, and compute only when there is actual data available (i.e. not assuming
// 'false' otherwise).
// As a positive side effect, it also simplifies implementation (but this is not the driver).
// Implementation note 2: I could have a very simple implementation doing e.g.
// ret[i] = this[i] && bitArray[i], for every index
// However, this would go through the array bit by bit. Instead, I go 4 bytes by 4 bytes (Uint32).
// This is where improved performance is expected.
/**
*
* @param bitArray
* @returns a BitArray instance with the logical 'or' applied to this and the passed parameter.
* The length of the resulting BitArray is the minimum of the length of the two operands.
*/
['|'](bitArray: BitArray): BitArray {
const ret = new BitArray(Math./*max*/ min(this.length, bitArray.length));
const retView = new Uint32Array(ret.buffer);
_views.set(ret.buffer, retView);
const thisView = getViewer(this);
const bitArrayView = getViewer(bitArray);
// let offset = 0;
// -1 needed for the case len == 32*n
let offset = ((ret.length - 1) >> 5) + 1; // divide 32, and add 1 because of next i--.
// for( /**/;
// // -1 needed for the case Math.min() == 32*n.
// offset < ( ret.length /*== Math.min( this.length, bitArray.length )*/ - 1 >> 5 ) + 1;
// offset++ )
while (offset--)
// note: ret is a proxy;
retView[offset] = thisView[offset] | bitArrayView[offset];
// Needed only if length of the returned value would be the max length of the two operands
// for( /**/;
// // offset < ( Math.max( this.length, bitArray.length ) >> 5 ) + 1;
// // (tbc) same as:
// offset < ret.length >> 5;
// offset++ )
// _views.get( ret.buffer )[ offset ] = _views.get( ( this.length <= bitArray.length ? bitArray : _targets.get( this ) ).buffer )[ offset ];
// when the two operands have a different length, and since bitwise
// operations treat by bunch of 32 bits, we may have set bits to 1
// beyond the length of ret. We apply a mask to set them back to zero
// (so that other operations like .count are correct)
retView[ret.length >> 5] &=
(1 << (ret.length & 31)) - 1; /* modulo 32 , -1*/
return ret;
}
/**
*
* @param bitArray
* @returns a BitArray instance with the logical 'and' applied to this and the passed parameter.
* The length of the resulting BitArray is the minimum of the length of the two operands.
*/
['&'](bitArray: BitArray): BitArray {
const ret = new BitArray(Math.min(this.length, bitArray.length));
const retView = new Uint32Array(ret.buffer);
_views.set(ret.buffer, retView);
const thisView = getViewer(this);
const bitArrayView = getViewer(bitArray);
// -1 needed for the case len == 32*n
let offset = ((ret.length - 1) >> 5) + 1; // divide 32, and add 1 because of next i--.
while (offset--) retView[offset] = thisView[offset] & bitArrayView[offset];
return ret;
}
/**
*
* @param bitArray
* @returns a BitArray instance with the logical 'xor' applied to this and the passed parameter.
* The length of the resulting BitArray is the minimum of the length of the two operands.
*/
['^'](bitArray: BitArray): BitArray {
const ret = new BitArray(Math.min(this.length, bitArray.length));
const retView = new Uint32Array(ret.buffer);
_views.set(ret.buffer, retView);
const thisView = getViewer(this);
const bitArrayView = getViewer(bitArray);
// -1 needed for the case len == 32*n
let offset = ((ret.length - 1) >> 5) + 1; // divide 32, and add 1 because of next i--.
while (offset--) retView[offset] = thisView[offset] ^ bitArrayView[offset];
// when the two operands have a different length, and since bitwise
// operations treat by bunch of 32 bits, we may have set bits to 1
// beyond the length of ret. We apply a mask to set them back to zero
// (so that other operations like .count are correct)
retView[ret.length >> 5] &=
(1 << (ret.length & 31)) - 1; /* modulo 32 , -1*/
return ret;
}
/**
*
* @param alphabet a set of n characters to use to encode the BitArray; alphabet.length must be a power of 2 (2, 4, 8, etc)
* The more characters in the set, the more compact the resulting output will be
* @returns a string encoded using the provided character set (e.g., base64 encoding can be achieved with this)
*/
encode(alphabet: string): string {
const log2 = Math.log2(alphabet.length);
if (log2 < 1 || log2 % 1 !== 0) {
throw new RangeError(alphabetLengthErrorMsg);
}
const ret = [];
let val = 0;
let valLen = 0;
for (const b of this) {
valLen++;
val <<= 1;
val += b;
if (valLen === log2) {
ret.push(alphabet[val]);
valLen = val = 0;
}
}
if (valLen !== 0) {
val <<= log2 - valLen;
ret.push(alphabet[val]);
}
return ret.join('');
}
/**
*
* @param alphabet a set of n characters to use to encode the BitArray; alphabet.length must be a power of 2 (2, 4, 8, etc),
* and should generally match the set used in the original encoding
* @param encodedString an encoded string built with encode
* @returns a BitArray of the encodedString decoded using alphabet
*/
static decode(encodedString: string, alphabet: string): BitArray {
const log2 = Math.log2(alphabet.length);
if (log2 < 1 || log2 % 1 !== 0) {
throw new RangeError(alphabetLengthErrorMsg);
}
const pad = (s: string) => '0'.repeat(log2 - s.length) + s;
const charMap = {}; // maps each character to its integral value
for (var i = 0; i < alphabet.length; i++) {
charMap[alphabet[i]] = pad(i.toString(2));
}
const deserialized = Array.from(encodedString)
.map((c) => {
if (!(c in charMap)) {
throw new RangeError('Invalid character found in encoded string');
}
return charMap[c];
})
.join('');
return BitArray.from(deserialized);
}
// Convenience specializations for encoding base64MIME and base64Url
encodeBase64MIME() {
return this.encode(base64MIMEChars);
}
static decodeBase64MIME(encodedString: string) {
return BitArray.decode(encodedString, base64MIMEChars);
}
encodeBase64Url() {
return this.encode(base64UrlChars);
}
static decodeBase64Url(encodedString: string) {
return BitArray.decode(encodedString, base64UrlChars);
}
}
// create aliases
BitArray.prototype.and = BitArray.prototype['&'];
BitArray.prototype.or = BitArray.prototype['|'];
BitArray.prototype.xor = BitArray.prototype['^'];