Skip to content

Commit

Permalink
Unaligned bit arrays on the JavaScript target
Browse files Browse the repository at this point in the history
  • Loading branch information
richard-viney committed Jan 5, 2025
1 parent 330a3cc commit 568d07e
Show file tree
Hide file tree
Showing 5 changed files with 211 additions and 96 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog

## Unreleased

- Unaligned bit arrays on the JavaScript target are now supported by the
`bit_array` module. Note: unaligned bit arrays on JavaScript are supported
starting with Gleam v1.8.

## v0.52.0 - 2025-01-04

- Improved the precision of `float.to_precision`.
Expand Down
12 changes: 5 additions & 7 deletions src/gleam/bit_array.gleam
Original file line number Diff line number Diff line change
Expand Up @@ -13,22 +13,20 @@ pub fn from_string(x: String) -> BitArray
/// Returns an integer which is the number of bits in the bit array.
///
@external(erlang, "erlang", "bit_size")
pub fn bit_size(x: BitArray) -> Int {
byte_size(x) * 8
}
@external(javascript, "../gleam_stdlib.mjs", "bit_array_bit_size")
pub fn bit_size(x: BitArray) -> Int

/// Returns an integer which is the number of bytes in the bit array.
///
@external(erlang, "erlang", "byte_size")
@external(javascript, "../gleam_stdlib.mjs", "length")
@external(javascript, "../gleam_stdlib.mjs", "bit_array_byte_size")
pub fn byte_size(x: BitArray) -> Int

/// Pads a bit array with zeros so that it is a whole number of bytes.
///
@external(erlang, "gleam_stdlib", "bit_array_pad_to_bytes")
pub fn pad_to_bytes(x: BitArray) -> BitArray {
x
}
@external(javascript, "../gleam_stdlib.mjs", "bit_array_pad_to_bytes")
pub fn pad_to_bytes(x: BitArray) -> BitArray

/// Creates a new bit array by joining two bit arrays.
///
Expand Down
221 changes: 190 additions & 31 deletions src/gleam_stdlib.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -316,8 +316,64 @@ export function bit_array_from_string(string) {
return toBitArray([stringBits(string)]);
}

const BIT_ARRAY_UNALIGNED_SUPPORTED =
new BitArray(new Uint8Array()).bitSize !== undefined;

export function bit_array_bit_size(bit_array) {
if (!BIT_ARRAY_UNALIGNED_SUPPORTED) {
return bit_array.length * 8;
}

return bit_array.bitSize;
}

export function bit_array_byte_size(bit_array) {
if (!BIT_ARRAY_UNALIGNED_SUPPORTED) {
return bit_array.length;
}

return bit_array.byteSize;
}

export function bit_array_pad_to_bytes(bit_array) {
const trailingBitsCount = bit_array_bit_size(bit_array) % 8;

// If the bit array is byte aligned it can be returned unchanged
if (trailingBitsCount === 0) {
return bit_array;
}

const finalByte = bit_array.byteAt(bit_array.byteSize - 1);

const unusedBitsCount = 8 - trailingBitsCount;
const correctFinalByte = (finalByte >> unusedBitsCount) << unusedBitsCount;

// If the unused bits in the final byte are already set to zero then the
// existing buffer can be re-used, avoiding a copy
if (finalByte === correctFinalByte) {
return new BitArray(
bit_array.rawBuffer,
bit_array.byteSize * 8,
bit_array.bitOffset,
);
}

// Copy the bit array into a new aligned buffer and set the correct final byte
const buffer = new Uint8Array(bit_array.byteSize);
for (let i = 0; i < buffer.length - 1; i++) {
buffer[i] = bit_array.byteAt(i);
}
buffer[buffer.length - 1] = correctFinalByte;

return new BitArray(buffer);
}

export function bit_array_concat(bit_arrays) {
return toBitArray(bit_arrays.toArray().map((b) => b.buffer));
if (BIT_ARRAY_UNALIGNED_SUPPORTED) {
return toBitArray(bit_arrays.toArray());
} else {
return toBitArray(bit_arrays.toArray().map((b) => b.buffer));
}
}

export function console_log(term) {
Expand All @@ -333,9 +389,25 @@ export function crash(message) {
}

export function bit_array_to_string(bit_array) {
if (bit_array_bit_size(bit_array) % 8 !== 0) {
return new Error(Nil);
}

try {
const decoder = new TextDecoder("utf-8", { fatal: true });
return new Ok(decoder.decode(bit_array.buffer));
if (BIT_ARRAY_UNALIGNED_SUPPORTED) {
if (bit_array.bitOffset === 0) {
return new Ok(decoder.decode(bit_array.rawBuffer));
} else {
const buffer = new Uint8Array(bit_array.byteSize);
for (let i = 0; i < buffer.length; i++) {
buffer[i] = bit_array.byteAt(i);
}
return new Ok(decoder.decode(buffer));
}
} else {
return new Ok(decoder.decode(bit_array.buffer));
}
} catch {
return new Error(Nil);
}
Expand Down Expand Up @@ -415,13 +487,21 @@ export function random_uniform() {
export function bit_array_slice(bits, position, length) {
const start = Math.min(position, position + length);
const end = Math.max(position, position + length);
if (start < 0 || end > bits.length) return new Error(Nil);
const byteOffset = bits.buffer.byteOffset + start;

if (start < 0 || end * 8 > bit_array_bit_size(bits)) {
return new Error(Nil);
}

if (BIT_ARRAY_UNALIGNED_SUPPORTED) {
return new Ok(bits.slice(start * 8, end * 8));
}

const buffer = new Uint8Array(
bits.buffer.buffer,
byteOffset,
Math.abs(length),
bits.buffer.byteOffset + start,
end - start,
);

return new Ok(new BitArray(buffer));
}

Expand Down Expand Up @@ -522,16 +602,20 @@ let b64TextDecoder;
export function encode64(bit_array, padding) {
b64TextDecoder ??= new TextDecoder();

const bytes = bit_array.buffer;
bit_array = bit_array_pad_to_bytes(bit_array);

const m = bytes.length;
const m = bit_array_byte_size(bit_array);
const k = m % 3;
const n = Math.floor(m / 3) * 4 + (k && k + 1);
const N = Math.ceil(m / 3) * 4;
const encoded = new Uint8Array(N);

for (let i = 0, j = 0; j < m; i += 4, j += 3) {
const y = (bytes[j] << 16) + (bytes[j + 1] << 8) + (bytes[j + 2] | 0);
const y =
(bit_array.byteAt(j) << 16) +
(bit_array.byteAt(j + 1) << 8) +
(bit_array.byteAt(j + 2) | 0);

encoded[i] = b64EncodeLookup[y >> 18];
encoded[i + 1] = b64EncodeLookup[(y >> 12) & 0x3f];
encoded[i + 2] = b64EncodeLookup[(y >> 6) & 0x3f];
Expand Down Expand Up @@ -804,7 +888,7 @@ export function inspect(v) {
if (Array.isArray(v)) return `#(${v.map(inspect).join(", ")})`;
if (v instanceof List) return inspectList(v);
if (v instanceof UtfCodepoint) return inspectUtfCodepoint(v);
if (v instanceof BitArray) return inspectBitArray(v);
if (v instanceof BitArray) return `<<${bit_array_inspect(v, "")}>>`;
if (v instanceof CustomType) return inspectCustomType(v);
if (v instanceof Dict) return inspectDict(v);
if (v instanceof Set) return `//js(Set(${[...v].map(inspect).join(", ")}))`;
Expand Down Expand Up @@ -895,17 +979,22 @@ export function inspectList(list) {
return `[${list.toArray().map(inspect).join(", ")}]`;
}

export function inspectBitArray(bits) {
return `<<${Array.from(bits.buffer).join(", ")}>>`;
}

export function inspectUtfCodepoint(codepoint) {
return `//utfcodepoint(${String.fromCodePoint(codepoint.value)})`;
}

export function base16_encode(bit_array) {
const trailingBitsCount = bit_array_bit_size(bit_array) % 8;

let result = "";
for (const byte of bit_array.buffer) {
for (let i = 0; i < bit_array.byteSize; i++) {
let byte = bit_array.byteAt(i);

if (i === bit_array.byteSize - 1 && trailingBitsCount) {
const unusedBitsCount = 8 - trailingBitsCount;
byte = (byte >> unusedBitsCount) << unusedBitsCount;
}

result += byte.toString(16).padStart(2, "0").toUpperCase();
}
return result;
Expand All @@ -923,38 +1012,108 @@ export function base16_decode(string) {
}

export function bit_array_inspect(bits, acc) {
return `${acc}${[...bits.buffer].join(", ")}`;
const bitSize = bit_array_bit_size(bits);

if (bitSize === 0) {
return acc;
}

const byteSize = bit_array_byte_size(bits);

for (let i = 0; i < byteSize - 1; i++) {
acc += bits.byteAt(i).toString();
acc += ", ";
}

if (byteSize * 8 === bitSize) {
acc += bits.byteAt(byteSize - 1).toString();
} else {
const trailingBitsCount = bitSize % 8;
acc += bits.byteAt(byteSize - 1) >> (8 - trailingBitsCount);
acc += `:size(${trailingBitsCount})`;
}

return acc;
}

export function bit_array_compare(first, second) {
for (let i = 0; i < first.length; i++) {
if (i >= second.length) {
return new Gt(); // first has more items
}
const f = first.buffer[i];
const s = second.buffer[i];
let i = 0;

let firstSize = bit_array_bit_size(first);
let secondSize = bit_array_bit_size(second);

while (firstSize >= 8 && secondSize >= 8) {
const f = first.byteAt(i);
const s = second.byteAt(i);

if (f > s) {
return new Gt();
}
if (f < s) {
} else if (f < s) {
return new Lt();
}

i++;
firstSize -= 8;
secondSize -= 8;
}
// This means that either first did not have any items
// or all items in first were equal to second.
if (first.length === second.length) {

if (firstSize === 0 && secondSize === 0) {
return new Eq();
}
return new Lt(); // second has more items

// First has more items, example: "AB" > "A":
if (secondSize === 0) {
return new Gt();
}

// Second has more items, example: "A" < "AB":
if (firstSize === 0) {
return new Lt();
}

// This happens when there are unaligned bit arrays

const f = first.byteAt(i) >> (8 - firstSize);
const s = second.byteAt(i) >> (8 - secondSize);

if (f > s) {
return new Gt();
}
if (f < s) {
return new Lt();
}
if (firstSize > secondSize) {
return new Gt();
}
if (firstSize < secondSize) {
return new Lt();
}

return new Eq();
}

export function bit_array_starts_with(bits, prefix) {
if (prefix.length > bits.length) {
const prefixSize = bit_array_bit_size(prefix);

if (prefixSize > bit_array_bit_size(bits)) {
return false;
}

for (let i = 0; i < prefix.length; i++) {
if (bits.buffer[i] !== prefix.buffer[i]) {
// Check any whole bytes
const byteCount = Math.trunc(prefixSize / 8);
for (let i = 0; i < byteCount; i++) {
if (bits.byteAt(i) !== prefix.byteAt(i)) {
return false;
}
}

// Check any trailing bits at the end of the prefix
if (prefixSize % 8 !== 0) {
const unusedBitsCount = 8 - (prefixSize % 8);
if (
bits.byteAt(byteCount) >> unusedBitsCount !==
prefix.byteAt(byteCount) >> unusedBitsCount
) {
return false;
}
}
Expand Down
Loading

0 comments on commit 568d07e

Please sign in to comment.