Skip to content

Add split and split_once to bit_array module #803

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

## Unreleased

- The `bit_array` module gains the `split` and `split_once` functions.
- The performance of `dict.is_empty` has been improved.

## v0.54.0 - 2025-02-04
Expand Down
46 changes: 46 additions & 0 deletions src/gleam/bit_array.gleam
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,52 @@ pub fn slice(
take length: Int,
) -> Result(BitArray, Nil)

/// Splits a bit array into two parts at the location of the pattern.
///
/// The result will not include the pattern, and returns an error if the
/// pattern is not found.
///
/// This function runs in linear time.
///
/// ## Examples
///
/// ```gleam
/// split_once(from: <<1, 2, 3>>, on: <<2>>)
/// // -> Ok(#(<<1>>, <<3>>))
///
/// split_once(from: <<0>>, on: <<1>>)
/// // -> Error(Nil)
/// ```
@external(erlang, "gleam_stdlib", "bit_array_split_once")
@external(javascript, "../gleam_stdlib.mjs", "bit_array_split_once")
pub fn split_once(
from bits: BitArray,
on pattern: BitArray,
) -> Result(#(BitArray, BitArray), Nil)

/// Splits a bit array into parts at the locations of the pattern.
///
/// The result will not include the pattern, and returns the input
/// as is if the pattern is not found.
///
/// This function runs in linear time.
///
/// ## Examples
///
/// ```gleam
/// split(from: <<0, 1, 0, 2, 0, 3>>, on: <<0>>)
/// // -> Ok([<<1>>, <<2>>, <<3>>])
///
/// split(from: <<0>>, on: <<1>>)
/// // -> Ok([<<0>>])
/// ```
@external(erlang, "gleam_stdlib", "bit_array_split")
@external(javascript, "../gleam_stdlib.mjs", "bit_array_split")
pub fn split(
from bits: BitArray,
on pattern: BitArray,
) -> Result(List(BitArray), Nil)

/// Tests to see whether a bit array is valid UTF-8.
///
pub fn is_utf8(bits: BitArray) -> Bool {
Expand Down
18 changes: 17 additions & 1 deletion src/gleam_stdlib.erl
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
inspect/1, float_to_string/1, int_from_base_string/2,
utf_codepoint_list_to_string/1, contains_string/2, crop_string/2,
base16_encode/1, base16_decode/1, string_replace/3, slice/3,
bit_array_to_int_and_size/1, bit_array_pad_to_bytes/1
bit_array_to_int_and_size/1, bit_array_pad_to_bytes/1, bit_array_split_once/2,
bit_array_split/2
]).

%% Taken from OTP's uri_string module
Expand Down Expand Up @@ -231,6 +232,21 @@ bit_array_slice(Bin, Pos, Len) ->
catch error:badarg -> {error, nil}
end.

bit_array_split_once(Bin, Sub) ->
try
case binary:split(Bin, [Sub]) of
[<<>>, <<>>] -> {error, nil};
[A, B] -> {ok, {A, B}};
_ -> {error, nil}
end
catch error:badarg -> {error, nil}
end.

bit_array_split(Bin, Sub) ->
try {ok, binary:split(Bin, [Sub], [global, trim_all])}
catch error:badarg -> {error, nil}
end.

base_decode64(S) ->
try {ok, base64:decode(S)}
catch error:_ -> {error, nil}
Expand Down
74 changes: 74 additions & 0 deletions src/gleam_stdlib.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,80 @@ export function bit_array_slice(bits, position, length) {
return new Ok(new BitArray(buffer));
}

export function bit_array_split_once(bits, pattern) {
try {
const patternEmpty = pattern.buffer.length < 1
const patternLongerThanBits = pattern.buffer.length >= bits.buffer.length
const incorrectArguments = !(bits instanceof BitArray) || !(pattern instanceof BitArray)
if (incorrectArguments || patternEmpty || patternLongerThanBits) {
return new Error(Nil);
}

const n = bits.buffer.length - pattern.buffer.length + 1;
find: for (let i = 0; i < n; i++) {
for (let j = 0; j < pattern.buffer.length; j++) {
if (bits.buffer[i + j] !== pattern.buffer[j]) {
continue find;
}
}
const before = bits.buffer.slice(0, i);
const after = bits.buffer.slice(i + pattern.buffer.length);
return new Ok([new BitArray(before), new BitArray(after)]);
}

return new Error(Nil);
} catch (e) {
return new Error(Nil);
}
}

export function bit_array_split(bits, pattern) {
try {
const patternEmpty = pattern.buffer.length < 1
const incorrectArguments = !(bits instanceof BitArray) || !(pattern instanceof BitArray)
if (incorrectArguments || patternEmpty) {
return new Error(Nil);
}

const bitsShorter = bits.buffer.length < pattern.buffer.length
if (bitsShorter) {
return new Ok(List.fromArray([bits]))
}

const results = [];
let lastIndex = 0;
const n = bits.buffer.length - pattern.buffer.length + 1;

find: for (let i = 0; i < n; i++) {
for (let j = 0; j < pattern.buffer.length; j++) {
if (bits.buffer[i + j] !== pattern.buffer[j]) {
continue find;
}
}

const bitsEqualsPattern = bits.buffer.length === pattern.buffer.length
if (bitsEqualsPattern) {
return new Ok(List.fromArray([]));
}

if (i > lastIndex) {
results.push(new BitArray(bits.buffer.slice(lastIndex, i)));
}

lastIndex = i + pattern.buffer.length;
i = lastIndex - 1;
}

if (lastIndex < bits.buffer.length) {
results.push(new BitArray(bits.buffer.slice(lastIndex)));
}

return new Ok(List.fromArray(results))
} catch (e) {
return new Error(Nil);
}
}

export function codepoint(int) {
return new UtfCodepoint(int);
}
Expand Down
86 changes: 86 additions & 0 deletions test/gleam/bit_array_test.gleam
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,92 @@ pub fn slice_erlang_only_test() {
|> should.equal(Error(Nil))
}

pub fn split_once_test() {
<<"hello":utf8>>
|> bit_array.split_once(<<"l":utf8>>)
|> should.equal(Ok(#(<<"he":utf8>>, <<"lo":utf8>>)))

<<"hello":utf8>>
|> bit_array.split_once(<<"o":utf8>>)
|> should.equal(Ok(#(<<"hell":utf8>>, <<>>)))

<<"hello":utf8>>
|> bit_array.split_once(<<"h":utf8>>)
|> should.equal(Ok(#(<<>>, <<"ello":utf8>>)))

<<0, 1, 0, 2, 0, 3>>
|> bit_array.split_once(<<0, 2>>)
|> should.equal(Ok(#(<<0, 1>>, <<0, 3>>)))

<<0, 1, 2, 0, 3, 4, 5>>
|> bit_array.split_once(<<>>)
|> should.equal(Error(Nil))

<<>>
|> bit_array.split_once(<<1>>)
|> should.equal(Error(Nil))

<<1>>
|> bit_array.split_once(<<1>>)
|> should.equal(Error(Nil))

<<0>>
|> bit_array.split_once(<<1>>)
|> should.equal(Error(Nil))
}

// This test is target specific since it's using non byte-aligned BitArrays
// and those are not supported on the JavaScript target.
@target(erlang)
pub fn split_once_erlang_only_test() {
<<0, 1, 2:7>>
|> bit_array.split_once(<<1>>)
|> should.equal(Error(Nil))
}

pub fn split_test() {
<<"hello":utf8>>
|> bit_array.split(<<"l":utf8>>)
|> should.equal(Ok([<<"he":utf8>>, <<"o":utf8>>]))

<<0, 1, 0, 2, 0, 3>>
|> bit_array.split(<<0>>)
|> should.equal(Ok([<<1>>, <<2>>, <<3>>]))

<<0, 1, 0, 2, 0, 3>>
|> bit_array.split(<<0, 2>>)
|> should.equal(Ok([<<0, 1>>, <<0, 3>>]))

<<1, 0>>
|> bit_array.split(<<0>>)
|> should.equal(Ok([<<1>>]))

<<1, 0>>
|> bit_array.split(<<1>>)
|> should.equal(Ok([<<0>>]))

<<1>>
|> bit_array.split(<<0>>)
|> should.equal(Ok([<<1>>]))

<<1, 2>>
|> bit_array.split(<<1, 2>>)
|> should.equal(Ok([]))

<<0, 1, 2, 0, 3, 4, 5>>
|> bit_array.split(<<>>)
|> should.equal(Error(Nil))
}

// This test is target specific since it's using non byte-aligned BitArrays
// and those are not supported on the JavaScript target.
@target(erlang)
pub fn split_erlang_only_test() {
<<0, 1, 2:7>>
|> bit_array.split(<<1>>)
|> should.equal(Error(Nil))
}

pub fn to_string_test() {
<<>>
|> bit_array.to_string
Expand Down