diff --git a/CHANGELOG.md b/CHANGELOG.md index 85da3c99..7f29700c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## Unreleased +- The `bit_array` module gains the `split` and `split_once` functions. - The performance of `dict.is_empty` has been improved. ## v0.54.0 - 2025-02-04 diff --git a/src/gleam/bit_array.gleam b/src/gleam/bit_array.gleam index df75be59..5603dea0 100644 --- a/src/gleam/bit_array.gleam +++ b/src/gleam/bit_array.gleam @@ -59,6 +59,52 @@ pub fn slice( take length: Int, ) -> Result(BitArray, Nil) +/// Splits a bit array into two parts at the location of the pattern. +/// +/// The result will not include the pattern, and returns an error if the +/// pattern is not found. +/// +/// This function runs in linear time. +/// +/// ## Examples +/// +/// ```gleam +/// split_once(from: <<1, 2, 3>>, on: <<2>>) +/// // -> Ok(#(<<1>>, <<3>>)) +/// +/// split_once(from: <<0>>, on: <<1>>) +/// // -> Error(Nil) +/// ``` +@external(erlang, "gleam_stdlib", "bit_array_split_once") +@external(javascript, "../gleam_stdlib.mjs", "bit_array_split_once") +pub fn split_once( + from bits: BitArray, + on pattern: BitArray, +) -> Result(#(BitArray, BitArray), Nil) + +/// Splits a bit array into parts at the locations of the pattern. +/// +/// The result will not include the pattern, and returns the input +/// as is if the pattern is not found. +/// +/// This function runs in linear time. +/// +/// ## Examples +/// +/// ```gleam +/// split(from: <<0, 1, 0, 2, 0, 3>>, on: <<0>>) +/// // -> Ok([<<1>>, <<2>>, <<3>>]) +/// +/// split(from: <<0>>, on: <<1>>) +/// // -> Ok([<<0>>]) +/// ``` +@external(erlang, "gleam_stdlib", "bit_array_split") +@external(javascript, "../gleam_stdlib.mjs", "bit_array_split") +pub fn split( + from bits: BitArray, + on pattern: BitArray, +) -> Result(List(BitArray), Nil) + /// Tests to see whether a bit array is valid UTF-8. /// pub fn is_utf8(bits: BitArray) -> Bool { diff --git a/src/gleam_stdlib.erl b/src/gleam_stdlib.erl index 3fda5df9..5d0c4c86 100644 --- a/src/gleam_stdlib.erl +++ b/src/gleam_stdlib.erl @@ -14,7 +14,8 @@ inspect/1, float_to_string/1, int_from_base_string/2, utf_codepoint_list_to_string/1, contains_string/2, crop_string/2, base16_encode/1, base16_decode/1, string_replace/3, slice/3, - bit_array_to_int_and_size/1, bit_array_pad_to_bytes/1 + bit_array_to_int_and_size/1, bit_array_pad_to_bytes/1, bit_array_split_once/2, + bit_array_split/2 ]). %% Taken from OTP's uri_string module @@ -231,6 +232,21 @@ bit_array_slice(Bin, Pos, Len) -> catch error:badarg -> {error, nil} end. +bit_array_split_once(Bin, Sub) -> + try + case binary:split(Bin, [Sub]) of + [<<>>, <<>>] -> {error, nil}; + [A, B] -> {ok, {A, B}}; + _ -> {error, nil} + end + catch error:badarg -> {error, nil} + end. + +bit_array_split(Bin, Sub) -> + try {ok, binary:split(Bin, [Sub], [global, trim_all])} + catch error:badarg -> {error, nil} + end. + base_decode64(S) -> try {ok, base64:decode(S)} catch error:_ -> {error, nil} diff --git a/src/gleam_stdlib.mjs b/src/gleam_stdlib.mjs index 700a3620..a3b16c32 100644 --- a/src/gleam_stdlib.mjs +++ b/src/gleam_stdlib.mjs @@ -425,6 +425,80 @@ export function bit_array_slice(bits, position, length) { return new Ok(new BitArray(buffer)); } +export function bit_array_split_once(bits, pattern) { + try { + const patternEmpty = pattern.buffer.length < 1 + const patternLongerThanBits = pattern.buffer.length >= bits.buffer.length + const incorrectArguments = !(bits instanceof BitArray) || !(pattern instanceof BitArray) + if (incorrectArguments || patternEmpty || patternLongerThanBits) { + return new Error(Nil); + } + + const n = bits.buffer.length - pattern.buffer.length + 1; + find: for (let i = 0; i < n; i++) { + for (let j = 0; j < pattern.buffer.length; j++) { + if (bits.buffer[i + j] !== pattern.buffer[j]) { + continue find; + } + } + const before = bits.buffer.slice(0, i); + const after = bits.buffer.slice(i + pattern.buffer.length); + return new Ok([new BitArray(before), new BitArray(after)]); + } + + return new Error(Nil); + } catch (e) { + return new Error(Nil); + } +} + +export function bit_array_split(bits, pattern) { + try { + const patternEmpty = pattern.buffer.length < 1 + const incorrectArguments = !(bits instanceof BitArray) || !(pattern instanceof BitArray) + if (incorrectArguments || patternEmpty) { + return new Error(Nil); + } + + const bitsShorter = bits.buffer.length < pattern.buffer.length + if (bitsShorter) { + return new Ok(List.fromArray([bits])) + } + + const results = []; + let lastIndex = 0; + const n = bits.buffer.length - pattern.buffer.length + 1; + + find: for (let i = 0; i < n; i++) { + for (let j = 0; j < pattern.buffer.length; j++) { + if (bits.buffer[i + j] !== pattern.buffer[j]) { + continue find; + } + } + + const bitsEqualsPattern = bits.buffer.length === pattern.buffer.length + if (bitsEqualsPattern) { + return new Ok(List.fromArray([])); + } + + if (i > lastIndex) { + results.push(new BitArray(bits.buffer.slice(lastIndex, i))); + } + + lastIndex = i + pattern.buffer.length; + i = lastIndex - 1; + } + + if (lastIndex < bits.buffer.length) { + results.push(new BitArray(bits.buffer.slice(lastIndex))); + } + + return new Ok(List.fromArray(results)) + } catch (e) { + return new Error(Nil); + } +} + export function codepoint(int) { return new UtfCodepoint(int); } diff --git a/test/gleam/bit_array_test.gleam b/test/gleam/bit_array_test.gleam index 638a8b2d..c70a8c75 100644 --- a/test/gleam/bit_array_test.gleam +++ b/test/gleam/bit_array_test.gleam @@ -202,6 +202,92 @@ pub fn slice_erlang_only_test() { |> should.equal(Error(Nil)) } +pub fn split_once_test() { + <<"hello":utf8>> + |> bit_array.split_once(<<"l":utf8>>) + |> should.equal(Ok(#(<<"he":utf8>>, <<"lo":utf8>>))) + + <<"hello":utf8>> + |> bit_array.split_once(<<"o":utf8>>) + |> should.equal(Ok(#(<<"hell":utf8>>, <<>>))) + + <<"hello":utf8>> + |> bit_array.split_once(<<"h":utf8>>) + |> should.equal(Ok(#(<<>>, <<"ello":utf8>>))) + + <<0, 1, 0, 2, 0, 3>> + |> bit_array.split_once(<<0, 2>>) + |> should.equal(Ok(#(<<0, 1>>, <<0, 3>>))) + + <<0, 1, 2, 0, 3, 4, 5>> + |> bit_array.split_once(<<>>) + |> should.equal(Error(Nil)) + + <<>> + |> bit_array.split_once(<<1>>) + |> should.equal(Error(Nil)) + + <<1>> + |> bit_array.split_once(<<1>>) + |> should.equal(Error(Nil)) + + <<0>> + |> bit_array.split_once(<<1>>) + |> should.equal(Error(Nil)) +} + +// This test is target specific since it's using non byte-aligned BitArrays +// and those are not supported on the JavaScript target. +@target(erlang) +pub fn split_once_erlang_only_test() { + <<0, 1, 2:7>> + |> bit_array.split_once(<<1>>) + |> should.equal(Error(Nil)) +} + +pub fn split_test() { + <<"hello":utf8>> + |> bit_array.split(<<"l":utf8>>) + |> should.equal(Ok([<<"he":utf8>>, <<"o":utf8>>])) + + <<0, 1, 0, 2, 0, 3>> + |> bit_array.split(<<0>>) + |> should.equal(Ok([<<1>>, <<2>>, <<3>>])) + + <<0, 1, 0, 2, 0, 3>> + |> bit_array.split(<<0, 2>>) + |> should.equal(Ok([<<0, 1>>, <<0, 3>>])) + + <<1, 0>> + |> bit_array.split(<<0>>) + |> should.equal(Ok([<<1>>])) + + <<1, 0>> + |> bit_array.split(<<1>>) + |> should.equal(Ok([<<0>>])) + + <<1>> + |> bit_array.split(<<0>>) + |> should.equal(Ok([<<1>>])) + + <<1, 2>> + |> bit_array.split(<<1, 2>>) + |> should.equal(Ok([])) + + <<0, 1, 2, 0, 3, 4, 5>> + |> bit_array.split(<<>>) + |> should.equal(Error(Nil)) +} + +// This test is target specific since it's using non byte-aligned BitArrays +// and those are not supported on the JavaScript target. +@target(erlang) +pub fn split_erlang_only_test() { + <<0, 1, 2:7>> + |> bit_array.split(<<1>>) + |> should.equal(Error(Nil)) +} + pub fn to_string_test() { <<>> |> bit_array.to_string