diff --git a/CHANGELOG.md b/CHANGELOG.md index 09486494..f1fb1b43 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## v0.71.0 - Unreleased + +- The `bit_array` module gains the `split` and `split_once` functions. + ## v0.70.0 - 2026-03-07 - Fixed a bug where `uri.parse` would incorrectly handle uppercase schemes on diff --git a/src/gleam/bit_array.gleam b/src/gleam/bit_array.gleam index 1fefa8b3..b4b8b8d0 100644 --- a/src/gleam/bit_array.gleam +++ b/src/gleam/bit_array.gleam @@ -57,6 +57,52 @@ pub fn slice( take length: Int, ) -> Result(BitArray, Nil) +/// Splits a bit array into two parts at the location of the pattern. +/// +/// The result will not include the pattern, and returns an error if the +/// pattern is not found. +/// +/// This function runs in linear time. +/// +/// ## Examples +/// +/// ```gleam +/// split_once(from: <<1, 2, 3>>, on: <<2>>) +/// // -> Ok(#(<<1>>, <<3>>)) +/// +/// split_once(from: <<0>>, on: <<1>>) +/// // -> Error(Nil) +/// ``` +@external(erlang, "gleam_stdlib", "bit_array_split_once") +@external(javascript, "../gleam_stdlib.mjs", "bit_array_split_once") +pub fn split_once( + from bits: BitArray, + on pattern: BitArray, +) -> Result(#(BitArray, BitArray), Nil) + +/// Splits a bit array into parts at the locations of the pattern. +/// +/// The result will not include the pattern, and returns the input +/// as is if the pattern is not found. +/// +/// This function runs in linear time. +/// +/// ## Examples +/// +/// ```gleam +/// split(from: <<0, 1, 0, 2, 0, 3>>, on: <<0>>) +/// // -> Ok([<<1>>, <<2>>, <<3>>]) +/// +/// split(from: <<0>>, on: <<1>>) +/// // -> Ok([<<0>>]) +/// ``` +@external(erlang, "gleam_stdlib", "bit_array_split") +@external(javascript, "../gleam_stdlib.mjs", "bit_array_split") +pub fn split( + from bits: BitArray, + on pattern: BitArray, +) -> Result(List(BitArray), Nil) + /// Tests to see whether a bit array is valid UTF-8. /// pub fn is_utf8(bits: BitArray) -> Bool { diff --git a/src/gleam_stdlib.erl b/src/gleam_stdlib.erl index d89cb831..2c6314c6 100644 --- a/src/gleam_stdlib.erl +++ b/src/gleam_stdlib.erl @@ -11,7 +11,8 @@ int_from_base_string/2, utf_codepoint_list_to_string/1, contains_string/2, crop_string/2, base16_encode/1, base16_decode/1, string_replace/3, slice/3, bit_array_to_int_and_size/1, bit_array_pad_to_bytes/1, index/2, list/5, - dict/1, int/1, float/1, bit_array/1, is_null/1 + dict/1, int/1, float/1, bit_array/1, is_null/1, bit_array_split_once/2, + bit_array_split/2 ]). %% Taken from OTP's uri_string module @@ -149,6 +150,21 @@ bit_array_slice(Bin, Pos, Len) -> catch error:badarg -> {error, nil} end. +bit_array_split_once(Bin, Sub) -> + try + case binary:split(Bin, [Sub]) of + [<<>>, <<>>] -> {error, nil}; + [A, B] -> {ok, {A, B}}; + _ -> {error, nil} + end + catch error:badarg -> {error, nil} + end. + +bit_array_split(Bin, Sub) -> + try {ok, binary:split(Bin, [Sub], [global, trim_all])} + catch error:badarg -> {error, nil} + end. + base64_decode(S) -> try {ok, base64:decode(S)} catch error:_ -> {error, nil} diff --git a/src/gleam_stdlib.mjs b/src/gleam_stdlib.mjs index 09ae6ed9..721ec574 100644 --- a/src/gleam_stdlib.mjs +++ b/src/gleam_stdlib.mjs @@ -458,6 +458,80 @@ export function bit_array_slice(bits, position, length) { return Result$Ok(bitArraySlice(bits, start * 8, end * 8)); } +export function bit_array_split_once(bits, pattern) { + try { + const patternEmpty = pattern.buffer.length < 1 + const patternLongerThanBits = pattern.buffer.length >= bits.buffer.length + const incorrectArguments = !(bits instanceof BitArray) || !(pattern instanceof BitArray) + if (incorrectArguments || patternEmpty || patternLongerThanBits) { + return new Error(Nil); + } + + const n = bits.buffer.length - pattern.buffer.length + 1; + find: for (let i = 0; i < n; i++) { + for (let j = 0; j < pattern.buffer.length; j++) { + if (bits.buffer[i + j] !== pattern.buffer[j]) { + continue find; + } + } + const before = bits.buffer.slice(0, i); + const after = bits.buffer.slice(i + pattern.buffer.length); + return new Ok([new BitArray(before), new BitArray(after)]); + } + + return new Error(Nil); + } catch (e) { + return new Error(Nil); + } +} + +export function bit_array_split(bits, pattern) { + try { + const patternEmpty = pattern.buffer.length < 1 + const incorrectArguments = !(bits instanceof BitArray) || !(pattern instanceof BitArray) + if (incorrectArguments || patternEmpty) { + return new Error(Nil); + } + + const bitsShorter = bits.buffer.length < pattern.buffer.length + if (bitsShorter) { + return new Ok(List.fromArray([bits])) + } + + const results = []; + let lastIndex = 0; + const n = bits.buffer.length - pattern.buffer.length + 1; + + find: for (let i = 0; i < n; i++) { + for (let j = 0; j < pattern.buffer.length; j++) { + if (bits.buffer[i + j] !== pattern.buffer[j]) { + continue find; + } + } + + const bitsEqualsPattern = bits.buffer.length === pattern.buffer.length + if (bitsEqualsPattern) { + return new Ok(List.fromArray([])); + } + + if (i > lastIndex) { + results.push(new BitArray(bits.buffer.slice(lastIndex, i))); + } + + lastIndex = i + pattern.buffer.length; + i = lastIndex - 1; + } + + if (lastIndex < bits.buffer.length) { + results.push(new BitArray(bits.buffer.slice(lastIndex))); + } + + return new Ok(List.fromArray(results)) + } catch (e) { + return new Error(Nil); + } +} + export function codepoint(int) { return new UtfCodepoint(int); } diff --git a/test/gleam/bit_array_test.gleam b/test/gleam/bit_array_test.gleam index c0d91949..87a6b206 100644 --- a/test/gleam/bit_array_test.gleam +++ b/test/gleam/bit_array_test.gleam @@ -188,6 +188,120 @@ pub fn slice_large_error_test() { == Error(Nil) } +pub fn split_once_middle_test() { + assert <<0, 1, 2>> + |> bit_array.split_once(<<1>>) + == Ok(#(<<0>>, <<2>>)) +} + +pub fn split_once_beginning_test() { + assert <<0, 1, 2>> + |> bit_array.split_once(<<0>>) + == Ok(#(<<>>, <<1, 2>>)) +} + +pub fn split_once_end_test() { + assert <<0, 1, 2>> + |> bit_array.split_once(<<2>>) + == Ok(#(<<0, 1>>, <<>>)) +} + +pub fn split_once_multi_byte_separator_test() { + assert <<0, 1, 0, 2, 0, 3>> + |> bit_array.split_once(<<0, 2>>) + == Ok(#(<<0, 1>>, <<0, 3>>)) +} + +pub fn split_once_empty_haystack_test() { + assert <<>> + |> bit_array.split_once(<<1>>) + == Error(Nil) +} + +pub fn split_once_empty_separator_test() { + assert <<0, 1, 2, 0, 3, 4, 5>> + |> bit_array.split_once(<<>>) + == Error(Nil) +} + +pub fn split_once_separator_equals_haystack_test() { + assert <<1>> + |> bit_array.split_once(<<1>>) + == Error(Nil) +} + +pub fn split_once_no_match_test() { + assert <<0>> + |> bit_array.split_once(<<1>>) + == Error(Nil) +} + +// This test is target specific since it's using non byte-aligned BitArrays +// and those are not supported on the JavaScript target. +@target(erlang) +pub fn split_once_unaligned_test() { + assert <<0, 1, 2:7>> + |> bit_array.split_once(<<1>>) + == Error(Nil) +} + +pub fn split_string_test() { + assert <<"hello":utf8>> + |> bit_array.split(<<"l":utf8>>) + == Ok([<<"he":utf8>>, <<"o":utf8>>]) +} + +pub fn split_multiple_matches_test() { + assert <<0, 1, 0, 2, 0, 3>> + |> bit_array.split(<<0>>) + == Ok([<<1>>, <<2>>, <<3>>]) +} + +pub fn split_multi_byte_separator_test() { + assert <<0, 1, 0, 2, 0, 3>> + |> bit_array.split(<<0, 2>>) + == Ok([<<0, 1>>, <<0, 3>>]) +} + +pub fn split_trailing_separator_test() { + assert <<1, 0>> + |> bit_array.split(<<0>>) + == Ok([<<1>>]) +} + +pub fn split_leading_separator_test() { + assert <<1, 0>> + |> bit_array.split(<<1>>) + == Ok([<<0>>]) +} + +pub fn split_no_match_test() { + assert <<1>> + |> bit_array.split(<<0>>) + == Ok([<<1>>]) +} + +pub fn split_separator_equals_haystack_test() { + assert <<1, 2>> + |> bit_array.split(<<1, 2>>) + == Ok([]) +} + +pub fn split_empty_separator_test() { + assert <<0, 1, 2, 0, 3, 4, 5>> + |> bit_array.split(<<>>) + == Error(Nil) +} + +// This test is target specific since it's using non byte-aligned BitArrays +// and those are not supported on the JavaScript target. +@target(erlang) +pub fn split_unaligned_test() { + assert <<0, 1, 2:7>> + |> bit_array.split(<<1>>) + == Error(Nil) +} + pub fn to_string_empty_test() { assert bit_array.to_string(<<>>) == Ok("") }