Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a function to replace matches with a callback #3

Merged
merged 6 commits into from
Feb 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Changelog

## v1.1.0 - 2025-02-05

- Added the `match_map` function.
2 changes: 1 addition & 1 deletion gleam.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name = "gleam_regexp"
version = "1.0.0"
version = "1.1.0"
gleam = ">= 1.0.0"
licences = ["Apache-2.0"]
description = "Regular expressions in Gleam!"
Expand Down
18 changes: 18 additions & 0 deletions src/gleam/regexp.gleam
Original file line number Diff line number Diff line change
Expand Up @@ -218,3 +218,21 @@ pub fn replace(
in string: String,
with substitute: String,
) -> String

/// Creates a new `String` by replacing all substrings that match the regular
/// expression with the result of applying the function to each match.
///
/// ## Examples
///
/// ```gleam
/// let assert Ok(re) = regexp.from_string("\\w+")
/// regexp.match_map(re, "hello, joe!", fn(m) { string.capitalise(m.content) })
/// // -> "Hello, Joe!"
/// ```
@external(erlang, "gleam_regexp_ffi", "match_map")
@external(javascript, "../gleam_regexp_ffi.mjs", "match_map")
pub fn match_map(
each pattern: Regexp,
in string: String,
with substitute: fn(Match) -> String,
) -> String
8 changes: 7 additions & 1 deletion src/gleam_regexp_ffi.erl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
-module(gleam_regexp_ffi).

-export([compile/2, check/2, split/2, replace/3, scan/2]).
-export([compile/2, check/2, split/2, replace/3, scan/2, match_map/3]).

compile(String, Options) ->
{options, Caseless, Multiline} = Options,
Expand Down Expand Up @@ -44,3 +44,9 @@ scan(Regexp, String) ->
replace(Regexp, Subject, Replacement) ->
re:replace(Subject, Regexp, Replacement, [global, {return, binary}]).

match_map(Regexp, Subject, Replacement) ->
Replacement1 = fun(Content, Submatches) ->
Submatches1 = lists:map(fun gleam@string:to_option/1, Submatches),
Replacement({match, Content, Submatches1})
end,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't this use the same submatches function above?

Copy link
Contributor Author

@ryanmiville ryanmiville Dec 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried to use it and it didn't work. The function signature for the replacement is fn((binary(), [binary()]) -> iodata() | unicode:charlist()) so the submatches are just [binary()].

re:replace(Subject, Regexp, Replacement1, [global, {return, binary}]).
36 changes: 25 additions & 11 deletions src/gleam_regexp_ffi.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -31,21 +31,35 @@ export function split(regex, string) {
export function scan(regex, string) {
const matches = Array.from(string.matchAll(regex)).map((match) => {
const content = match[0];
const submatches = [];
for (let n = match.length - 1; n > 0; n--) {
if (match[n]) {
submatches[n - 1] = new Some(match[n]);
continue;
}
if (submatches.length > 0) {
submatches[n - 1] = new None();
}
}
return new RegexMatch(content, List.fromArray(submatches));
return new RegexMatch(content, submatches(match.slice(1)));
});
return List.fromArray(matches);
}

export function replace(regex, original_string, replacement) {
return original_string.replaceAll(regex, replacement);
}

export function match_map(regex, original_string, replacement) {
let replace = (match, ...args) => {
const hasNamedGroups = typeof args.at(-1) === "object";
const groups = args.slice(0, hasNamedGroups ? -3 : -2);
let regexMatch = new RegexMatch(match, submatches(groups));
return replacement(regexMatch);
};
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this a copy/paste of the code above?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's very similar, but not identical. I originally pulled it out into its own function to reuse, but when I did, my test failed. I didn't dig in to understand why the above code iterated backwards, but when I iterated forwards, the test passed.

I'm not opposed to trying to find a way to unify the two, but I didn't want to go altering existing code that works without discussing it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking at it again, I bet I didn't account for the n - 1 in the scan implementation since the match array in scan has the whole match string as the first element, whereas this array only holds the submatches.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems a shame there's such similar code here! Can they not be unified?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think they can. I'll take a stab at it later today.

return original_string.replaceAll(regex, replace);
}

function submatches(groups) {
const submatches = [];
for (let n = groups.length - 1; n >= 0; n--) {
if (groups[n]) {
submatches[n] = new Some(groups[n]);
continue;
}
if (submatches.length > 0) {
submatches[n] = new None();
}
}
return List.fromArray(submatches);
}
30 changes: 29 additions & 1 deletion test/gleam_regexp_test.gleam
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import gleam/option.{None, Some}
import gleam/regexp.{Match, Options}
import gleam/regexp.{type Match, Match, Options}
import gleeunit
import gleeunit/should

Expand Down Expand Up @@ -190,3 +190,31 @@ pub fn replace_3_test() {
regexp.replace(re, "🐈🐈 are great!", "🐕")
|> should.equal("🐕🐕 are great!")
}

pub fn match_map_0_test() {
let replace = fn(match: Match) {
case match.content {
"1" -> "one"
"2" -> "two"
"3" -> "three"
n -> n
}
}
let assert Ok(re) = regexp.from_string("1|2|3")
regexp.match_map(re, "1, 2, 3, 4", replace)
|> should.equal("one, two, three, 4")
}

pub fn match_map_1_test() {
let replace = fn(match: Match) {
case match.submatches {
[Some("1")] -> "one"
[Some("2")] -> "two"
[Some("3")] -> "three"
_ -> match.content
}
}
let assert Ok(re) = regexp.from_string("'(1|2|3)'")
regexp.match_map(re, "'1', '2', '3', '4'", replace)
|> should.equal("one, two, three, '4'")
}