Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 27 additions & 2 deletions lib/stdlib/test/unicode_util_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
nfd/1, nfc/1, nfkd/1, nfkc/1,
whitespace/1,
get/1,
lookup/1,
lookup/1, category/1, is_id_func/1,
count/1]).

-export([debug/0, id/1, bin_split/1, uc_loaded_size/0,
Expand All @@ -47,6 +47,8 @@ all() ->
cp, gc,
nfd, nfc, nfkd, nfkc,
whitespace,
category,
is_id_func,
get,
lookup,
count
Expand Down Expand Up @@ -91,7 +93,8 @@ casefold(_) ->
[[$s,$s]] = unicode_util:casefold([$ẞ]),
ok.

whitespace(_) ->
whitespace(_Config) ->
%% Pattern whitespace
WS = unicode_util:whitespace(),
WS = lists:filter(fun unicode_util:is_whitespace/1, WS),
false = unicode_util:is_whitespace($A),
Expand Down Expand Up @@ -368,6 +371,28 @@ check_category(Id, [{Next,_}|_] = Rest, Es) ->
check_category(_Id, [], Es) ->
Es.

category(_Config) ->
Check = fun(Id) ->
LC = maps:get(category, unicode_util:lookup(Id)),
LC == unicode_util:category(Id)
end,
[] = [Id || Id <- lists:seq(1, 200000), not Check(Id)],
{'EXIT', _} = catch unicode_util:category(-1),
{'EXIT', _} = catch unicode_util:category(5000000),
{'EXIT', _} = catch unicode_util:category(foobar),
ok.

is_id_func(_Config) ->
%% Basic tests more tests in unicode tests
false = unicode_util:is_other_id_start($a),
true = unicode_util:is_other_id_start(6277),

false = unicode_util:is_other_id_continue($a),
true = unicode_util:is_other_id_continue(183),

false = unicode_util:is_letter_not_pattern_syntax(11823),
true = unicode_util:is_letter_not_pattern_syntax($a),
ok.

count(Config) ->
Parent = self(),
Expand Down
1,737 changes: 706 additions & 1,031 deletions lib/stdlib/test/unicode_util_SUITE_data/GraphemeBreakTest.txt

Large diffs are not rendered by default.

35,990 changes: 19,329 additions & 16,661 deletions lib/stdlib/test/unicode_util_SUITE_data/LineBreakTest.txt

Large diffs are not rendered by default.

75 changes: 72 additions & 3 deletions lib/stdlib/test/unicode_util_SUITE_data/NormalizationTest.txt

Large diffs are not rendered by default.

Binary file modified lib/stdlib/test/unicode_util_SUITE_data/unicode_table.bin
Binary file not shown.
40 changes: 34 additions & 6 deletions lib/stdlib/uc_spec/CaseFolding.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# CaseFolding-16.0.0.txt
# Date: 2024-04-30, 21:48:11 GMT
# © 2024 Unicode®, Inc.
# CaseFolding-17.0.0.txt
# Date: 2025-07-30, 23:54:36 GMT
# © 2025 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
#
Expand All @@ -18,15 +18,15 @@
# The data supports both implementations that require simple case foldings
# (where string lengths don't change), and implementations that allow full case folding
# (where string lengths may grow). Note that where they can be supported, the
# full case foldings are superior: for example, they allow "MASSE" and "Maße" to match.
# full case foldings are superior: for example, they allow "FUSS" and "Fuß" to match.
#
# All code points not listed in this file map to themselves.
#
# NOTE: case folding does not preserve normalization formats!
#
# For information on case folding, including how to have case folding
# preserve normalization formats, see Section 3.13 Default Case Algorithms in
# The Unicode Standard.
# preserve normalization formats, see the
# "Conformance" / "Default Case Algorithms" section of the core specification.
#
# ================================================================================
# Format
Expand Down Expand Up @@ -1243,7 +1243,10 @@ A7C7; C; A7C8; # LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY
A7C9; C; A7CA; # LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY
A7CB; C; 0264; # LATIN CAPITAL LETTER RAMS HORN
A7CC; C; A7CD; # LATIN CAPITAL LETTER S WITH DIAGONAL STROKE
A7CE; C; A7CF; # LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE
A7D0; C; A7D1; # LATIN CAPITAL LETTER CLOSED INSULAR G
A7D2; C; A7D3; # LATIN CAPITAL LETTER DOUBLE THORN
A7D4; C; A7D5; # LATIN CAPITAL LETTER DOUBLE WYNN
A7D6; C; A7D7; # LATIN CAPITAL LETTER MIDDLE SCOTS S
A7D8; C; A7D9; # LATIN CAPITAL LETTER SIGMOID S
A7DA; C; A7DB; # LATIN CAPITAL LETTER LAMBDA
Expand Down Expand Up @@ -1616,6 +1619,31 @@ FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z
16E5D; C; 16E7D; # MEDEFAIDRIN CAPITAL LETTER O
16E5E; C; 16E7E; # MEDEFAIDRIN CAPITAL LETTER AI
16E5F; C; 16E7F; # MEDEFAIDRIN CAPITAL LETTER Y
16EA0; C; 16EBB; # BERIA ERFE CAPITAL LETTER ARKAB
16EA1; C; 16EBC; # BERIA ERFE CAPITAL LETTER BASIGNA
16EA2; C; 16EBD; # BERIA ERFE CAPITAL LETTER DARBAI
16EA3; C; 16EBE; # BERIA ERFE CAPITAL LETTER EH
16EA4; C; 16EBF; # BERIA ERFE CAPITAL LETTER FITKO
16EA5; C; 16EC0; # BERIA ERFE CAPITAL LETTER GOWAY
16EA6; C; 16EC1; # BERIA ERFE CAPITAL LETTER HIRDEABO
16EA7; C; 16EC2; # BERIA ERFE CAPITAL LETTER I
16EA8; C; 16EC3; # BERIA ERFE CAPITAL LETTER DJAI
16EA9; C; 16EC4; # BERIA ERFE CAPITAL LETTER KOBO
16EAA; C; 16EC5; # BERIA ERFE CAPITAL LETTER LAKKO
16EAB; C; 16EC6; # BERIA ERFE CAPITAL LETTER MERI
16EAC; C; 16EC7; # BERIA ERFE CAPITAL LETTER NINI
16EAD; C; 16EC8; # BERIA ERFE CAPITAL LETTER GNA
16EAE; C; 16EC9; # BERIA ERFE CAPITAL LETTER NGAY
16EAF; C; 16ECA; # BERIA ERFE CAPITAL LETTER OI
16EB0; C; 16ECB; # BERIA ERFE CAPITAL LETTER PI
16EB1; C; 16ECC; # BERIA ERFE CAPITAL LETTER ERIGO
16EB2; C; 16ECD; # BERIA ERFE CAPITAL LETTER ERIGO TAMURA
16EB3; C; 16ECE; # BERIA ERFE CAPITAL LETTER SERI
16EB4; C; 16ECF; # BERIA ERFE CAPITAL LETTER SHEP
16EB5; C; 16ED0; # BERIA ERFE CAPITAL LETTER TATASOUE
16EB6; C; 16ED1; # BERIA ERFE CAPITAL LETTER UI
16EB7; C; 16ED2; # BERIA ERFE CAPITAL LETTER WASSE
16EB8; C; 16ED3; # BERIA ERFE CAPITAL LETTER AY
1E900; C; 1E922; # ADLAM CAPITAL LETTER ALIF
1E901; C; 1E923; # ADLAM CAPITAL LETTER DAALI
1E902; C; 1E924; # ADLAM CAPITAL LETTER LAAM
Expand Down
6 changes: 3 additions & 3 deletions lib/stdlib/uc_spec/CompositionExclusions.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# CompositionExclusions-16.0.0.txt
# Date: 2024-02-02
# © 2024 Unicode®, Inc.
# CompositionExclusions-17.0.0.txt
# Date: 2025-08-01
# © 2025 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
#
Expand Down
Loading
Loading