Skip to content

Commit 9184ced

Browse files
committed
Prepare Unicode::UCD for version 15.1
A new property is being added that needs the same special handling as a similar existing one And use \z instead of $ boundary conditions. Spotted by Lukas Mai
1 parent 4071919 commit 9184ced

File tree

7 files changed

+21
-14
lines changed

7 files changed

+21
-14
lines changed

charclass_invlists.inc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -436006,7 +436006,7 @@ static const U8 WB_table[23][23] = {
436006436006
#endif /* defined(PERL_IN_REGEXEC_C) */
436007436007

436008436008
/* Generated from:
436009-
* 0e8307ab7c654d9c133ea885f5413a4eb5c0123ed2178f7e1cbabed36b67792c lib/Unicode/UCD.pm
436009+
* 7229a97216f54f7d47d5cff56fc8dbc185dcfe40db20533f8034a1215af787fe lib/Unicode/UCD.pm
436010436010
* eb840f36e0a7446293578c684a54c6d83d249abde7bdd4dfa89794af1d7fe9e9 lib/unicore/ArabicShaping.txt
436011436011
* 333ae1e99db0504ca8a046a07dc45b5e7aa91869c685e6bf955ebe674804827a lib/unicore/BidiBrackets.txt
436012436012
* b4b9e1d87d8ea273613880de9d2b2f0b0b696244b42152bfa0a3106e7d983a20 lib/unicore/BidiMirroring.txt

lib/Unicode/UCD.pm

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use warnings;
55
no warnings 'surrogate'; # surrogates can be inputs to this
66
use charnames ();
77

8-
our $VERSION = '0.79';
8+
our $VERSION = '0.80';
99

1010
sub DEBUG () { 0 }
1111
$|=1 if DEBUG;
@@ -3554,8 +3554,8 @@ format is the empty string.
35543554
35553555
is a combination of the C<"al"> type and the C<"ae"> type. Some of
35563556
the map array elements have the forms given by C<"al">, and
3557-
the rest are the empty string. The property C<NFKC_Casefold> has this form.
3558-
An example slice is:
3557+
the rest are the empty string. The properties C<NFKC_Casefold> and
3558+
C<NFKC_Simple_Casefold> have this form. An example slice is:
35593559
35603560
@$ranges_ref @$maps_ref Note
35613561
...
@@ -3846,9 +3846,9 @@ RETRY:
38463846
# in the new-style, and this routine is supposed to return old-style block
38473847
# names. The Name table is valid, but we need to execute the special code
38483848
# below to add in the algorithmic-defined name entries.
3849-
# And NFKCCF needs conversion, so handle that here too.
3849+
# And NFKCCF NFKCSCF need conversion, so handle those here too.
38503850
if (ref $swash eq ""
3851-
|| $swash->{'TYPE'} =~ / ^ To (?: Blk | Na | NFKCCF ) $ /x)
3851+
|| $swash->{'TYPE'} =~ / ^ To (?: Blk | Na | NFKCS?CF ) \z /x)
38523852
{
38533853

38543854
# Get the short name of the input property, in standard form
@@ -3993,7 +3993,7 @@ RETRY:
39933993
$decomps{'TYPE'} = "ToDt";
39943994
$SwashInfo{'ToDt'}{'missing'} = "None";
39953995
$SwashInfo{'ToDt'}{'format'} = "s";
3996-
} # 'dm' is handled below, with 'nfkccf'
3996+
} # 'dm' is handled below, with 'nfkcs?cf'
39973997

39983998
$decomps{'LIST'} = "";
39993999

@@ -4045,11 +4045,11 @@ RETRY:
40454045
}
40464046
$swash = \%decomps;
40474047
}
4048-
elsif ($second_try ne 'nfkccf') { # Don't know this property. Fail.
4048+
elsif ($second_try !~ /^nfkcs?cf\z/) { # Don't know this property. Fail.
40494049
return;
40504050
}
40514051

4052-
if ($second_try eq 'nfkccf' || $second_try eq 'dm') {
4052+
if ($second_try =~ / ^ (?: nfkcs?cf | dm ) \z /x) {
40534053

40544054
# The 'nfkccf' property is stored in the old format for backwards
40554055
# compatibility for any applications that has read its file
@@ -4180,7 +4180,9 @@ RETRY:
41804180
} # End of loop constructing the converted list
41814181

41824182
# Finish up the data structure for our converted swash
4183-
my $type = ($second_try eq 'nfkccf') ? 'ToNFKCCF' : 'ToDm';
4183+
my $type = ($second_try =~ / ^ ( nfkcs?cf ) \z /x)
4184+
? 'To' . uc $1
4185+
: 'ToDm';
41844186
$revised_swash{'LIST'} = $list;
41854187
$revised_swash{'TYPE'} = $type;
41864188
$revised_swash{'SPECIALS'} = $swash->{'SPECIALS'};
@@ -4265,6 +4267,10 @@ RETRY:
42654267
# assumed to be 'Y'.
42664268

42674269
foreach my $range (split "\n", $swash->{'LIST'}) {
4270+
4271+
# No code points matched
4272+
last if $range eq '!Unicode::UCD::All';
4273+
42684274
$range =~ s/ \s* (?: \# .* )? $ //xg; # rmv trailing space, comments
42694275

42704276
# Find the beginning and end of the range on the line

lib/Unicode/UCD.t

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1602,6 +1602,7 @@ is(@list, 0, "prop_invmap('Is_Is_Any') returns <undef> since two is's");
16021602
# applications use them (though such use is deprecated).
16031603
my @legacy_file_format = (qw( Bidi_Mirroring_Glyph
16041604
NFKC_Casefold
1605+
NFKC_Simple_Casefold
16051606
)
16061607
);
16071608

lib/unicore/uni_keywords.pl

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

regcharclass.h

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

regexp_constants.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
#define MAX_FOLD_FROMS 3
3030

3131
/* Generated from:
32-
* 0e8307ab7c654d9c133ea885f5413a4eb5c0123ed2178f7e1cbabed36b67792c lib/Unicode/UCD.pm
32+
* 7229a97216f54f7d47d5cff56fc8dbc185dcfe40db20533f8034a1215af787fe lib/Unicode/UCD.pm
3333
* eb840f36e0a7446293578c684a54c6d83d249abde7bdd4dfa89794af1d7fe9e9 lib/unicore/ArabicShaping.txt
3434
* 333ae1e99db0504ca8a046a07dc45b5e7aa91869c685e6bf955ebe674804827a lib/unicore/BidiBrackets.txt
3535
* b4b9e1d87d8ea273613880de9d2b2f0b0b696244b42152bfa0a3106e7d983a20 lib/unicore/BidiMirroring.txt

uni_keywords.h

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)