diff --git a/charclass_invlists.inc b/charclass_invlists.inc index 50dfd4b65bf6..ec8f70aca951 100644 --- a/charclass_invlists.inc +++ b/charclass_invlists.inc @@ -436006,7 +436006,7 @@ static const U8 WB_table[23][23] = { #endif /* defined(PERL_IN_REGEXEC_C) */ /* Generated from: - * 0e8307ab7c654d9c133ea885f5413a4eb5c0123ed2178f7e1cbabed36b67792c lib/Unicode/UCD.pm + * 92b3b0b73e402a9efee67f10380c390638c080fdde7430665e57abdac2fa976f lib/Unicode/UCD.pm * eb840f36e0a7446293578c684a54c6d83d249abde7bdd4dfa89794af1d7fe9e9 lib/unicore/ArabicShaping.txt * 333ae1e99db0504ca8a046a07dc45b5e7aa91869c685e6bf955ebe674804827a lib/unicore/BidiBrackets.txt * b4b9e1d87d8ea273613880de9d2b2f0b0b696244b42152bfa0a3106e7d983a20 lib/unicore/BidiMirroring.txt diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm index e8e97455d082..8f649b3c91b4 100644 --- a/lib/Unicode/UCD.pm +++ b/lib/Unicode/UCD.pm @@ -5,7 +5,7 @@ use warnings; no warnings 'surrogate'; # surrogates can be inputs to this use charnames (); -our $VERSION = '0.79'; +our $VERSION = '0.80'; sub DEBUG () { 0 } $|=1 if DEBUG; @@ -3554,8 +3554,8 @@ format is the empty string. is a combination of the C<"al"> type and the C<"ae"> type. Some of the map array elements have the forms given by C<"al">, and -the rest are the empty string. The property C has this form. -An example slice is: +the rest are the empty string. The properties C and +C have this form. An example slice is: @$ranges_ref @$maps_ref Note ... @@ -3846,9 +3846,9 @@ RETRY: # in the new-style, and this routine is supposed to return old-style block # names. The Name table is valid, but we need to execute the special code # below to add in the algorithmic-defined name entries. - # And NFKCCF needs conversion, so handle that here too. + # And NFKCCF NFKCSCF need conversion, so handle those here too. if (ref $swash eq "" - || $swash->{'TYPE'} =~ / ^ To (?: Blk | Na | NFKCCF ) $ /x) + || $swash->{'TYPE'} =~ / ^ To (?: Blk | Na | NFKCS?CF ) \z /x) { # Get the short name of the input property, in standard form @@ -3993,7 +3993,7 @@ RETRY: $decomps{'TYPE'} = "ToDt"; $SwashInfo{'ToDt'}{'missing'} = "None"; $SwashInfo{'ToDt'}{'format'} = "s"; - } # 'dm' is handled below, with 'nfkccf' + } # 'dm' is handled below, with 'nfkcs?cf' $decomps{'LIST'} = ""; @@ -4045,11 +4045,11 @@ RETRY: } $swash = \%decomps; } - elsif ($second_try ne 'nfkccf') { # Don't know this property. Fail. + elsif ($second_try !~ /^nfkcs?cf\z/) { # Don't know this property. Fail. return; } - if ($second_try eq 'nfkccf' || $second_try eq 'dm') { + if ($second_try =~ / ^ (?: nfkcs?cf | dm ) \z /x) { # The 'nfkccf' property is stored in the old format for backwards # compatibility for any applications that has read its file @@ -4180,7 +4180,9 @@ RETRY: } # End of loop constructing the converted list # Finish up the data structure for our converted swash - my $type = ($second_try eq 'nfkccf') ? 'ToNFKCCF' : 'ToDm'; + my $type = ($second_try =~ / ^ ( nfkcs?cf ) \z /x) + ? 'To' . $1 + : 'ToDm'; $revised_swash{'LIST'} = $list; $revised_swash{'TYPE'} = $type; $revised_swash{'SPECIALS'} = $swash->{'SPECIALS'}; @@ -4265,6 +4267,10 @@ RETRY: # assumed to be 'Y'. foreach my $range (split "\n", $swash->{'LIST'}) { + + # No code points matched + last if $range eq '!Unicode::UCD::All'; + $range =~ s/ \s* (?: \# .* )? $ //xg; # rmv trailing space, comments # Find the beginning and end of the range on the line diff --git a/lib/Unicode/UCD.t b/lib/Unicode/UCD.t index 57bd70286017..d068958ea174 100644 --- a/lib/Unicode/UCD.t +++ b/lib/Unicode/UCD.t @@ -1543,13 +1543,29 @@ foreach my $set_of_tables (\%Unicode::UCD::stricter_to_file_of, \%Unicode::UCD:: chomp $official; $/ = $input_record_separator; - # If we are to test against an inverted file, it is easier to invert - # our array than the file. if ($invert) { - if (@tested && $tested[0] == 0) { - shift @tested; - } else { - unshift @tested, 0; + + # Special case an inverted empty file + if (@tested == 0) { + if ($official ne 'V0') { + fail_with_diff($mod_table, $official, 'V0', + "prop_invlist"); + } + else { + pass("prop_invlist('$mod_table')"); + } + + next; + } + else { + + # If we are to test against an inverted file, it is easier to + # invert our array than the file. + if ($tested[0] == 0) { + shift @tested; + } else { + unshift @tested, 0; + } } } @@ -1602,6 +1618,7 @@ is(@list, 0, "prop_invmap('Is_Is_Any') returns since two is's"); # applications use them (though such use is deprecated). my @legacy_file_format = (qw( Bidi_Mirroring_Glyph NFKC_Casefold + NFKC_Simple_Casefold ) ); @@ -2078,9 +2095,18 @@ foreach my $prop (sort(keys %props)) { # it's an error my %specials = %$specials_ref if $specials_ref; + # Special case an expected and gotten empty return + if ( @$invlist_ref - $upper_limit_subtract == 1 + && $official =~ / ^ ( V0 | !Unicode::UCD::All ) \z /x) + { + pass("prop_invmap('$display_prop')"); + next PROPERTY; + } + # The extra -$upper_limit_subtract is because the final element may # have been tested above to be for anything above Unicode, in which - # case the file may not go that high. + # case the file may not go that high. The upper bound may be changed + # in the loop, so can't pre-calculate it. for (my $i = 0; $i < @$invlist_ref - $upper_limit_subtract; $i++) { # If the map element is a reference, have to stringify it (but diff --git a/lib/unicore/uni_keywords.pl b/lib/unicore/uni_keywords.pl index fe9034ecda6c..cd9e5c988885 100644 --- a/lib/unicore/uni_keywords.pl +++ b/lib/unicore/uni_keywords.pl @@ -1282,7 +1282,7 @@ 1; # Generated from: -# 0e8307ab7c654d9c133ea885f5413a4eb5c0123ed2178f7e1cbabed36b67792c lib/Unicode/UCD.pm +# 92b3b0b73e402a9efee67f10380c390638c080fdde7430665e57abdac2fa976f lib/Unicode/UCD.pm # eb840f36e0a7446293578c684a54c6d83d249abde7bdd4dfa89794af1d7fe9e9 lib/unicore/ArabicShaping.txt # 333ae1e99db0504ca8a046a07dc45b5e7aa91869c685e6bf955ebe674804827a lib/unicore/BidiBrackets.txt # b4b9e1d87d8ea273613880de9d2b2f0b0b696244b42152bfa0a3106e7d983a20 lib/unicore/BidiMirroring.txt diff --git a/regcharclass.h b/regcharclass.h index 0b7b686598ea..99eb30d415e3 100644 --- a/regcharclass.h +++ b/regcharclass.h @@ -3801,7 +3801,7 @@ #endif /* PERL_REGCHARCLASS_H_ */ /* Generated from: - * 0e8307ab7c654d9c133ea885f5413a4eb5c0123ed2178f7e1cbabed36b67792c lib/Unicode/UCD.pm + * 92b3b0b73e402a9efee67f10380c390638c080fdde7430665e57abdac2fa976f lib/Unicode/UCD.pm * eb840f36e0a7446293578c684a54c6d83d249abde7bdd4dfa89794af1d7fe9e9 lib/unicore/ArabicShaping.txt * 333ae1e99db0504ca8a046a07dc45b5e7aa91869c685e6bf955ebe674804827a lib/unicore/BidiBrackets.txt * b4b9e1d87d8ea273613880de9d2b2f0b0b696244b42152bfa0a3106e7d983a20 lib/unicore/BidiMirroring.txt diff --git a/regexp_constants.h b/regexp_constants.h index 38e727c60c90..c548cd523180 100644 --- a/regexp_constants.h +++ b/regexp_constants.h @@ -29,7 +29,7 @@ #define MAX_FOLD_FROMS 3 /* Generated from: - * 0e8307ab7c654d9c133ea885f5413a4eb5c0123ed2178f7e1cbabed36b67792c lib/Unicode/UCD.pm + * 92b3b0b73e402a9efee67f10380c390638c080fdde7430665e57abdac2fa976f lib/Unicode/UCD.pm * eb840f36e0a7446293578c684a54c6d83d249abde7bdd4dfa89794af1d7fe9e9 lib/unicore/ArabicShaping.txt * 333ae1e99db0504ca8a046a07dc45b5e7aa91869c685e6bf955ebe674804827a lib/unicore/BidiBrackets.txt * b4b9e1d87d8ea273613880de9d2b2f0b0b696244b42152bfa0a3106e7d983a20 lib/unicore/BidiMirroring.txt diff --git a/uni_keywords.h b/uni_keywords.h index e013651e107a..afa8574ad04f 100644 --- a/uni_keywords.h +++ b/uni_keywords.h @@ -7707,7 +7707,7 @@ match_uniprop( const unsigned char * const key, const U16 key_len ) { #endif /* #if defined(PERL_CORE) || defined(PERL_EXT_RE_BUILD) */ /* Generated from: - * 0e8307ab7c654d9c133ea885f5413a4eb5c0123ed2178f7e1cbabed36b67792c lib/Unicode/UCD.pm + * 92b3b0b73e402a9efee67f10380c390638c080fdde7430665e57abdac2fa976f lib/Unicode/UCD.pm * eb840f36e0a7446293578c684a54c6d83d249abde7bdd4dfa89794af1d7fe9e9 lib/unicore/ArabicShaping.txt * 333ae1e99db0504ca8a046a07dc45b5e7aa91869c685e6bf955ebe674804827a lib/unicore/BidiBrackets.txt * b4b9e1d87d8ea273613880de9d2b2f0b0b696244b42152bfa0a3106e7d983a20 lib/unicore/BidiMirroring.txt