diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 623ba2176e..8f691eced5 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -267,9 +267,11 @@ set ( "src/phonenumbers/base/strings/string_piece.cc" "src/phonenumbers/default_logger.cc" "src/phonenumbers/logger.cc" + "src/phonenumbers/phonecontextparser.cc" "src/phonenumbers/phonemetadata.pb.cc" # Generated by Protocol Buffers. "src/phonenumbers/phonenumber.cc" "src/phonenumbers/phonenumber.pb.cc" # Generated by Protocol Buffers. + "src/phonenumbers/phonenumbernormalizer.cc" "src/phonenumbers/phonenumberutil.cc" "src/phonenumbers/regex_based_matcher.cc" "src/phonenumbers/regexpsandmappings.cc" @@ -428,7 +430,7 @@ include_directories ("src") # Collate dependencies #---------------------------------------------------------------- -set (LIBRARY_DEPS ${ICU_LIB} ${PROTOBUF_LIB} absl::node_hash_set absl::strings absl::synchronization) +set (LIBRARY_DEPS ${ICU_LIB} ${PROTOBUF_LIB} absl::node_hash_set absl::statusor absl::strings absl::synchronization) if (USE_BOOST) list (APPEND LIBRARY_DEPS ${Boost_LIBRARIES}) diff --git a/cpp/src/phonenumbers/asyoutypeformatter.cc b/cpp/src/phonenumbers/asyoutypeformatter.cc index 3fd323fa02..af56a8058e 100644 --- a/cpp/src/phonenumbers/asyoutypeformatter.cc +++ b/cpp/src/phonenumbers/asyoutypeformatter.cc @@ -711,7 +711,7 @@ char AsYouTypeFormatter::NormalizeAndAccrueDigitsAndPlusSign( } else { string number; UnicodeString(next_char).toUTF8String(number); - phone_util_.NormalizeDigitsOnly(&number); + phone_util_.phone_number_normalizer_->NormalizeDigitsOnly(&number); accrued_input_without_formatting_.append(next_char); national_number_.append(number); normalized_char = number[0]; diff --git a/cpp/src/phonenumbers/constants.h b/cpp/src/phonenumbers/constants.h index 867968a8c5..814c24eb46 100644 --- a/cpp/src/phonenumbers/constants.h +++ b/cpp/src/phonenumbers/constants.h @@ -21,7 +21,9 @@ namespace i18n { namespace phonenumbers { class Constants { + friend class PhoneContextParser; friend class PhoneNumberMatcherRegExps; + friend class PhoneNumberNormalizer; friend class PhoneNumberRegExpsAndMappings; friend class PhoneNumberUtil; @@ -33,6 +35,7 @@ class Constants { static constexpr char kRfc3966ExtnPrefix[] = ";ext="; static constexpr char kRfc3966VisualSeparator[] = "[\\-\\.\\(\\)]?"; + static constexpr char kRfc3966PhoneContext[] = ";phone-context="; static constexpr char kDigits[] = "\\p{Nd}"; @@ -53,6 +56,8 @@ class Constants { // The minimum and maximum length of the national significant number. static constexpr size_t kMinLengthForNsn = 2; + // The maximum length of the country calling code. + static constexpr size_t kMaxLengthCountryCode = 3; static constexpr char kPlusChars[] = "+\xEF\xBC\x8B"; /* "++" */ diff --git a/cpp/src/phonenumbers/normalize_utf8.h b/cpp/src/phonenumbers/normalize_utf8.h index ee1ea2f641..e1fcd77437 100644 --- a/cpp/src/phonenumbers/normalize_utf8.h +++ b/cpp/src/phonenumbers/normalize_utf8.h @@ -14,6 +14,8 @@ #include +#include + #include "phonenumbers/utf/unicodetext.h" namespace i18n { diff --git a/cpp/src/phonenumbers/phonecontextparser.cc b/cpp/src/phonenumbers/phonecontextparser.cc new file mode 100644 index 0000000000..b12b4301e6 --- /dev/null +++ b/cpp/src/phonenumbers/phonecontextparser.cc @@ -0,0 +1,125 @@ +// Copyright (C) 2025 The Libphonenumber Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "phonenumbers/phonecontextparser.h" + +#include + +#include "phonenumbers/constants.h" + +namespace i18n { +namespace phonenumbers { + +PhoneContextParser::PhoneContextParser( + std::unique_ptr> country_calling_codes, + std::shared_ptr reg_exps, + std::shared_ptr normalizer) + : country_calling_codes_(std::move(country_calling_codes)), + reg_exps_(reg_exps), + normalizer_(normalizer) {} + +std::optional PhoneContextParser::ExtractPhoneContext( + const absl::string_view phone_number) { + size_t index_of_phone_context = + phone_number.find(Constants::kRfc3966PhoneContext); + + if (index_of_phone_context == std::string::npos) { + return std::nullopt; + } + + size_t phone_context_start = + index_of_phone_context + strlen(Constants::kRfc3966PhoneContext); + + // If phone-context parameter is empty + if (phone_context_start >= phone_number.length()) { + return ""; + } + + size_t phone_context_end = phone_number.find(';', phone_context_start); + // If phone-context is the last parameter + if (phone_context_end == std::string::npos) { + return phone_number.substr(phone_context_start); + } + + return phone_number.substr(phone_context_start, + phone_context_end - phone_context_start); +} + +bool PhoneContextParser::isValid(absl::string_view phone_context) { + if (phone_context.empty()) { + return false; + } + + // Does phone-context value match the global number digits pattern or the + // domain name pattern? + return reg_exps_->rfc3966_global_number_digits_pattern_->FullMatch( + std::string{phone_context}) || + reg_exps_->rfc3966_domainname_pattern_->FullMatch( + std::string{phone_context}); +} + +bool PhoneContextParser::isValidCountryCode(int country_code) { + return std::find(country_calling_codes_->begin(), + country_calling_codes_->end(), + country_code) != country_calling_codes_->end(); +} + +PhoneContextParser::PhoneContext PhoneContextParser::ParsePhoneContext( + absl::string_view phone_context) { + PhoneContextParser::PhoneContext phone_context_object; + phone_context_object.raw_context = phone_context; + phone_context_object.country_code = std::nullopt; + + // Ignore phone-context values that do not start with a plus sign. Could be a + // domain name. + if (!phone_context.empty() && + phone_context.at(0) == Constants::kPlusSign[0]) { + return phone_context_object; + } + + // Remove the plus sign from the phone context and normalize the digits. + std::string normalized_phone_context = std::string(phone_context.substr(1)); + normalizer_->NormalizeDigitsOnly(&normalized_phone_context); + + if (normalized_phone_context.empty() || + normalized_phone_context.length() > Constants::kMaxLengthCountryCode) { + return phone_context_object; + } + + int potential_country_code = std::stoi(normalized_phone_context, nullptr, 10); + if (!isValidCountryCode(potential_country_code)) { + return phone_context_object; + } + + phone_context_object.country_code = potential_country_code; + return phone_context_object; +} + +absl::StatusOr> +PhoneContextParser::Parse(absl::string_view phone_number) { + std::optional phone_context = + ExtractPhoneContext(phone_number); + if (!phone_context.has_value()) { + return std::nullopt; + } + + if (!isValid(phone_context.value())) { + return absl::InvalidArgumentError("Phone context is invalid."); + } + + return ParsePhoneContext(phone_context.value()); +} + +} // namespace phonenumbers +} // namespace i18n \ No newline at end of file diff --git a/cpp/src/phonenumbers/phonecontextparser.h b/cpp/src/phonenumbers/phonecontextparser.h new file mode 100644 index 0000000000..4538867238 --- /dev/null +++ b/cpp/src/phonenumbers/phonecontextparser.h @@ -0,0 +1,81 @@ +// Copyright (C) 2025 The Libphonenumber Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef I18N_PHONENUMBERS_PHONECONTEXTPARSER_H_ +#define I18N_PHONENUMBERS_PHONECONTEXTPARSER_H_ + +#include +#include +#include +#include + +#include "absl/status/statusor.h" +#include "phonenumbers/phonenumbernormalizer.h" +#include "phonenumbers/regexpsandmappings.h" + +namespace i18n { +namespace phonenumbers { + +// Parses the phone-context parameter of a phone number in RFC3966 format. +class PhoneContextParser { + friend class PhoneNumberUtil; + friend class PhoneContextParserTest; + + private: + struct PhoneContext { + // The raw value of the phone-context parameter. + std::string raw_context; + + // The country code of the phone-context parameter if the phone-context is + // exactly and only a + followed by a valid country code. + std::optional country_code; + }; + + PhoneContextParser(std::unique_ptr> country_calling_codes, + std::shared_ptr reg_exps, + std::shared_ptr normalizer); + + // Parses the phone-context parameter of a phone number in RFC3966 format. + // If the phone-context parameter is not present, returns std::nullopt. If it + // is present but invalid, returns an error status. If it is present and + // valid, returns a PhoneContext object. This object contains the raw value of + // the phone-context parameter. Additionally, if the phone-context is exactly + // and only a + followed by a valid country code, it also contains the country + // code. + absl::StatusOr> Parse( + absl::string_view phone_number); + + std::unique_ptr> country_calling_codes_; + std::shared_ptr reg_exps_; + std::shared_ptr normalizer_; + + // Extracts the value of the phone-context parameter, following the + // specification of RFC3966. + static std::optional ExtractPhoneContext( + absl::string_view phone_number); + + // Checks whether the phone context value follows the specification of + // RFC3966. + bool isValid(absl::string_view phone_context); + + bool isValidCountryCode(int country_code); + + // Parses the phone context value into a PhoneContext object. + PhoneContext ParsePhoneContext(absl::string_view phone_context); +}; + +} // namespace phonenumbers +} // namespace i18n + +#endif // I18N_PHONENUMBERS_PHONECONTEXTPARSER_H_ \ No newline at end of file diff --git a/cpp/src/phonenumbers/phonenumbermatcher.cc b/cpp/src/phonenumbers/phonenumbermatcher.cc index 0edd29b4f3..3fbc1b545d 100644 --- a/cpp/src/phonenumbers/phonenumbermatcher.cc +++ b/cpp/src/phonenumbers/phonenumbermatcher.cc @@ -79,43 +79,6 @@ bool IsInvalidPunctuationSymbol(char32 character) { return character == '%' || u_charType(character) == U_CURRENCY_SYMBOL; } -bool ContainsOnlyValidXChars(const PhoneNumber& number, const string& candidate, - const PhoneNumberUtil& util) { - // The characters 'x' and 'X' can be (1) a carrier code, in which case they - // always precede the national significant number or (2) an extension sign, - // in which case they always precede the extension number. We assume a - // carrier code is more than 1 digit, so the first case has to have more than - // 1 consecutive 'x' or 'X', whereas the second case can only have exactly 1 - // 'x' or 'X'. - size_t found; - found = candidate.find_first_of("xX"); - // We ignore the character if 'x' or 'X' appears as the last character of - // the string. - while (found != string::npos && found < candidate.length() - 1) { - // We only look for 'x' or 'X' in ASCII form. - char next_char = candidate[found + 1]; - if (next_char == 'x' || next_char == 'X') { - // This is the carrier code case, in which the 'X's always precede the - // national significant number. - ++found; - if (util.IsNumberMatchWithOneString( - number, candidate.substr(found, candidate.length() - found)) - != PhoneNumberUtil::NSN_MATCH) { - return false; - } - } else { - string normalized_extension(candidate.substr(found, - candidate.length() - found)); - util.NormalizeDigitsOnly(&normalized_extension); - if (normalized_extension != number.extension()) { - return false; - } - } - found = candidate.find_first_of("xX", found + 1); - } - return true; -} - bool AllNumberGroupsRemainGrouped( const PhoneNumberUtil& util, const PhoneNumber& number, @@ -283,7 +246,7 @@ class PhoneNumberMatcherRegExps : public Singleton { lead_limit_(Limit(0, 2)), punctuation_limit_(Limit(0, 4)), digit_block_limit_(PhoneNumberUtil::kMaxLengthForNsn + - PhoneNumberUtil::kMaxLengthCountryCode), + Constants::kMaxLengthCountryCode), block_limit_(Limit(0, digit_block_limit_)), punctuation_(StrCat("[", Constants::kValidPunctuation, "]", punctuation_limit_)), @@ -395,6 +358,46 @@ class AlternateFormats : public Singleton { DISALLOW_COPY_AND_ASSIGN(AlternateFormats); }; +class XCharValidator { + public: + static bool ContainsOnlyValidXChars(const PhoneNumber& number, const string& candidate, + const PhoneNumberUtil& util) { + // The characters 'x' and 'X' can be (1) a carrier code, in which case they + // always precede the national significant number or (2) an extension sign, + // in which case they always precede the extension number. We assume a + // carrier code is more than 1 digit, so the first case has to have more than + // 1 consecutive 'x' or 'X', whereas the second case can only have exactly 1 + // 'x' or 'X'. + size_t found; + found = candidate.find_first_of("xX"); + // We ignore the character if 'x' or 'X' appears as the last character of + // the string. + while (found != string::npos && found < candidate.length() - 1) { + // We only look for 'x' or 'X' in ASCII form. + char next_char = candidate[found + 1]; + if (next_char == 'x' || next_char == 'X') { + // This is the carrier code case, in which the 'X's always precede the + // national significant number. + ++found; + if (util.IsNumberMatchWithOneString( + number, candidate.substr(found, candidate.length() - found)) + != PhoneNumberUtil::NSN_MATCH) { + return false; + } + } else { + string normalized_extension(candidate.substr(found, + candidate.length() - found)); + util.phone_number_normalizer_->NormalizeDigitsOnly(&normalized_extension); + if (normalized_extension != number.extension()) { + return false; + } + } + found = candidate.find_first_of("xX", found + 1); + } + return true; + } +}; + PhoneNumberMatcher::PhoneNumberMatcher(const PhoneNumberUtil& util, const string& text, const string& region_code, @@ -531,13 +534,13 @@ bool PhoneNumberMatcher::VerifyAccordingToLeniency( return phone_util_.IsPossibleNumber(number); case PhoneNumberMatcher::VALID: if (!phone_util_.IsValidNumber(number) || - !ContainsOnlyValidXChars(number, candidate, phone_util_)) { + !XCharValidator::ContainsOnlyValidXChars(number, candidate, phone_util_)) { return false; } return IsNationalPrefixPresentIfRequired(number); case PhoneNumberMatcher::STRICT_GROUPING: { if (!phone_util_.IsValidNumber(number) || - !ContainsOnlyValidXChars(number, candidate, phone_util_) || + !XCharValidator::ContainsOnlyValidXChars(number, candidate, phone_util_) || ContainsMoreThanOneSlashInNationalNumber( number, candidate, phone_util_) || !IsNationalPrefixPresentIfRequired(number)) { @@ -552,7 +555,7 @@ bool PhoneNumberMatcher::VerifyAccordingToLeniency( } case PhoneNumberMatcher::EXACT_GROUPING: { if (!phone_util_.IsValidNumber(number) || - !ContainsOnlyValidXChars(number, candidate, phone_util_) || + !XCharValidator::ContainsOnlyValidXChars(number, candidate, phone_util_) || ContainsMoreThanOneSlashInNationalNumber( number, candidate, phone_util_) || !IsNationalPrefixPresentIfRequired(number)) { @@ -815,7 +818,7 @@ bool PhoneNumberMatcher::IsNationalPrefixPresentIfRequired( string raw_input_copy(number.raw_input()); // Check if we found a national prefix and/or carrier code at the start of // the raw input, and return the result. - phone_util_.NormalizeDigitsOnly(&raw_input_copy); + phone_util_.phone_number_normalizer_->NormalizeDigitsOnly(&raw_input_copy); return phone_util_.MaybeStripNationalPrefixAndCarrierCode( *metadata, &raw_input_copy, @@ -898,7 +901,7 @@ bool PhoneNumberMatcher::ContainsMoreThanOneSlashInNationalNumber( PhoneNumber::FROM_NUMBER_WITHOUT_PLUS_SIGN) { string normalized_country_code = candidate.substr(0, first_slash_in_body); - util.NormalizeDigitsOnly(&normalized_country_code); + util.phone_number_normalizer_->NormalizeDigitsOnly(&normalized_country_code); if (normalized_country_code == SimpleItoa(number.country_code())) { // Any more slashes and this is illegal. return candidate.find('/', second_slash_in_body + 1) != string::npos; diff --git a/cpp/src/phonenumbers/phonenumbernormalizer.cc b/cpp/src/phonenumbers/phonenumbernormalizer.cc new file mode 100644 index 0000000000..4bbfc82a68 --- /dev/null +++ b/cpp/src/phonenumbers/phonenumbernormalizer.cc @@ -0,0 +1,39 @@ +// Copyright (C) 2025 The Libphonenumber Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "phonenumbers/phonenumbernormalizer.h" + +#include "phonenumbers/base/logging.h" +#include "phonenumbers/constants.h" +#include "phonenumbers/normalize_utf8.h" + +namespace i18n { +namespace phonenumbers { + +PhoneNumberNormalizer::PhoneNumberNormalizer( + std::shared_ptr reg_exps) + : reg_exps_(reg_exps) {} + +void PhoneNumberNormalizer::NormalizeDigitsOnly(std::string* number) const { + DCHECK(number); + const RegExp& non_digits_pattern = reg_exps_->regexp_cache_->GetRegExp( + absl::StrCat("[^", Constants::kDigits, "]")); + // Delete everything that isn't valid digits. + non_digits_pattern.GlobalReplace(number, ""); + // Normalize all decimal digits to ASCII digits. + number->assign(NormalizeUTF8::NormalizeDecimalDigits(*number)); +} + +} // namespace phonenumbers +} // namespace i18n diff --git a/cpp/src/phonenumbers/phonenumbernormalizer.h b/cpp/src/phonenumbers/phonenumbernormalizer.h new file mode 100644 index 0000000000..0c2c14201d --- /dev/null +++ b/cpp/src/phonenumbers/phonenumbernormalizer.h @@ -0,0 +1,51 @@ +// Copyright (C) 2025 The Libphonenumber Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef I18N_PHONENUMBERS_PHONENUMBERNORMALIZER_H_ +#define I18N_PHONENUMBERS_PHONENUMBERNORMALIZER_H_ + +#include +#include + +#include "phonenumbers/regexpsandmappings.h" + +namespace i18n { +namespace phonenumbers { + +// Util class to normalize phone numbers. +class PhoneNumberNormalizer { + friend class AsYouTypeFormatter; + friend class PhoneContextParser; + friend class PhoneNumberMatcher; + friend class PhoneNumberUtil; + friend class XCharValidator; + friend class PhoneContextParserTest; + friend class PhoneNumberNormalizerTest; + + private: + std::shared_ptr reg_exps_; + + explicit PhoneNumberNormalizer( + std::shared_ptr reg_exps); + + // Normalizes a string of characters representing a phone number. This + // converts wide-ascii and arabic-indic numerals to European numerals, and + // strips punctuation and alpha characters. + void NormalizeDigitsOnly(std::string* number) const; +}; + +} // namespace phonenumbers +} // namespace i18n + +#endif // I18N_PHONENUMBERS_PHONENUMBERNORMALIZER_H_ \ No newline at end of file diff --git a/cpp/src/phonenumbers/phonenumberutil.cc b/cpp/src/phonenumbers/phonenumberutil.cc index 92b8c9548e..c18666f3bd 100644 --- a/cpp/src/phonenumbers/phonenumberutil.cc +++ b/cpp/src/phonenumbers/phonenumberutil.cc @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -33,10 +34,10 @@ #include "phonenumbers/default_logger.h" #include "phonenumbers/matcher_api.h" #include "phonenumbers/metadata.h" -#include "phonenumbers/normalize_utf8.h" #include "phonenumbers/phonemetadata.pb.h" #include "phonenumbers/phonenumber.h" #include "phonenumbers/phonenumber.pb.h" +#include "phonenumbers/phonenumbernormalizer.h" #include "phonenumbers/regex_based_matcher.h" #include "phonenumbers/regexp_adapter.h" #include "phonenumbers/regexp_cache.h" @@ -56,7 +57,6 @@ using gtl::OrderByFirst; // static constants const size_t PhoneNumberUtil::kMaxLengthForNsn; -const size_t PhoneNumberUtil::kMaxLengthCountryCode; const int PhoneNumberUtil::kNanpaCountryCode; // static @@ -65,7 +65,6 @@ const char PhoneNumberUtil::kRegionCodeForNonGeoEntity[] = "001"; namespace { const char kRfc3966Prefix[] = "tel:"; -const char kRfc3966PhoneContext[] = ";phone-context="; const char kRfc3966IsdnSubaddress[] = ";isub="; // Default extension prefix to use when formatting. This will be put in front of @@ -346,7 +345,8 @@ PhoneNumberUtil::PhoneNumberUtil() nanpa_regions_(new absl::node_hash_set()), region_to_metadata_map_(new absl::node_hash_map()), country_code_to_non_geographical_metadata_map_( - new absl::node_hash_map) { + new absl::node_hash_map), + phone_number_normalizer_(new PhoneNumberNormalizer(reg_exps_)) { Logger::set_logger_impl(logger_.get()); // TODO: Update the java version to put the contents of the init // method inside the constructor as well to keep both in sync. @@ -392,6 +392,19 @@ PhoneNumberUtil::PhoneNumberUtil() if (country_calling_code == kNanpaCountryCode) { nanpa_regions_->insert(region_code); } + + // Create a vector of country calling codes to be used by the phone context + // parser. + auto country_calling_codes_ = std::make_unique>(); + for (std::vector::const_iterator it = + country_calling_code_to_region_code_map_->begin(); + it != country_calling_code_to_region_code_map_->end(); ++it) { + country_calling_codes_->push_back(it->first); + } + + phone_context_parser_ = std::unique_ptr( + new PhoneContextParser(std::move(country_calling_codes_), reg_exps_, + phone_number_normalizer_)); } country_calling_code_to_region_code_map_->insert( @@ -1049,7 +1062,7 @@ void PhoneNumberUtil::FormatInOriginalFormat(const PhoneNumber& number, break; } candidate_national_prefix_rule.erase(index_of_first_group); - NormalizeDigitsOnly(&candidate_national_prefix_rule); + phone_number_normalizer_->NormalizeDigitsOnly(&candidate_national_prefix_rule); } if (candidate_national_prefix_rule.empty()) { // National prefix not used when formatting this number. @@ -1085,7 +1098,7 @@ bool PhoneNumberUtil::RawInputContainsNationalPrefix( const string& national_prefix, const string& region_code) const { string normalized_national_number(raw_input); - NormalizeDigitsOnly(&normalized_national_number); + phone_number_normalizer_->NormalizeDigitsOnly(&normalized_national_number); if (HasPrefixString(normalized_national_number, national_prefix)) { // Some Japanese numbers (e.g. 00777123) might be mistaken to contain // the national prefix when written without it (e.g. 0777123) if we just @@ -1671,78 +1684,29 @@ bool PhoneNumberUtil::CheckRegionForParsing( return true; } -// Extracts the value of the phone-context parameter of number_to_extract_from -// where the index of ";phone-context=" is parameter index_of_phone_context, -// following the syntax defined in RFC3966. -// Returns the extracted string_view (possibly empty), or a nullopt if no -// phone-context parameter is found. -absl::optional PhoneNumberUtil::ExtractPhoneContext( - const string& number_to_extract_from, - const size_t index_of_phone_context) const { - // If no phone-context parameter is present - if (index_of_phone_context == std::string::npos) { - return absl::nullopt; - } - - size_t phone_context_start = - index_of_phone_context + strlen(kRfc3966PhoneContext); - // If phone-context parameter is empty - if (phone_context_start >= number_to_extract_from.length()) { - return ""; - } - - size_t phone_context_end = - number_to_extract_from.find(';', phone_context_start); - // If phone-context is not the last parameter - if (phone_context_end != std::string::npos) { - return number_to_extract_from.substr( - phone_context_start, phone_context_end - phone_context_start); - } else { - return number_to_extract_from.substr(phone_context_start); - } -} - -// Returns whether the value of phoneContext follows the syntax defined in -// RFC3966. -bool PhoneNumberUtil::IsPhoneContextValid( - const absl::optional phone_context) const { - if (!phone_context.has_value()) { - return true; - } - if (phone_context.value().empty()) { - return false; - } - - // Does phone-context value match pattern of global-number-digits or - // domainname - return reg_exps_->rfc3966_global_number_digits_pattern_->FullMatch( - std::string{phone_context.value()}) || - reg_exps_->rfc3966_domainname_pattern_->FullMatch( - std::string{phone_context.value()}); -} - // Converts number_to_parse to a form that we can parse and write it to -// national_number if it is written in RFC3966; otherwise extract a possible -// number out of it and write to national_number. +// output_number if it is written in RFC3966; otherwise extract a possible +// number out of it and write to output_number. PhoneNumberUtil::ErrorType PhoneNumberUtil::BuildNationalNumberForParsing( - const string& number_to_parse, string* national_number) const { - size_t index_of_phone_context = number_to_parse.find(kRfc3966PhoneContext); + const string& number_to_parse, string* output_number) const { + size_t index_of_phone_context = number_to_parse.find(Constants::kRfc3966PhoneContext); - absl::optional phone_context = - ExtractPhoneContext(number_to_parse, index_of_phone_context); - if (!IsPhoneContextValid(phone_context)) { + absl::StatusOr> + phone_context = phone_context_parser_->Parse(number_to_parse); + + if (!phone_context.ok()) { VLOG(2) << "The phone-context value is invalid."; return NOT_A_NUMBER; } - if (phone_context.has_value()) { + if (phone_context->has_value()) { // If the phone context contains a phone number prefix, we need to capture // it, whereas domains will be ignored. - if (phone_context.value().at(0) == Constants::kPlusSign[0]) { + if (phone_context->value().raw_context.at(0) == Constants::kPlusSign[0]) { // Additional parameters might follow the phone context. If so, we will // remove them here because the parameters after phone context are not // important for parsing the phone number. - StrAppend(national_number, phone_context.value()); + StrAppend(output_number, phone_context->value().raw_context); } // Now append everything between the "tel:" prefix and the phone-context. @@ -1751,25 +1715,25 @@ PhoneNumberUtil::ErrorType PhoneNumberUtil::BuildNationalNumberForParsing( // missing, as we have seen in some of the phone number inputs. In that // case, we append everything from the beginning. size_t index_of_rfc_prefix = number_to_parse.find(kRfc3966Prefix); - int index_of_national_number = (index_of_rfc_prefix != string::npos) ? + int index_of_number = (index_of_rfc_prefix != string::npos) ? static_cast(index_of_rfc_prefix + strlen(kRfc3966Prefix)) : 0; StrAppend( - national_number, + output_number, number_to_parse.substr( - index_of_national_number, - index_of_phone_context - index_of_national_number)); + index_of_number, + index_of_phone_context - index_of_number)); } else { // Extract a possible number from the string passed in (this strips leading // characters that could not be the start of a phone number.) - ExtractPossibleNumber(number_to_parse, national_number); + ExtractPossibleNumber(number_to_parse, output_number); } // Delete the isdn-subaddress and everything after it if it is present. Note // extension won't appear at the same time with isdn-subaddress according to // paragraph 5.3 of the RFC3966 spec. - size_t index_of_isdn = national_number->find(kRfc3966IsdnSubaddress); + size_t index_of_isdn = output_number->find(kRfc3966IsdnSubaddress); if (index_of_isdn != string::npos) { - national_number->erase(index_of_isdn); + output_number->erase(index_of_isdn); } // If both phone context and isdn-subaddress are absent but other parameters // are present, the parameters are left in nationalNumber. This is because @@ -2285,13 +2249,7 @@ void PhoneNumberUtil::GetCountryMobileToken(int country_calling_code, } void PhoneNumberUtil::NormalizeDigitsOnly(string* number) const { - DCHECK(number); - const RegExp& non_digits_pattern = reg_exps_->regexp_cache_->GetRegExp( - StrCat("[^", Constants::kDigits, "]")); - // Delete everything that isn't valid digits. - non_digits_pattern.GlobalReplace(number, ""); - // Normalize all decimal digits to ASCII digits. - number->assign(NormalizeUTF8::NormalizeDecimalDigits(*number)); + phone_number_normalizer_->NormalizeDigitsOnly(number); } void PhoneNumberUtil::NormalizeDiallableCharsOnly(string* number) const { @@ -2334,7 +2292,7 @@ void PhoneNumberUtil::Normalize(string* number) const { if (reg_exps_->valid_alpha_phone_pattern_->PartialMatch(*number)) { NormalizeHelper(reg_exps_->alpha_phone_mappings_, true, number); } - NormalizeDigitsOnly(number); + phone_number_normalizer_->NormalizeDigitsOnly(number); } // Checks to see if the string of characters could possibly be a phone number at @@ -2365,7 +2323,7 @@ bool PhoneNumberUtil::ParsePrefixAsIdd(const RegExp& idd_pattern, string extracted_digit; if (reg_exps_->capturing_digit_pattern_->PartialMatch( number_copy->ToString(), &extracted_digit)) { - NormalizeDigitsOnly(&extracted_digit); + phone_number_normalizer_->NormalizeDigitsOnly(&extracted_digit); if (extracted_digit == "0") { return false; } @@ -2553,7 +2511,7 @@ int PhoneNumberUtil::ExtractCountryCode(string* national_number) const { // Country codes do not begin with a '0'. return 0; } - for (size_t i = 1; i <= kMaxLengthCountryCode; ++i) { + for (size_t i = 1; i <= Constants::kMaxLengthCountryCode; ++i) { safe_strto32(national_number->substr(0, i), &potential_country_code); string region_code; GetRegionCodeForCountryCode(potential_country_code, ®ion_code); diff --git a/cpp/src/phonenumbers/phonenumberutil.h b/cpp/src/phonenumbers/phonenumberutil.h index 8f35b19c7c..4d6f7fa9a4 100644 --- a/cpp/src/phonenumbers/phonenumberutil.h +++ b/cpp/src/phonenumbers/phonenumberutil.h @@ -29,7 +29,9 @@ #include "phonenumbers/base/basictypes.h" #include "phonenumbers/base/memory/scoped_ptr.h" #include "phonenumbers/base/memory/singleton.h" +#include "phonenumbers/phonecontextparser.h" #include "phonenumbers/phonenumber.pb.h" +#include "phonenumbers/phonenumbernormalizer.h" #include "phonenumbers/regexpsandmappings.h" class TelephoneNumber; @@ -64,6 +66,7 @@ class PhoneNumberUtil : public Singleton { friend class ShortNumberInfo; friend class ShortNumberInfoTest; friend class Singleton; + friend class XCharValidator; public: // This type is neither copyable nor movable. @@ -789,8 +792,6 @@ class PhoneNumberUtil : public Singleton { // The ITU says the maximum length should be 15, but we have found longer // numbers in Germany. static const size_t kMaxLengthForNsn = 17; - // The maximum length of the country calling code. - static const size_t kMaxLengthCountryCode = 3; // Regular expression of characters typically used to start a second phone // number for the purposes of parsing. This allows us to strip off parts of @@ -806,7 +807,7 @@ class PhoneNumberUtil : public Singleton { scoped_ptr matcher_api_; // Helper class holding useful regular expressions and character mappings. - scoped_ptr reg_exps_; + std::shared_ptr reg_exps_; // A mapping from a country calling code to a RegionCode object which denotes // the region represented by that country calling code. Note regions under @@ -831,6 +832,12 @@ class PhoneNumberUtil : public Singleton { scoped_ptr > country_code_to_non_geographical_metadata_map_; + // An instance of PhoneContextParser. + std::unique_ptr phone_context_parser_; + + // An instance of PhoneNumberNormalizer. + std::shared_ptr phone_number_normalizer_; + PhoneNumberUtil(); // Returns a regular expression for the possible extensions that may be found @@ -968,14 +975,8 @@ class PhoneNumberUtil : public Singleton { bool check_region, PhoneNumber* phone_number) const; - absl::optional ExtractPhoneContext( - const string& number_to_extract_from, - size_t index_of_phone_context) const; - - bool IsPhoneContextValid(absl::optional phone_context) const; - ErrorType BuildNationalNumberForParsing(const string& number_to_parse, - string* national_number) const; + string* output_number) const; bool IsShorterThanPossibleNormalNumber(const PhoneMetadata* country_metadata, const string& number) const; diff --git a/cpp/src/phonenumbers/regexpsandmappings.h b/cpp/src/phonenumbers/regexpsandmappings.h index ccefc5bd64..28ae7d5bd8 100644 --- a/cpp/src/phonenumbers/regexpsandmappings.h +++ b/cpp/src/phonenumbers/regexpsandmappings.h @@ -30,7 +30,11 @@ namespace i18n { namespace phonenumbers { class PhoneNumberRegExpsAndMappings { + friend class PhoneContextParser; + friend class PhoneNumberNormalizer; friend class PhoneNumberUtil; + friend class PhoneContextParserTest; + friend class PhoneNumberNormalizerTest; private: void InitializeMapsAndSets(); diff --git a/cpp/test/phonenumbers/phonecontextparser_test.cc b/cpp/test/phonenumbers/phonecontextparser_test.cc new file mode 100644 index 0000000000..eb49b02599 --- /dev/null +++ b/cpp/test/phonenumbers/phonecontextparser_test.cc @@ -0,0 +1,112 @@ +// Copyright (C) 2025 The Libphonenumber Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "phonenumbers/phonecontextparser.h" + +#include + +#include "phonenumbers/phonenumbernormalizer.h" +#include "phonenumbers/regexpsandmappings.h" + +namespace i18n { +namespace phonenumbers { +using testing::Eq; + +class PhoneContextParserTest : public testing::Test { + public: + // This type is neither copyable nor movable. + PhoneContextParserTest(const PhoneContextParserTest&) = delete; + PhoneContextParserTest& operator=(const PhoneContextParserTest&) = delete; + + protected: + PhoneContextParserTest() + : country_calling_codes_(new std::vector{64}), + reg_exps_(new PhoneNumberRegExpsAndMappings()), + normalizer_(new PhoneNumberNormalizer(reg_exps_)), + context_parser_(new PhoneContextParser( + std::move(country_calling_codes_), reg_exps_, normalizer_)) {} + + std::unique_ptr> country_calling_codes_; + std::shared_ptr reg_exps_; + std::shared_ptr normalizer_; + std::unique_ptr context_parser_; + + absl::StatusOr> Parse( + absl::string_view phone_number) { + return context_parser_->Parse(phone_number); + } +}; + +TEST_F(PhoneContextParserTest, ParsePhoneContext) { + auto parse_result = Parse("tel:03-331-6005;phone-context=+64"); + ASSERT_TRUE(parse_result.ok()); + ASSERT_TRUE(parse_result->has_value()); + EXPECT_EQ("+64", parse_result.value()->raw_context); + EXPECT_EQ(64, parse_result.value()->country_code); + + auto parse_result = Parse("tel:03-331-6005;phone-context=example.com"); + ASSERT_TRUE(parse_result.ok()); + ASSERT_TRUE(parse_result->has_value()); + EXPECT_EQ("example.com", parse_result.value()->raw_context); + EXPECT_EQ(std::nullopt, parse_result.value()->country_code); + + auto parse_result = Parse("03-331-6005;phone-context=+64;"); + ASSERT_TRUE(parse_result.ok()); + ASSERT_TRUE(parse_result->has_value()); + EXPECT_EQ("+64", parse_result.value()->raw_context); + EXPECT_EQ(64, parse_result.value()->country_code); + + auto parse_result = Parse("+64-3-331-6005;phone-context=+64;"); + ASSERT_TRUE(parse_result.ok()); + ASSERT_TRUE(parse_result->has_value()); + EXPECT_EQ("+64", parse_result.value()->raw_context); + EXPECT_EQ(64, parse_result.value()->country_code); + + auto parse_result = + Parse("tel:03-331-6005;foo=bar;phone-context=+64;baz=qux"); + ASSERT_TRUE(parse_result.ok()); + ASSERT_TRUE(parse_result->has_value()); + EXPECT_EQ("+64", parse_result.value()->raw_context); + EXPECT_EQ(64, parse_result.value()->country_code); + + auto parse_result = Parse("tel:03-331-6005"); + ASSERT_TRUE(parse_result.ok()); + ASSERT_EQ(std::nullopt, parse_result); + + auto parse_result = Parse("tel:03-331-6005;phone-context=+0"); + ASSERT_TRUE(parse_result.ok()); + ASSERT_TRUE(parse_result->has_value()); + EXPECT_EQ("+0", parse_result.value()->raw_context); + EXPECT_EQ(std::nullopt, parse_result.value()->country_code); + + auto parse_result = Parse("tel:03-331-6005;phone-context=+1234"); + ASSERT_TRUE(parse_result.ok()); + ASSERT_TRUE(parse_result->has_value()); + EXPECT_EQ("+1234", parse_result.value()->raw_context); + EXPECT_EQ(std::nullopt, parse_result.value()->country_code); +} + +TEST_F(PhoneContextParserTest, ParsePhoneContextInvalid) { + auto parse_result = Parse("tel:03-331-6005;phone-context="); + EXPECT_EQ(absl::StatusCode::kInvalidArgument, parse_result.status().code()); + + auto parse_result = Parse("tel:03-331-6005;phone-context=;"); + EXPECT_EQ(absl::StatusCode::kInvalidArgument, parse_result.status().code()); + + auto parse_result = Parse("tel:03-331-6005;phone-context=0"); + EXPECT_EQ(absl::StatusCode::kInvalidArgument, parse_result.status().code()); +} + +} // namespace phonenumbers +} // namespace i18n diff --git a/cpp/test/phonenumbers/phonenumbernormalizer_test.cc b/cpp/test/phonenumbers/phonenumbernormalizer_test.cc new file mode 100644 index 0000000000..ab413ffb62 --- /dev/null +++ b/cpp/test/phonenumbers/phonenumbernormalizer_test.cc @@ -0,0 +1,54 @@ +// Copyright (C) 2025 The Libphonenumber Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "phonenumbers/phonenumbernormalizer.h" + +#include + +#include "phonenumbers/regexpsandmappings.h" + +namespace i18n { +namespace phonenumbers { +using testing::Eq; + +class PhoneNumberNormalizerTest : public testing::Test { + public: + // This type is neither copyable nor movable. + PhoneNumberNormalizerTest(const PhoneNumberNormalizerTest&) = delete; + PhoneNumberNormalizerTest& operator=(const PhoneNumberNormalizerTest&) = + delete; + + protected: + PhoneNumberNormalizerTest() + : reg_exps_(new PhoneNumberRegExpsAndMappings()), + normalizer_(new PhoneNumberNormalizer(reg_exps_)) {} + + std::shared_ptr reg_exps_; + std::shared_ptr normalizer_; + + void NormalizeDigitsOnly(std::string* number) { + normalizer_->NormalizeDigitsOnly(number); + } +}; + +TEST_F(PhoneNumberNormalizerTest, NormaliseStripAlphaCharacters) { + string input_number("034-56&+a#234"); + NormalizeDigitsOnly(&input_number); + static const string kExpectedOutput("03456234"); + EXPECT_EQ(kExpectedOutput, input_number) + << "Conversion did not correctly remove alpha characters"; +} + +} // namespace phonenumbers +} // namespace i18n diff --git a/cpp/test/phonenumbers/phonenumberutil_test.cc b/cpp/test/phonenumbers/phonenumberutil_test.cc index f63f00df4a..a76150f20c 100644 --- a/cpp/test/phonenumbers/phonenumberutil_test.cc +++ b/cpp/test/phonenumbers/phonenumberutil_test.cc @@ -2968,14 +2968,6 @@ TEST_F(PhoneNumberUtilTest, NormaliseOtherDigits) { << "Conversion did not correctly replace non-latin digits"; } -TEST_F(PhoneNumberUtilTest, NormaliseStripAlphaCharacters) { - string input_number("034-56&+a#234"); - phone_util_.NormalizeDigitsOnly(&input_number); - static const string kExpectedOutput("03456234"); - EXPECT_EQ(kExpectedOutput, input_number) - << "Conversion did not correctly remove alpha characters"; -} - TEST_F(PhoneNumberUtilTest, NormaliseStripNonDiallableCharacters) { string input_number("03*4-56&+1a#234"); phone_util_.NormalizeDiallableCharsOnly(&input_number);