From e5f7b70873ee0ba45111d402f2ddc5c9df30bc86 Mon Sep 17 00:00:00 2001 From: Jelle De Loecker Date: Wed, 14 Feb 2024 11:37:32 +0100 Subject: [PATCH] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20Cleanup=20string=20inflect?= =?UTF-8?q?ion=20code?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lib/inflections.js | 381 +++++++++++++++++++++++---------------------- 1 file changed, 199 insertions(+), 182 deletions(-) diff --git a/lib/inflections.js b/lib/inflections.js index 7084873..aef3f4e 100644 --- a/lib/inflections.js +++ b/lib/inflections.js @@ -21,187 +21,206 @@ * THE SOFTWARE. */ const Rxi = (pattern) => RegExp(pattern, 'gi'), + Rxg = (pattern) => RegExp(pattern, 'g'), Rxia = (pattern) => [Rxi(pattern)], Rxiar = (pattern, replacement) => [Rxi(pattern), '$1' + (replacement || '')]; -let S; - -let InflectionJS = { - - // This is a list of nouns that use the same form for both singular and plural. - // This list should remain entirely in lower case to correctly match Strings. - uncountable_words: [ - 'equipment', 'information', 'rice', 'money', 'species', 'series', - 'fish', 'sheep', 'moose', 'deer', 'news' - ], - - // These rules translate from the singular form of a noun to its plural form. - plural_rules: [ - // do not replace if its already a plural word - Rxia('(m)en$'), - Rxia('(pe)ople$'), - Rxia('(child)ren$'), - Rxia('([ti])a$'), - Rxia('((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$'), - Rxia('(hive)s$'), - Rxia('(tive)s$'), - Rxia('(curve)s$'), - Rxia('([lr])ves$'), - Rxia('([^fo])ves$'), - Rxia('([^aeiouy]|qu)ies$'), - Rxia('(s)eries$'), - Rxia('(m)ovies$'), - Rxia('(x|ch|ss|sh)es$'), - Rxia('([m|l])ice$'), - Rxia('(bus)es$'), - Rxia('(o)es$'), - Rxia('(shoe)s$'), - Rxia('(cris|ax|test)es$'), - Rxia('(octop|vir)i$'), - Rxia('(alias|status)es$'), - Rxia('^(ox)en'), - Rxia('(vert|ind)ices$'), - Rxia('(matr)ices$'), - Rxia('(quiz)zes$'), - - Rxiar('(m)an$', 'en'), - Rxiar('(pe)rson$', 'ople'), - Rxiar('(child)$', 'ren'), - Rxiar('^(ox)$', 'en'), - Rxiar('(ax|test)is$', 'es'), - Rxiar('(octop|vir)us$', 'i'), - Rxiar('(alias|status)$', 'es'), - Rxiar('(bu)s$', 'ses'), - Rxiar('(buffal|tomat|potat)o$', 'oes'), - Rxiar('([ti])um$', 'a'), - [Rxi('sis$'), 'ses'], - Rxiar('(?:([^f])fe|([lr])f)$', '$2ves'), - Rxiar('(hive)$', 's'), - Rxiar('([^aeiouy]|qu)y$', 'ies'), - Rxiar('(x|ch|ss|sh)$', 'es'), - Rxiar('(matr|vert|ind)ix|ex$', 'ices'), - Rxiar('([m|l])ouse$', 'ice'), - Rxiar('(quiz)$', 'zes'), - Rxiar('(criter)ion$', 'ia'), - [Rxi('s$'), 's'], - [Rxi('$'), 's'] - ], - - // These rules translate from the plural form of a noun to its singular form. - singular_rules: [ - // do not replace if its already a singular word - Rxia('(m)an$'), - Rxia('(pe)rson$'), - Rxia('(child)$'), - Rxia('^(ox)$'), - Rxia('(ax|test)is$'), - Rxia('(octop|vir)us$'), - Rxia('(alias|status)$'), - Rxia('(bu)s$'), - Rxia('(buffal|tomat|potat)o$'), - Rxia('([ti])um$'), - Rxia('sis$'), - Rxia('(?:([^f])fe|([lr])f)$'), - Rxia('(hive)$'), - Rxia('([^aeiouy]|qu)y$'), - Rxia('(x|ch|ss|sh)$'), - Rxia('(matr|vert|ind)ix|ex$'), - Rxia('([m|l])ouse$'), - Rxia('(quiz)$'), - - // original rule - Rxiar('(m)en$', 'an'), - Rxiar('(pe)ople$', 'rson'), - Rxiar('(child)ren$'), - Rxiar('(criteri)a$', 'on'), - Rxiar('([ti])a$', 'um'), - Rxiar('((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$', '$2sis'), - Rxiar('(hive)s$'), - Rxiar('(tive)s$'), - Rxiar('(curve)s$'), - Rxiar('([lr])ves$', 'f'), - Rxiar('([^fo])ves$', 'fe'), - Rxiar('(m)ovies$', 'ovie'), - Rxiar('([^aeiouy]|qu)ies$', 'y'), - Rxiar('(s)eries$', 'eries'), - Rxiar('(x|ch|ss|sh)es$'), - Rxiar('([m|l])ice$', 'ouse'), - Rxiar('(bus)es$'), - Rxiar('(o)es$'), - Rxiar('(shoe)s$'), - Rxiar('(cris|ax|test)es$', 'is'), - Rxiar('(octop|vir)i$', 'us'), - Rxiar('(alias|status)es$'), - Rxiar('^(ox)en'), - Rxiar('(vert|ind)ices$', 'ex'), - Rxiar('(matr)ices$', 'ix'), - Rxiar('(quiz)zes$'), - [Rxi('ss$'), 'ss'], - [Rxi('s$'), ''], - ], - - // This is a list of words that should not be capitalized for title case - non_titlecased_words: [ - 'and', 'or', 'nor', 'a', 'an', 'the', 'so', 'but', 'to', 'of', 'at', - 'by', 'from', 'into', 'on', 'onto', 'off', 'out', 'in', 'over', - 'with', 'for' - ], - - // These are regular expressions used for converting between String formats - id_suffix : RegExp('(_ids|_id)$', 'g'), - underbar : RegExp('_', 'g'), - space_or_underbar : RegExp('[\ _]', 'g'), - uppercase : RegExp('([A-Z])', 'g'), - underbar_prefix : RegExp('^_'), - spaces_or_underscores : /[\s_]+/g, - underscores : /_+/g, - - /* - This is a helper method that applies rules based replacement to a String - Signature: - InflectionJS.apply_rules(str, rules, skip, override) == String - Arguments: - str - String - String to modify and return based on the passed rules - rules - Array: [RegExp, String] - Regexp to match paired with String to use for replacement - skip - Array: [String] - Strings to skip if they match - override - String (optional) - String to return as though this method succeeded (used to conform to APIs) - Returns: - String - passed String modified by passed rules - Examples: - InflectionJS.apply_rules("cows", InflectionJs.singular_rules) === 'cow' - */ - apply_rules: function(str, rules, skip, override) { - - var ignore, - i, - j; - - if (override) { - str = override; - } else { +// This is a list of nouns that use the same form for both singular and plural. +// This list should remain entirely in lower case to correctly match Strings. +const UNCOUNTABLE_WORDS = [ + 'equipment', + 'information', + 'rice', + 'money', + 'species', + 'series', + 'fish', + 'sheep', + 'moose', + 'deer', + 'news', +]; + +// These rules translate from the singular form of a noun to its plural form. +const PLURAL_RULES = [ + // do not replace if its already a plural word + Rxia('(m)en$'), + Rxia('(pe)ople$'), + Rxia('(child)ren$'), + Rxia('([ti])a$'), + Rxia('((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$'), + Rxia('(hive)s$'), + Rxia('(tive)s$'), + Rxia('(curve)s$'), + Rxia('([lr])ves$'), + Rxia('([^fo])ves$'), + Rxia('([^aeiouy]|qu)ies$'), + Rxia('(s)eries$'), + Rxia('(m)ovies$'), + Rxia('(x|ch|ss|sh)es$'), + Rxia('([m|l])ice$'), + Rxia('(bus)es$'), + Rxia('(o)es$'), + Rxia('(shoe)s$'), + Rxia('(cris|ax|test)es$'), + Rxia('(octop|vir)i$'), + Rxia('(alias|status)es$'), + Rxia('^(ox)en'), + Rxia('(vert|ind)ices$'), + Rxia('(matr)ices$'), + Rxia('(quiz)zes$'), + + Rxiar('(m)an$', 'en'), + Rxiar('(pe)rson$', 'ople'), + Rxiar('(child)$', 'ren'), + Rxiar('^(ox)$', 'en'), + Rxiar('(ax|test)is$', 'es'), + Rxiar('(octop|vir)us$', 'i'), + Rxiar('(alias|status)$', 'es'), + Rxiar('(bu)s$', 'ses'), + Rxiar('(buffal|tomat|potat)o$', 'oes'), + Rxiar('([ti])um$', 'a'), + [Rxi('sis$'), 'ses'], + Rxiar('(?:([^f])fe|([lr])f)$', '$2ves'), + Rxiar('(hive)$', 's'), + Rxiar('([^aeiouy]|qu)y$', 'ies'), + Rxiar('(x|ch|ss|sh)$', 'es'), + Rxiar('(matr|vert|ind)ix|ex$', 'ices'), + Rxiar('([m|l])ouse$', 'ice'), + Rxiar('(quiz)$', 'zes'), + Rxiar('(criter)ion$', 'ia'), + [Rxi('s$'), 's'], + [Rxi('$'), 's'] +]; + +// These rules translate from the plural form of a noun to its singular form. +const SINGULAR_RULES = [ + // do not replace if its already a singular word + Rxia('(m)an$'), + Rxia('(pe)rson$'), + Rxia('(child)$'), + Rxia('^(ox)$'), + Rxia('(ax|test)is$'), + Rxia('(octop|vir)us$'), + Rxia('(alias|status)$'), + Rxia('(bu)s$'), + Rxia('(buffal|tomat|potat)o$'), + Rxia('([ti])um$'), + Rxia('sis$'), + Rxia('(?:([^f])fe|([lr])f)$'), + Rxia('(hive)$'), + Rxia('([^aeiouy]|qu)y$'), + Rxia('(x|ch|ss|sh)$'), + Rxia('(matr|vert|ind)ix|ex$'), + Rxia('([m|l])ouse$'), + Rxia('(quiz)$'), + + // original rule + Rxiar('(m)en$', 'an'), + Rxiar('(pe)ople$', 'rson'), + Rxiar('(child)ren$'), + Rxiar('(criteri)a$', 'on'), + Rxiar('([ti])a$', 'um'), + Rxiar('((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$', '$2sis'), + Rxiar('(hive)s$'), + Rxiar('(tive)s$'), + Rxiar('(curve)s$'), + Rxiar('([lr])ves$', 'f'), + Rxiar('([^fo])ves$', 'fe'), + Rxiar('(m)ovies$', 'ovie'), + Rxiar('([^aeiouy]|qu)ies$', 'y'), + Rxiar('(s)eries$', 'eries'), + Rxiar('(x|ch|ss|sh)es$'), + Rxiar('([m|l])ice$', 'ouse'), + Rxiar('(bus)es$'), + Rxiar('(o)es$'), + Rxiar('(shoe)s$'), + Rxiar('(cris|ax|test)es$', 'is'), + Rxiar('(octop|vir)i$', 'us'), + Rxiar('(alias|status)es$'), + Rxiar('^(ox)en'), + Rxiar('(vert|ind)ices$', 'ex'), + Rxiar('(matr)ices$', 'ix'), + Rxiar('(quiz)zes$'), + [Rxi('ss$'), 'ss'], + [Rxi('s$'), ''], +]; + +// This is a list of words that should not be capitalized for title case +const NON_TITLECASED_WORDS = [ + 'and', + 'or', + 'nor', + 'a', + 'an', + 'the', + 'so', + 'but', + 'to', + 'of', + 'at', + 'by', + 'from', + 'into', + 'on', + 'onto', + 'off', + 'out', + 'in', + 'over', + 'with', + 'for', +]; + +// These are regular expressions used for converting between String formats +const ID_SUFFIX = Rxg('(_ids|_id)$'), + UNDERBAR = Rxg('_'), + SPACE_OR_UNDERBAR = Rxg('[\ _]'); + +/* + This is a helper method that applies rules based replacement to a String + Signature: + applyRules(str, rules, skip, override) == String + Arguments: + str - String - String to modify and return based on the passed rules + rules - Array: [RegExp, String] - Regexp to match paired with String to use for replacement + skip - Array: [String] - Strings to skip if they match + override - String (optional) - String to return as though this method succeeded (used to conform to APIs) + Returns: + String - passed String modified by passed rules + Examples: + applyRules("cows", SINGULAR_RULES) === 'cow' +*/ +const applyRules = (str, rules, skip, override) => { + + if (override) { + str = override; + } else { - ignore = (skip.indexOf(str.toLowerCase()) > -1); + let ignore = skip.includes(str.toLowerCase()); - if (!ignore) { + if (!ignore) { - j = rules.length; + let i = 0, + j = rules.length; - for (i = 0; i < j; i++) { - if (str.match(rules[i][0])){ - if (rules[i][1] !== undefined) { - str = str.replace(rules[i][0], rules[i][1]); - } - break; + for (; i < j; i++) { + if (str.match(rules[i][0])){ + if (rules[i][1] !== undefined) { + str = str.replace(rules[i][0], rules[i][1]); } + break; } } } - - // Make sure we return a useable string - return '' + str; } + + // Make sure we return a useable string + return '' + str; }; +let S; + Blast.once('pre-extra-files', function getBoundString() { S = Blast.Bound.String; }); @@ -291,13 +310,11 @@ function toLower(char_code) { * * @author Jelle De Loecker * @since 0.7.26 - * @version 0.7.26 + * @version 0.9.0 * * @param {Function} fnc */ -function defString(fnc) { - return Blast.definePrototype('String', fnc); -} +const defString = Blast.createProtoDefiner('String'); /** * Pluralize a string @@ -311,10 +328,10 @@ function defString(fnc) { * @return {String} Singular English language nouns are returned in plural form */ defString(function pluralize(plural) { - return InflectionJS.apply_rules( + return applyRules( this, - InflectionJS.plural_rules, - InflectionJS.uncountable_words, + PLURAL_RULES, + UNCOUNTABLE_WORDS, plural ); }); @@ -331,10 +348,10 @@ defString(function pluralize(plural) { * @return {String} Plural English language nouns are returned in singular form */ defString(function singularize(singular) { - return InflectionJS.apply_rules( + return applyRules( this, - InflectionJS.singular_rules, - InflectionJS.uncountable_words, + SINGULAR_RULES, + UNCOUNTABLE_WORDS, singular ); }); @@ -599,14 +616,14 @@ defString(function humanize(lowFirstLetter) { ori = str; // Remove the trailing _id suffix - str = str.replace(InflectionJS.id_suffix, ''); + str = str.replace(ID_SUFFIX, ''); // If the string is empty now, put it back if (!str) { str = ori; } - str = str.replace(InflectionJS.underbar, ' ').trim(); + str = str.replace(UNDERBAR, ' ').trim(); if (!lowFirstLetter) { str = S.capitalize(str); @@ -646,7 +663,7 @@ defString(function capitalize() { */ defString(function dasherize() { var str = this; - str = str.replace(InflectionJS.space_or_underbar, '-'); + str = str.replace(SPACE_OR_UNDERBAR, '-'); return str; }); @@ -671,7 +688,7 @@ defString(function titleize(alwaysCapitalize) { x; // Turn the underscores into spaces - str = str.replace(InflectionJS.underbar, ' '); + str = str.replace(UNDERBAR, ' '); // Split the string str_arr = str.split(' '); @@ -681,7 +698,7 @@ defString(function titleize(alwaysCapitalize) { d = str_arr[x].split('-'); for (i = 0; i < d.length; i++) { - if (alwaysCapitalize === true || InflectionJS.non_titlecased_words.indexOf(d[i].toLowerCase()) < 0) { + if (alwaysCapitalize === true || NON_TITLECASED_WORDS.indexOf(d[i].toLowerCase()) < 0) { d[i] = S.capitalize(d[i]); } }