Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"author": "Transcend Inc.",
"name": "@transcend-io/internationalization",
"description": "Internationalization configuration for the monorepo",
"version": "2.2.0",
"version": "2.3.0",
"homepage": "https://github.com/transcend-io/internationalization",
"repository": {
"type": "git",
Expand Down
12 changes: 12 additions & 0 deletions src/enums.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1603,6 +1603,18 @@ export const LOCALE_BROWSER_MAP = {
/** Union of Browser locale keys */
export type BrowserLocaleKey = keyof typeof LOCALE_BROWSER_MAP;

/** Case-insensitive index for browser tag → LocaleValue */
export const LOCALE_BROWSER_MAP_LOWERCASE = Object.entries(
LOCALE_BROWSER_MAP,
).reduce(
(idx, [k, v]) => {
// eslint-disable-next-line no-param-reassign
idx[k.toLowerCase()] = v;
return idx;
},
{} as Record<string, LocaleValue>,
);

/**
* Native language names, used to render options to users
* Language options for end-users should be written in own language
Expand Down
243 changes: 243 additions & 0 deletions src/getUserLocalesFromBrowserLanguages.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
import {
LocaleValue,
BrowserLocaleKey,
LOCALE_BROWSER_MAP_LOWERCASE,
} from './enums';

/**
* Normalize a BCP-47 browser language tag to lowercase.
*
* @param tag - Raw browser language tag (e.g., 'en-US')
* @returns Lowercased tag (e.g., 'en-us')
*/
function normalizeBrowserTag(tag: string): string {
return tag.trim().toLowerCase();
}

/**
* Extract the base language sub-tag from a BCP-47 tag or LocaleValue.
*
* @param code - A tag or LocaleValue (e.g., 'fr-CA' or 'fr')
* @returns Base language (e.g., 'fr')
*/
function baseOf(code: string): string {
return code.split('-')[0];
}

/**
* Return a de-duplicated array preserving first-seen order.
*
* @param items - Input items
* @returns Unique items in original order
*/
function uniqOrdered<T>(items: T[]): T[] {
const out: T[] = [];
const seen = new Set<T>();
// eslint-disable-next-line no-restricted-syntax
for (const x of items) {
if (!seen.has(x)) {
seen.add(x);
out.push(x);
}
}
return out;
}

/**
* Detect user-preferred languages from the navigator.
* We only trim; keys in LOCALE_BROWSER_MAP can be mixed case, and resolution is case-insensitive.
*
* @param languages - navigator.languages
* @param language - navigator.language
* @returns Ordered list of BCP-47 tags (strings)
*/
export function getLanguagesFromNavigator(
languages = navigator.languages,
language = navigator.language,
): BrowserLocaleKey[] {
const tags = languages?.length ? languages : [language];
return tags.map((t) => t.trim()).filter((x) => !!x) as BrowserLocaleKey[];
}

/**
* Case-insensitive lookup of a browser tag in LOCALE_BROWSER_MAP,
* with a fallback to its base tag.
*
* @param tag - Browser tag (any case, e.g., 'Es-Mx')
* @returns LocaleValue if found, otherwise undefined
*/
export function mapBrowserTagToLocale(tag: string): LocaleValue | undefined {
// normalize language
const lc = normalizeBrowserTag(tag);

// direct match if exists
if (lc in LOCALE_BROWSER_MAP_LOWERCASE) {
return LOCALE_BROWSER_MAP_LOWERCASE[lc];
}

// otherwise try base prefix
const baseLc = baseOf(lc);
if (baseLc in LOCALE_BROWSER_MAP_LOWERCASE) {
return LOCALE_BROWSER_MAP_LOWERCASE[baseLc];
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does it make sense to have both this and the base lang fallback in resolveSupportedLocaleForBrowserTag?


// no direct match
return undefined;
}

/**
* Resolve the best supported LocaleValue for a single browser tag.
* Rule:
* 1) Map via LOCALE_BROWSER_MAP (case-insensitive); if supported, use it.
* 2) Otherwise, use the browser tag’s base prefix:
* a) if the short code (e.g., 'ar') is supported, use it
* b) else pick the first 'ar-*' in supportedLocales (preserves customer order)
*
* @param browserTag - Browser tag (e.g., 'ar-EG')
* @param supportedLocales - Allowed locales (customer-ordered)
* @returns Supported LocaleValue or undefined if no base match exists
*/
export function resolveSupportedLocaleForBrowserTag(
browserTag: string,
supportedLocales: LocaleValue[],
): LocaleValue | undefined {
const supportedSet = new Set(supportedLocales);

// look for direct match and accept that if in list
const mapped = mapBrowserTagToLocale(browserTag);
if (mapped && supportedSet.has(mapped)) {
return mapped;
}

// if no direct match, look for base prefix matches e.g. "ar-EG" -> "ar"
const prefixLc = baseOf(normalizeBrowserTag(browserTag));
const shortMatch = supportedLocales.find((l) => l.toLowerCase() === prefixLc);
if (shortMatch) {
return shortMatch;
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

im not sure that this makes sense to implement - if we assume that we add a base lang and all its sublocale BCP 47 codes to our LOCALE_BROWSER_MAP at a time, then the only time this would trigger is e.g.

  1. my browser locale is it-CH which maps to LOCALE_KEY.ItCh
  2. the customer has explicitly marked LOCALE_KEY.ItCh as unsupported (since thats the only reason why it wouldnt be in the supportedLocales list)
  3. we then look to it, which has to be in the supportedLocales list for this logic to trigger, otherwise we would move to the next fallback, meaning LOCALE_BROWSER_MAP has it map to LOCALE_KEY.It
  4. and we then show them the base italian language json.

i think this feels a little murky to me for 3 reasons:

  1. the customer has to explicitly mark that locale as not supported, so im not sure if we want to fall back to a potentially very different base lang. one example i could think of is if a customer has fr translated, and its full of france french words or references or jokes or something, then them saying "we dont support fr-CA" is for a decent reason
  2. base langs and sublocales can be really different! chinese probably being the biggest example here
  3. the order of operations feels off. if the user has 2 languages they speak marked in their browser, we would be favoring a fuzzy match on their first language over a solid match on their second

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i definitely think that if customer has "fr" enabled, but not "fr-CA" and "fr-CA" is locale, we should default to "fr" instead of "en"....

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i can definitely adjust the logic to favor an exact match on second over a fuzzy match on first!

but i def think that in above example fr-CA should map to fr > en if those are the two best options available

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

im just calling out that i dont think it would necessarily be universally desired. we can settle on that, but fuzzy locale matching has historically been an extremely sticky topic for us when it comes up on tickets and such. ease of understanding the matching logic may be a point towards just trying to go straight through LOCALE_BROWSER_MAP and failover otherwise, esp with all the new locale keys were adding making that exact matching more robust

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@csmccarthy but thats not even how it worked today. the way it works today actually seems to be more accepting of fallbacks like this. the logic seemed to be looking and saying "both fr-CA, fr, and fr-FR map to fr locale in AWS translations, so they are all interchangable". this at least makes the logic more obvious about how we fallback...

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i thought about something like "just use LOCALE_BROWSER_MAP" but we'd have to change it to something like { "ar": ["ar", "ar-AR" ,...]
}

and the complexity of maintaining that and making sure folks edit correctly is a bit high. i think the rules here of

  1. take exact hit
  2. fallback to 2 character locale
  3. fallback to best locale matching 2 character prefix
  4. default

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i dont really agree, but i dont feel strongly enough to argue it out in comments on the tests. i think once we have the exact > fuzzy match change in then we can merge

Copy link
Member Author

@michaelfarrell76 michaelfarrell76 Oct 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok i can do that. i’m definitely open to hearing you out, but id like to get specific test examples for the counter. this logic was very hard to parse without any tests. it's quite tricky to know what was intentional vs overlooked, and how a change to one of these functions results in different corner cases changing.

i feel like what you’re arguing would result in browser language “ar” mapping to “en”
instead of ar-AE which definitely feels off to me?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

agreed it was hard to grok without tests lol, the logic is pretty rough

the tldr is what i was advocating for would result in a situation where if i go into the admin dash and remove the Ar locale key from my orgs supported locales, and then went onto my site with the Ar locale id see the fallback language, which makes intuitive sense to me. doing the fuzzy matching means you cant really "disable" certain locales if you want to

i need to head out the door to get some labwork done before my fmla leave! i can respond again when im back home (or on slack)

Copy link
Member Author

@michaelfarrell76 michaelfarrell76 Oct 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

kk no problem, i may get this merged in so that i can start updating the functions in monorepo and consent uis, but before getting the final pr merged in main, im definitely happy to make sure we flesh this out. at the very least, it should now be easier to make changes to the logic in one place with some quick unit tests.

what do you think about situation where browser locale is "ar-AE" and list of supported locales is "ar", "fr", "en", and "ar-AE" is not supported - do you think it should fallback to ar?

if i go into the admin dash and remove the Ar locale key from my orgs supported locales, and then went onto my site with the Ar locale id see the fallback language, which makes intuitive sense to me. doing the fuzzy matching means you cant really "disable" certain locales if you want to

the situation you describe makes total sense to me as a valid concern... i think, at least for now, it makes sense to start with what we have here as a first step for the following reasons:

  1. for this first pass, consent manager ui still will not support all LOCALE_KEYS. so the customer is not given the ability to choose "ar" vs "ar-AE" -when the consent ui constant was created, we basically chosen a single locale key for each language, but it's not consistently the 5+ character key or the 2 character key. the logic changes here will basically use the same idea where we are essentially treating each locale key as it's 2 character key, and just finding the closest language to the 2 character key.
  • note: of all the languages in the consent ui, the full list of locales that overlap on the first two characters are: es-419, es-ES, he-IL, he, zh-CN, zh-HK --- note "zh", "es" are not configurable options for consent ui at this time
  1. historically when i've seen customers configure the set of languages they use on the privacy center, it seems like they are normally trying to pick the same locale keys that they use on their website, while also choosing the minimum number of options so that they can keep the set of translations they have to maintain to a minimum. most companies seem to either just use 2 character keys with a few specific 4+ character keys, some only use 4 character keys, and others it can be a complete mixture (not always well thought out). i bring this up because the concept of "disabling" or "enabling" a browser language is completely coupled to the need to provide translations, which folks want to keep to a minimum.

  2. I do like the idea of continuing to encourage folks to treat localization separate from regionalization. this would keep switching of languages for the most part independent from the meaning behind the content. i think adding in the 2 character locales will help with this, as we can encourage folks to use the 2 character locales in most places, and only use the 4 character locales when they really need to get nuanced. if this is the ultimate way we push customers, and we agree that "ar-AE" fallsback to "ar", i could see a world where we change it so that "ar" does not fallback to "ar-AE". but we should keep in mind that getting to this state both involves a) making the consent ui language keys selectable and inclusive of all LOCALE_KEY values and b) reviewing all customer privacy centers that are using 4 character locales when they probably should instead be using 2 character locales.


// then first variant with same base
const variantMatch = supportedLocales.find(
(l) => l.includes('-') && baseOf(l).toLowerCase() === prefixLc,
);
return variantMatch;
}

/**
* Map an ordered list of browser tags to supported LocaleValues using the resolve rule.
* Keeps first-seen order from the browser list and de-duplicates.
* Falls back to default if nothing matches.
*
* @param browserLocales - Browser tags (ordered by user preference)
* @param supportedLocales - Allowed locales (customer-ordered)
* @param defaultLocale - Fallback when nothing matches (defaults to 'en')
* @returns Ordered, unique supported LocaleValues
*/
export function getUserLocalesFromBrowserLanguages(
browserLocales: string[],
supportedLocales: LocaleValue[],
defaultLocale: LocaleValue,
): LocaleValue[] {
const resolved = browserLocales
.map((tag) => resolveSupportedLocaleForBrowserTag(tag, supportedLocales))
.filter((x): x is LocaleValue => Boolean(x));

const unique = uniqOrdered(resolved);
return unique.length ? unique : [defaultLocale];
}

/**
* Return the first preferred locale that is supported.
* Pure membership check—no external equivalence.
*
* @param preferred - Candidate locales in descending preference
* @param supported - Allowed locales
* @returns First supported locale or undefined
*/
export function getNearestSupportedLocale(
preferred: LocaleValue[],
supported: LocaleValue[],
): LocaleValue | undefined {
const set = new Set(supported);
// eslint-disable-next-line no-restricted-syntax
for (const p of preferred) {
if (set.has(p)) return p;
}
return undefined;
}

/**
* Sort a provided list of locales by the user’s preferences.
* Exact matches rank before base-only matches; otherwise original order is preserved.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

afaict this comment as written isnt true, since were adding the base only fuzzy matches to the users preferred locale array

*
* @param languages - Locales to sort (subset of supported)
* @param userPreferredLocales - Preferred locales (e.g., output of getUserLocalesFromBrowserLanguages)
* @returns languages sorted by preference (stable)
*/
export function sortSupportedLocalesByPreference<T extends LocaleValue>(
languages: T[],
userPreferredLocales: LocaleValue[],
): T[] {
const exactOrder = new Map<LocaleValue, number>();
userPreferredLocales.forEach((v, i) => exactOrder.set(v, i));

const baseOrder = new Map<string, number>();
uniqOrdered(userPreferredLocales.map((v) => baseOf(v).toLowerCase())).forEach(
(b, i) => baseOrder.set(b, i),
);

const score = (l: T): number => {
const exact = exactOrder.get(l);
if (exact !== undefined) return exact;
const bIdx = baseOrder.get(baseOf(l).toLowerCase());
if (bIdx !== undefined) return 1000 + bIdx;
return Number.POSITIVE_INFINITY;
};

return [...languages].sort((a, b) => score(a) - score(b));
}

/**
* Compute the single default language for the user using browser order.
* This will try base prefix matches (e.g., 'zh' or any 'zh-*') among supported
* before falling back to the provided fallback.
*
* @param supportedLocales - Allowed locales (customer-ordered)
* @param browserLocales - Browser tags (ordered by user preference)
* @param fallback - Fallback locale (defaults to 'en')
* @returns Chosen LocaleValue
*/
export function pickDefaultLanguage(
supportedLocales: LocaleValue[],
browserLocales: string[],
fallback: LocaleValue,
): LocaleValue {
const preferred = getUserLocalesFromBrowserLanguages(
browserLocales,
supportedLocales,
fallback,
);
return getNearestSupportedLocale(preferred, supportedLocales) ?? fallback;
}

/**
* Given a customer-configured, ordered list of allowed locales, return that same list
* re-ordered by the user’s browser preferences using the prefix rule.
*
* @param customerLocales - Allowed locales in display/config order
* @param browserLocales - Browser tags (e.g., from getLanguagesFromNavigator())
* @param fallback - Fallback when no signal matches
* @returns customerLocales sorted by user preference
*/
export function orderCustomerLocalesForDisplay(
customerLocales: LocaleValue[],
browserLocales: string[],
fallback: LocaleValue,
): LocaleValue[] {
const preferred = getUserLocalesFromBrowserLanguages(
browserLocales,
customerLocales,
fallback,
);
return sortSupportedLocalesByPreference(customerLocales, preferred);
}
1 change: 1 addition & 0 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ export * from './enums';
export * from './types';
export * from './typeGuards';
export * from './defineMessages';
export * from './getUserLocalesFromBrowserLanguages';
Loading