X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/b331163bffd790ced0e88b73f44f86d49ccc48a5..6be67b064733ad8f9e904623c29984bb874c1e0c:/icuSources/common/ualoc.cpp diff --git a/icuSources/common/ualoc.cpp b/icuSources/common/ualoc.cpp index 721846e0..1458531a 100644 --- a/icuSources/common/ualoc.cpp +++ b/icuSources/common/ualoc.cpp @@ -1,14 +1,20 @@ /* ***************************************************************************************** -* Copyright (C) 2014-2015 Apple Inc. All Rights Reserved. +* Copyright (C) 2014-2016 Apple Inc. All Rights Reserved. ***************************************************************************************** */ +#define DEBUG_UALOC 0 +#if DEBUG_UALOC +#include +#endif +#include #include "unicode/utypes.h" #include "unicode/ualoc.h" #include "unicode/uloc.h" #include "unicode/ures.h" #include "unicode/putil.h" +#include "unicode/ustring.h" #include "cstring.h" #include "cmemory.h" #include "uhash.h" @@ -46,6 +52,48 @@ static int compareLangEntries(const void * entry1, const void * entry2) { return uprv_strcmp(((const UALanguageEntry *)entry1)->languageCode,((const UALanguageEntry *)entry2)->languageCode); } +// language codes to version with default script +// must be sorted by language code +static const char * langToDefaultScript[] = { + "az", "az_Latn", + "bs", "bs_Latn", + "iu", "iu_Cans", + "kk", "kk_Arab", + "ks", "ks_Arab", + "ku", "ku_Latn", + "ky", "ky_Cyrl", + "mn", "mn_Cyrl", + "ms", "ms_Latn", + "pa", "pa_Guru", + "rif", "rif_Tfng", + "shi", "shi_Tfng", + "sr", "sr_Cyrl", + "tg", "tg_Cyrl", + "tk", "tk_Latn", + "ug", "ug_Arab", + "uz", "uz_Latn", + "vai", "vai_Vaii", + "yue", "yue_Hant", + "zh", "zh_Hans", + NULL +}; + +static const char * langCodeWithScriptIfAmbig(const char * langCode) { + const char ** langToDefScriptPtr = langToDefaultScript; + const char * testCurLoc; + while ( (testCurLoc = *langToDefScriptPtr++) != NULL ) { + int cmp = uprv_strcmp(langCode, testCurLoc); + if (cmp <= 0) { + if (cmp == 0) { + return *langToDefScriptPtr; + } + break; + } + langToDefScriptPtr++; + } + return langCode; +} + static const UChar ustrLangStatusDefacto[] = {0x64,0x65,0x5F,0x66,0x61,0x63,0x74,0x6F,0x5F,0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0}; //"de_facto_official" static const UChar ustrLangStatusOfficial[] = {0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0}; //"official" static const UChar ustrLangStatusRegional[] = {0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0x5F,0x72,0x65,0x67,0x69,0x6F,0x6E,0x61,0x6C,0}; //"official_regional" @@ -144,7 +192,7 @@ ualoc_getLanguagesForRegion(const char *regionID, double minimumFraction, } langEntriesMax = newMax; } - uprv_strcpy(langEntries[entryCount].languageCode, langCode); + uprv_strcpy(langEntries[entryCount].languageCode, langCodeWithScriptIfAmbig(langCode)); langEntries[entryCount].userFraction = userFraction; langEntries[entryCount].status = langStatus; } @@ -173,12 +221,35 @@ ualoc_getLanguagesForRegion(const char *regionID, double minimumFraction, } static const char * forceParent[] = { + "en_150", "en_GB", // en for Europe "en_AU", "en_GB", - "en_BD", "en_GB", // en for Bangladesh - "en_HK", "en_GB", // en for Hong Kong + "en_BD", "en_GB", // en for Bangladesh + "en_BE", "en_150", // en for Belgium goes to en for Europe + "en_DG", "en_GB", + "en_FK", "en_GB", + "en_GG", "en_GB", + "en_GI", "en_GB", + "en_HK", "en_GB", // en for Hong Kong + "en_IE", "en_GB", + "en_IM", "en_GB", "en_IN", "en_GB", - "en_MY", "en_GB", // en for Malaysia - "en_PK", "en_GB", // en for Pakistan + "en_IO", "en_GB", + "en_JE", "en_GB", + "en_JM", "en_GB", + "en_MO", "en_GB", + "en_MT", "en_GB", + "en_MV", "en_GB", // for Maldives + "en_MY", "en_GB", // en for Malaysia + "en_NZ", "en_AU", + "en_PK", "en_GB", // en for Pakistan + "en_SG", "en_GB", + "en_SH", "en_GB", + "en_VG", "en_GB", + "yue", "yue_CN", // yue_CN has 71M users (5.2% of 1.37G), yue_HK has 6.5M (90% of 7.17M) + "yue_CN", "root", + "yue_HK", "root", + "yue_Hans","yue_CN", + "yue_Hant","yue_HK", "zh", "zh_CN", "zh_CN", "root", "zh_Hant", "zh_TW", @@ -186,6 +257,8 @@ static const char * forceParent[] = { NULL }; +enum { kLocBaseNameMax = 16 }; + U_CAPI int32_t U_EXPORT2 ualoc_getAppleParent(const char* localeID, char * parent, @@ -242,6 +315,7 @@ ualoc_getAppleParent(const char* localeID, rb = ures_openDirect(NULL, locbuf, &tempStatus); if (U_SUCCESS(tempStatus)) { const char * actualLocale = ures_getLocaleByType(rb, ULOC_ACTUAL_LOCALE, &tempStatus); + ures_close(rb); if (U_SUCCESS(tempStatus) && uprv_strcmp(locbuf, actualLocale) != 0) { // we have followed an alias len = uprv_strlen(actualLocale); @@ -250,22 +324,58 @@ ualoc_getAppleParent(const char* localeID, } else { *err = U_BUFFER_OVERFLOW_ERROR; } - ures_close(rb); return len; } - tempStatus = U_ZERO_ERROR; - const UChar * parentUName = ures_getStringByKey(rb, "%%Parent", &len, &tempStatus); - if (U_SUCCESS(tempStatus) && tempStatus != U_USING_FALLBACK_WARNING) { + } + tempStatus = U_ZERO_ERROR; + rb = ures_openDirect(NULL, "supplementalData", &tempStatus); + rb = ures_getByKey(rb, "parentLocales", rb, &tempStatus); + if (U_SUCCESS(tempStatus)) { + UResourceBundle * parentMapBundle = NULL; + int32_t childLen = 0; + while (childLen == 0) { + tempStatus = U_ZERO_ERROR; + parentMapBundle = ures_getNextResource(rb, parentMapBundle, &tempStatus); + if (U_FAILURE(tempStatus)) { + break; // no more parent bundles, normal exit + } + char childName[kLocBaseNameMax + 1]; + childName[kLocBaseNameMax] = 0; + const char * childPtr = NULL; + if (ures_getType(parentMapBundle) == URES_STRING) { + childLen = kLocBaseNameMax; + childPtr = ures_getUTF8String(parentMapBundle, childName, &childLen, FALSE, &tempStatus); + if (U_FAILURE(tempStatus) || uprv_strncmp(locbuf, childPtr, kLocBaseNameMax) != 0) { + childLen = 0; + } + } else { // should be URES_ARRAY + int32_t childCur, childCount = ures_getSize(parentMapBundle); + for (childCur = 0; childCur < childCount && childLen == 0; childCur++) { + tempStatus = U_ZERO_ERROR; + childLen = kLocBaseNameMax; + childPtr = ures_getUTF8StringByIndex(parentMapBundle, childCur, childName, &childLen, FALSE, &tempStatus); + if (U_FAILURE(tempStatus) || uprv_strncmp(locbuf, childPtr, kLocBaseNameMax) != 0) { + childLen = 0; + } + } + } + } + ures_close(rb); + if (childLen > 0) { + // parentMapBundle key is the parent we are looking for + const char * keyStr = ures_getKey(parentMapBundle); + len = uprv_strlen(keyStr); if (len < parentCapacity) { - u_UCharsToChars(parentUName, parent, len + 1); + uprv_strcpy(parent, keyStr); } else { *err = U_BUFFER_OVERFLOW_ERROR; } - ures_close(rb); + ures_close(parentMapBundle); return len; } - ures_close(rb); + ures_close(parentMapBundle); } + len = uloc_getParent(locbuf, parent, parentCapacity, err); if (U_SUCCESS(*err) && len == 0) { len = 4; @@ -295,6 +405,7 @@ static const char * appleAliasMap[][2] = { { "italian", "it" }, // T1, still in use { "japanese", "ja" }, // T0, still in use { "korean", "ko" }, // T1 + { "no_NO", "nb_NO" }, // special { "norwegian", "nb" }, // T2 { "polish", "pl" }, // T2 { "portuguese", "pt" }, // T2 @@ -303,67 +414,151 @@ static const char * appleAliasMap[][2] = { { "swedish", "sv" }, // T2 { "thai", "th" }, // T2 { "turkish", "tr" }, // T2 + { "yue", "yue_Hans"}, // special { "zh", "zh_Hans" }, // special }; -enum { kAppleAliasMapCount = sizeof(appleAliasMap)/sizeof(appleAliasMap[0]) }; +enum { kAppleAliasMapCount = UPRV_LENGTHOF(appleAliasMap) }; static const char * appleParentMap[][2] = { { "en_150", "en_GB" }, // Apple custom parent { "en_AD", "en_150" }, // Apple locale addition + { "en_AG", "en_GB" }, // Antigua & Barbuda + { "en_AI", "en_GB" }, // Anguilla { "en_AL", "en_150" }, // Apple locale addition { "en_AT", "en_150" }, // Apple locale addition { "en_AU", "en_GB" }, // Apple custom parent { "en_BA", "en_150" }, // Apple locale addition + { "en_BB", "en_GB" }, // Barbados { "en_BD", "en_GB" }, // Apple custom parent + { "en_BE", "en_150" }, // Apple custom parent + { "en_BM", "en_GB" }, // Bermuda + { "en_BS", "en_GB" }, // Bahamas + { "en_BW", "en_GB" }, // Botswana + { "en_BZ", "en_GB" }, // Belize + { "en_CC", "en_AU" }, // Cocos (Keeling) Islands { "en_CH", "en_150" }, // Apple locale addition + { "en_CK", "en_AU" }, // Cook Islands (maybe to en_NZ instead?) + { "en_CX", "en_AU" }, // Christmas Island { "en_CY", "en_150" }, // Apple locale addition { "en_CZ", "en_150" }, // Apple locale addition { "en_DE", "en_150" }, // Apple locale addition + { "en_DG", "en_GB" }, { "en_DK", "en_150" }, // Apple locale addition + { "en_DM", "en_GB" }, // Dominica { "en_EE", "en_150" }, // Apple locale addition { "en_ES", "en_150" }, // Apple locale addition { "en_FI", "en_150" }, // Apple locale addition + { "en_FJ", "en_GB" }, // Fiji + { "en_FK", "en_GB" }, { "en_FR", "en_150" }, // Apple locale addition + { "en_GD", "en_GB" }, // Grenada + { "en_GG", "en_GB" }, + { "en_GH", "en_GB" }, // Ghana + { "en_GI", "en_GB" }, + { "en_GM", "en_GB" }, // Gambia { "en_GR", "en_150" }, // Apple locale addition + { "en_GY", "en_GB" }, // Guyana { "en_HK", "en_GB" }, // Apple custom parent { "en_HR", "en_150" }, // Apple locale addition { "en_HU", "en_150" }, // Apple locale addition + { "en_IE", "en_GB" }, { "en_IL", "en_001" }, // Apple locale addition + { "en_IM", "en_GB" }, { "en_IN", "en_GB" }, // Apple custom parent + { "en_IO", "en_GB" }, { "en_IS", "en_150" }, // Apple locale addition { "en_IT", "en_150" }, // Apple locale addition + { "en_JE", "en_GB" }, + { "en_JM", "en_GB" }, + { "en_KE", "en_GB" }, // Kenya + { "en_KI", "en_GB" }, // Kiribati + { "en_KN", "en_GB" }, // St. Kitts & Nevis + { "en_KY", "en_GB" }, // Cayman Islands + { "en_LC", "en_GB" }, // St. Lucia + { "en_LS", "en_GB" }, // Lesotho { "en_LT", "en_150" }, // Apple locale addition { "en_LU", "en_150" }, // Apple locale addition { "en_LV", "en_150" }, // Apple locale addition { "en_ME", "en_150" }, // Apple locale addition + { "en_MO", "en_GB" }, + { "en_MS", "en_GB" }, // Montserrat + { "en_MT", "en_GB" }, + { "en_MU", "en_GB" }, // Mauritius + { "en_MV", "en_GB" }, + { "en_MW", "en_GB" }, // Malawi { "en_MY", "en_GB" }, // Apple custom parent + { "en_NA", "en_GB" }, // Namibia + { "en_NF", "en_AU" }, // Norfolk Island + { "en_NG", "en_GB" }, // Nigeria { "en_NL", "en_150" }, // Apple locale addition { "en_NO", "en_150" }, // Apple locale addition + { "en_NR", "en_AU" }, // Nauru + { "en_NU", "en_AU" }, // Niue (maybe to en_NZ instead?) + { "en_NZ", "en_AU" }, + { "en_PG", "en_AU" }, // Papua New Guinea { "en_PK", "en_GB" }, // Apple custom parent { "en_PL", "en_150" }, // Apple locale addition + { "en_PN", "en_GB" }, // Pitcairn Islands { "en_PT", "en_150" }, // Apple locale addition { "en_RO", "en_150" }, // Apple locale addition { "en_RU", "en_150" }, // Apple locale addition + { "en_SB", "en_GB" }, // Solomon Islands + { "en_SC", "en_GB" }, // Seychelles + { "en_SD", "en_GB" }, // Sudan { "en_SE", "en_150" }, // Apple locale addition + { "en_SG", "en_GB" }, + { "en_SH", "en_GB" }, { "en_SI", "en_150" }, // Apple locale addition { "en_SK", "en_150" }, // Apple locale addition - { "en_TR", "en_150" }, // Apple locale addition + { "en_SL", "en_GB" }, // Sierra Leone + { "en_SS", "en_GB" }, // South Sudan + { "en_SZ", "en_GB" }, // Swaziland + { "en_TC", "en_GB" }, // Tristan da Cunha + { "en_TO", "en_GB" }, // Tonga + { "en_TT", "en_GB" }, // Trinidad & Tobago + { "en_TV", "en_GB" }, // Tuvalu + { "en_TZ", "en_GB" }, // Tanzania + { "en_UG", "en_GB" }, // Uganda + { "en_VC", "en_GB" }, // St. Vincent & Grenadines + { "en_VG", "en_GB" }, + { "en_VU", "en_GB" }, // Vanuatu + { "en_WS", "en_AU" }, // Samoa (maybe to en_NZ instead?) + { "en_ZA", "en_GB" }, // South Africa + { "en_ZM", "en_GB" }, // Zambia + { "en_ZW", "en_GB" }, // Zimbabwe }; -enum { kAppleParentMapCount = sizeof(appleParentMap)/sizeof(appleParentMap[0]) }; - -// Might do something better for this, perhaps maximizing locales then stripping. -// Selected parents of available localizations, add as necessary. -static const char * locParentMap[][2] = { - { "pt_BR", "pt" }, - { "pt_PT", "pt" }, - { "zh_Hans_CN", "zh_Hans" }, - { "zh_Hant_TW", "zh_Hant" }, +enum { kAppleParentMapCount = UPRV_LENGTHOF(appleParentMap) }; + +typedef struct { + const char * locale; + const char * parent; + int8_t distance; +} LocParentAndDistance; + +static LocParentAndDistance locParentMap[] = { + // The localizations listed in the first column are in + // normalized form (e.g. zh_CN -> zh_Hans_CN, etc.). + // The distance is a rough measure of distance from + // the localization to its parent, used as a weight. + { "en_001", "en", 2 }, + { "en_150", "en_GB", 1 }, + { "en_AU", "en_GB", 1 }, + { "en_GB", "en_001", 0 }, + { "en_US", "en", 0 }, + { "es_419", "es", 2 }, + { "es_MX", "es_419", 0 }, + { "pt_PT", "pt", 2 }, + { "yue_Hans_CN","yue_Hans",0 }, + { "yue_Hant_HK","yue_Hant",0 }, + { "zh_Hans_CN", "zh_Hans", 0 }, + { "zh_Hant_HK", "zh_Hant", 1 }, + { "zh_Hant_TW", "zh_Hant", 0 }, }; -enum { kLocParentMapCount = sizeof(locParentMap)/sizeof(locParentMap[0]) }; +enum { kLocParentMapCount = UPRV_LENGTHOF(locParentMap), kMaxParentDistance = 8 }; enum { - kStringsAllocSize = 4096, // cannot expand; current actual usage 3610 - kParentMapInitCount = 161 // can expand; current actual usage 161 + kStringsAllocSize = 4480, // cannot expand; current actual usage 4150 + kParentMapInitCount = 205 // can expand; current actual usage 205 }; U_CDECL_BEGIN @@ -534,8 +729,10 @@ static void initializeMapData() { return; } - //printf("# gStrings size %ld\n", stringsPtr - gStrings); - //printf("# gParentMap count %d\n", uhash_count(gParentMap)); +#if DEBUG_UALOC + printf("# gStrings size %ld\n", stringsPtr - gStrings); + printf("# gParentMap count %d\n", uhash_count(gParentMap)); +#endif gMapDataState = 1; } @@ -556,7 +753,7 @@ static void ualoc_normalize(const char *locale, char *normalized, int32_t normal if (replacement == NULL) { replacement = locale; } - int32_t len = uprv_strlen(replacement); + int32_t len = strnlen(replacement, normalizedCapacity); if (len < normalizedCapacity) { // allow for 0 termination uprv_strcpy(normalized, replacement); } else { @@ -586,12 +783,20 @@ static void ualoc_getParent(const char *locale, char *parent, int32_t parentCapa } // Might do something better for this, perhaps maximizing locales then stripping -const char * getLocParent(const char *locale) +static const char * getLocParent(const char *locale, int32_t* distance) { int32_t locParentIndex; for (locParentIndex = 0; locParentIndex < kLocParentMapCount; locParentIndex++) { - if (uprv_strcmp(locale, locParentMap[locParentIndex][0]) == 0) { - return locParentMap[locParentIndex][1]; + if (uprv_strcmp(locale, locParentMap[locParentIndex].locale) == 0) { + *distance = locParentMap[locParentIndex].distance; + return locParentMap[locParentIndex].parent; + } + } + if (gMapDataState > 0) { + const char *replacement = (const char *)uhash_get(gParentMap, locale); + if (replacement) { + *distance = 1; + return replacement; } } return NULL; @@ -633,10 +838,22 @@ ualoc_localizationsToUse( const char* const *preferredLanguages, } int32_t locsToUseCount = 0; int32_t prefLangIndex, availLocIndex = 0; + int32_t availLocIndexBackup = -1; // if >= 0, contains index of backup match + int32_t foundMatchPrefLangIndex = 0, backupMatchPrefLangIndex = 0; char (*availLocBase)[kLangScriptRegMaxLen + 1] = NULL; char (*availLocNorm)[kLangScriptRegMaxLen + 1] = NULL; - UBool checkAvailLocParents = FALSE; UBool foundMatch = FALSE; + UBool backupMatchPrefLang_pt_PT = FALSE; + +#if DEBUG_UALOC + if (preferredLanguagesCount > 0 && availableLocalizationsCount > 0) { + printf("\n # ualoc_localizationsToUse start, preferredLanguages %d: %s, ..., availableLocalizations %d: %s, ...\n", + preferredLanguagesCount, preferredLanguages[0], availableLocalizationsCount, availableLocalizations[0]); + } else { + printf("\n # ualoc_localizationsToUse start, preferredLanguages %d: ..., availableLocalizations %d: ...\n", + preferredLanguagesCount, availableLocalizationsCount); + } +#endif // Part 1, find the best matching localization, if any for (prefLangIndex = 0; prefLangIndex < preferredLanguagesCount; prefLangIndex++) { @@ -655,7 +872,9 @@ ualoc_localizationsToUse( const char* const *preferredLanguages, continue; // can't handle this preferredLanguages entry or it is invalid, go to next one } prefLangBaseName[kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING - //printf(" # prefLangBaseName %s\n", prefLangBaseName); +#if DEBUG_UALOC + printf(" # loop: try prefLangBaseName %s\n", prefLangBaseName); +#endif // if we have not already allocated and filled the array of // base availableLocalizations, do so now. @@ -664,25 +883,38 @@ ualoc_localizationsToUse( const char* const *preferredLanguages, if (availLocBase == NULL) { continue; // cannot further check this preferredLanguages entry, go to next one } +#if DEBUG_UALOC + printf(" # allocate & fill availLocBase\n"); +#endif for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { tmpStatus = U_ZERO_ERROR; + if (availableLocalizations[availLocIndex] == NULL) { + availLocBase[availLocIndex][0] = 0; // effectively remove this entry + continue; + } uloc_getBaseName(availableLocalizations[availLocIndex], availLocBase[availLocIndex], kLangScriptRegMaxLen, &tmpStatus); if (U_FAILURE(tmpStatus) || uprv_strcmp(availLocBase[availLocIndex], "root") == 0 || availLocBase[availLocIndex][0] == '_') { availLocBase[availLocIndex][0] = 0; // effectively remove this entry - } else { - availLocBase[availLocIndex][kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING + continue; } + availLocBase[availLocIndex][kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING +#if DEBUG_UALOC + printf(" # add availLocBase %s\n", availLocBase[availLocIndex]); +#endif } } // first compare base preferredLanguage to base versions of availableLocalizations names for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { if (uprv_strcmp(prefLangBaseName, availLocBase[availLocIndex]) == 0) { foundMatch = TRUE; // availLocIndex records where + foundMatchPrefLangIndex = prefLangIndex; +#if DEBUG_UALOC + printf(" # FOUND: matched availLocBase %s -> actualLoc %s\n", availLocBase[availLocIndex], availableLocalizations[availLocIndex]); +#endif break; } } if (foundMatch) { - //printf(" # matched actualLocName\n"); break; // found a loc for this preferredLanguages entry } @@ -692,7 +924,9 @@ ualoc_localizationsToUse( const char* const *preferredLanguages, if (U_FAILURE(tmpStatus)) { continue; // can't handle this preferredLanguages entry, go to next one } - //printf(" # prefLangNormName %s\n", prefLangNormName); +#if DEBUG_UALOC + printf(" # prefLangNormName %s\n", prefLangNormName); +#endif // if we have not already allocated and filled the array of // normalized availableLocalizations, do so now. // Note: ualoc_normalize turns "zh_TW" into "zh_Hant_TW", zh_HK" into "zh_Hant_HK", @@ -702,15 +936,19 @@ ualoc_localizationsToUse( const char* const *preferredLanguages, if (availLocNorm == NULL) { continue; // cannot further check this preferredLanguages entry, go to next one } +#if DEBUG_UALOC + printf(" # allocate & fill availLocNorm\n"); +#endif for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { tmpStatus = U_ZERO_ERROR; ualoc_normalize(availLocBase[availLocIndex], availLocNorm[availLocIndex], kLangScriptRegMaxLen + 1, &tmpStatus); if (U_FAILURE(tmpStatus)) { availLocNorm[availLocIndex][0] = 0; // effectively remove this entry - } else if (getLocParent(availLocNorm[availLocIndex]) != NULL) { - checkAvailLocParents = TRUE; +#if DEBUG_UALOC + } else { + printf(" # actualLoc %-11s -> norm %s\n", availableLocalizations[availLocIndex], availLocNorm[availLocIndex]); +#endif } - //printf(" # actualLoc %-11s -> norm %s\n", availableLocalizations[availLocIndex], availLocNorm[availLocIndex]); } } // now compare normalized preferredLanguage to normalized localization names @@ -718,11 +956,14 @@ ualoc_localizationsToUse( const char* const *preferredLanguages, for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { if (uprv_strcmp(prefLangNormName, availLocNorm[availLocIndex]) == 0) { foundMatch = TRUE; // availLocIndex records where + foundMatchPrefLangIndex = prefLangIndex; +#if DEBUG_UALOC + printf(" # FOUND: matched availLocNorm %s -> actualLoc %s\n", availLocNorm[availLocIndex], availableLocalizations[availLocIndex]); +#endif break; } } if (foundMatch) { - //printf(" # matched actualLocNormName\n"); break; // found a loc for this preferredLanguages entry } @@ -735,13 +976,19 @@ ualoc_localizationsToUse( const char* const *preferredLanguages, if (U_FAILURE(tmpStatus) || uprv_strcmp(prefLangParentName, "root") == 0 || prefLangParentName[0] == 0) { break; // reached root or cannot proceed further } - //printf(" # prefLangParentName %s\n", prefLangParentName); +#if DEBUG_UALOC + printf(" # prefLangParentName %s\n", prefLangParentName); +#endif // now compare this preferredLanguage parent to normalized localization names // if matches, copy *original* localization name for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { if (uprv_strcmp(prefLangParentName, availLocNorm[availLocIndex]) == 0) { foundMatch = TRUE; // availLocIndex records where + foundMatchPrefLangIndex = prefLangIndex; +#if DEBUG_UALOC + printf(" # FOUND: matched availLocNorm %s -> actualLoc %s\n", availLocNorm[availLocIndex], availableLocalizations[availLocIndex]); +#endif break; } } @@ -751,26 +998,102 @@ ualoc_localizationsToUse( const char* const *preferredLanguages, break; // found a loc for this preferredLanguages entry } - // last try, use parents of selected - if (checkAvailLocParents) { + // last try, use parents of selected language to try for backup match + // if we have not already found one + if (availLocIndexBackup < 0) { // now walk up the parent chain for preferredLanguage again // checking against parents of selected availLocNorm entries // but this time start with current prefLangNormName uprv_strcpy(prefLangBaseName, prefLangNormName); + int32_t minDistance = kMaxParentDistance; while (TRUE) { - tmpStatus = U_ZERO_ERROR; // now compare this preferredLanguage to normalized localization names // parent if have one for this; if matches, copy *original* localization name +#if DEBUG_UALOC + printf(" # BACKUP: trying prefLangBaseName %s\n", prefLangBaseName); +#endif for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { - const char *availLocParent = getLocParent(availLocNorm[availLocIndex]); - if (availLocParent && uprv_strcmp(prefLangBaseName, availLocParent) == 0) { - foundMatch = TRUE; // availLocIndex records where - break; + char availLocMinOrParent[kLangScriptRegMaxLen + 1]; + int32_t distance; + // first check for special Apple parents of availLocNorm - + // - the number of locales with such parents is small - + // or if not such parent, then try stripping region. + const char *availLocParent = getLocParent(availLocNorm[availLocIndex], &distance); + if (availLocParent) { +#if DEBUG_UALOC + printf(" # availLocAppleParentName %s\n", availLocParent); +#endif + if (uprv_strcmp(prefLangBaseName, availLocParent) == 0 && distance < minDistance) { + availLocIndexBackup = availLocIndex; // records where the match occurred + backupMatchPrefLangIndex = prefLangIndex; + minDistance = distance; +#if DEBUG_UALOC + printf(" # BACKUP: LocAppleParent matched prefLangNormName with distance %d\n", distance); +#endif + continue; + } + } + if (minDistance <= 1) { + continue; // we can't get any closer in the rest of this iteration + } + if (availLocParent == NULL) { + tmpStatus = U_ZERO_ERROR; + int32_t regLen = uloc_getCountry(availLocNorm[availLocIndex], availLocMinOrParent, kLangScriptRegMaxLen, &tmpStatus); + if (U_SUCCESS(tmpStatus) && regLen > 1) { + uloc_addLikelySubtags(availLocNorm[availLocIndex], availLocMinOrParent, kLangScriptRegMaxLen, &tmpStatus); + if (U_SUCCESS(tmpStatus)) { + availLocMinOrParent[kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING +#if DEBUG_UALOC + printf(" # availLocRegMaxName %s\n", availLocMinOrParent); +#endif + char availLocTemp[kLangScriptRegMaxLen + 1]; + uloc_getParent(availLocMinOrParent, availLocTemp, kLangScriptRegMaxLen, &tmpStatus); + if (U_SUCCESS(tmpStatus)) { + availLocTemp[kLangScriptRegMaxLen] = 0; + uloc_minimizeSubtags(availLocTemp, availLocMinOrParent, kLangScriptRegMaxLen, &tmpStatus); + if (U_SUCCESS(tmpStatus)) { + availLocMinOrParent[kLangScriptRegMaxLen] = 0; +#if DEBUG_UALOC + printf(" # availLocNoRegParentName %s\n", availLocMinOrParent); +#endif + if (uprv_strcmp(prefLangBaseName, availLocMinOrParent) == 0) { + availLocIndexBackup = availLocIndex; // records where the match occurred + backupMatchPrefLangIndex = prefLangIndex; + minDistance = 1; + backupMatchPrefLang_pt_PT = (uprv_strcmp(prefLangNormName, "pt_PT") == 0); +#if DEBUG_UALOC + printf(" # BACKUP: LocNoRegParent matched prefLangNormName with distance 1\n"); +#endif + continue; + } + } + } + } + } + } + // then check against minimized version of availLocNorm + tmpStatus = U_ZERO_ERROR; + uloc_minimizeSubtags(availLocNorm[availLocIndex], availLocMinOrParent, kLangScriptRegMaxLen, &tmpStatus); + if (U_FAILURE(tmpStatus)) { + continue; + } + availLocMinOrParent[kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING +#if DEBUG_UALOC + printf(" # availLocMinimized %s\n", availLocMinOrParent); +#endif + if (uprv_strcmp(prefLangBaseName, availLocMinOrParent) == 0) { + availLocIndexBackup = availLocIndex; // records where the match occurred + backupMatchPrefLangIndex = prefLangIndex; + minDistance = 1; +#if DEBUG_UALOC + printf(" # BACKUP: LocMinimized matched prefLangNormName with distance 1\n"); +#endif } } - if (foundMatch) { + if (availLocIndexBackup >= 0) { break; } + tmpStatus = U_ZERO_ERROR; ualoc_getParent(prefLangBaseName, prefLangParentName, kLangScriptRegMaxLen + 1, &tmpStatus); if (U_FAILURE(tmpStatus) || uprv_strcmp(prefLangParentName, "root") == 0 || prefLangParentName[0] == 0) { break; // reached root or cannot proceed further @@ -778,8 +1101,60 @@ ualoc_localizationsToUse( const char* const *preferredLanguages, uprv_strcpy(prefLangBaseName, prefLangParentName); } } - if (foundMatch) { - break; // found a loc for this preferredLanguages entry + } + // If we have a backup match, decide what to do + if (availLocIndexBackup >= 0) { + if (!foundMatch) { + // no main match, just use the backup + availLocIndex = availLocIndexBackup; + foundMatch = TRUE; +#if DEBUG_UALOC + printf(" # no main match, have backup => use availLocIndexBackup %d\n", availLocIndexBackup); +#endif + } else if (backupMatchPrefLangIndex < foundMatchPrefLangIndex && (!backupMatchPrefLang_pt_PT || uprv_strcmp(availLocNorm[availLocIndexBackup], "pt_BR") != 0)) { + // have a main match but backup match was higher in the prefs, use it if for a different language +#if DEBUG_UALOC + printf(" # have backup match higher in prefs, comparing its language and script to main match\n"); +#endif + char mainLang[ULOC_LANG_CAPACITY + 1]; + char backupLang[ULOC_LANG_CAPACITY + 1]; + UErrorCode tmpStatus = U_ZERO_ERROR; + uloc_getLanguage(availLocNorm[availLocIndex], mainLang, ULOC_LANG_CAPACITY, &tmpStatus); + mainLang[ULOC_LANG_CAPACITY] = 0; // ensure zero termination + uloc_getLanguage(availLocNorm[availLocIndexBackup], backupLang, ULOC_LANG_CAPACITY, &tmpStatus); + backupLang[ULOC_LANG_CAPACITY] = 0; // ensure zero termination + if (U_SUCCESS(tmpStatus)) { + if (uprv_strncmp(mainLang, backupLang, ULOC_LANG_CAPACITY) != 0) { + // backup match has different language than main match + availLocIndex = availLocIndexBackup; + // foundMatch is already TRUE +#if DEBUG_UALOC + printf(" # main match but backup is for a different lang higher in prefs => use availLocIndexBackup %d\n", availLocIndexBackup); +#endif + } else { + // backup match has same language as main match, check scripts too + char availLocMaximized[kLangScriptRegMaxLen + 1]; + + uloc_addLikelySubtags(availLocNorm[availLocIndex], availLocMaximized, kLangScriptRegMaxLen, &tmpStatus); + availLocMaximized[kLangScriptRegMaxLen] = 0; + uloc_getScript(availLocMaximized, mainLang, ULOC_LANG_CAPACITY, &tmpStatus); + mainLang[ULOC_LANG_CAPACITY] = 0; + + uloc_addLikelySubtags(availLocNorm[availLocIndexBackup], availLocMaximized, kLangScriptRegMaxLen, &tmpStatus); + availLocMaximized[kLangScriptRegMaxLen] = 0; + uloc_getScript(availLocMaximized, backupLang, ULOC_LANG_CAPACITY, &tmpStatus); + backupLang[ULOC_LANG_CAPACITY] = 0; + + if (U_SUCCESS(tmpStatus) && uprv_strncmp(mainLang, backupLang, ULOC_LANG_CAPACITY) != 0) { + // backup match has different script than main match + availLocIndex = availLocIndexBackup; + // foundMatch is already TRUE +#if DEBUG_UALOC + printf(" # main match but backup is for a different script higher in prefs => use availLocIndexBackup %d\n", availLocIndexBackup); +#endif + } + } + } } }