X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/57a6839dcb3bba09e8228b822b290604668416fe..1a147d096ae81f4c8262f7bfc56bd19fc2dee932:/icuSources/common/ualoc.cpp diff --git a/icuSources/common/ualoc.cpp b/icuSources/common/ualoc.cpp index 21a0ec9a..e22121f0 100644 --- a/icuSources/common/ualoc.cpp +++ b/icuSources/common/ualoc.cpp @@ -1,16 +1,26 @@ /* ***************************************************************************************** -* Copyright (C) 2014 Apple Inc. All Rights Reserved. +* Copyright (C) 2014-2019 Apple Inc. All Rights Reserved. ***************************************************************************************** */ +#define DEBUG_UALOC 0 +#if DEBUG_UALOC +#include +#endif +#include +#include #include "unicode/utypes.h" #include "unicode/ualoc.h" #include "unicode/uloc.h" #include "unicode/ures.h" #include "unicode/putil.h" +#include "unicode/ustring.h" #include "cstring.h" #include "cmemory.h" +#include "uhash.h" +#include "umutex.h" +#include "ucln_cmn.h" // the following has replacements for some math.h funcs etc #include "putilimp.h" @@ -43,6 +53,54 @@ static int compareLangEntries(const void * entry1, const void * entry2) { return uprv_strcmp(((const UALanguageEntry *)entry1)->languageCode,((const UALanguageEntry *)entry2)->languageCode); } +// language codes to version with default script +// must be sorted by language code +static const char * langToDefaultScript[] = { + "az", "az_Latn", + "bm", "bm_Latn", // added + "bs", "bs_Latn", + "byn", "byn_Ethi", // added + "cu", "cu_Cyrl", // added + "ff", "ff_Latn", // added + "ha", "ha_Latn", // added + "iu", "iu_Cans", + "kk", "kk_Cyrl", // changed from _Arab + "ks", "ks_Arab", // unnecessary? + "ku", "ku_Latn", + "ky", "ky_Cyrl", + "mn", "mn_Cyrl", + "ms", "ms_Latn", + "pa", "pa_Guru", + "rif", "rif_Tfng", // unnecessary? no locale support anyway + "sd", "sd_Arab", // added + "shi", "shi_Tfng", + "sr", "sr_Cyrl", + "tg", "tg_Cyrl", + "tk", "tk_Latn", // unnecessary? + "ug", "ug_Arab", + "uz", "uz_Latn", + "vai", "vai_Vaii", + "yue", "yue_Hant", // to match CLDR data, not Apple default + "zh", "zh_Hans", + NULL +}; + +static const char * langCodeWithScriptIfAmbig(const char * langCode) { + const char ** langToDefScriptPtr = langToDefaultScript; + const char * testCurLoc; + while ( (testCurLoc = *langToDefScriptPtr++) != NULL ) { + int cmp = uprv_strcmp(langCode, testCurLoc); + if (cmp <= 0) { + if (cmp == 0) { + return *langToDefScriptPtr; + } + break; + } + langToDefScriptPtr++; + } + return langCode; +} + static const UChar ustrLangStatusDefacto[] = {0x64,0x65,0x5F,0x66,0x61,0x63,0x74,0x6F,0x5F,0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0}; //"de_facto_official" static const UChar ustrLangStatusOfficial[] = {0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0}; //"official" static const UChar ustrLangStatusRegional[] = {0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0x5F,0x72,0x65,0x67,0x69,0x6F,0x6E,0x61,0x6C,0}; //"official_regional" @@ -141,7 +199,7 @@ ualoc_getLanguagesForRegion(const char *regionID, double minimumFraction, } langEntriesMax = newMax; } - uprv_strcpy(langEntries[entryCount].languageCode, langCode); + uprv_strcpy(langEntries[entryCount].languageCode, langCodeWithScriptIfAmbig(langCode)); langEntries[entryCount].userFraction = userFraction; langEntries[entryCount].status = langStatus; } @@ -169,8 +227,37 @@ ualoc_getLanguagesForRegion(const char *regionID, double minimumFraction, return entryCount; } - static const char * forceParent[] = { + "en_150", "en_GB", // en for Europe + "en_AU", "en_GB", + "en_BD", "en_GB", // en for Bangladesh + "en_BE", "en_150", // en for Belgium goes to en for Europe + "en_DG", "en_GB", + "en_FK", "en_GB", + "en_GG", "en_GB", + "en_GI", "en_GB", + "en_HK", "en_GB", // en for Hong Kong + "en_IE", "en_GB", + "en_IM", "en_GB", + "en_IN", "en_GB", + "en_IO", "en_GB", + "en_JE", "en_GB", + "en_JM", "en_GB", + "en_LK", "en_GB", + "en_MO", "en_GB", + "en_MT", "en_GB", + "en_MV", "en_GB", // for Maldives + "en_MY", "en_GB", // en for Malaysia + "en_NZ", "en_AU", + "en_PK", "en_GB", // en for Pakistan + "en_SG", "en_GB", + "en_SH", "en_GB", + "en_VG", "en_GB", + "yue", "yue_CN", // yue_CN has 71M users (5.2% of 1.37G), yue_HK has 6.5M (90% of 7.17M) + "yue_CN", "root", + "yue_HK", "root", + "yue_Hans","yue_CN", + "yue_Hant","yue_HK", "zh", "zh_CN", "zh_CN", "root", "zh_Hant", "zh_TW", @@ -178,6 +265,8 @@ static const char * forceParent[] = { NULL }; +enum { kLocBaseNameMax = 16 }; + U_CAPI int32_t U_EXPORT2 ualoc_getAppleParent(const char* localeID, char * parent, @@ -188,6 +277,7 @@ ualoc_getAppleParent(const char* localeID, int32_t len; UErrorCode tempStatus; char locbuf[ULOC_FULLNAME_CAPACITY+1]; + char * foundDoubleUnderscore; if (U_FAILURE(*err)) { return 0; @@ -196,7 +286,7 @@ ualoc_getAppleParent(const char* localeID, *err = U_ILLEGAL_ARGUMENT_ERROR; return 0; } - len = uloc_canonicalize(localeID, locbuf, ULOC_FULLNAME_CAPACITY, err); + len = uloc_getBaseName(localeID, locbuf, ULOC_FULLNAME_CAPACITY, err); /* canonicalize and strip keywords */ if (U_FAILURE(*err)) { return 0; } @@ -204,7 +294,12 @@ ualoc_getAppleParent(const char* localeID, locbuf[ULOC_FULLNAME_CAPACITY] = 0; *err = U_ZERO_ERROR; } - if (len >= 2 && uprv_strncmp(locbuf, "zh", 2) == 0) { + foundDoubleUnderscore = uprv_strstr(locbuf, "__"); /* __ comes from bad/missing subtag or variant */ + if (foundDoubleUnderscore != NULL) { + *foundDoubleUnderscore = 0; /* terminate at the __ */ + len = uprv_strlen(locbuf); + } + if (len >= 2 && (uprv_strncmp(locbuf, "en", 2) == 0 || uprv_strncmp(locbuf, "zh", 2) == 0)) { const char ** forceParentPtr = forceParent; const char * testCurLoc; while ( (testCurLoc = *forceParentPtr++) != NULL ) { @@ -228,6 +323,7 @@ ualoc_getAppleParent(const char* localeID, rb = ures_openDirect(NULL, locbuf, &tempStatus); if (U_SUCCESS(tempStatus)) { const char * actualLocale = ures_getLocaleByType(rb, ULOC_ACTUAL_LOCALE, &tempStatus); + ures_close(rb); if (U_SUCCESS(tempStatus) && uprv_strcmp(locbuf, actualLocale) != 0) { // we have followed an alias len = uprv_strlen(actualLocale); @@ -236,22 +332,58 @@ ualoc_getAppleParent(const char* localeID, } else { *err = U_BUFFER_OVERFLOW_ERROR; } - ures_close(rb); return len; } - tempStatus = U_ZERO_ERROR; - const UChar * parentUName = ures_getStringByKey(rb, "%%Parent", &len, &tempStatus); - if (U_SUCCESS(tempStatus) && tempStatus != U_USING_FALLBACK_WARNING) { + } + tempStatus = U_ZERO_ERROR; + rb = ures_openDirect(NULL, "supplementalData", &tempStatus); + rb = ures_getByKey(rb, "parentLocales", rb, &tempStatus); + if (U_SUCCESS(tempStatus)) { + UResourceBundle * parentMapBundle = NULL; + int32_t childLen = 0; + while (childLen == 0) { + tempStatus = U_ZERO_ERROR; + parentMapBundle = ures_getNextResource(rb, parentMapBundle, &tempStatus); + if (U_FAILURE(tempStatus)) { + break; // no more parent bundles, normal exit + } + char childName[kLocBaseNameMax + 1]; + childName[kLocBaseNameMax] = 0; + const char * childPtr = NULL; + if (ures_getType(parentMapBundle) == URES_STRING) { + childLen = kLocBaseNameMax; + childPtr = ures_getUTF8String(parentMapBundle, childName, &childLen, FALSE, &tempStatus); + if (U_FAILURE(tempStatus) || uprv_strncmp(locbuf, childPtr, kLocBaseNameMax) != 0) { + childLen = 0; + } + } else { // should be URES_ARRAY + int32_t childCur, childCount = ures_getSize(parentMapBundle); + for (childCur = 0; childCur < childCount && childLen == 0; childCur++) { + tempStatus = U_ZERO_ERROR; + childLen = kLocBaseNameMax; + childPtr = ures_getUTF8StringByIndex(parentMapBundle, childCur, childName, &childLen, FALSE, &tempStatus); + if (U_FAILURE(tempStatus) || uprv_strncmp(locbuf, childPtr, kLocBaseNameMax) != 0) { + childLen = 0; + } + } + } + } + ures_close(rb); + if (childLen > 0) { + // parentMapBundle key is the parent we are looking for + const char * keyStr = ures_getKey(parentMapBundle); + len = uprv_strlen(keyStr); if (len < parentCapacity) { - u_UCharsToChars(parentUName, parent, len + 1); + uprv_strcpy(parent, keyStr); } else { *err = U_BUFFER_OVERFLOW_ERROR; } - ures_close(rb); + ures_close(parentMapBundle); return len; } - ures_close(rb); + ures_close(parentMapBundle); } + len = uloc_getParent(locbuf, parent, parentCapacity, err); if (U_SUCCESS(*err) && len == 0) { len = 4; @@ -264,3 +396,899 @@ ualoc_getAppleParent(const char* localeID, return len; } +// ================= +// Data and related functions for ualoc_localizationsToUse +// ================= + +static const char * appleAliasMap[][2] = { + // names are lowercase here because they are looked up after being processed by uloc_getBaseName + { "arabic", "ar" }, // T2 + { "chinese", "zh_Hans" }, // T0 + { "danish", "da" }, // T2 + { "dutch", "nl" }, // T1, still in use + { "english", "en" }, // T0, still in use + { "finnish", "fi" }, // T2 + { "french", "fr" }, // T0, still in use + { "german", "de" }, // T0, still in use + { "italian", "it" }, // T1, still in use + { "japanese", "ja" }, // T0, still in use + { "korean", "ko" }, // T1 + { "no_NO", "nb_NO" }, // special + { "norwegian", "nb" }, // T2 + { "polish", "pl" }, // T2 + { "portuguese", "pt" }, // T2 + { "russian", "ru" }, // T2 + { "spanish", "es" }, // T1, still in use + { "swedish", "sv" }, // T2 + { "thai", "th" }, // T2 + { "turkish", "tr" }, // T2 + { "yue", "yue_Hans"}, // special + { "zh", "zh_Hans" }, // special +}; +enum { kAppleAliasMapCount = UPRV_LENGTHOF(appleAliasMap) }; + +static const char * appleParentMap[][2] = { + { "en_150", "en_GB" }, // Apple custom parent + { "en_AD", "en_150" }, // Apple locale addition + { "en_AG", "en_GB" }, // Antigua & Barbuda + { "en_AI", "en_GB" }, // Anguilla + { "en_AL", "en_150" }, // Apple locale addition + { "en_AT", "en_150" }, // Apple locale addition + { "en_AU", "en_GB" }, // Apple custom parent + { "en_BA", "en_150" }, // Apple locale addition + { "en_BB", "en_GB" }, // Barbados + { "en_BD", "en_GB" }, // Apple custom parent + { "en_BE", "en_150" }, // Apple custom parent + { "en_BM", "en_GB" }, // Bermuda + { "en_BS", "en_GB" }, // Bahamas + { "en_BW", "en_GB" }, // Botswana + { "en_BZ", "en_GB" }, // Belize + { "en_CC", "en_AU" }, // Cocos (Keeling) Islands + { "en_CH", "en_150" }, // Apple locale addition + { "en_CK", "en_AU" }, // Cook Islands (maybe to en_NZ instead?) + { "en_CX", "en_AU" }, // Christmas Island + { "en_CY", "en_150" }, // Apple locale addition + { "en_CZ", "en_150" }, // Apple locale addition + { "en_DE", "en_150" }, // Apple locale addition + { "en_DG", "en_GB" }, + { "en_DK", "en_150" }, // Apple locale addition + { "en_DM", "en_GB" }, // Dominica + { "en_EE", "en_150" }, // Apple locale addition + { "en_ES", "en_150" }, // Apple locale addition + { "en_FI", "en_150" }, // Apple locale addition + { "en_FJ", "en_GB" }, // Fiji + { "en_FK", "en_GB" }, + { "en_FR", "en_150" }, // Apple locale addition + { "en_GD", "en_GB" }, // Grenada + { "en_GG", "en_GB" }, + { "en_GH", "en_GB" }, // Ghana + { "en_GI", "en_GB" }, + { "en_GM", "en_GB" }, // Gambia + { "en_GR", "en_150" }, // Apple locale addition + { "en_GY", "en_GB" }, // Guyana + { "en_HK", "en_GB" }, // Apple custom parent + { "en_HR", "en_150" }, // Apple locale addition + { "en_HU", "en_150" }, // Apple locale addition + { "en_IE", "en_GB" }, + { "en_IL", "en_001" }, // Apple locale addition + { "en_IM", "en_GB" }, + { "en_IN", "en_GB" }, // Apple custom parent + { "en_IO", "en_GB" }, + { "en_IS", "en_150" }, // Apple locale addition + { "en_IT", "en_150" }, // Apple locale addition + { "en_JE", "en_GB" }, + { "en_JM", "en_GB" }, + { "en_KE", "en_GB" }, // Kenya + { "en_KI", "en_GB" }, // Kiribati + { "en_KN", "en_GB" }, // St. Kitts & Nevis + { "en_KY", "en_GB" }, // Cayman Islands + { "en_LC", "en_GB" }, // St. Lucia + { "en_LK", "en_GB" }, // Apple custom parent + { "en_LS", "en_GB" }, // Lesotho + { "en_LT", "en_150" }, // Apple locale addition + { "en_LU", "en_150" }, // Apple locale addition + { "en_LV", "en_150" }, // Apple locale addition + { "en_ME", "en_150" }, // Apple locale addition + { "en_MO", "en_GB" }, + { "en_MS", "en_GB" }, // Montserrat + { "en_MT", "en_GB" }, + { "en_MU", "en_GB" }, // Mauritius + { "en_MV", "en_GB" }, + { "en_MW", "en_GB" }, // Malawi + { "en_MY", "en_GB" }, // Apple custom parent + { "en_NA", "en_GB" }, // Namibia + { "en_NF", "en_AU" }, // Norfolk Island + { "en_NG", "en_GB" }, // Nigeria + { "en_NL", "en_150" }, // Apple locale addition + { "en_NO", "en_150" }, // Apple locale addition + { "en_NR", "en_AU" }, // Nauru + { "en_NU", "en_AU" }, // Niue (maybe to en_NZ instead?) + { "en_NZ", "en_AU" }, + { "en_PG", "en_AU" }, // Papua New Guinea + { "en_PK", "en_GB" }, // Apple custom parent + { "en_PL", "en_150" }, // Apple locale addition + { "en_PN", "en_GB" }, // Pitcairn Islands + { "en_PT", "en_150" }, // Apple locale addition + { "en_RO", "en_150" }, // Apple locale addition + { "en_RS", "en_150" }, // Apple locale addition + { "en_RU", "en_150" }, // Apple locale addition + { "en_SB", "en_GB" }, // Solomon Islands + { "en_SC", "en_GB" }, // Seychelles + { "en_SD", "en_GB" }, // Sudan + { "en_SE", "en_150" }, // Apple locale addition + { "en_SG", "en_GB" }, + { "en_SH", "en_GB" }, + { "en_SI", "en_150" }, // Apple locale addition + { "en_SK", "en_150" }, // Apple locale addition + { "en_SL", "en_GB" }, // Sierra Leone + { "en_SS", "en_GB" }, // South Sudan + { "en_SZ", "en_GB" }, // Swaziland + { "en_TC", "en_GB" }, // Tristan da Cunha + { "en_TO", "en_GB" }, // Tonga + { "en_TT", "en_GB" }, // Trinidad & Tobago + { "en_TV", "en_GB" }, // Tuvalu + { "en_TZ", "en_GB" }, // Tanzania + { "en_UA", "en_150" }, // Apple locale addition + { "en_UG", "en_GB" }, // Uganda + { "en_VC", "en_GB" }, // St. Vincent & Grenadines + { "en_VG", "en_GB" }, + { "en_VU", "en_GB" }, // Vanuatu + { "en_WS", "en_AU" }, // Samoa (maybe to en_NZ instead?) + { "en_ZA", "en_GB" }, // South Africa + { "en_ZM", "en_GB" }, // Zambia + { "en_ZW", "en_GB" }, // Zimbabwe +}; +enum { kAppleParentMapCount = UPRV_LENGTHOF(appleParentMap) }; + +typedef struct { + const char * locale; + const char * parent; + int8_t distance; +} LocParentAndDistance; + +static LocParentAndDistance locParentMap[] = { + // The localizations listed in the first column are in + // normalized form (e.g. zh_CN -> zh_Hans_CN, etc.). + // The distance is a rough measure of distance from + // the localization to its parent, used as a weight. + { "de_DE", "de", 0 }, + { "en_001", "en", 2 }, + { "en_150", "en_GB", 1 }, + { "en_AU", "en_GB", 1 }, + { "en_GB", "en_001", 0 }, + { "en_US", "en", 0 }, + { "es_419", "es", 2 }, + { "es_MX", "es_419", 0 }, + { "fr_FR", "fr", 0 }, + { "it_IT", "it", 0 }, + { "pt_PT", "pt", 2 }, + { "yue_Hans_CN","yue_Hans",0 }, + { "yue_Hant_HK","yue_Hant",0 }, + { "zh_Hans_CN", "zh_Hans", 0 }, + { "zh_Hant_HK", "zh_Hant", 1 }, + { "zh_Hant_TW", "zh_Hant", 0 }, +}; +enum { kLocParentMapCount = UPRV_LENGTHOF(locParentMap), kMaxParentDistance = 8 }; + +enum { + kStringsAllocSize = 5280, // cannot expand; current actual usage 5259 + kParentMapInitCount = 272 // can expand; current actual usage 254 +}; + +U_CDECL_BEGIN +static UBool U_CALLCONV ualocale_cleanup(void); +U_CDECL_END + +U_NAMESPACE_BEGIN + +static UInitOnce gUALocaleCacheInitOnce = U_INITONCE_INITIALIZER; + +static int gMapDataState = 0; // 0 = not initialized, 1 = initialized, -1 = failure +static char* gStrings = NULL; +static UHashtable* gAliasMap = NULL; +static UHashtable* gParentMap = NULL; + +U_NAMESPACE_END + +U_CDECL_BEGIN + +static UBool U_CALLCONV ualocale_cleanup(void) +{ + U_NAMESPACE_USE + + gUALocaleCacheInitOnce.reset(); + + if (gMapDataState > 0) { + uhash_close(gParentMap); + gParentMap = NULL; + uhash_close(gAliasMap); + gAliasMap = NULL; + uprv_free(gStrings); + gStrings = NULL; + } + gMapDataState = 0; + return TRUE; +} + +static void initializeMapData() { + U_NAMESPACE_USE + + UResourceBundle * curBundle; + char* stringsPtr; + char* stringsEnd; + UErrorCode status; + int32_t entryIndex, icuEntryCount; + + ucln_common_registerCleanup(UCLN_COMMON_LOCALE, ualocale_cleanup); + + gStrings = (char*)uprv_malloc(kStringsAllocSize); + if (gStrings) { + stringsPtr = gStrings; + stringsEnd = gStrings + kStringsAllocSize; + } + + status = U_ZERO_ERROR; + curBundle = NULL; + icuEntryCount = 0; + if (gStrings) { + curBundle = ures_openDirect(NULL, "metadata", &status); + curBundle = ures_getByKey(curBundle, "alias", curBundle, &status); + curBundle = ures_getByKey(curBundle, "language", curBundle, &status); // language resource is URES_TABLE + if (U_SUCCESS(status)) { + icuEntryCount = ures_getSize(curBundle); // currently 331 + } + } + status = U_ZERO_ERROR; + gAliasMap = uhash_openSize(uhash_hashIChars, uhash_compareIChars, uhash_compareIChars, + kAppleAliasMapCount + icuEntryCount, &status); + // defaults to keyDeleter NULL + if (U_SUCCESS(status)) { + for (entryIndex = 0; entryIndex < kAppleAliasMapCount && U_SUCCESS(status); entryIndex++) { + uhash_put(gAliasMap, (void*)appleAliasMap[entryIndex][0], (void*)appleAliasMap[entryIndex][1], &status); +#if DEBUG_UALOC + if (U_FAILURE(status)) { + printf("# uhash_put 1 fails %s\n", u_errorName(status)); + } +#endif + } + status = U_ZERO_ERROR; + UResourceBundle * aliasMapBundle = NULL; + for (entryIndex = 0; entryIndex < icuEntryCount && U_SUCCESS(status); entryIndex++) { + aliasMapBundle = ures_getByIndex(curBundle, entryIndex, aliasMapBundle, &status); + if (U_FAILURE(status)) { + break; // error + } + const char * keyStr = ures_getKey(aliasMapBundle); + int32_t len = uprv_strlen(keyStr); + if (len >= stringsEnd - stringsPtr) { + break; // error + } + uprv_strcpy(stringsPtr, keyStr); + char * inLocStr = stringsPtr; + stringsPtr += len + 1; + + len = stringsEnd - stringsPtr - 1; + ures_getUTF8StringByKey(aliasMapBundle, "replacement", stringsPtr, &len, TRUE, &status); + if (U_FAILURE(status)) { + break; // error + } + stringsPtr[len] = 0; + uhash_put(gAliasMap, inLocStr, stringsPtr, &status); +#if DEBUG_UALOC + if (U_FAILURE(status)) { + printf("# uhash_put 2 fails %s\n", u_errorName(status)); + } +#endif + stringsPtr += len + 1; + } + ures_close(aliasMapBundle); + } else { + ures_close(curBundle); + uprv_free(gStrings); + gMapDataState = -1; // failure + return; + } + ures_close(curBundle); + + status = U_ZERO_ERROR; + gParentMap = uhash_openSize(uhash_hashIChars, uhash_compareIChars, uhash_compareIChars, + kParentMapInitCount, &status); + // defaults to keyDeleter NULL + if (U_SUCCESS(status)) { + curBundle = ures_openDirect(NULL, "supplementalData", &status); + curBundle = ures_getByKey(curBundle, "parentLocales", curBundle, &status); // parentLocales resource is URES_TABLE + if (U_SUCCESS(status)) { + UResourceBundle * parentMapBundle = NULL; + while (TRUE) { + parentMapBundle = ures_getNextResource(curBundle, parentMapBundle, &status); + if (U_FAILURE(status)) { + break; // no more parent bundles, normal exit + } + const char * keyStr = ures_getKey(parentMapBundle); + int32_t len = uprv_strlen(keyStr); + if (len >= stringsEnd - stringsPtr) { + break; // error + } + uprv_strcpy(stringsPtr, keyStr); + char * parentStr = stringsPtr; + stringsPtr += len + 1; + + if (ures_getType(parentMapBundle) == URES_STRING) { + len = stringsEnd - stringsPtr - 1; + ures_getUTF8String(parentMapBundle, stringsPtr, &len, TRUE, &status); + if (U_FAILURE(status)) { + break; // error + } + stringsPtr[len] = 0; + uhash_put(gParentMap, stringsPtr, parentStr, &status); + stringsPtr += len + 1; + } else { + // should be URES_ARRAY + icuEntryCount = ures_getSize(parentMapBundle); + for (entryIndex = 0; entryIndex < icuEntryCount && U_SUCCESS(status); entryIndex++) { + len = stringsEnd - stringsPtr - 1; + ures_getUTF8StringByIndex(parentMapBundle, entryIndex, stringsPtr, &len, TRUE, &status); + if (U_FAILURE(status)) { + break; + } + stringsPtr[len] = 0; + uhash_put(gParentMap, stringsPtr, parentStr, &status); + stringsPtr += len + 1; + } + } + } + ures_close(parentMapBundle); + } + ures_close(curBundle); + + status = U_ZERO_ERROR; + for (entryIndex = 0; entryIndex < kAppleParentMapCount && U_SUCCESS(status); entryIndex++) { + uhash_put(gParentMap, (void*)appleParentMap[entryIndex][0], (void*)appleParentMap[entryIndex][1], &status); + } + } else { + uhash_close(gAliasMap); + gAliasMap = NULL; + uprv_free(gStrings); + gMapDataState = -1; // failure + return; + } + +#if DEBUG_UALOC + printf("# gStrings size %ld\n", stringsPtr - gStrings); + printf("# gParentMap count %d\n", uhash_count(gParentMap)); +#endif + gMapDataState = 1; +} + +U_CDECL_END + +// The following maps aliases, etc. Ensures 0-termination if no error. +static void ualoc_normalize(const char *locale, char *normalized, int32_t normalizedCapacity, UErrorCode *status) +{ + if (U_FAILURE(*status)) { + return; + } + // uloc_minimizeSubtags(locale, normalized, normalizedCapacity, status); + + const char *replacement = NULL; + if (icu::gMapDataState > 0) { + replacement = (const char *)uhash_get(icu::gAliasMap, locale); + } + if (replacement == NULL) { + replacement = locale; + } + int32_t len = strnlen(replacement, normalizedCapacity); + if (len < normalizedCapacity) { // allow for 0 termination + uprv_strcpy(normalized, replacement); + } else { + *status = U_BUFFER_OVERFLOW_ERROR; + } +} + +static void ualoc_getParent(const char *locale, char *parent, int32_t parentCapacity, UErrorCode *status) +{ + if (U_FAILURE(*status)) { + return; + } + if (icu::gMapDataState > 0) { + const char *replacement = (const char *)uhash_get(icu::gParentMap, locale); + if (replacement) { + int32_t len = uprv_strlen(replacement); + if (len < parentCapacity) { // allow for 0 termination + uprv_strcpy(parent, replacement); +#if DEBUG_UALOC + printf(" # ualoc_getParent 1: locale %s -> parent %s\n", locale, parent); +#endif + } else { + *status = U_BUFFER_OVERFLOW_ERROR; + } + return; + } + } + uloc_getParent(locale, parent, parentCapacity - 1, status); +#if DEBUG_UALOC + printf(" # ualoc_getParent 2: locale %s -> parent %s\n", locale, parent); +#endif + parent[parentCapacity - 1] = 0; // ensure 0 termination in case of U_STRING_NOT_TERMINATED_WARNING +} + +// Might do something better for this, perhaps maximizing locales then stripping +static const char * getLocParent(const char *locale, int32_t* distance) +{ + int32_t locParentIndex; + for (locParentIndex = 0; locParentIndex < kLocParentMapCount; locParentIndex++) { + if (uprv_strcmp(locale, locParentMap[locParentIndex].locale) == 0) { + *distance = locParentMap[locParentIndex].distance; + return locParentMap[locParentIndex].parent; + } + } + if (icu::gMapDataState > 0) { + const char *replacement = (const char *)uhash_get(icu::gParentMap, locale); + if (replacement) { + *distance = 1; + return replacement; + } + } + return NULL; +} + +// this just checks if the *pointer* value is already in the array +static UBool locInArray(const char* *localizationsToUse, int32_t locsToUseCount, const char *locToCheck) +{ + int32_t locIndex; + for (locIndex = 0; locIndex < locsToUseCount; locIndex++) { + if (locToCheck == localizationsToUse[locIndex]) { + return TRUE; + } + } + return FALSE; +} + +enum { kLangScriptRegMaxLen = ULOC_LANG_CAPACITY + ULOC_SCRIPT_CAPACITY + ULOC_COUNTRY_CAPACITY }; // currently 22 + +int32_t +ualoc_localizationsToUse( const char* const *preferredLanguages, + int32_t preferredLanguagesCount, + const char* const *availableLocalizations, + int32_t availableLocalizationsCount, + const char* *localizationsToUse, + int32_t localizationsToUseCapacity, + UErrorCode *status ) +{ + if (U_FAILURE(*status)) { + return -1; + } + if (preferredLanguages == NULL || availableLocalizations == NULL || localizationsToUse == NULL) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return -1; + } + // get resource data, need to protect with mutex + if (icu::gMapDataState == 0) { + umtx_initOnce(icu::gUALocaleCacheInitOnce, initializeMapData); + } + int32_t locsToUseCount = 0; + int32_t prefLangIndex, availLocIndex = 0; + int32_t availLocIndexBackup = -1; // if >= 0, contains index of backup match + int32_t foundMatchPrefLangIndex = 0, backupMatchPrefLangIndex = 0; + char (*availLocBase)[kLangScriptRegMaxLen + 1] = NULL; + char (*availLocNorm)[kLangScriptRegMaxLen + 1] = NULL; + UBool foundMatch = FALSE; + UBool backupMatchPrefLang_pt_PT = FALSE; + +#if DEBUG_UALOC + if (preferredLanguagesCount > 0 && availableLocalizationsCount > 0) { + printf("\n # ualoc_localizationsToUse start, preferredLanguages %d: %s, ..., availableLocalizations %d: %s, ...\n", + preferredLanguagesCount, preferredLanguages[0], availableLocalizationsCount, availableLocalizations[0]); + } else { + printf("\n # ualoc_localizationsToUse start, preferredLanguages %d: ..., availableLocalizations %d: ...\n", + preferredLanguagesCount, availableLocalizationsCount); + } +#endif + + // Part 1, find the best matching localization, if any + for (prefLangIndex = 0; prefLangIndex < preferredLanguagesCount; prefLangIndex++) { + char prefLangBaseName[kLangScriptRegMaxLen + 1]; + char prefLangNormName[kLangScriptRegMaxLen + 1]; + char prefLangParentName[kLangScriptRegMaxLen + 1]; + UErrorCode tmpStatus = U_ZERO_ERROR; + + if (preferredLanguages[prefLangIndex] == NULL) { + continue; // skip NULL preferredLanguages entry, go to next one + } + // use underscores, fix bad capitalization, delete any keywords + uloc_getBaseName(preferredLanguages[prefLangIndex], prefLangBaseName, kLangScriptRegMaxLen, &tmpStatus); + if (U_FAILURE(tmpStatus) || prefLangBaseName[0] == 0 || + uprv_strcmp(prefLangBaseName, "root") == 0 || prefLangBaseName[0] == '_') { + continue; // can't handle this preferredLanguages entry or it is invalid, go to next one + } + prefLangBaseName[kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING +#if DEBUG_UALOC + printf(" # loop: try prefLangBaseName %s\n", prefLangBaseName); +#endif + + // if we have not already allocated and filled the array of + // base availableLocalizations, do so now. + if (availLocBase == NULL) { + availLocBase = (char (*)[kLangScriptRegMaxLen + 1])uprv_malloc(availableLocalizationsCount * (kLangScriptRegMaxLen + 1)); + if (availLocBase == NULL) { + continue; // cannot further check this preferredLanguages entry, go to next one + } +#if DEBUG_UALOC + printf(" # allocate & fill availLocBase\n"); +#endif + for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { + tmpStatus = U_ZERO_ERROR; + if (availableLocalizations[availLocIndex] == NULL) { + availLocBase[availLocIndex][0] = 0; // effectively remove this entry + continue; + } + uloc_getBaseName(availableLocalizations[availLocIndex], availLocBase[availLocIndex], kLangScriptRegMaxLen, &tmpStatus); + if (U_FAILURE(tmpStatus) || uprv_strcmp(availLocBase[availLocIndex], "root") == 0 || availLocBase[availLocIndex][0] == '_') { + availLocBase[availLocIndex][0] = 0; // effectively remove this entry + continue; + } + availLocBase[availLocIndex][kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING +#if DEBUG_UALOC + printf(" # add availLocBase %s\n", availLocBase[availLocIndex]); +#endif + } + } + // first compare base preferredLanguage to base versions of availableLocalizations names + for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { + if (uprv_strcmp(prefLangBaseName, availLocBase[availLocIndex]) == 0) { + foundMatch = TRUE; // availLocIndex records where + foundMatchPrefLangIndex = prefLangIndex; +#if DEBUG_UALOC + printf(" # FOUND: matched availLocBase %s -> actualLoc %s\n", availLocBase[availLocIndex], availableLocalizations[availLocIndex]); +#endif + break; + } + } + if (foundMatch) { + break; // found a loc for this preferredLanguages entry + } + + // get normalized preferredLanguage + tmpStatus = U_ZERO_ERROR; + ualoc_normalize(prefLangBaseName, prefLangNormName, kLangScriptRegMaxLen + 1, &tmpStatus); + if (U_FAILURE(tmpStatus)) { + continue; // can't handle this preferredLanguages entry, go to next one + } +#if DEBUG_UALOC + printf(" # prefLangNormName %s\n", prefLangNormName); +#endif + // if we have not already allocated and filled the array of + // normalized availableLocalizations, do so now. + // Note: ualoc_normalize turns "zh_TW" into "zh_Hant_TW", zh_HK" into "zh_Hant_HK", + // and fixes deprecated codes "iw" > "he", "in" > "id" etc. + if (availLocNorm == NULL) { + availLocNorm = (char (*)[kLangScriptRegMaxLen + 1])uprv_malloc(availableLocalizationsCount * (kLangScriptRegMaxLen + 1)); + if (availLocNorm == NULL) { + continue; // cannot further check this preferredLanguages entry, go to next one + } +#if DEBUG_UALOC + printf(" # allocate & fill availLocNorm\n"); +#endif + for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { + tmpStatus = U_ZERO_ERROR; + ualoc_normalize(availLocBase[availLocIndex], availLocNorm[availLocIndex], kLangScriptRegMaxLen + 1, &tmpStatus); + if (U_FAILURE(tmpStatus)) { + availLocNorm[availLocIndex][0] = 0; // effectively remove this entry +#if DEBUG_UALOC + } else { + printf(" # actualLoc %-11s -> norm %s\n", availableLocalizations[availLocIndex], availLocNorm[availLocIndex]); +#endif + } + } + } + // now compare normalized preferredLanguage to normalized localization names + // if matches, copy *original* localization name + for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { + if (uprv_strcmp(prefLangNormName, availLocNorm[availLocIndex]) == 0) { + foundMatch = TRUE; // availLocIndex records where + foundMatchPrefLangIndex = prefLangIndex; +#if DEBUG_UALOC + printf(" # FOUND: matched availLocNorm %s -> actualLoc %s\n", availLocNorm[availLocIndex], availableLocalizations[availLocIndex]); +#endif + break; + } + } + if (foundMatch) { + break; // found a loc for this preferredLanguages entry + } + + // now walk up the parent chain for preferredLanguage + // until we find a match or hit root + uprv_strcpy(prefLangBaseName, prefLangNormName); + while (!foundMatch) { + tmpStatus = U_ZERO_ERROR; + ualoc_getParent(prefLangBaseName, prefLangParentName, kLangScriptRegMaxLen + 1, &tmpStatus); + if (U_FAILURE(tmpStatus) || uprv_strcmp(prefLangParentName, "root") == 0 || prefLangParentName[0] == 0) { + break; // reached root or cannot proceed further + } +#if DEBUG_UALOC + printf(" # prefLangParentName %s\n", prefLangParentName); +#endif + + // now compare this preferredLanguage parent to normalized localization names + // if matches, copy *original* localization name + for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { + if (uprv_strcmp(prefLangParentName, availLocNorm[availLocIndex]) == 0) { + foundMatch = TRUE; // availLocIndex records where + foundMatchPrefLangIndex = prefLangIndex; +#if DEBUG_UALOC + printf(" # FOUND: matched availLocNorm %s -> actualLoc %s\n", availLocNorm[availLocIndex], availableLocalizations[availLocIndex]); +#endif + break; + } + } + uprv_strcpy(prefLangBaseName, prefLangParentName); + } + if (foundMatch) { + break; // found a loc for this preferredLanguages entry + } + + // last try, use parents of selected language to try for backup match + // if we have not already found one + if (availLocIndexBackup < 0) { + // now walk up the parent chain for preferredLanguage again + // checking against parents of selected availLocNorm entries + // but this time start with current prefLangNormName + uprv_strcpy(prefLangBaseName, prefLangNormName); + int32_t minDistance = kMaxParentDistance; + while (TRUE) { + // now compare this preferredLanguage to normalized localization names + // parent if have one for this; if matches, copy *original* localization name +#if DEBUG_UALOC + printf(" # BACKUP: trying prefLangBaseName %s\n", prefLangBaseName); +#endif + for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { + char availLocMinOrParent[kLangScriptRegMaxLen + 1]; + int32_t distance; + // first check for special Apple parents of availLocNorm; the number + // of locales with such parents is small. + // If no such parent, or if parent has an intermediate numeric region, + // then try stripping the original region. + int32_t availLocParentLen = 0; + const char *availLocParent = getLocParent(availLocNorm[availLocIndex], &distance); + if (availLocParent) { +#if DEBUG_UALOC + printf(" # availLocAppleParentName %s\n", availLocParent); +#endif + if (uprv_strcmp(prefLangBaseName, availLocParent) == 0 && distance < minDistance) { + availLocIndexBackup = availLocIndex; // records where the match occurred + backupMatchPrefLangIndex = prefLangIndex; + minDistance = distance; +#if DEBUG_UALOC + printf(" # BACKUP: LocAppleParent matched prefLangNormName with distance %d\n", distance); +#endif + continue; + } + availLocParentLen = uprv_strlen(availLocParent); + } + if (minDistance <= 1) { + continue; // we can't get any closer in the rest of this iteration + } + if (availLocParent == NULL || (availLocParentLen >= 6 && isdigit(availLocParent[availLocParentLen-1]))) { + tmpStatus = U_ZERO_ERROR; + int32_t regLen = uloc_getCountry(availLocNorm[availLocIndex], availLocMinOrParent, kLangScriptRegMaxLen, &tmpStatus); + if (U_SUCCESS(tmpStatus) && regLen > 1) { + uloc_addLikelySubtags(availLocNorm[availLocIndex], availLocMinOrParent, kLangScriptRegMaxLen, &tmpStatus); + if (U_SUCCESS(tmpStatus)) { + availLocMinOrParent[kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING +#if DEBUG_UALOC + printf(" # availLocRegMaxName %s\n", availLocMinOrParent); +#endif + char availLocTemp[kLangScriptRegMaxLen + 1]; + uloc_getParent(availLocMinOrParent, availLocTemp, kLangScriptRegMaxLen, &tmpStatus); + if (U_SUCCESS(tmpStatus)) { + availLocTemp[kLangScriptRegMaxLen] = 0; + uloc_minimizeSubtags(availLocTemp, availLocMinOrParent, kLangScriptRegMaxLen, &tmpStatus); + if (U_SUCCESS(tmpStatus)) { + availLocMinOrParent[kLangScriptRegMaxLen] = 0; +#if DEBUG_UALOC + printf(" # availLocNoRegParentName %s\n", availLocMinOrParent); +#endif + if (uprv_strcmp(prefLangBaseName, availLocMinOrParent) == 0) { + availLocIndexBackup = availLocIndex; // records where the match occurred + backupMatchPrefLangIndex = prefLangIndex; + minDistance = 1; + backupMatchPrefLang_pt_PT = (uprv_strcmp(prefLangNormName, "pt_PT") == 0); +#if DEBUG_UALOC + printf(" # BACKUP: LocNoRegParent matched prefLangNormName with distance 1\n"); +#endif + continue; + } + } + } + } + } + } + // then check against minimized version of availLocNorm + tmpStatus = U_ZERO_ERROR; + uloc_minimizeSubtags(availLocNorm[availLocIndex], availLocMinOrParent, kLangScriptRegMaxLen, &tmpStatus); + if (U_FAILURE(tmpStatus)) { + continue; + } + availLocMinOrParent[kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING +#if DEBUG_UALOC + printf(" # availLocMinimized %s\n", availLocMinOrParent); +#endif + if (uprv_strcmp(prefLangBaseName, availLocMinOrParent) == 0) { + availLocIndexBackup = availLocIndex; // records where the match occurred + backupMatchPrefLangIndex = prefLangIndex; + minDistance = 1; +#if DEBUG_UALOC + printf(" # BACKUP: LocMinimized matched prefLangNormName with distance 1\n"); +#endif + } + } + if (availLocIndexBackup >= 0) { + break; + } + tmpStatus = U_ZERO_ERROR; + ualoc_getParent(prefLangBaseName, prefLangParentName, kLangScriptRegMaxLen + 1, &tmpStatus); + if (U_FAILURE(tmpStatus) || uprv_strcmp(prefLangParentName, "root") == 0 || prefLangParentName[0] == 0) { + break; // reached root or cannot proceed further + } + uprv_strcpy(prefLangBaseName, prefLangParentName); + } + } + } + // If we have a backup match, decide what to do + if (availLocIndexBackup >= 0) { + if (!foundMatch) { + // no main match, just use the backup + availLocIndex = availLocIndexBackup; + foundMatch = TRUE; +#if DEBUG_UALOC + printf(" # no main match, have backup => use availLocIndexBackup %d\n", availLocIndexBackup); +#endif + } else if (backupMatchPrefLangIndex < foundMatchPrefLangIndex && (!backupMatchPrefLang_pt_PT || uprv_strcmp(availLocNorm[availLocIndexBackup], "pt_BR") != 0)) { + // have a main match but backup match was higher in the prefs, use it if for a different language +#if DEBUG_UALOC + printf(" # have backup match higher in prefs, comparing its language and script to main match\n"); +#endif + char mainLang[ULOC_LANG_CAPACITY + 1]; + char backupLang[ULOC_LANG_CAPACITY + 1]; + UErrorCode tmpStatus = U_ZERO_ERROR; + uloc_getLanguage(availLocNorm[availLocIndex], mainLang, ULOC_LANG_CAPACITY, &tmpStatus); + mainLang[ULOC_LANG_CAPACITY] = 0; // ensure zero termination + uloc_getLanguage(availLocNorm[availLocIndexBackup], backupLang, ULOC_LANG_CAPACITY, &tmpStatus); + backupLang[ULOC_LANG_CAPACITY] = 0; // ensure zero termination + if (U_SUCCESS(tmpStatus)) { + if (uprv_strncmp(mainLang, backupLang, ULOC_LANG_CAPACITY) != 0) { + // backup match has different language than main match + availLocIndex = availLocIndexBackup; + // foundMatch is already TRUE +#if DEBUG_UALOC + printf(" # main match but backup is for a different lang higher in prefs => use availLocIndexBackup %d\n", availLocIndexBackup); +#endif + } else { + // backup match has same language as main match, check scripts too + char availLocMaximized[kLangScriptRegMaxLen + 1]; + + uloc_addLikelySubtags(availLocNorm[availLocIndex], availLocMaximized, kLangScriptRegMaxLen, &tmpStatus); + availLocMaximized[kLangScriptRegMaxLen] = 0; + uloc_getScript(availLocMaximized, mainLang, ULOC_LANG_CAPACITY, &tmpStatus); + mainLang[ULOC_LANG_CAPACITY] = 0; + + uloc_addLikelySubtags(availLocNorm[availLocIndexBackup], availLocMaximized, kLangScriptRegMaxLen, &tmpStatus); + availLocMaximized[kLangScriptRegMaxLen] = 0; + uloc_getScript(availLocMaximized, backupLang, ULOC_LANG_CAPACITY, &tmpStatus); + backupLang[ULOC_LANG_CAPACITY] = 0; + + if (U_SUCCESS(tmpStatus) && uprv_strncmp(mainLang, backupLang, ULOC_LANG_CAPACITY) != 0) { + // backup match has different script than main match + availLocIndex = availLocIndexBackup; + // foundMatch is already TRUE +#if DEBUG_UALOC + printf(" # main match but backup is for a different script higher in prefs => use availLocIndexBackup %d\n", availLocIndexBackup); +#endif + } + } + } + } + } + + // Part 2, if we found a matching localization, then walk up its parent tree to find any fallback matches in availableLocalizations + if (foundMatch) { + // Here availLocIndex corresponds to the first matched localization + UErrorCode tmpStatus = U_ZERO_ERROR; + int32_t availLocMatchIndex = availLocIndex; + if (locsToUseCount < localizationsToUseCapacity) { + localizationsToUse[locsToUseCount++] = availableLocalizations[availLocMatchIndex]; + } + // at this point we must have availLocBase, and minimally matched against that. + // if we have not already allocated and filled the array of + // normalized availableLocalizations, do so now, but don't require it + if (availLocNorm == NULL) { + availLocNorm = (char (*)[kLangScriptRegMaxLen + 1])uprv_malloc(availableLocalizationsCount * (kLangScriptRegMaxLen + 1)); + if (availLocNorm != NULL) { + for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { + tmpStatus = U_ZERO_ERROR; + ualoc_normalize(availLocBase[availLocIndex], availLocNorm[availLocIndex], kLangScriptRegMaxLen + 1, &tmpStatus); + if (U_FAILURE(tmpStatus)) { + availLocNorm[availLocIndex][0] = 0; // effectively remove this entry + } + } + } + } + + // add normalized form of matching loc, if different and in availLocBase + if (locsToUseCount < localizationsToUseCapacity) { + tmpStatus = U_ZERO_ERROR; + char matchedLocNormName[kLangScriptRegMaxLen + 1]; + char matchedLocParentName[kLangScriptRegMaxLen + 1]; + // get normalized form of matching loc + if (availLocNorm != NULL) { + uprv_strcpy(matchedLocNormName, availLocNorm[availLocMatchIndex]); + } else { + ualoc_normalize(availLocBase[availLocMatchIndex], matchedLocNormName, kLangScriptRegMaxLen + 1, &tmpStatus); + } + if (U_SUCCESS(tmpStatus)) { + // add normalized form of matching loc, if different and in availLocBase + if (uprv_strcmp(matchedLocNormName, localizationsToUse[0]) != 0) { + // normalization of matched localization is different, see if we have the normalization in availableLocalizations + // from this point on, availLocIndex no longer corresponds to the matched localization. + for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { + if ( (uprv_strcmp(matchedLocNormName, availLocBase[availLocIndex]) == 0 + || (availLocNorm != NULL && uprv_strcmp(matchedLocNormName, availLocNorm[availLocIndex]) == 0)) + && !locInArray(localizationsToUse, locsToUseCount, availableLocalizations[availLocIndex])) { + localizationsToUse[locsToUseCount++] = availableLocalizations[availLocIndex]; + break; + } + } + } + + // now walk up the parent chain from matchedLocNormName, adding parents if they are in availLocBase + while (locsToUseCount < localizationsToUseCapacity) { + ualoc_getParent(matchedLocNormName, matchedLocParentName, kLangScriptRegMaxLen + 1, &tmpStatus); + if (U_FAILURE(tmpStatus) || uprv_strcmp(matchedLocParentName, "root") == 0 || matchedLocParentName[0] == 0) { + break; // reached root or cannot proceed further + } + + // now compare this matchedLocParentName parent to base localization names (and norm ones if we have them) + for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { + if ( (uprv_strcmp(matchedLocParentName, availLocBase[availLocIndex]) == 0 + || (availLocNorm != NULL && uprv_strcmp(matchedLocParentName, availLocNorm[availLocIndex]) == 0)) + && !locInArray(localizationsToUse, locsToUseCount, availableLocalizations[availLocIndex])) { + localizationsToUse[locsToUseCount++] = availableLocalizations[availLocIndex]; + break; + } + } + uprv_strcpy(matchedLocNormName, matchedLocParentName); + } + + // The above still fails to include "zh_TW" if it is in availLocBase and the matched localization + // base name is "zh_HK" or "zh_MO". One option would be to walk up the parent chain from + // matchedLocNormName again, comparing against parents of of selected availLocNorm entries. + // But this picks up too many matches that are not parents of the matched localization. So + // we just handle these specially. + if ( locsToUseCount < localizationsToUseCapacity + && (uprv_strcmp(availLocBase[availLocMatchIndex], "zh_HK") == 0 + || uprv_strcmp(availLocBase[availLocMatchIndex], "zh_MO") == 0) ) { + int32_t zhTW_matchIndex = -1; + UBool zhHant_found = FALSE; + for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { + if ( zhTW_matchIndex < 0 && uprv_strcmp("zh_TW", availLocBase[availLocIndex]) == 0 ) { + zhTW_matchIndex = availLocIndex; + } + if ( !zhHant_found && uprv_strcmp("zh_Hant", availLocBase[availLocIndex]) == 0 ) { + zhHant_found = TRUE; + } + } + if (zhTW_matchIndex >= 0 && !zhHant_found + && !locInArray(localizationsToUse, locsToUseCount, availableLocalizations[zhTW_matchIndex])) { + localizationsToUse[locsToUseCount++] = availableLocalizations[zhTW_matchIndex]; + } + } + } + } + } + + uprv_free(availLocNorm); + uprv_free(availLocBase); + return locsToUseCount; +} +