/*
*****************************************************************************************
-* Copyright (C) 2014 Apple Inc. All Rights Reserved.
+* Copyright (C) 2014-2019 Apple Inc. All Rights Reserved.
*****************************************************************************************
*/
+#define DEBUG_UALOC 0
+#if DEBUG_UALOC
+#include <stdio.h>
+#endif
+#include <string.h>
+#include <ctype.h>
#include "unicode/utypes.h"
#include "unicode/ualoc.h"
#include "unicode/uloc.h"
#include "unicode/ures.h"
#include "unicode/putil.h"
+#include "unicode/ustring.h"
#include "cstring.h"
#include "cmemory.h"
+#include "uhash.h"
+#include "umutex.h"
+#include "ucln_cmn.h"
// the following has replacements for some math.h funcs etc
#include "putilimp.h"
return uprv_strcmp(((const UALanguageEntry *)entry1)->languageCode,((const UALanguageEntry *)entry2)->languageCode);
}
+// language codes to version with default script
+// must be sorted by language code
+static const char * langToDefaultScript[] = {
+ "az", "az_Latn",
+ "bm", "bm_Latn", // <rdar://problem/47494729> added
+ "bs", "bs_Latn",
+ "byn", "byn_Ethi", // <rdar://problem/47494729> added
+ "cu", "cu_Cyrl", // <rdar://problem/47494729> added
+ "ff", "ff_Latn", // <rdar://problem/47494729> added
+ "ha", "ha_Latn", // <rdar://problem/47494729> added
+ "iu", "iu_Cans",
+ "kk", "kk_Cyrl", // <rdar://problem/47494729> changed from _Arab
+ "ks", "ks_Arab", // unnecessary?
+ "ku", "ku_Latn",
+ "ky", "ky_Cyrl",
+ "mn", "mn_Cyrl",
+ "ms", "ms_Latn",
+ "pa", "pa_Guru",
+ "rif", "rif_Tfng", // unnecessary? no locale support anyway
+ "sd", "sd_Arab", // <rdar://problem/47494729> added
+ "shi", "shi_Tfng",
+ "sr", "sr_Cyrl",
+ "tg", "tg_Cyrl",
+ "tk", "tk_Latn", // unnecessary?
+ "ug", "ug_Arab",
+ "uz", "uz_Latn",
+ "vai", "vai_Vaii",
+ "yue", "yue_Hant", // to match CLDR data, not Apple default
+ "zh", "zh_Hans",
+ NULL
+};
+
+static const char * langCodeWithScriptIfAmbig(const char * langCode) {
+ const char ** langToDefScriptPtr = langToDefaultScript;
+ const char * testCurLoc;
+ while ( (testCurLoc = *langToDefScriptPtr++) != NULL ) {
+ int cmp = uprv_strcmp(langCode, testCurLoc);
+ if (cmp <= 0) {
+ if (cmp == 0) {
+ return *langToDefScriptPtr;
+ }
+ break;
+ }
+ langToDefScriptPtr++;
+ }
+ return langCode;
+}
+
static const UChar ustrLangStatusDefacto[] = {0x64,0x65,0x5F,0x66,0x61,0x63,0x74,0x6F,0x5F,0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0}; //"de_facto_official"
static const UChar ustrLangStatusOfficial[] = {0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0}; //"official"
static const UChar ustrLangStatusRegional[] = {0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0x5F,0x72,0x65,0x67,0x69,0x6F,0x6E,0x61,0x6C,0}; //"official_regional"
}
langEntriesMax = newMax;
}
- uprv_strcpy(langEntries[entryCount].languageCode, langCode);
+ uprv_strcpy(langEntries[entryCount].languageCode, langCodeWithScriptIfAmbig(langCode));
langEntries[entryCount].userFraction = userFraction;
langEntries[entryCount].status = langStatus;
}
return entryCount;
}
-
static const char * forceParent[] = {
+ "en_150", "en_GB", // en for Europe
+ "en_AU", "en_GB",
+ "en_BD", "en_GB", // en for Bangladesh
+ "en_BE", "en_150", // en for Belgium goes to en for Europe
+ "en_DG", "en_GB",
+ "en_FK", "en_GB",
+ "en_GG", "en_GB",
+ "en_GI", "en_GB",
+ "en_HK", "en_GB", // en for Hong Kong
+ "en_IE", "en_GB",
+ "en_IM", "en_GB",
+ "en_IN", "en_GB",
+ "en_IO", "en_GB",
+ "en_JE", "en_GB",
+ "en_JM", "en_GB",
+ "en_LK", "en_GB",
+ "en_MO", "en_GB",
+ "en_MT", "en_GB",
+ "en_MV", "en_GB", // for Maldives
+ "en_MY", "en_GB", // en for Malaysia
+ "en_NZ", "en_AU",
+ "en_PK", "en_GB", // en for Pakistan
+ "en_SG", "en_GB",
+ "en_SH", "en_GB",
+ "en_VG", "en_GB",
+ "yue", "yue_CN", // yue_CN has 71M users (5.2% of 1.37G), yue_HK has 6.5M (90% of 7.17M)
+ "yue_CN", "root",
+ "yue_HK", "root",
+ "yue_Hans","yue_CN",
+ "yue_Hant","yue_HK",
"zh", "zh_CN",
"zh_CN", "root",
"zh_Hant", "zh_TW",
NULL
};
+enum { kLocBaseNameMax = 16 };
+
U_CAPI int32_t U_EXPORT2
ualoc_getAppleParent(const char* localeID,
char * parent,
int32_t len;
UErrorCode tempStatus;
char locbuf[ULOC_FULLNAME_CAPACITY+1];
+ char * foundDoubleUnderscore;
if (U_FAILURE(*err)) {
return 0;
*err = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
- len = uloc_canonicalize(localeID, locbuf, ULOC_FULLNAME_CAPACITY, err);
+ len = uloc_getBaseName(localeID, locbuf, ULOC_FULLNAME_CAPACITY, err); /* canonicalize and strip keywords */
if (U_FAILURE(*err)) {
return 0;
}
locbuf[ULOC_FULLNAME_CAPACITY] = 0;
*err = U_ZERO_ERROR;
}
- if (len >= 2 && uprv_strncmp(locbuf, "zh", 2) == 0) {
+ foundDoubleUnderscore = uprv_strstr(locbuf, "__"); /* __ comes from bad/missing subtag or variant */
+ if (foundDoubleUnderscore != NULL) {
+ *foundDoubleUnderscore = 0; /* terminate at the __ */
+ len = uprv_strlen(locbuf);
+ }
+ if (len >= 2 && (uprv_strncmp(locbuf, "en", 2) == 0 || uprv_strncmp(locbuf, "zh", 2) == 0)) {
const char ** forceParentPtr = forceParent;
const char * testCurLoc;
while ( (testCurLoc = *forceParentPtr++) != NULL ) {
rb = ures_openDirect(NULL, locbuf, &tempStatus);
if (U_SUCCESS(tempStatus)) {
const char * actualLocale = ures_getLocaleByType(rb, ULOC_ACTUAL_LOCALE, &tempStatus);
+ ures_close(rb);
if (U_SUCCESS(tempStatus) && uprv_strcmp(locbuf, actualLocale) != 0) {
// we have followed an alias
len = uprv_strlen(actualLocale);
} else {
*err = U_BUFFER_OVERFLOW_ERROR;
}
- ures_close(rb);
return len;
}
- tempStatus = U_ZERO_ERROR;
- const UChar * parentUName = ures_getStringByKey(rb, "%%Parent", &len, &tempStatus);
- if (U_SUCCESS(tempStatus) && tempStatus != U_USING_FALLBACK_WARNING) {
+ }
+ tempStatus = U_ZERO_ERROR;
+ rb = ures_openDirect(NULL, "supplementalData", &tempStatus);
+ rb = ures_getByKey(rb, "parentLocales", rb, &tempStatus);
+ if (U_SUCCESS(tempStatus)) {
+ UResourceBundle * parentMapBundle = NULL;
+ int32_t childLen = 0;
+ while (childLen == 0) {
+ tempStatus = U_ZERO_ERROR;
+ parentMapBundle = ures_getNextResource(rb, parentMapBundle, &tempStatus);
+ if (U_FAILURE(tempStatus)) {
+ break; // no more parent bundles, normal exit
+ }
+ char childName[kLocBaseNameMax + 1];
+ childName[kLocBaseNameMax] = 0;
+ const char * childPtr = NULL;
+ if (ures_getType(parentMapBundle) == URES_STRING) {
+ childLen = kLocBaseNameMax;
+ childPtr = ures_getUTF8String(parentMapBundle, childName, &childLen, FALSE, &tempStatus);
+ if (U_FAILURE(tempStatus) || uprv_strncmp(locbuf, childPtr, kLocBaseNameMax) != 0) {
+ childLen = 0;
+ }
+ } else { // should be URES_ARRAY
+ int32_t childCur, childCount = ures_getSize(parentMapBundle);
+ for (childCur = 0; childCur < childCount && childLen == 0; childCur++) {
+ tempStatus = U_ZERO_ERROR;
+ childLen = kLocBaseNameMax;
+ childPtr = ures_getUTF8StringByIndex(parentMapBundle, childCur, childName, &childLen, FALSE, &tempStatus);
+ if (U_FAILURE(tempStatus) || uprv_strncmp(locbuf, childPtr, kLocBaseNameMax) != 0) {
+ childLen = 0;
+ }
+ }
+ }
+ }
+ ures_close(rb);
+ if (childLen > 0) {
+ // parentMapBundle key is the parent we are looking for
+ const char * keyStr = ures_getKey(parentMapBundle);
+ len = uprv_strlen(keyStr);
if (len < parentCapacity) {
- u_UCharsToChars(parentUName, parent, len + 1);
+ uprv_strcpy(parent, keyStr);
} else {
*err = U_BUFFER_OVERFLOW_ERROR;
}
- ures_close(rb);
+ ures_close(parentMapBundle);
return len;
}
- ures_close(rb);
+ ures_close(parentMapBundle);
}
+
len = uloc_getParent(locbuf, parent, parentCapacity, err);
if (U_SUCCESS(*err) && len == 0) {
len = 4;
return len;
}
+// =================
+// Data and related functions for ualoc_localizationsToUse
+// =================
+
+static const char * appleAliasMap[][2] = {
+ // names are lowercase here because they are looked up after being processed by uloc_getBaseName
+ { "arabic", "ar" }, // T2
+ { "chinese", "zh_Hans" }, // T0
+ { "danish", "da" }, // T2
+ { "dutch", "nl" }, // T1, still in use
+ { "english", "en" }, // T0, still in use
+ { "finnish", "fi" }, // T2
+ { "french", "fr" }, // T0, still in use
+ { "german", "de" }, // T0, still in use
+ { "italian", "it" }, // T1, still in use
+ { "japanese", "ja" }, // T0, still in use
+ { "korean", "ko" }, // T1
+ { "no_NO", "nb_NO" }, // special
+ { "norwegian", "nb" }, // T2
+ { "polish", "pl" }, // T2
+ { "portuguese", "pt" }, // T2
+ { "russian", "ru" }, // T2
+ { "spanish", "es" }, // T1, still in use
+ { "swedish", "sv" }, // T2
+ { "thai", "th" }, // T2
+ { "turkish", "tr" }, // T2
+ { "yue", "yue_Hans"}, // special
+ { "zh", "zh_Hans" }, // special
+};
+enum { kAppleAliasMapCount = UPRV_LENGTHOF(appleAliasMap) };
+
+static const char * appleParentMap[][2] = {
+ { "en_150", "en_GB" }, // Apple custom parent
+ { "en_AD", "en_150" }, // Apple locale addition
+ { "en_AG", "en_GB" }, // Antigua & Barbuda
+ { "en_AI", "en_GB" }, // Anguilla
+ { "en_AL", "en_150" }, // Apple locale addition
+ { "en_AT", "en_150" }, // Apple locale addition
+ { "en_AU", "en_GB" }, // Apple custom parent
+ { "en_BA", "en_150" }, // Apple locale addition
+ { "en_BB", "en_GB" }, // Barbados
+ { "en_BD", "en_GB" }, // Apple custom parent
+ { "en_BE", "en_150" }, // Apple custom parent
+ { "en_BM", "en_GB" }, // Bermuda
+ { "en_BS", "en_GB" }, // Bahamas
+ { "en_BW", "en_GB" }, // Botswana
+ { "en_BZ", "en_GB" }, // Belize
+ { "en_CC", "en_AU" }, // Cocos (Keeling) Islands
+ { "en_CH", "en_150" }, // Apple locale addition
+ { "en_CK", "en_AU" }, // Cook Islands (maybe to en_NZ instead?)
+ { "en_CX", "en_AU" }, // Christmas Island
+ { "en_CY", "en_150" }, // Apple locale addition
+ { "en_CZ", "en_150" }, // Apple locale addition
+ { "en_DE", "en_150" }, // Apple locale addition
+ { "en_DG", "en_GB" },
+ { "en_DK", "en_150" }, // Apple locale addition
+ { "en_DM", "en_GB" }, // Dominica
+ { "en_EE", "en_150" }, // Apple locale addition
+ { "en_ES", "en_150" }, // Apple locale addition
+ { "en_FI", "en_150" }, // Apple locale addition
+ { "en_FJ", "en_GB" }, // Fiji
+ { "en_FK", "en_GB" },
+ { "en_FR", "en_150" }, // Apple locale addition
+ { "en_GD", "en_GB" }, // Grenada
+ { "en_GG", "en_GB" },
+ { "en_GH", "en_GB" }, // Ghana
+ { "en_GI", "en_GB" },
+ { "en_GM", "en_GB" }, // Gambia
+ { "en_GR", "en_150" }, // Apple locale addition
+ { "en_GY", "en_GB" }, // Guyana
+ { "en_HK", "en_GB" }, // Apple custom parent
+ { "en_HR", "en_150" }, // Apple locale addition
+ { "en_HU", "en_150" }, // Apple locale addition
+ { "en_IE", "en_GB" },
+ { "en_IL", "en_001" }, // Apple locale addition
+ { "en_IM", "en_GB" },
+ { "en_IN", "en_GB" }, // Apple custom parent
+ { "en_IO", "en_GB" },
+ { "en_IS", "en_150" }, // Apple locale addition
+ { "en_IT", "en_150" }, // Apple locale addition
+ { "en_JE", "en_GB" },
+ { "en_JM", "en_GB" },
+ { "en_KE", "en_GB" }, // Kenya
+ { "en_KI", "en_GB" }, // Kiribati
+ { "en_KN", "en_GB" }, // St. Kitts & Nevis
+ { "en_KY", "en_GB" }, // Cayman Islands
+ { "en_LC", "en_GB" }, // St. Lucia
+ { "en_LK", "en_GB" }, // Apple custom parent
+ { "en_LS", "en_GB" }, // Lesotho
+ { "en_LT", "en_150" }, // Apple locale addition
+ { "en_LU", "en_150" }, // Apple locale addition
+ { "en_LV", "en_150" }, // Apple locale addition
+ { "en_ME", "en_150" }, // Apple locale addition
+ { "en_MO", "en_GB" },
+ { "en_MS", "en_GB" }, // Montserrat
+ { "en_MT", "en_GB" },
+ { "en_MU", "en_GB" }, // Mauritius
+ { "en_MV", "en_GB" },
+ { "en_MW", "en_GB" }, // Malawi
+ { "en_MY", "en_GB" }, // Apple custom parent
+ { "en_NA", "en_GB" }, // Namibia
+ { "en_NF", "en_AU" }, // Norfolk Island
+ { "en_NG", "en_GB" }, // Nigeria
+ { "en_NL", "en_150" }, // Apple locale addition
+ { "en_NO", "en_150" }, // Apple locale addition
+ { "en_NR", "en_AU" }, // Nauru
+ { "en_NU", "en_AU" }, // Niue (maybe to en_NZ instead?)
+ { "en_NZ", "en_AU" },
+ { "en_PG", "en_AU" }, // Papua New Guinea
+ { "en_PK", "en_GB" }, // Apple custom parent
+ { "en_PL", "en_150" }, // Apple locale addition
+ { "en_PN", "en_GB" }, // Pitcairn Islands
+ { "en_PT", "en_150" }, // Apple locale addition
+ { "en_RO", "en_150" }, // Apple locale addition
+ { "en_RS", "en_150" }, // Apple locale addition
+ { "en_RU", "en_150" }, // Apple locale addition
+ { "en_SB", "en_GB" }, // Solomon Islands
+ { "en_SC", "en_GB" }, // Seychelles
+ { "en_SD", "en_GB" }, // Sudan
+ { "en_SE", "en_150" }, // Apple locale addition
+ { "en_SG", "en_GB" },
+ { "en_SH", "en_GB" },
+ { "en_SI", "en_150" }, // Apple locale addition
+ { "en_SK", "en_150" }, // Apple locale addition
+ { "en_SL", "en_GB" }, // Sierra Leone
+ { "en_SS", "en_GB" }, // South Sudan
+ { "en_SZ", "en_GB" }, // Swaziland
+ { "en_TC", "en_GB" }, // Tristan da Cunha
+ { "en_TO", "en_GB" }, // Tonga
+ { "en_TT", "en_GB" }, // Trinidad & Tobago
+ { "en_TV", "en_GB" }, // Tuvalu
+ { "en_TZ", "en_GB" }, // Tanzania
+ { "en_UA", "en_150" }, // Apple locale addition
+ { "en_UG", "en_GB" }, // Uganda
+ { "en_VC", "en_GB" }, // St. Vincent & Grenadines
+ { "en_VG", "en_GB" },
+ { "en_VU", "en_GB" }, // Vanuatu
+ { "en_WS", "en_AU" }, // Samoa (maybe to en_NZ instead?)
+ { "en_ZA", "en_GB" }, // South Africa
+ { "en_ZM", "en_GB" }, // Zambia
+ { "en_ZW", "en_GB" }, // Zimbabwe
+};
+enum { kAppleParentMapCount = UPRV_LENGTHOF(appleParentMap) };
+
+typedef struct {
+ const char * locale;
+ const char * parent;
+ int8_t distance;
+} LocParentAndDistance;
+
+static LocParentAndDistance locParentMap[] = {
+ // The localizations listed in the first column are in
+ // normalized form (e.g. zh_CN -> zh_Hans_CN, etc.).
+ // The distance is a rough measure of distance from
+ // the localization to its parent, used as a weight.
+ { "de_DE", "de", 0 },
+ { "en_001", "en", 2 },
+ { "en_150", "en_GB", 1 },
+ { "en_AU", "en_GB", 1 },
+ { "en_GB", "en_001", 0 },
+ { "en_US", "en", 0 },
+ { "es_419", "es", 2 },
+ { "es_MX", "es_419", 0 },
+ { "fr_FR", "fr", 0 },
+ { "it_IT", "it", 0 },
+ { "pt_PT", "pt", 2 },
+ { "yue_Hans_CN","yue_Hans",0 },
+ { "yue_Hant_HK","yue_Hant",0 },
+ { "zh_Hans_CN", "zh_Hans", 0 },
+ { "zh_Hant_HK", "zh_Hant", 1 },
+ { "zh_Hant_TW", "zh_Hant", 0 },
+};
+enum { kLocParentMapCount = UPRV_LENGTHOF(locParentMap), kMaxParentDistance = 8 };
+
+enum {
+ kStringsAllocSize = 5280, // cannot expand; current actual usage 5259
+ kParentMapInitCount = 272 // can expand; current actual usage 254
+};
+
+U_CDECL_BEGIN
+static UBool U_CALLCONV ualocale_cleanup(void);
+U_CDECL_END
+
+U_NAMESPACE_BEGIN
+
+static UInitOnce gUALocaleCacheInitOnce = U_INITONCE_INITIALIZER;
+
+static int gMapDataState = 0; // 0 = not initialized, 1 = initialized, -1 = failure
+static char* gStrings = NULL;
+static UHashtable* gAliasMap = NULL;
+static UHashtable* gParentMap = NULL;
+
+U_NAMESPACE_END
+
+U_CDECL_BEGIN
+
+static UBool U_CALLCONV ualocale_cleanup(void)
+{
+ U_NAMESPACE_USE
+
+ gUALocaleCacheInitOnce.reset();
+
+ if (gMapDataState > 0) {
+ uhash_close(gParentMap);
+ gParentMap = NULL;
+ uhash_close(gAliasMap);
+ gAliasMap = NULL;
+ uprv_free(gStrings);
+ gStrings = NULL;
+ }
+ gMapDataState = 0;
+ return TRUE;
+}
+
+static void initializeMapData() {
+ U_NAMESPACE_USE
+
+ UResourceBundle * curBundle;
+ char* stringsPtr;
+ char* stringsEnd;
+ UErrorCode status;
+ int32_t entryIndex, icuEntryCount;
+
+ ucln_common_registerCleanup(UCLN_COMMON_LOCALE, ualocale_cleanup);
+
+ gStrings = (char*)uprv_malloc(kStringsAllocSize);
+ if (gStrings) {
+ stringsPtr = gStrings;
+ stringsEnd = gStrings + kStringsAllocSize;
+ }
+
+ status = U_ZERO_ERROR;
+ curBundle = NULL;
+ icuEntryCount = 0;
+ if (gStrings) {
+ curBundle = ures_openDirect(NULL, "metadata", &status);
+ curBundle = ures_getByKey(curBundle, "alias", curBundle, &status);
+ curBundle = ures_getByKey(curBundle, "language", curBundle, &status); // language resource is URES_TABLE
+ if (U_SUCCESS(status)) {
+ icuEntryCount = ures_getSize(curBundle); // currently 331
+ }
+ }
+ status = U_ZERO_ERROR;
+ gAliasMap = uhash_openSize(uhash_hashIChars, uhash_compareIChars, uhash_compareIChars,
+ kAppleAliasMapCount + icuEntryCount, &status);
+ // defaults to keyDeleter NULL
+ if (U_SUCCESS(status)) {
+ for (entryIndex = 0; entryIndex < kAppleAliasMapCount && U_SUCCESS(status); entryIndex++) {
+ uhash_put(gAliasMap, (void*)appleAliasMap[entryIndex][0], (void*)appleAliasMap[entryIndex][1], &status);
+#if DEBUG_UALOC
+ if (U_FAILURE(status)) {
+ printf("# uhash_put 1 fails %s\n", u_errorName(status));
+ }
+#endif
+ }
+ status = U_ZERO_ERROR;
+ UResourceBundle * aliasMapBundle = NULL;
+ for (entryIndex = 0; entryIndex < icuEntryCount && U_SUCCESS(status); entryIndex++) {
+ aliasMapBundle = ures_getByIndex(curBundle, entryIndex, aliasMapBundle, &status);
+ if (U_FAILURE(status)) {
+ break; // error
+ }
+ const char * keyStr = ures_getKey(aliasMapBundle);
+ int32_t len = uprv_strlen(keyStr);
+ if (len >= stringsEnd - stringsPtr) {
+ break; // error
+ }
+ uprv_strcpy(stringsPtr, keyStr);
+ char * inLocStr = stringsPtr;
+ stringsPtr += len + 1;
+
+ len = stringsEnd - stringsPtr - 1;
+ ures_getUTF8StringByKey(aliasMapBundle, "replacement", stringsPtr, &len, TRUE, &status);
+ if (U_FAILURE(status)) {
+ break; // error
+ }
+ stringsPtr[len] = 0;
+ uhash_put(gAliasMap, inLocStr, stringsPtr, &status);
+#if DEBUG_UALOC
+ if (U_FAILURE(status)) {
+ printf("# uhash_put 2 fails %s\n", u_errorName(status));
+ }
+#endif
+ stringsPtr += len + 1;
+ }
+ ures_close(aliasMapBundle);
+ } else {
+ ures_close(curBundle);
+ uprv_free(gStrings);
+ gMapDataState = -1; // failure
+ return;
+ }
+ ures_close(curBundle);
+
+ status = U_ZERO_ERROR;
+ gParentMap = uhash_openSize(uhash_hashIChars, uhash_compareIChars, uhash_compareIChars,
+ kParentMapInitCount, &status);
+ // defaults to keyDeleter NULL
+ if (U_SUCCESS(status)) {
+ curBundle = ures_openDirect(NULL, "supplementalData", &status);
+ curBundle = ures_getByKey(curBundle, "parentLocales", curBundle, &status); // parentLocales resource is URES_TABLE
+ if (U_SUCCESS(status)) {
+ UResourceBundle * parentMapBundle = NULL;
+ while (TRUE) {
+ parentMapBundle = ures_getNextResource(curBundle, parentMapBundle, &status);
+ if (U_FAILURE(status)) {
+ break; // no more parent bundles, normal exit
+ }
+ const char * keyStr = ures_getKey(parentMapBundle);
+ int32_t len = uprv_strlen(keyStr);
+ if (len >= stringsEnd - stringsPtr) {
+ break; // error
+ }
+ uprv_strcpy(stringsPtr, keyStr);
+ char * parentStr = stringsPtr;
+ stringsPtr += len + 1;
+
+ if (ures_getType(parentMapBundle) == URES_STRING) {
+ len = stringsEnd - stringsPtr - 1;
+ ures_getUTF8String(parentMapBundle, stringsPtr, &len, TRUE, &status);
+ if (U_FAILURE(status)) {
+ break; // error
+ }
+ stringsPtr[len] = 0;
+ uhash_put(gParentMap, stringsPtr, parentStr, &status);
+ stringsPtr += len + 1;
+ } else {
+ // should be URES_ARRAY
+ icuEntryCount = ures_getSize(parentMapBundle);
+ for (entryIndex = 0; entryIndex < icuEntryCount && U_SUCCESS(status); entryIndex++) {
+ len = stringsEnd - stringsPtr - 1;
+ ures_getUTF8StringByIndex(parentMapBundle, entryIndex, stringsPtr, &len, TRUE, &status);
+ if (U_FAILURE(status)) {
+ break;
+ }
+ stringsPtr[len] = 0;
+ uhash_put(gParentMap, stringsPtr, parentStr, &status);
+ stringsPtr += len + 1;
+ }
+ }
+ }
+ ures_close(parentMapBundle);
+ }
+ ures_close(curBundle);
+
+ status = U_ZERO_ERROR;
+ for (entryIndex = 0; entryIndex < kAppleParentMapCount && U_SUCCESS(status); entryIndex++) {
+ uhash_put(gParentMap, (void*)appleParentMap[entryIndex][0], (void*)appleParentMap[entryIndex][1], &status);
+ }
+ } else {
+ uhash_close(gAliasMap);
+ gAliasMap = NULL;
+ uprv_free(gStrings);
+ gMapDataState = -1; // failure
+ return;
+ }
+
+#if DEBUG_UALOC
+ printf("# gStrings size %ld\n", stringsPtr - gStrings);
+ printf("# gParentMap count %d\n", uhash_count(gParentMap));
+#endif
+ gMapDataState = 1;
+}
+
+U_CDECL_END
+
+// The following maps aliases, etc. Ensures 0-termination if no error.
+static void ualoc_normalize(const char *locale, char *normalized, int32_t normalizedCapacity, UErrorCode *status)
+{
+ if (U_FAILURE(*status)) {
+ return;
+ }
+ // uloc_minimizeSubtags(locale, normalized, normalizedCapacity, status);
+
+ const char *replacement = NULL;
+ if (icu::gMapDataState > 0) {
+ replacement = (const char *)uhash_get(icu::gAliasMap, locale);
+ }
+ if (replacement == NULL) {
+ replacement = locale;
+ }
+ int32_t len = strnlen(replacement, normalizedCapacity);
+ if (len < normalizedCapacity) { // allow for 0 termination
+ uprv_strcpy(normalized, replacement);
+ } else {
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ }
+}
+
+static void ualoc_getParent(const char *locale, char *parent, int32_t parentCapacity, UErrorCode *status)
+{
+ if (U_FAILURE(*status)) {
+ return;
+ }
+ if (icu::gMapDataState > 0) {
+ const char *replacement = (const char *)uhash_get(icu::gParentMap, locale);
+ if (replacement) {
+ int32_t len = uprv_strlen(replacement);
+ if (len < parentCapacity) { // allow for 0 termination
+ uprv_strcpy(parent, replacement);
+#if DEBUG_UALOC
+ printf(" # ualoc_getParent 1: locale %s -> parent %s\n", locale, parent);
+#endif
+ } else {
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ }
+ return;
+ }
+ }
+ uloc_getParent(locale, parent, parentCapacity - 1, status);
+#if DEBUG_UALOC
+ printf(" # ualoc_getParent 2: locale %s -> parent %s\n", locale, parent);
+#endif
+ parent[parentCapacity - 1] = 0; // ensure 0 termination in case of U_STRING_NOT_TERMINATED_WARNING
+}
+
+// Might do something better for this, perhaps maximizing locales then stripping
+static const char * getLocParent(const char *locale, int32_t* distance)
+{
+ int32_t locParentIndex;
+ for (locParentIndex = 0; locParentIndex < kLocParentMapCount; locParentIndex++) {
+ if (uprv_strcmp(locale, locParentMap[locParentIndex].locale) == 0) {
+ *distance = locParentMap[locParentIndex].distance;
+ return locParentMap[locParentIndex].parent;
+ }
+ }
+ if (icu::gMapDataState > 0) {
+ const char *replacement = (const char *)uhash_get(icu::gParentMap, locale);
+ if (replacement) {
+ *distance = 1;
+ return replacement;
+ }
+ }
+ return NULL;
+}
+
+// this just checks if the *pointer* value is already in the array
+static UBool locInArray(const char* *localizationsToUse, int32_t locsToUseCount, const char *locToCheck)
+{
+ int32_t locIndex;
+ for (locIndex = 0; locIndex < locsToUseCount; locIndex++) {
+ if (locToCheck == localizationsToUse[locIndex]) {
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+enum { kLangScriptRegMaxLen = ULOC_LANG_CAPACITY + ULOC_SCRIPT_CAPACITY + ULOC_COUNTRY_CAPACITY }; // currently 22
+
+int32_t
+ualoc_localizationsToUse( const char* const *preferredLanguages,
+ int32_t preferredLanguagesCount,
+ const char* const *availableLocalizations,
+ int32_t availableLocalizationsCount,
+ const char* *localizationsToUse,
+ int32_t localizationsToUseCapacity,
+ UErrorCode *status )
+{
+ if (U_FAILURE(*status)) {
+ return -1;
+ }
+ if (preferredLanguages == NULL || availableLocalizations == NULL || localizationsToUse == NULL) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return -1;
+ }
+ // get resource data, need to protect with mutex
+ if (icu::gMapDataState == 0) {
+ umtx_initOnce(icu::gUALocaleCacheInitOnce, initializeMapData);
+ }
+ int32_t locsToUseCount = 0;
+ int32_t prefLangIndex, availLocIndex = 0;
+ int32_t availLocIndexBackup = -1; // if >= 0, contains index of backup match
+ int32_t foundMatchPrefLangIndex = 0, backupMatchPrefLangIndex = 0;
+ char (*availLocBase)[kLangScriptRegMaxLen + 1] = NULL;
+ char (*availLocNorm)[kLangScriptRegMaxLen + 1] = NULL;
+ UBool foundMatch = FALSE;
+ UBool backupMatchPrefLang_pt_PT = FALSE;
+
+#if DEBUG_UALOC
+ if (preferredLanguagesCount > 0 && availableLocalizationsCount > 0) {
+ printf("\n # ualoc_localizationsToUse start, preferredLanguages %d: %s, ..., availableLocalizations %d: %s, ...\n",
+ preferredLanguagesCount, preferredLanguages[0], availableLocalizationsCount, availableLocalizations[0]);
+ } else {
+ printf("\n # ualoc_localizationsToUse start, preferredLanguages %d: ..., availableLocalizations %d: ...\n",
+ preferredLanguagesCount, availableLocalizationsCount);
+ }
+#endif
+
+ // Part 1, find the best matching localization, if any
+ for (prefLangIndex = 0; prefLangIndex < preferredLanguagesCount; prefLangIndex++) {
+ char prefLangBaseName[kLangScriptRegMaxLen + 1];
+ char prefLangNormName[kLangScriptRegMaxLen + 1];
+ char prefLangParentName[kLangScriptRegMaxLen + 1];
+ UErrorCode tmpStatus = U_ZERO_ERROR;
+
+ if (preferredLanguages[prefLangIndex] == NULL) {
+ continue; // skip NULL preferredLanguages entry, go to next one
+ }
+ // use underscores, fix bad capitalization, delete any keywords
+ uloc_getBaseName(preferredLanguages[prefLangIndex], prefLangBaseName, kLangScriptRegMaxLen, &tmpStatus);
+ if (U_FAILURE(tmpStatus) || prefLangBaseName[0] == 0 ||
+ uprv_strcmp(prefLangBaseName, "root") == 0 || prefLangBaseName[0] == '_') {
+ continue; // can't handle this preferredLanguages entry or it is invalid, go to next one
+ }
+ prefLangBaseName[kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING
+#if DEBUG_UALOC
+ printf(" # loop: try prefLangBaseName %s\n", prefLangBaseName);
+#endif
+
+ // if we have not already allocated and filled the array of
+ // base availableLocalizations, do so now.
+ if (availLocBase == NULL) {
+ availLocBase = (char (*)[kLangScriptRegMaxLen + 1])uprv_malloc(availableLocalizationsCount * (kLangScriptRegMaxLen + 1));
+ if (availLocBase == NULL) {
+ continue; // cannot further check this preferredLanguages entry, go to next one
+ }
+#if DEBUG_UALOC
+ printf(" # allocate & fill availLocBase\n");
+#endif
+ for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
+ tmpStatus = U_ZERO_ERROR;
+ if (availableLocalizations[availLocIndex] == NULL) {
+ availLocBase[availLocIndex][0] = 0; // effectively remove this entry
+ continue;
+ }
+ uloc_getBaseName(availableLocalizations[availLocIndex], availLocBase[availLocIndex], kLangScriptRegMaxLen, &tmpStatus);
+ if (U_FAILURE(tmpStatus) || uprv_strcmp(availLocBase[availLocIndex], "root") == 0 || availLocBase[availLocIndex][0] == '_') {
+ availLocBase[availLocIndex][0] = 0; // effectively remove this entry
+ continue;
+ }
+ availLocBase[availLocIndex][kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING
+#if DEBUG_UALOC
+ printf(" # add availLocBase %s\n", availLocBase[availLocIndex]);
+#endif
+ }
+ }
+ // first compare base preferredLanguage to base versions of availableLocalizations names
+ for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
+ if (uprv_strcmp(prefLangBaseName, availLocBase[availLocIndex]) == 0) {
+ foundMatch = TRUE; // availLocIndex records where
+ foundMatchPrefLangIndex = prefLangIndex;
+#if DEBUG_UALOC
+ printf(" # FOUND: matched availLocBase %s -> actualLoc %s\n", availLocBase[availLocIndex], availableLocalizations[availLocIndex]);
+#endif
+ break;
+ }
+ }
+ if (foundMatch) {
+ break; // found a loc for this preferredLanguages entry
+ }
+
+ // get normalized preferredLanguage
+ tmpStatus = U_ZERO_ERROR;
+ ualoc_normalize(prefLangBaseName, prefLangNormName, kLangScriptRegMaxLen + 1, &tmpStatus);
+ if (U_FAILURE(tmpStatus)) {
+ continue; // can't handle this preferredLanguages entry, go to next one
+ }
+#if DEBUG_UALOC
+ printf(" # prefLangNormName %s\n", prefLangNormName);
+#endif
+ // if we have not already allocated and filled the array of
+ // normalized availableLocalizations, do so now.
+ // Note: ualoc_normalize turns "zh_TW" into "zh_Hant_TW", zh_HK" into "zh_Hant_HK",
+ // and fixes deprecated codes "iw" > "he", "in" > "id" etc.
+ if (availLocNorm == NULL) {
+ availLocNorm = (char (*)[kLangScriptRegMaxLen + 1])uprv_malloc(availableLocalizationsCount * (kLangScriptRegMaxLen + 1));
+ if (availLocNorm == NULL) {
+ continue; // cannot further check this preferredLanguages entry, go to next one
+ }
+#if DEBUG_UALOC
+ printf(" # allocate & fill availLocNorm\n");
+#endif
+ for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
+ tmpStatus = U_ZERO_ERROR;
+ ualoc_normalize(availLocBase[availLocIndex], availLocNorm[availLocIndex], kLangScriptRegMaxLen + 1, &tmpStatus);
+ if (U_FAILURE(tmpStatus)) {
+ availLocNorm[availLocIndex][0] = 0; // effectively remove this entry
+#if DEBUG_UALOC
+ } else {
+ printf(" # actualLoc %-11s -> norm %s\n", availableLocalizations[availLocIndex], availLocNorm[availLocIndex]);
+#endif
+ }
+ }
+ }
+ // now compare normalized preferredLanguage to normalized localization names
+ // if matches, copy *original* localization name
+ for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
+ if (uprv_strcmp(prefLangNormName, availLocNorm[availLocIndex]) == 0) {
+ foundMatch = TRUE; // availLocIndex records where
+ foundMatchPrefLangIndex = prefLangIndex;
+#if DEBUG_UALOC
+ printf(" # FOUND: matched availLocNorm %s -> actualLoc %s\n", availLocNorm[availLocIndex], availableLocalizations[availLocIndex]);
+#endif
+ break;
+ }
+ }
+ if (foundMatch) {
+ break; // found a loc for this preferredLanguages entry
+ }
+
+ // now walk up the parent chain for preferredLanguage
+ // until we find a match or hit root
+ uprv_strcpy(prefLangBaseName, prefLangNormName);
+ while (!foundMatch) {
+ tmpStatus = U_ZERO_ERROR;
+ ualoc_getParent(prefLangBaseName, prefLangParentName, kLangScriptRegMaxLen + 1, &tmpStatus);
+ if (U_FAILURE(tmpStatus) || uprv_strcmp(prefLangParentName, "root") == 0 || prefLangParentName[0] == 0) {
+ break; // reached root or cannot proceed further
+ }
+#if DEBUG_UALOC
+ printf(" # prefLangParentName %s\n", prefLangParentName);
+#endif
+
+ // now compare this preferredLanguage parent to normalized localization names
+ // if matches, copy *original* localization name
+ for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
+ if (uprv_strcmp(prefLangParentName, availLocNorm[availLocIndex]) == 0) {
+ foundMatch = TRUE; // availLocIndex records where
+ foundMatchPrefLangIndex = prefLangIndex;
+#if DEBUG_UALOC
+ printf(" # FOUND: matched availLocNorm %s -> actualLoc %s\n", availLocNorm[availLocIndex], availableLocalizations[availLocIndex]);
+#endif
+ break;
+ }
+ }
+ uprv_strcpy(prefLangBaseName, prefLangParentName);
+ }
+ if (foundMatch) {
+ break; // found a loc for this preferredLanguages entry
+ }
+
+ // last try, use parents of selected language to try for backup match
+ // if we have not already found one
+ if (availLocIndexBackup < 0) {
+ // now walk up the parent chain for preferredLanguage again
+ // checking against parents of selected availLocNorm entries
+ // but this time start with current prefLangNormName
+ uprv_strcpy(prefLangBaseName, prefLangNormName);
+ int32_t minDistance = kMaxParentDistance;
+ while (TRUE) {
+ // now compare this preferredLanguage to normalized localization names
+ // parent if have one for this; if matches, copy *original* localization name
+#if DEBUG_UALOC
+ printf(" # BACKUP: trying prefLangBaseName %s\n", prefLangBaseName);
+#endif
+ for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
+ char availLocMinOrParent[kLangScriptRegMaxLen + 1];
+ int32_t distance;
+ // first check for special Apple parents of availLocNorm; the number
+ // of locales with such parents is small.
+ // If no such parent, or if parent has an intermediate numeric region,
+ // then try stripping the original region.
+ int32_t availLocParentLen = 0;
+ const char *availLocParent = getLocParent(availLocNorm[availLocIndex], &distance);
+ if (availLocParent) {
+#if DEBUG_UALOC
+ printf(" # availLocAppleParentName %s\n", availLocParent);
+#endif
+ if (uprv_strcmp(prefLangBaseName, availLocParent) == 0 && distance < minDistance) {
+ availLocIndexBackup = availLocIndex; // records where the match occurred
+ backupMatchPrefLangIndex = prefLangIndex;
+ minDistance = distance;
+#if DEBUG_UALOC
+ printf(" # BACKUP: LocAppleParent matched prefLangNormName with distance %d\n", distance);
+#endif
+ continue;
+ }
+ availLocParentLen = uprv_strlen(availLocParent);
+ }
+ if (minDistance <= 1) {
+ continue; // we can't get any closer in the rest of this iteration
+ }
+ if (availLocParent == NULL || (availLocParentLen >= 6 && isdigit(availLocParent[availLocParentLen-1]))) {
+ tmpStatus = U_ZERO_ERROR;
+ int32_t regLen = uloc_getCountry(availLocNorm[availLocIndex], availLocMinOrParent, kLangScriptRegMaxLen, &tmpStatus);
+ if (U_SUCCESS(tmpStatus) && regLen > 1) {
+ uloc_addLikelySubtags(availLocNorm[availLocIndex], availLocMinOrParent, kLangScriptRegMaxLen, &tmpStatus);
+ if (U_SUCCESS(tmpStatus)) {
+ availLocMinOrParent[kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING
+#if DEBUG_UALOC
+ printf(" # availLocRegMaxName %s\n", availLocMinOrParent);
+#endif
+ char availLocTemp[kLangScriptRegMaxLen + 1];
+ uloc_getParent(availLocMinOrParent, availLocTemp, kLangScriptRegMaxLen, &tmpStatus);
+ if (U_SUCCESS(tmpStatus)) {
+ availLocTemp[kLangScriptRegMaxLen] = 0;
+ uloc_minimizeSubtags(availLocTemp, availLocMinOrParent, kLangScriptRegMaxLen, &tmpStatus);
+ if (U_SUCCESS(tmpStatus)) {
+ availLocMinOrParent[kLangScriptRegMaxLen] = 0;
+#if DEBUG_UALOC
+ printf(" # availLocNoRegParentName %s\n", availLocMinOrParent);
+#endif
+ if (uprv_strcmp(prefLangBaseName, availLocMinOrParent) == 0) {
+ availLocIndexBackup = availLocIndex; // records where the match occurred
+ backupMatchPrefLangIndex = prefLangIndex;
+ minDistance = 1;
+ backupMatchPrefLang_pt_PT = (uprv_strcmp(prefLangNormName, "pt_PT") == 0);
+#if DEBUG_UALOC
+ printf(" # BACKUP: LocNoRegParent matched prefLangNormName with distance 1\n");
+#endif
+ continue;
+ }
+ }
+ }
+ }
+ }
+ }
+ // then check against minimized version of availLocNorm
+ tmpStatus = U_ZERO_ERROR;
+ uloc_minimizeSubtags(availLocNorm[availLocIndex], availLocMinOrParent, kLangScriptRegMaxLen, &tmpStatus);
+ if (U_FAILURE(tmpStatus)) {
+ continue;
+ }
+ availLocMinOrParent[kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING
+#if DEBUG_UALOC
+ printf(" # availLocMinimized %s\n", availLocMinOrParent);
+#endif
+ if (uprv_strcmp(prefLangBaseName, availLocMinOrParent) == 0) {
+ availLocIndexBackup = availLocIndex; // records where the match occurred
+ backupMatchPrefLangIndex = prefLangIndex;
+ minDistance = 1;
+#if DEBUG_UALOC
+ printf(" # BACKUP: LocMinimized matched prefLangNormName with distance 1\n");
+#endif
+ }
+ }
+ if (availLocIndexBackup >= 0) {
+ break;
+ }
+ tmpStatus = U_ZERO_ERROR;
+ ualoc_getParent(prefLangBaseName, prefLangParentName, kLangScriptRegMaxLen + 1, &tmpStatus);
+ if (U_FAILURE(tmpStatus) || uprv_strcmp(prefLangParentName, "root") == 0 || prefLangParentName[0] == 0) {
+ break; // reached root or cannot proceed further
+ }
+ uprv_strcpy(prefLangBaseName, prefLangParentName);
+ }
+ }
+ }
+ // If we have a backup match, decide what to do
+ if (availLocIndexBackup >= 0) {
+ if (!foundMatch) {
+ // no main match, just use the backup
+ availLocIndex = availLocIndexBackup;
+ foundMatch = TRUE;
+#if DEBUG_UALOC
+ printf(" # no main match, have backup => use availLocIndexBackup %d\n", availLocIndexBackup);
+#endif
+ } else if (backupMatchPrefLangIndex < foundMatchPrefLangIndex && (!backupMatchPrefLang_pt_PT || uprv_strcmp(availLocNorm[availLocIndexBackup], "pt_BR") != 0)) {
+ // have a main match but backup match was higher in the prefs, use it if for a different language
+#if DEBUG_UALOC
+ printf(" # have backup match higher in prefs, comparing its language and script to main match\n");
+#endif
+ char mainLang[ULOC_LANG_CAPACITY + 1];
+ char backupLang[ULOC_LANG_CAPACITY + 1];
+ UErrorCode tmpStatus = U_ZERO_ERROR;
+ uloc_getLanguage(availLocNorm[availLocIndex], mainLang, ULOC_LANG_CAPACITY, &tmpStatus);
+ mainLang[ULOC_LANG_CAPACITY] = 0; // ensure zero termination
+ uloc_getLanguage(availLocNorm[availLocIndexBackup], backupLang, ULOC_LANG_CAPACITY, &tmpStatus);
+ backupLang[ULOC_LANG_CAPACITY] = 0; // ensure zero termination
+ if (U_SUCCESS(tmpStatus)) {
+ if (uprv_strncmp(mainLang, backupLang, ULOC_LANG_CAPACITY) != 0) {
+ // backup match has different language than main match
+ availLocIndex = availLocIndexBackup;
+ // foundMatch is already TRUE
+#if DEBUG_UALOC
+ printf(" # main match but backup is for a different lang higher in prefs => use availLocIndexBackup %d\n", availLocIndexBackup);
+#endif
+ } else {
+ // backup match has same language as main match, check scripts too
+ char availLocMaximized[kLangScriptRegMaxLen + 1];
+
+ uloc_addLikelySubtags(availLocNorm[availLocIndex], availLocMaximized, kLangScriptRegMaxLen, &tmpStatus);
+ availLocMaximized[kLangScriptRegMaxLen] = 0;
+ uloc_getScript(availLocMaximized, mainLang, ULOC_LANG_CAPACITY, &tmpStatus);
+ mainLang[ULOC_LANG_CAPACITY] = 0;
+
+ uloc_addLikelySubtags(availLocNorm[availLocIndexBackup], availLocMaximized, kLangScriptRegMaxLen, &tmpStatus);
+ availLocMaximized[kLangScriptRegMaxLen] = 0;
+ uloc_getScript(availLocMaximized, backupLang, ULOC_LANG_CAPACITY, &tmpStatus);
+ backupLang[ULOC_LANG_CAPACITY] = 0;
+
+ if (U_SUCCESS(tmpStatus) && uprv_strncmp(mainLang, backupLang, ULOC_LANG_CAPACITY) != 0) {
+ // backup match has different script than main match
+ availLocIndex = availLocIndexBackup;
+ // foundMatch is already TRUE
+#if DEBUG_UALOC
+ printf(" # main match but backup is for a different script higher in prefs => use availLocIndexBackup %d\n", availLocIndexBackup);
+#endif
+ }
+ }
+ }
+ }
+ }
+
+ // Part 2, if we found a matching localization, then walk up its parent tree to find any fallback matches in availableLocalizations
+ if (foundMatch) {
+ // Here availLocIndex corresponds to the first matched localization
+ UErrorCode tmpStatus = U_ZERO_ERROR;
+ int32_t availLocMatchIndex = availLocIndex;
+ if (locsToUseCount < localizationsToUseCapacity) {
+ localizationsToUse[locsToUseCount++] = availableLocalizations[availLocMatchIndex];
+ }
+ // at this point we must have availLocBase, and minimally matched against that.
+ // if we have not already allocated and filled the array of
+ // normalized availableLocalizations, do so now, but don't require it
+ if (availLocNorm == NULL) {
+ availLocNorm = (char (*)[kLangScriptRegMaxLen + 1])uprv_malloc(availableLocalizationsCount * (kLangScriptRegMaxLen + 1));
+ if (availLocNorm != NULL) {
+ for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
+ tmpStatus = U_ZERO_ERROR;
+ ualoc_normalize(availLocBase[availLocIndex], availLocNorm[availLocIndex], kLangScriptRegMaxLen + 1, &tmpStatus);
+ if (U_FAILURE(tmpStatus)) {
+ availLocNorm[availLocIndex][0] = 0; // effectively remove this entry
+ }
+ }
+ }
+ }
+
+ // add normalized form of matching loc, if different and in availLocBase
+ if (locsToUseCount < localizationsToUseCapacity) {
+ tmpStatus = U_ZERO_ERROR;
+ char matchedLocNormName[kLangScriptRegMaxLen + 1];
+ char matchedLocParentName[kLangScriptRegMaxLen + 1];
+ // get normalized form of matching loc
+ if (availLocNorm != NULL) {
+ uprv_strcpy(matchedLocNormName, availLocNorm[availLocMatchIndex]);
+ } else {
+ ualoc_normalize(availLocBase[availLocMatchIndex], matchedLocNormName, kLangScriptRegMaxLen + 1, &tmpStatus);
+ }
+ if (U_SUCCESS(tmpStatus)) {
+ // add normalized form of matching loc, if different and in availLocBase
+ if (uprv_strcmp(matchedLocNormName, localizationsToUse[0]) != 0) {
+ // normalization of matched localization is different, see if we have the normalization in availableLocalizations
+ // from this point on, availLocIndex no longer corresponds to the matched localization.
+ for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
+ if ( (uprv_strcmp(matchedLocNormName, availLocBase[availLocIndex]) == 0
+ || (availLocNorm != NULL && uprv_strcmp(matchedLocNormName, availLocNorm[availLocIndex]) == 0))
+ && !locInArray(localizationsToUse, locsToUseCount, availableLocalizations[availLocIndex])) {
+ localizationsToUse[locsToUseCount++] = availableLocalizations[availLocIndex];
+ break;
+ }
+ }
+ }
+
+ // now walk up the parent chain from matchedLocNormName, adding parents if they are in availLocBase
+ while (locsToUseCount < localizationsToUseCapacity) {
+ ualoc_getParent(matchedLocNormName, matchedLocParentName, kLangScriptRegMaxLen + 1, &tmpStatus);
+ if (U_FAILURE(tmpStatus) || uprv_strcmp(matchedLocParentName, "root") == 0 || matchedLocParentName[0] == 0) {
+ break; // reached root or cannot proceed further
+ }
+
+ // now compare this matchedLocParentName parent to base localization names (and norm ones if we have them)
+ for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
+ if ( (uprv_strcmp(matchedLocParentName, availLocBase[availLocIndex]) == 0
+ || (availLocNorm != NULL && uprv_strcmp(matchedLocParentName, availLocNorm[availLocIndex]) == 0))
+ && !locInArray(localizationsToUse, locsToUseCount, availableLocalizations[availLocIndex])) {
+ localizationsToUse[locsToUseCount++] = availableLocalizations[availLocIndex];
+ break;
+ }
+ }
+ uprv_strcpy(matchedLocNormName, matchedLocParentName);
+ }
+
+ // The above still fails to include "zh_TW" if it is in availLocBase and the matched localization
+ // base name is "zh_HK" or "zh_MO". One option would be to walk up the parent chain from
+ // matchedLocNormName again, comparing against parents of of selected availLocNorm entries.
+ // But this picks up too many matches that are not parents of the matched localization. So
+ // we just handle these specially.
+ if ( locsToUseCount < localizationsToUseCapacity
+ && (uprv_strcmp(availLocBase[availLocMatchIndex], "zh_HK") == 0
+ || uprv_strcmp(availLocBase[availLocMatchIndex], "zh_MO") == 0) ) {
+ int32_t zhTW_matchIndex = -1;
+ UBool zhHant_found = FALSE;
+ for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
+ if ( zhTW_matchIndex < 0 && uprv_strcmp("zh_TW", availLocBase[availLocIndex]) == 0 ) {
+ zhTW_matchIndex = availLocIndex;
+ }
+ if ( !zhHant_found && uprv_strcmp("zh_Hant", availLocBase[availLocIndex]) == 0 ) {
+ zhHant_found = TRUE;
+ }
+ }
+ if (zhTW_matchIndex >= 0 && !zhHant_found
+ && !locInArray(localizationsToUse, locsToUseCount, availableLocalizations[zhTW_matchIndex])) {
+ localizationsToUse[locsToUseCount++] = availableLocalizations[zhTW_matchIndex];
+ }
+ }
+ }
+ }
+ }
+
+ uprv_free(availLocNorm);
+ uprv_free(availLocBase);
+ return locsToUseCount;
+}
+