]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/ualoc.cpp
ICU-66108.tar.gz
[apple/icu.git] / icuSources / common / ualoc.cpp
CommitLineData
57a6839d
A
1/*
2*****************************************************************************************
3d1f044b 3* Copyright (C) 2014-2019 Apple Inc. All Rights Reserved.
57a6839d
A
4*****************************************************************************************
5*/
6
2ca993e8
A
7#define DEBUG_UALOC 0
8#if DEBUG_UALOC
9#include <stdio.h>
10#endif
340931cb 11#include <stdlib.h>
2ca993e8 12#include <string.h>
9f1b1155 13#include <ctype.h>
57a6839d
A
14#include "unicode/utypes.h"
15#include "unicode/ualoc.h"
16#include "unicode/uloc.h"
17#include "unicode/ures.h"
18#include "unicode/putil.h"
f3c0d7a5 19#include "unicode/ustring.h"
57a6839d
A
20#include "cstring.h"
21#include "cmemory.h"
b331163b
A
22#include "uhash.h"
23#include "umutex.h"
24#include "ucln_cmn.h"
57a6839d
A
25// the following has replacements for some math.h funcs etc
26#include "putilimp.h"
340931cb
A
27// For <rdar://problem/63880069>
28#include "uresimp.h"
57a6839d
A
29
30// The numeric values in territoryInfo are in "IntF" format from LDML2ICUConverter.
31// From its docs (adapted): [IntF is] a special integer that represents the number in
32// normalized scientific notation.
33// Resultant integers are in the form -?xxyyyyyy, where xx is the exponent
34// offset by 50 and yyyyyy is the coefficient to 5 decimal places (range 1.0 to 9.99999), e.g.
35// 14660000000000 -> 1.46600E13 -> 63146600
36// 0.0001 -> 1.00000E-4 -> 46100000
37// -123.456 -> -1.23456E-2 -> -48123456
38//
39// Here to avoid an extra division we have the max coefficient as 999999 (instead of
40// 9.99999) and instead offset the exponent by -55.
41//
42static double doubleFromIntF(int32_t intF) {
43 double coefficient = (double)(intF % 1000000);
44 int32_t exponent = (intF / 1000000) - 55;
45 return coefficient * uprv_pow10(exponent);
46}
47
48static int compareLangEntries(const void * entry1, const void * entry2) {
49 double fraction1 = ((const UALanguageEntry *)entry1)->userFraction;
50 double fraction2 = ((const UALanguageEntry *)entry2)->userFraction;
51 // want descending order
52 if (fraction1 > fraction2) return -1;
53 if (fraction1 < fraction2) return 1;
54 // userFractions the same, sort by languageCode
55 return uprv_strcmp(((const UALanguageEntry *)entry1)->languageCode,((const UALanguageEntry *)entry2)->languageCode);
56}
57
f3c0d7a5
A
58// language codes to version with default script
59// must be sorted by language code
60static const char * langToDefaultScript[] = {
61 "az", "az_Latn",
3d1f044b 62 "bm", "bm_Latn", // <rdar://problem/47494729> added
f3c0d7a5 63 "bs", "bs_Latn",
3d1f044b
A
64 "byn", "byn_Ethi", // <rdar://problem/47494729> added
65 "cu", "cu_Cyrl", // <rdar://problem/47494729> added
66 "ff", "ff_Latn", // <rdar://problem/47494729> added
67 "ha", "ha_Latn", // <rdar://problem/47494729> added
f3c0d7a5 68 "iu", "iu_Cans",
3d1f044b
A
69 "kk", "kk_Cyrl", // <rdar://problem/47494729> changed from _Arab
70 "ks", "ks_Arab", // unnecessary?
f3c0d7a5
A
71 "ku", "ku_Latn",
72 "ky", "ky_Cyrl",
73 "mn", "mn_Cyrl",
74 "ms", "ms_Latn",
75 "pa", "pa_Guru",
3d1f044b
A
76 "rif", "rif_Tfng", // unnecessary? no locale support anyway
77 "sd", "sd_Arab", // <rdar://problem/47494729> added
f3c0d7a5
A
78 "shi", "shi_Tfng",
79 "sr", "sr_Cyrl",
80 "tg", "tg_Cyrl",
3d1f044b 81 "tk", "tk_Latn", // unnecessary?
f3c0d7a5
A
82 "ug", "ug_Arab",
83 "uz", "uz_Latn",
84 "vai", "vai_Vaii",
3d1f044b 85 "yue", "yue_Hant", // to match CLDR data, not Apple default
f3c0d7a5
A
86 "zh", "zh_Hans",
87 NULL
88};
89
90static const char * langCodeWithScriptIfAmbig(const char * langCode) {
91 const char ** langToDefScriptPtr = langToDefaultScript;
92 const char * testCurLoc;
93 while ( (testCurLoc = *langToDefScriptPtr++) != NULL ) {
94 int cmp = uprv_strcmp(langCode, testCurLoc);
95 if (cmp <= 0) {
96 if (cmp == 0) {
97 return *langToDefScriptPtr;
98 }
99 break;
100 }
101 langToDefScriptPtr++;
102 }
103 return langCode;
104}
105
57a6839d
A
106static const UChar ustrLangStatusDefacto[] = {0x64,0x65,0x5F,0x66,0x61,0x63,0x74,0x6F,0x5F,0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0}; //"de_facto_official"
107static const UChar ustrLangStatusOfficial[] = {0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0}; //"official"
108static const UChar ustrLangStatusRegional[] = {0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0x5F,0x72,0x65,0x67,0x69,0x6F,0x6E,0x61,0x6C,0}; //"official_regional"
109
110enum {
111 kLocalLangEntriesMax = 26, // enough for most regions to minimumFraction 0.001 except India
112 kLangEntriesFactor = 3 // if we have to allocate, multiply existing size by this
113};
114
115U_CAPI int32_t U_EXPORT2
116ualoc_getLanguagesForRegion(const char *regionID, double minimumFraction,
117 UALanguageEntry *entries, int32_t entriesCapacity,
118 UErrorCode *err)
119{
120 if (U_FAILURE(*err)) {
121 return 0;
122 }
123 if ( regionID == NULL || minimumFraction < 0.0 || minimumFraction > 1.0 ||
124 ((entries==NULL)? entriesCapacity!=0: entriesCapacity<0) ) {
125 *err = U_ILLEGAL_ARGUMENT_ERROR;
126 return 0;
127 }
128 UResourceBundle *rb = ures_openDirect(NULL, "supplementalData", err);
129 rb = ures_getByKey(rb, "territoryInfo", rb, err);
130 rb = ures_getByKey(rb, regionID, rb, err);
131 if (U_FAILURE(*err)) {
132 ures_close(rb);
133 return 0;
134 }
135
136 int32_t entryCount = 0;
137 UResourceBundle *langBund = NULL;
138 int32_t lbIdx, lbCount = ures_getSize(rb);
139 UALanguageEntry localLangEntries[kLocalLangEntriesMax];
140 UALanguageEntry * langEntries = localLangEntries;
141 int32_t langEntriesMax = kLocalLangEntriesMax;
142
143 for (lbIdx = 0; lbIdx < lbCount; lbIdx++) {
144 langBund = ures_getByIndex(rb, lbIdx, langBund, err);
145 if (U_FAILURE(*err)) {
146 break;
147 }
148 const char * langCode = ures_getKey(langBund);
149 if (uprv_strcmp(langCode,"territoryF") == 0) {
150 continue;
151 }
152 if (strnlen(langCode, UALANGDATA_CODELEN+1) > UALANGDATA_CODELEN) { // no uprv_strnlen
153 continue; // a code we cannot handle
154 }
155
156 UErrorCode localErr = U_ZERO_ERROR;
157 double userFraction = 0.0;
158 UResourceBundle *itemBund = ures_getByKey(langBund, "populationShareF", NULL, &localErr);
159 if (U_SUCCESS(localErr)) {
160 int32_t intF = ures_getInt(itemBund, &localErr);
161 if (U_SUCCESS(localErr)) {
162 userFraction = doubleFromIntF(intF);
163 }
164 ures_close(itemBund);
165 }
166 if (userFraction < minimumFraction) {
167 continue;
168 }
169 if (entries != NULL) {
170 localErr = U_ZERO_ERROR;
171 UALanguageStatus langStatus = UALANGSTATUS_UNSPECIFIED;
172 int32_t ulen;
173 const UChar * ustrLangStatus = ures_getStringByKey(langBund, "officialStatus", &ulen, &localErr);
174 if (U_SUCCESS(localErr)) {
175 int32_t cmp = u_strcmp(ustrLangStatus, ustrLangStatusOfficial);
176 if (cmp == 0) {
177 langStatus = UALANGSTATUS_OFFICIAL;
178 } else if (cmp < 0 && u_strcmp(ustrLangStatus, ustrLangStatusDefacto) == 0) {
179 langStatus = UALANGSTATUS_DEFACTO_OFFICIAL;
180 } else if (u_strcmp(ustrLangStatus, ustrLangStatusRegional) == 0) {
181 langStatus = UALANGSTATUS_REGIONAL_OFFICIAL;
182 }
183 }
184 // Now we have all of the info for our next entry
185 if (entryCount >= langEntriesMax) {
186 int32_t newMax = langEntriesMax * kLangEntriesFactor;
187 if (langEntries == localLangEntries) {
188 // first allocation, copy from local buf
189 langEntries = (UALanguageEntry*)uprv_malloc(newMax*sizeof(UALanguageEntry));
190 if (langEntries == NULL) {
191 *err = U_MEMORY_ALLOCATION_ERROR;
192 break;
193 }
194 uprv_memcpy(langEntries, localLangEntries, entryCount*sizeof(UALanguageEntry));
195 } else {
196 langEntries = (UALanguageEntry*)uprv_realloc(langEntries, newMax*sizeof(UALanguageEntry));
197 if (langEntries == NULL) {
198 *err = U_MEMORY_ALLOCATION_ERROR;
199 break;
200 }
201 }
202 langEntriesMax = newMax;
203 }
f3c0d7a5 204 uprv_strcpy(langEntries[entryCount].languageCode, langCodeWithScriptIfAmbig(langCode));
57a6839d
A
205 langEntries[entryCount].userFraction = userFraction;
206 langEntries[entryCount].status = langStatus;
207 }
208 entryCount++;
209 }
210 ures_close(langBund);
211 ures_close(rb);
212 if (U_FAILURE(*err)) {
213 if (langEntries != localLangEntries) {
214 free(langEntries);
215 }
216 return 0;
217 }
218 if (entries != NULL) {
219 // sort langEntries, copy entries that fit to provided array
220 qsort(langEntries, entryCount, sizeof(UALanguageEntry), compareLangEntries);
221 if (entryCount > entriesCapacity) {
222 entryCount = entriesCapacity;
223 }
224 uprv_memcpy(entries, langEntries, entryCount*sizeof(UALanguageEntry));
225 if (langEntries != localLangEntries) {
226 free(langEntries);
227 }
228 }
229 return entryCount;
230}
231
340931cb 232static const char * forceParent[] = { // Not used by ualoc_localizationsToUse
a961784b 233 "en_150", "en_GB", // en for Europe
b331163b 234 "en_AU", "en_GB",
a961784b
A
235 "en_BD", "en_GB", // en for Bangladesh
236 "en_BE", "en_150", // en for Belgium goes to en for Europe
237 "en_DG", "en_GB",
238 "en_FK", "en_GB",
239 "en_GG", "en_GB",
240 "en_GI", "en_GB",
241 "en_HK", "en_GB", // en for Hong Kong
242 "en_IE", "en_GB",
243 "en_IM", "en_GB",
b331163b 244 "en_IN", "en_GB",
a961784b
A
245 "en_IO", "en_GB",
246 "en_JE", "en_GB",
a62d09fc 247 "en_JM", "en_GB",
3d1f044b 248 "en_LK", "en_GB",
a961784b
A
249 "en_MO", "en_GB",
250 "en_MT", "en_GB",
2ca993e8 251 "en_MV", "en_GB", // for Maldives
a961784b 252 "en_MY", "en_GB", // en for Malaysia
2ca993e8 253 "en_NZ", "en_AU",
a961784b
A
254 "en_PK", "en_GB", // en for Pakistan
255 "en_SG", "en_GB",
256 "en_SH", "en_GB",
257 "en_VG", "en_GB",
f3c0d7a5 258 "yue", "yue_CN", // yue_CN has 71M users (5.2% of 1.37G), yue_HK has 6.5M (90% of 7.17M)
340931cb
A
259 "yue_CN", "root", // should this change to e.g. "zh_Hans_CN" for <rdar://problem/30671866>?
260 "yue_HK", "root", // should this change to e.g. "zh_Hant_HK" for <rdar://problem/30671866>?
f3c0d7a5
A
261 "yue_Hans","yue_CN",
262 "yue_Hant","yue_HK",
57a6839d
A
263 "zh", "zh_CN",
264 "zh_CN", "root",
265 "zh_Hant", "zh_TW",
266 "zh_TW", "root",
267 NULL
268};
269
2ca993e8
A
270enum { kLocBaseNameMax = 16 };
271
57a6839d
A
272U_CAPI int32_t U_EXPORT2
273ualoc_getAppleParent(const char* localeID,
274 char * parent,
275 int32_t parentCapacity,
276 UErrorCode* err)
277{
278 UResourceBundle *rb;
279 int32_t len;
280 UErrorCode tempStatus;
281 char locbuf[ULOC_FULLNAME_CAPACITY+1];
08b89b0a 282 char * foundDoubleUnderscore;
57a6839d
A
283
284 if (U_FAILURE(*err)) {
285 return 0;
286 }
287 if ( (parent==NULL)? parentCapacity!=0: parentCapacity<0 ) {
288 *err = U_ILLEGAL_ARGUMENT_ERROR;
289 return 0;
290 }
08b89b0a 291 len = uloc_getBaseName(localeID, locbuf, ULOC_FULLNAME_CAPACITY, err); /* canonicalize and strip keywords */
57a6839d
A
292 if (U_FAILURE(*err)) {
293 return 0;
294 }
295 if (*err == U_STRING_NOT_TERMINATED_WARNING) {
296 locbuf[ULOC_FULLNAME_CAPACITY] = 0;
297 *err = U_ZERO_ERROR;
298 }
08b89b0a
A
299 foundDoubleUnderscore = uprv_strstr(locbuf, "__"); /* __ comes from bad/missing subtag or variant */
300 if (foundDoubleUnderscore != NULL) {
301 *foundDoubleUnderscore = 0; /* terminate at the __ */
302 len = uprv_strlen(locbuf);
303 }
b331163b 304 if (len >= 2 && (uprv_strncmp(locbuf, "en", 2) == 0 || uprv_strncmp(locbuf, "zh", 2) == 0)) {
57a6839d
A
305 const char ** forceParentPtr = forceParent;
306 const char * testCurLoc;
307 while ( (testCurLoc = *forceParentPtr++) != NULL ) {
308 int cmp = uprv_strcmp(locbuf, testCurLoc);
309 if (cmp <= 0) {
310 if (cmp == 0) {
311 len = uprv_strlen(*forceParentPtr);
312 if (len < parentCapacity) {
313 uprv_strcpy(parent, *forceParentPtr);
314 } else {
315 *err = U_BUFFER_OVERFLOW_ERROR;
316 }
317 return len;
318 }
319 break;
320 }
321 forceParentPtr++;
322 }
323 }
324 tempStatus = U_ZERO_ERROR;
325 rb = ures_openDirect(NULL, locbuf, &tempStatus);
326 if (U_SUCCESS(tempStatus)) {
327 const char * actualLocale = ures_getLocaleByType(rb, ULOC_ACTUAL_LOCALE, &tempStatus);
2ca993e8 328 ures_close(rb);
57a6839d
A
329 if (U_SUCCESS(tempStatus) && uprv_strcmp(locbuf, actualLocale) != 0) {
330 // we have followed an alias
331 len = uprv_strlen(actualLocale);
332 if (len < parentCapacity) {
333 uprv_strcpy(parent, actualLocale);
334 } else {
335 *err = U_BUFFER_OVERFLOW_ERROR;
336 }
57a6839d
A
337 return len;
338 }
2ca993e8
A
339 }
340 tempStatus = U_ZERO_ERROR;
341 rb = ures_openDirect(NULL, "supplementalData", &tempStatus);
342 rb = ures_getByKey(rb, "parentLocales", rb, &tempStatus);
343 if (U_SUCCESS(tempStatus)) {
344 UResourceBundle * parentMapBundle = NULL;
345 int32_t childLen = 0;
346 while (childLen == 0) {
347 tempStatus = U_ZERO_ERROR;
348 parentMapBundle = ures_getNextResource(rb, parentMapBundle, &tempStatus);
349 if (U_FAILURE(tempStatus)) {
350 break; // no more parent bundles, normal exit
351 }
352 char childName[kLocBaseNameMax + 1];
353 childName[kLocBaseNameMax] = 0;
354 const char * childPtr = NULL;
355 if (ures_getType(parentMapBundle) == URES_STRING) {
356 childLen = kLocBaseNameMax;
357 childPtr = ures_getUTF8String(parentMapBundle, childName, &childLen, FALSE, &tempStatus);
358 if (U_FAILURE(tempStatus) || uprv_strncmp(locbuf, childPtr, kLocBaseNameMax) != 0) {
359 childLen = 0;
360 }
361 } else { // should be URES_ARRAY
362 int32_t childCur, childCount = ures_getSize(parentMapBundle);
363 for (childCur = 0; childCur < childCount && childLen == 0; childCur++) {
364 tempStatus = U_ZERO_ERROR;
365 childLen = kLocBaseNameMax;
366 childPtr = ures_getUTF8StringByIndex(parentMapBundle, childCur, childName, &childLen, FALSE, &tempStatus);
367 if (U_FAILURE(tempStatus) || uprv_strncmp(locbuf, childPtr, kLocBaseNameMax) != 0) {
368 childLen = 0;
369 }
370 }
371 }
372 }
373 ures_close(rb);
374 if (childLen > 0) {
375 // parentMapBundle key is the parent we are looking for
376 const char * keyStr = ures_getKey(parentMapBundle);
377 len = uprv_strlen(keyStr);
57a6839d 378 if (len < parentCapacity) {
2ca993e8 379 uprv_strcpy(parent, keyStr);
57a6839d
A
380 } else {
381 *err = U_BUFFER_OVERFLOW_ERROR;
382 }
2ca993e8 383 ures_close(parentMapBundle);
57a6839d
A
384 return len;
385 }
2ca993e8 386 ures_close(parentMapBundle);
57a6839d 387 }
2ca993e8 388
57a6839d
A
389 len = uloc_getParent(locbuf, parent, parentCapacity, err);
390 if (U_SUCCESS(*err) && len == 0) {
391 len = 4;
392 if (len < parentCapacity) {
393 uprv_strcpy(parent, "root");
394 } else {
395 *err = U_BUFFER_OVERFLOW_ERROR;
396 }
397 }
398 return len;
399}
400
b331163b
A
401// =================
402// Data and related functions for ualoc_localizationsToUse
403// =================
404
405static const char * appleAliasMap[][2] = {
406 // names are lowercase here because they are looked up after being processed by uloc_getBaseName
407 { "arabic", "ar" }, // T2
408 { "chinese", "zh_Hans" }, // T0
409 { "danish", "da" }, // T2
410 { "dutch", "nl" }, // T1, still in use
411 { "english", "en" }, // T0, still in use
412 { "finnish", "fi" }, // T2
413 { "french", "fr" }, // T0, still in use
414 { "german", "de" }, // T0, still in use
415 { "italian", "it" }, // T1, still in use
416 { "japanese", "ja" }, // T0, still in use
417 { "korean", "ko" }, // T1
a961784b 418 { "no_NO", "nb_NO" }, // special
b331163b
A
419 { "norwegian", "nb" }, // T2
420 { "polish", "pl" }, // T2
421 { "portuguese", "pt" }, // T2
422 { "russian", "ru" }, // T2
423 { "spanish", "es" }, // T1, still in use
424 { "swedish", "sv" }, // T2
425 { "thai", "th" }, // T2
426 { "turkish", "tr" }, // T2
b331163b 427};
2ca993e8 428enum { kAppleAliasMapCount = UPRV_LENGTHOF(appleAliasMap) };
b331163b 429
340931cb
A
430// Most of the entries in the following are cases in which
431// localization bundle inheritance is different from
432// ICU resource inheritance, and thus are not in parentLocales data.
433// <rdar://problem/63880069> However, since this is now checked before
434// the hashmap of parentLocales data, we add a few important entries
435// from parentLocales data for lookup efficiency.
b331163b 436static const char * appleParentMap[][2] = {
340931cb 437 { "ars", "ar" }, // rdar://64497611
b331163b 438 { "en_150", "en_GB" }, // Apple custom parent
f3c0d7a5
A
439 { "en_AG", "en_GB" }, // Antigua & Barbuda
440 { "en_AI", "en_GB" }, // Anguilla
b331163b 441 { "en_AU", "en_GB" }, // Apple custom parent
f3c0d7a5 442 { "en_BB", "en_GB" }, // Barbados
b331163b 443 { "en_BD", "en_GB" }, // Apple custom parent
f3c0d7a5 444 { "en_BM", "en_GB" }, // Bermuda
340931cb 445 { "en_BN", "en_GB" }, // Brunei
f3c0d7a5
A
446 { "en_BS", "en_GB" }, // Bahamas
447 { "en_BW", "en_GB" }, // Botswana
448 { "en_BZ", "en_GB" }, // Belize
449 { "en_CC", "en_AU" }, // Cocos (Keeling) Islands
f3c0d7a5
A
450 { "en_CK", "en_AU" }, // Cook Islands (maybe to en_NZ instead?)
451 { "en_CX", "en_AU" }, // Christmas Island
b331163b 452 { "en_CY", "en_150" }, // Apple locale addition
a961784b 453 { "en_DG", "en_GB" },
f3c0d7a5 454 { "en_DM", "en_GB" }, // Dominica
f3c0d7a5 455 { "en_FJ", "en_GB" }, // Fiji
a961784b 456 { "en_FK", "en_GB" },
340931cb 457 { "en_GB", "en_001" }, // from parentLocales, added here for efficiency
f3c0d7a5 458 { "en_GD", "en_GB" }, // Grenada
a961784b 459 { "en_GG", "en_GB" },
f3c0d7a5 460 { "en_GH", "en_GB" }, // Ghana
a961784b 461 { "en_GI", "en_GB" },
f3c0d7a5 462 { "en_GM", "en_GB" }, // Gambia
f3c0d7a5 463 { "en_GY", "en_GB" }, // Guyana
b331163b 464 { "en_HK", "en_GB" }, // Apple custom parent
a961784b 465 { "en_IE", "en_GB" },
a961784b 466 { "en_IM", "en_GB" },
b331163b 467 { "en_IN", "en_GB" }, // Apple custom parent
a961784b 468 { "en_IO", "en_GB" },
a961784b 469 { "en_JE", "en_GB" },
a62d09fc 470 { "en_JM", "en_GB" },
f3c0d7a5
A
471 { "en_KE", "en_GB" }, // Kenya
472 { "en_KI", "en_GB" }, // Kiribati
473 { "en_KN", "en_GB" }, // St. Kitts & Nevis
474 { "en_KY", "en_GB" }, // Cayman Islands
475 { "en_LC", "en_GB" }, // St. Lucia
3d1f044b 476 { "en_LK", "en_GB" }, // Apple custom parent
f3c0d7a5 477 { "en_LS", "en_GB" }, // Lesotho
a961784b 478 { "en_MO", "en_GB" },
f3c0d7a5 479 { "en_MS", "en_GB" }, // Montserrat
a961784b 480 { "en_MT", "en_GB" },
f3c0d7a5 481 { "en_MU", "en_GB" }, // Mauritius
2ca993e8 482 { "en_MV", "en_GB" },
f3c0d7a5 483 { "en_MW", "en_GB" }, // Malawi
b331163b 484 { "en_MY", "en_GB" }, // Apple custom parent
f3c0d7a5
A
485 { "en_NA", "en_GB" }, // Namibia
486 { "en_NF", "en_AU" }, // Norfolk Island
487 { "en_NG", "en_GB" }, // Nigeria
f3c0d7a5
A
488 { "en_NR", "en_AU" }, // Nauru
489 { "en_NU", "en_AU" }, // Niue (maybe to en_NZ instead?)
2ca993e8 490 { "en_NZ", "en_AU" },
f3c0d7a5 491 { "en_PG", "en_AU" }, // Papua New Guinea
b331163b 492 { "en_PK", "en_GB" }, // Apple custom parent
f3c0d7a5 493 { "en_PN", "en_GB" }, // Pitcairn Islands
f3c0d7a5
A
494 { "en_SB", "en_GB" }, // Solomon Islands
495 { "en_SC", "en_GB" }, // Seychelles
496 { "en_SD", "en_GB" }, // Sudan
a961784b
A
497 { "en_SG", "en_GB" },
498 { "en_SH", "en_GB" },
f3c0d7a5
A
499 { "en_SL", "en_GB" }, // Sierra Leone
500 { "en_SS", "en_GB" }, // South Sudan
501 { "en_SZ", "en_GB" }, // Swaziland
502 { "en_TC", "en_GB" }, // Tristan da Cunha
503 { "en_TO", "en_GB" }, // Tonga
504 { "en_TT", "en_GB" }, // Trinidad & Tobago
505 { "en_TV", "en_GB" }, // Tuvalu
506 { "en_TZ", "en_GB" }, // Tanzania
507 { "en_UG", "en_GB" }, // Uganda
508 { "en_VC", "en_GB" }, // St. Vincent & Grenadines
a961784b 509 { "en_VG", "en_GB" },
f3c0d7a5
A
510 { "en_VU", "en_GB" }, // Vanuatu
511 { "en_WS", "en_AU" }, // Samoa (maybe to en_NZ instead?)
512 { "en_ZA", "en_GB" }, // South Africa
513 { "en_ZM", "en_GB" }, // Zambia
514 { "en_ZW", "en_GB" }, // Zimbabwe
340931cb
A
515 { "es_MX", "es_419" }, // from parentLocales, added here for efficiency
516 { "wuu", "wuu_Hans"}, // rdar://64497611
517 { "wuu_Hans", "zh_Hans" }, // rdar://64497611
518 { "wuu_Hant", "zh_Hant" }, // rdar://64497611
519 { "yue", "yue_Hant"},
520 { "yue_Hans", "zh_Hans" }, // <rdar://problem/30671866>
521 { "yue_Hant", "zh_Hant" }, // <rdar://problem/30671866>
522 { "zh_Hant", "root" }, // from parentLocales, added here for efficiency
b331163b 523};
2ca993e8
A
524enum { kAppleParentMapCount = UPRV_LENGTHOF(appleParentMap) };
525
b331163b
A
526U_CDECL_BEGIN
527static UBool U_CALLCONV ualocale_cleanup(void);
528U_CDECL_END
529
530U_NAMESPACE_BEGIN
531
532static UInitOnce gUALocaleCacheInitOnce = U_INITONCE_INITIALIZER;
533
534static int gMapDataState = 0; // 0 = not initialized, 1 = initialized, -1 = failure
340931cb 535static UResourceBundle* gLanguageAliasesBundle = NULL;
b331163b
A
536
537U_NAMESPACE_END
538
539U_CDECL_BEGIN
540
541static UBool U_CALLCONV ualocale_cleanup(void)
542{
543 U_NAMESPACE_USE
544
b331163b 545 if (gMapDataState > 0) {
340931cb
A
546 ures_close(gLanguageAliasesBundle);
547 gLanguageAliasesBundle = NULL;
b331163b
A
548 }
549 gMapDataState = 0;
340931cb 550 gUALocaleCacheInitOnce.reset();
b331163b
A
551 return TRUE;
552}
553
554static void initializeMapData() {
555 U_NAMESPACE_USE
556
b331163b
A
557 ucln_common_registerCleanup(UCLN_COMMON_LOCALE, ualocale_cleanup);
558
340931cb
A
559 UResourceBundle * curBundle;
560 UErrorCode status = U_ZERO_ERROR;
561 curBundle = ures_openDirect(NULL, "metadata", &status);
562 curBundle = ures_getByKey(curBundle, "alias", curBundle, &status);
563 curBundle = ures_getByKey(curBundle, "language", curBundle, &status);
564 if (U_FAILURE(status)) {
b331163b
A
565 gMapDataState = -1; // failure
566 return;
567 }
340931cb 568 gLanguageAliasesBundle = curBundle; // URES_TABLE resource, 420 entries in ICU-6600n
2ca993e8 569#if DEBUG_UALOC
340931cb 570 printf("# metadata/alias/language size %d\n", ures_getSize(curBundle));
2ca993e8 571#endif
340931cb 572
b331163b
A
573 gMapDataState = 1;
574}
575
576U_CDECL_END
577
340931cb
A
578// comparator for binary search of appleAliasMap
579static int compareAppleMapElements(const void *key, const void *entry) {
580 return uprv_strcmp((const char *)key, ((const char **)entry)[0]);
581}
582
b331163b
A
583// The following maps aliases, etc. Ensures 0-termination if no error.
584static void ualoc_normalize(const char *locale, char *normalized, int32_t normalizedCapacity, UErrorCode *status)
585{
586 if (U_FAILURE(*status)) {
587 return;
588 }
589 // uloc_minimizeSubtags(locale, normalized, normalizedCapacity, status);
590
340931cb
A
591 const char *replacement = locale; // fallback to no replacement
592 int32_t len;
593 // first check in appleAliasMap using binary search
594 const char** entry = (const char**)bsearch(locale, appleAliasMap, kAppleAliasMapCount, sizeof(appleAliasMap[0]), compareAppleMapElements);
595 if (entry != NULL) {
596 replacement = entry[1];
597 } else if (icu::gMapDataState > 0) {
598 // check in gLanguageAliasesBundle
599 UErrorCode localStatus = U_ZERO_ERROR;
600 UResourceBundle * aliasMapBundle = ures_getByKey(icu::gLanguageAliasesBundle, locale, NULL, &localStatus);
601 if (U_SUCCESS(localStatus) && aliasMapBundle != NULL) {
602 len = normalizedCapacity;
603 ures_getUTF8StringByKey(aliasMapBundle, "replacement", normalized, &len, TRUE, status);
604 if (U_SUCCESS(*status) && len >= normalizedCapacity) {
605 *status = U_BUFFER_OVERFLOW_ERROR; // treat unterminated as error
606 }
607 ures_close(aliasMapBundle);
608 return;
609 }
610 }
611
612 len = strnlen(replacement, normalizedCapacity);
b331163b
A
613 if (len < normalizedCapacity) { // allow for 0 termination
614 uprv_strcpy(normalized, replacement);
615 } else {
616 *status = U_BUFFER_OVERFLOW_ERROR;
617 }
618}
619
620static void ualoc_getParent(const char *locale, char *parent, int32_t parentCapacity, UErrorCode *status)
621{
622 if (U_FAILURE(*status)) {
623 return;
624 }
340931cb
A
625 // first check in appleParentMap using binary search
626 int32_t len;
627 const char** entry = (const char**)bsearch(locale, appleParentMap, kAppleParentMapCount, sizeof(appleParentMap[0]), compareAppleMapElements);
628 if (entry != NULL) {
629 const char* replacement = entry[1];
630 len = uprv_strlen(replacement);
631 if (len < parentCapacity) { // allow for 0 termination
632 uprv_strcpy(parent, replacement);
633 } else {
634 *status = U_BUFFER_OVERFLOW_ERROR;
b331163b 635 }
340931cb
A
636 return;
637 }
638 len = ures_getLocParent(locale, parent, parentCapacity - 1, status);
639 if (len > 0 || U_FAILURE(*status)) {
640 parent[parentCapacity - 1] = 0; // ensure 0 termination in case of U_STRING_NOT_TERMINATED_WARNING
641 return;
b331163b
A
642 }
643 uloc_getParent(locale, parent, parentCapacity - 1, status);
644 parent[parentCapacity - 1] = 0; // ensure 0 termination in case of U_STRING_NOT_TERMINATED_WARNING
645}
646
340931cb
A
647enum { kLangScriptRegMaxLen = ULOC_LANG_CAPACITY + ULOC_SCRIPT_CAPACITY + ULOC_COUNTRY_CAPACITY }; // currently 22
648
649const int32_t kMaxLocaleIDLength = 58; // ULOC_FULLNAME_CAPACITY - ULOC_KEYWORD_AND_VALUES_CAPACITY: locales without variants should never be more than 24 chars, the excess is just to cover variant codes (+1 for null termination)
650const int32_t kMaxParentChainLength = 7;
651const int32_t kCharStorageBlockSize = 650; // very few of the unit tests used more than 650 bytes of character storage
652
653struct LocIDCharStorage {
654 char chars[kCharStorageBlockSize];
655 char* curTop;
656 char* limit;
657 LocIDCharStorage* nextBlock;
658
659 LocIDCharStorage() : chars(), curTop(chars), limit(curTop + kCharStorageBlockSize), nextBlock(NULL) {}
660 ~LocIDCharStorage() { delete nextBlock; }
661
662 char* nextPtr() {
663 if (nextBlock == NULL) {
664 if (limit - curTop > kMaxLocaleIDLength) {
665 // return the top of the current block only if there's enough room for a maximum-length locale ID--
666 // this keeps us from having to preflight or repeat any of the actual uloc calls and wastes
667 // relatively little space
668 return curTop;
669 } else {
670 // if we DON'T have enough space for a max-length locale ID, allocate a new block...
671 nextBlock = new LocIDCharStorage();
672 // ...and fall through to the line below to return its top pointer
673 }
b331163b 674 }
340931cb 675 return nextBlock->nextPtr();
b331163b 676 }
340931cb
A
677
678 void advance(int32_t charsUsed) {
679 if (nextBlock == NULL) {
680 curTop += charsUsed;
681 *curTop++ = '\0'; // in rare cases, the ICU call might not have null-terminated the result, so force it here
682 } else {
683 nextBlock->advance(charsUsed);
f3c0d7a5
A
684 }
685 }
340931cb
A
686};
687
688/**
689 * Data structure used by ualoc_localizationsToUse() below to cache the various transformed versions of a single locale ID.
690 * All char* members are pointers into storage managed separately by the caller-- usually pointers into a separate array of char intended to
691 * hold all of the strings in bulk.
692 */
693struct LocaleIDInfo {
694 const char* original; //< Pointer to the original locale ID
695 const char* base; //< The result of uloc_getBaseName() on the original locale ID
696 const char* normalized; //< The result of ualoc_normalize() on the value of `base`
697 const char* language; //< The language code from `normalized`
698 const char* languageGroup; //< Same as `language`, except for certain languages that fall back to other languages
699 const char* parentChain[kMaxParentChainLength]; //< Array of the results of calling ualoc_getParent() repeatedly on `normalized`
700
701 LocaleIDInfo();
702 void initBaseNames(const char* originalID, LocIDCharStorage& charStorage, UErrorCode* err);
703 void calcParentChain(LocIDCharStorage& charStorage, UBool penalizeNonDefaultCountry, UErrorCode* err);
704 UBool specifiesCountry();
705#if DEBUG_UALOC
706 void dump(const char *originalID, LocIDCharStorage& charStorage, UBool penalizeNonDefaultCountry, UErrorCode *err);
707#endif
708};
709
710LocaleIDInfo::LocaleIDInfo() {
711 // these are the only two fields that HAVE to be initialized to NULL
712 original = NULL;
713 parentChain[0] = NULL;
b331163b
A
714}
715
340931cb
A
716/**
717 * Caches the `originalID` in `original` and fills in `base`, `normalized`, and `language. If these fields have already been filled in by an earlier call, this
718 * function won't fill them in again.
719 * @param originalID The locale ID to base the other values on.
720 * @param textPtr A pointer to a `char*` variable that points into an array of character storage maintained by the caller. The actual characters in this
721 * object's strings are written to this storage and `textPtr` is advanced to point to the first memory position after the last string written to the storage.
722 * @param textPtrLimit A pointer to the position immediately beyond the end of the separate character storage. This function won't write beyond
723 * this point and will return U_BUFFER_OVERFLOW if the storage is filled (which shouldn't happen).
724 * @param err Pointer to a variable holding the ICU error code.
725 */
726void LocaleIDInfo::initBaseNames(const char *originalID, LocIDCharStorage& charStorage, UErrorCode *err) {
727 // don't fill in the fields if they're already filled in
728 if (original == NULL) {
729 original = originalID;
730
731 base = charStorage.nextPtr();
732 int32_t length = uloc_getBaseName(original, const_cast<char*>(base), kMaxLocaleIDLength, err);
733 charStorage.advance(length);
734
735 normalized = charStorage.nextPtr();
736 ualoc_normalize(base, const_cast<char*>(normalized), kMaxLocaleIDLength, err);
737 charStorage.advance(uprv_strlen(normalized));
738
739 language = charStorage.nextPtr();
740 length = uloc_getLanguage(normalized, const_cast<char*>(language), kMaxLocaleIDLength, err);
741 charStorage.advance(length);
742 languageGroup = language;
743
744 // The `languageGroup` field is used for performance optimization; we don't need to walk the parent chain if the
745 // languages of the two locales being compared are different. This code accounts for the few cases of different
746 // language codes that need to be considered equivalent for comparison purposes.
747 static const char* likeLanguages[] = {
748 "ars", "ar",
749 "no", "nb",
750 "wuu", "zh",
751 "yue", "zh"
752 };
753 for (int32_t i = 0; i < UPRV_LENGTHOF(likeLanguages); i += 2) {
754 if (uprv_strcmp(language, likeLanguages[i]) == 0) {
755 languageGroup = likeLanguages[i + 1];
756 break;
757 }
b331163b
A
758 }
759 }
b331163b
A
760}
761
340931cb
A
762/**
763 * Calculates the parent chain for the locale ID in `original` by calling `ualoc_getParent()` repeatedly until it returns the empty string or "root". If this object's
764 * parent chain has previously been calculated, this won't do it again. The parent chain in the LocaleIDInfo object is terminated by a NULL entry.
765 * @param textPtr A pointer to a `char*` variable that points into an array of character storage maintained by the caller. The actual characters in this
766 * object's strings are written to this storage and `textPtr` is advanced to point to the first memory position after the last string written to the storage.
767 * @param textPtrLimit A pointer to the position immediately beyond the end of the separate character storage. This function won't write beyond
768 * this point and will return U_BUFFER_OVERFLOW if the storage is filled (which shouldn't happen).
769 * @param penalizeNonDefaultCountry If TRUE, an extra entry is added to the parent chain if the original locale specifies a country other than
770 * the default country for the locale's language.
771 * @param err Pointer to a variable holding the ICU error code.
772 */
773void LocaleIDInfo::calcParentChain(LocIDCharStorage& charStorage, UBool penalizeNonDefaultCountry, UErrorCode *err) {
774 // don't calculate the parent chain if it's already been calculated
775 if (parentChain[0] != NULL) {
776 return;
777 }
778
779 int32_t index = 0;
780
781 // Entry 0 in the parent chain is always the same as `normalized`-- this simplifies distance calculations.
782 parentChain[index] = normalized;
783
784 // If the caller asks to penalize the non-default country (which it does for entries in `availableLocalizations`
785 // but not for entries in `preferredLanguages`), check to see if the original locale ID specifies a country code
786 // for a country other than the default country for the specified language (as determined by uloc_minimizeSubtags() ).
787 // If the country is NOT the default for the language, artifically lengthen the parent chain by also putting
788 // `normalized` into entry 1 in the parent chain. We do this to bias our similarity scores toward the default country.
789 // (e.g., if `preferredLanguages` is { it } and `availableLocalizations` is { it_CH, it_IT }, this causes us to return
790 // `it_IT` even though it comes second in the list because it's the default country for the language.)
791 if (penalizeNonDefaultCountry) {
792 UErrorCode dummyErr = U_ZERO_ERROR;
793 if (uloc_getCountry(normalized, NULL, 0, &dummyErr) > 0) {
794 if (uprv_strcmp(normalized, "es_MX") != 0 && uprv_strcmp(normalized, "zh_Hant_TW") != 0) {
795 dummyErr = U_ZERO_ERROR;
796 char minimizedLocale[kLocBaseNameMax];
797 uloc_minimizeSubtags(normalized, minimizedLocale, kLocBaseNameMax, &dummyErr);
798 if (uloc_getCountry(minimizedLocale, NULL, 0, &dummyErr)) {
799 parentChain[++index] = normalized;
800 }
801 }
802 }
803 }
804
805 // Walk the locale ID's parent chain using ualoc_getParent(). That function will return "" or "root" when it
806 // gets to the end of the chain, but internall we use NULL to mark the end of the chain.
807 while (index < kMaxParentChainLength && parentChain[index] != NULL) {
808 char* textPtr = charStorage.nextPtr();
809 ualoc_getParent(parentChain[index], textPtr, kMaxLocaleIDLength, err);
810 ++index;
811 if (textPtr[0] == '\0' || uprv_strcmp(textPtr, "root") == 0) {
812 parentChain[index] = NULL;
813 } else {
814 parentChain[index] = textPtr;
815 charStorage.advance(uprv_strlen(textPtr));
816 }
817 }
818}
819
820UBool LocaleIDInfo::specifiesCountry() {
821 UErrorCode err = U_ZERO_ERROR;
822 int32_t countryLength = uloc_getCountry(normalized, NULL, 0, &err);
823 return countryLength != 0;
824}
825
826#if DEBUG_UALOC
827/**
828 * Debugging function that dumps the contents of this object to stdout. Parameters are the same as the functions above.
829 */
830void LocaleIDInfo::dump(const char *originalID, LocIDCharStorage& charStorage, UBool penalizeNonDefaultCountry, UErrorCode *err) {
831 initBaseNames(originalID, charStorage, err);
832 calcParentChain(charStorage, penalizeNonDefaultCountry, err);
833
834 printf("[ %s -> %s -> %s ]", original, base, normalized);
835 for (int32_t i = 1; parentChain[i] != NULL; i++) {
836 printf(" -> %s", parentChain[i]);
837 }
838 printf("\n");
839}
840#endif // DEBUG_UALOC
b331163b
A
841
842int32_t
843ualoc_localizationsToUse( const char* const *preferredLanguages,
844 int32_t preferredLanguagesCount,
845 const char* const *availableLocalizations,
846 int32_t availableLocalizationsCount,
847 const char* *localizationsToUse,
848 int32_t localizationsToUseCapacity,
849 UErrorCode *status )
850{
851 if (U_FAILURE(*status)) {
852 return -1;
853 }
854 if (preferredLanguages == NULL || availableLocalizations == NULL || localizationsToUse == NULL) {
855 *status = U_ILLEGAL_ARGUMENT_ERROR;
856 return -1;
857 }
858 // get resource data, need to protect with mutex
0f5d89e8
A
859 if (icu::gMapDataState == 0) {
860 umtx_initOnce(icu::gUALocaleCacheInitOnce, initializeMapData);
b331163b 861 }
340931cb 862
2ca993e8 863#if DEBUG_UALOC
340931cb
A
864 printf("--------------------------------------------------------------------------------\n");
865 printf("Preferred languages: ");
866 for (int32_t i = 0; i < preferredLanguagesCount; i++) {
867 printf("%s ", preferredLanguages[i]);
2ca993e8 868 }
340931cb
A
869 printf("\nAvailable localizations: ");
870 for (int32_t i = 0; i < availableLocalizationsCount; i++) {
871 printf("%s ", availableLocalizations[i]);
872 }
873 printf("\n\n");
874#endif // DEBUG_UALOC
875
876 LocaleIDInfo prefLangInfos[preferredLanguagesCount];
877 LocaleIDInfo availLocInfos[availableLocalizationsCount];
878 LocIDCharStorage charStorage;
879 LocaleIDInfo* result = NULL;
880 LocaleIDInfo* portugueseResult = NULL;
881 int32_t resultScore = 999;
882
2ca993e8 883#if DEBUG_UALOC
340931cb
A
884 for (int32_t i = 0; i < preferredLanguagesCount; i++) {
885 prefLangInfos[i].dump(preferredLanguages[i], charStorage, FALSE, status);
886 }
887 printf("\n");
888 for (int32_t i = 0; i < availableLocalizationsCount; i++) {
889 availLocInfos[i].dump(availableLocalizations[i], charStorage, TRUE, status);
890 }
891 printf("\n");
892#endif // DEBUG_UALOC
893
894 // Loop over the entries in `preferredLanguages` matching them against `availableLocalizations`. The first preferred
895 // language that has a matching available localization is the only one that contributes to the result (except in the
896 // case of Portuguese, about which more below).
897 for (int32_t prefLangIndex = 0; result == NULL && prefLangIndex < preferredLanguagesCount; ++prefLangIndex) {
898 LocaleIDInfo* prefLangInfo = &prefLangInfos[prefLangIndex];
899 prefLangInfo->initBaseNames(preferredLanguages[prefLangIndex], charStorage, status);
900
901 // Loop over the entries in `availableLocalizations`, looking for the best match to the current entry
902 // from `preferredLanguages`.
903 for (int32_t availLocIndex = 0; availLocIndex < availableLocalizationsCount; ++availLocIndex) {
904 LocaleIDInfo* availLocInfo = &availLocInfos[availLocIndex];
905 availLocInfo->initBaseNames(availableLocalizations[availLocIndex], charStorage, status);
906
907 // Give the highest preference (a score of -1) to locales whose base names are an exact match.
908 if (resultScore > -1 && uprv_strcmp(prefLangInfo->base, availLocInfo->base) == 0) {
909 result = availLocInfo;
910 resultScore = -1;
911 // Give the second-highest preference (a score of 0) to locales whose normalized names are an exact match.
912 } else if (resultScore > 0 && uprv_strcmp(prefLangInfo->normalized, availLocInfo->normalized) == 0) {
913 result = availLocInfo;
914 resultScore = 0;
915 } else if (resultScore > 0 && uprv_strcmp(prefLangInfo->languageGroup, availLocInfo->languageGroup) == 0) {
916 // If we haven't yet found an exact match, look to see if the two locales have an exact match further
917 // down in their parent chains. We can skip checking the parent chains if the locales' languages are
918 // different since (with a couple of important exceptions) the parent chain will never change language.
919 prefLangInfo->calcParentChain(charStorage, FALSE, status);
920 availLocInfo->calcParentChain(charStorage, TRUE, status);
921
922 if (U_SUCCESS(*status)) {
923 // Compare each pair of entries in the two locales' parent chains. If we find an exact match,
924 // assign it a score based on how deep into the two parent chains it is (preference is given
925 // to matches higher in the two locales' parent chains). The locale with the lowest score
926 // will be our result.
927 for (int32_t prefLangParentIndex = 0; prefLangInfo->parentChain[prefLangParentIndex] != NULL; ++prefLangParentIndex) {
928 for (int32_t availLocParentIndex = 0; availLocInfo->parentChain[availLocParentIndex] != NULL; ++availLocParentIndex) {
929 if (uprv_strcmp(prefLangInfo->parentChain[prefLangParentIndex], availLocInfo->parentChain[availLocParentIndex]) == 0) {
930 if (uprv_strcmp(prefLangInfo->normalized, "pt_PT") == 0 && uprv_strcmp(availLocInfo->normalized, "pt_BR") == 0) {
931 // We don't want to match pt_BR with pt_PT unless there are no better matches anywhere--
932 // if we see this match, store it "off to the side", but continue as though we didn't find
933 // a match at all. We only return it if we _don't_ find any other matches.
934 portugueseResult = availLocInfo;
935 } else {
936 int32_t score = prefLangParentIndex + availLocParentIndex;
937 if (uprv_strcmp(prefLangInfo->language, availLocInfo->language) != 0) {
938 // Add a one-point penalty to the score if the two locales have different languages
939 ++score;
940 }
941 if (score < resultScore) {
942 resultScore = score;
943 result = availLocInfo;
2ca993e8
A
944 }
945 }
946 }
947 }
948 }
b331163b 949 }
b331163b
A
950 }
951 }
2ca993e8 952 }
340931cb
A
953
954 // If our result isn't an exact match and does specify a country, check to see if there are any entries further
955 // down in the preferred language list that have the same language as the current result but ARE an exact match with
956 // something in the available-localizations list. That is, if the preferred languages list is [ fr-CH, fr-CA ] and
957 // the available localizations list is [ fr-FR, fr-CA ], we want to return fr-CA, but we only want to do that with
958 // variations of the language we originally matched. (We do go with the match if it doesn't specify a country--
959 // we want "en" to match "en-US" and to be preferred over matches later in the preferred-languages list.)
960 // [NOTE: This logic was causing side effects with Chinese, which is more complicated, so for now we have logic
961 // to skip it when the original result is Chinese.]
962 if (result != NULL && resultScore > 0 && result->specifiesCountry() && uprv_strcmp(result->language, "zh") != 0) {
963 for (int32_t prefLangIndex = 0; prefLangIndex < preferredLanguagesCount; ++prefLangIndex) {
964 LocaleIDInfo* prefLangInfo = &prefLangInfos[prefLangIndex];
965 prefLangInfo->initBaseNames(preferredLanguages[prefLangIndex], charStorage, status);
966 if (uprv_strcmp(prefLangInfo->language, result->language) == 0) {
967 for (int32_t availLocIndex = 0; availLocIndex < availableLocalizationsCount; ++availLocIndex) {
968 LocaleIDInfo* availLocInfo = &availLocInfos[availLocIndex];
969 if (uprv_strcmp(prefLangInfo->base, availLocInfo->base) == 0 || uprv_strcmp(prefLangInfo->normalized, availLocInfo->normalized) == 0) {
970 result = &availLocInfos[availLocIndex];
971 break;
2ca993e8 972 }
340931cb 973 }
2ca993e8 974 }
b331163b
A
975 }
976 }
340931cb
A
977
978 // Write out our results.
979 int32_t locsToUseCount = 0;
980
981 // If the only match we found above is matching pt_PT to pt_BR, we can use it as our result.
982 if (result == NULL && portugueseResult != NULL) {
983 result = portugueseResult;
984 }
985
986 // If we found a match above, walk its parent chain and search `availableLocales` for any entries that occur in the
987 // main result's parent chain. If we find any, we want to return those too. (The extra wrinkles below are to keep
988 // us from putting the same locale into the result list more than once.)
989 if (result != NULL) {
990 localizationsToUse[locsToUseCount++] = result->original;
991
992 result->calcParentChain(charStorage, TRUE, status);
993 for (int32_t parentChainIndex = 0; result->parentChain[parentChainIndex] != NULL; ++parentChainIndex) {
994 if (parentChainIndex > 0 && result->parentChain[parentChainIndex - 1] == result->parentChain[parentChainIndex]) {
995 continue;
996 }
997 for (int32_t availLocIndex = 0; availLocIndex < availableLocalizationsCount; ++availLocIndex) {
998 LocaleIDInfo* availLocInfo = &availLocInfos[availLocIndex];
999 if (result->original == availLocInfo->original) {
1000 continue;
1001 } else if (locsToUseCount < localizationsToUseCapacity && uprv_strcmp(result->parentChain[parentChainIndex], "zh_Hant_HK") == 0 && uprv_strcmp(availLocInfo->normalized, "zh_Hant_TW") == 0) {
1002 // HACK for Chinese: If we find "zh_Hant_HK" while walking the result's parent chain and the available localizations list includes "zh_Hant_TW", include "zh_Hant_TW" in the results list too
1003 localizationsToUse[locsToUseCount++] = availLocInfo->original;
1004 } else if (locsToUseCount < localizationsToUseCapacity && uprv_strcmp(result->parentChain[parentChainIndex], availLocInfo->normalized) == 0) {
1005 localizationsToUse[locsToUseCount++] = availLocInfo->original;
b331163b
A
1006 }
1007 }
1008 }
340931cb
A
1009 }
1010
1011 // if our result array is empty, check to see if the availableLocalizations list contains the special sentinel
1012 // value "zxx" (which means "no linguistic content"). If it does, return that instead of the empty list
1013 if (locsToUseCount == 0) {
1014 int32_t zxxPos = -1;
1015 for (int32_t i = 0; i < availableLocalizationsCount; i++) {
1016 if (uprv_strcmp(availableLocalizations[i], "zxx") == 0) {
1017 zxxPos = i;
1018 break;
b331163b
A
1019 }
1020 }
340931cb
A
1021 if (zxxPos >= 0) {
1022 localizationsToUse[locsToUseCount++] = availableLocalizations[zxxPos];
1023 }
b331163b 1024 }
340931cb
A
1025
1026#if DEBUG_UALOC
1027 printf("Localizations to use: ");
1028 for (int32_t i = 0; i < locsToUseCount; i++) {
1029 printf("%s ", localizationsToUse[i]);
1030 }
1031 printf("\n\n");
1032#endif // DEBUG_UALOC
b331163b
A
1033 return locsToUseCount;
1034}