]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/ualoc.cpp
ICU-59152.0.1.tar.gz
[apple/icu.git] / icuSources / common / ualoc.cpp
CommitLineData
57a6839d
A
1/*
2*****************************************************************************************
a961784b 3* Copyright (C) 2014-2016 Apple Inc. All Rights Reserved.
57a6839d
A
4*****************************************************************************************
5*/
6
2ca993e8
A
7#define DEBUG_UALOC 0
8#if DEBUG_UALOC
9#include <stdio.h>
10#endif
11#include <string.h>
57a6839d
A
12#include "unicode/utypes.h"
13#include "unicode/ualoc.h"
14#include "unicode/uloc.h"
15#include "unicode/ures.h"
16#include "unicode/putil.h"
f3c0d7a5 17#include "unicode/ustring.h"
57a6839d
A
18#include "cstring.h"
19#include "cmemory.h"
b331163b
A
20#include "uhash.h"
21#include "umutex.h"
22#include "ucln_cmn.h"
57a6839d
A
23// the following has replacements for some math.h funcs etc
24#include "putilimp.h"
25
26
27// The numeric values in territoryInfo are in "IntF" format from LDML2ICUConverter.
28// From its docs (adapted): [IntF is] a special integer that represents the number in
29// normalized scientific notation.
30// Resultant integers are in the form -?xxyyyyyy, where xx is the exponent
31// offset by 50 and yyyyyy is the coefficient to 5 decimal places (range 1.0 to 9.99999), e.g.
32// 14660000000000 -> 1.46600E13 -> 63146600
33// 0.0001 -> 1.00000E-4 -> 46100000
34// -123.456 -> -1.23456E-2 -> -48123456
35//
36// Here to avoid an extra division we have the max coefficient as 999999 (instead of
37// 9.99999) and instead offset the exponent by -55.
38//
39static double doubleFromIntF(int32_t intF) {
40 double coefficient = (double)(intF % 1000000);
41 int32_t exponent = (intF / 1000000) - 55;
42 return coefficient * uprv_pow10(exponent);
43}
44
45static int compareLangEntries(const void * entry1, const void * entry2) {
46 double fraction1 = ((const UALanguageEntry *)entry1)->userFraction;
47 double fraction2 = ((const UALanguageEntry *)entry2)->userFraction;
48 // want descending order
49 if (fraction1 > fraction2) return -1;
50 if (fraction1 < fraction2) return 1;
51 // userFractions the same, sort by languageCode
52 return uprv_strcmp(((const UALanguageEntry *)entry1)->languageCode,((const UALanguageEntry *)entry2)->languageCode);
53}
54
f3c0d7a5
A
55// language codes to version with default script
56// must be sorted by language code
57static const char * langToDefaultScript[] = {
58 "az", "az_Latn",
59 "bs", "bs_Latn",
60 "iu", "iu_Cans",
61 "kk", "kk_Arab",
62 "ks", "ks_Arab",
63 "ku", "ku_Latn",
64 "ky", "ky_Cyrl",
65 "mn", "mn_Cyrl",
66 "ms", "ms_Latn",
67 "pa", "pa_Guru",
68 "rif", "rif_Tfng",
69 "shi", "shi_Tfng",
70 "sr", "sr_Cyrl",
71 "tg", "tg_Cyrl",
72 "tk", "tk_Latn",
73 "ug", "ug_Arab",
74 "uz", "uz_Latn",
75 "vai", "vai_Vaii",
76 "yue", "yue_Hant",
77 "zh", "zh_Hans",
78 NULL
79};
80
81static const char * langCodeWithScriptIfAmbig(const char * langCode) {
82 const char ** langToDefScriptPtr = langToDefaultScript;
83 const char * testCurLoc;
84 while ( (testCurLoc = *langToDefScriptPtr++) != NULL ) {
85 int cmp = uprv_strcmp(langCode, testCurLoc);
86 if (cmp <= 0) {
87 if (cmp == 0) {
88 return *langToDefScriptPtr;
89 }
90 break;
91 }
92 langToDefScriptPtr++;
93 }
94 return langCode;
95}
96
57a6839d
A
97static const UChar ustrLangStatusDefacto[] = {0x64,0x65,0x5F,0x66,0x61,0x63,0x74,0x6F,0x5F,0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0}; //"de_facto_official"
98static const UChar ustrLangStatusOfficial[] = {0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0}; //"official"
99static const UChar ustrLangStatusRegional[] = {0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0x5F,0x72,0x65,0x67,0x69,0x6F,0x6E,0x61,0x6C,0}; //"official_regional"
100
101enum {
102 kLocalLangEntriesMax = 26, // enough for most regions to minimumFraction 0.001 except India
103 kLangEntriesFactor = 3 // if we have to allocate, multiply existing size by this
104};
105
106U_CAPI int32_t U_EXPORT2
107ualoc_getLanguagesForRegion(const char *regionID, double minimumFraction,
108 UALanguageEntry *entries, int32_t entriesCapacity,
109 UErrorCode *err)
110{
111 if (U_FAILURE(*err)) {
112 return 0;
113 }
114 if ( regionID == NULL || minimumFraction < 0.0 || minimumFraction > 1.0 ||
115 ((entries==NULL)? entriesCapacity!=0: entriesCapacity<0) ) {
116 *err = U_ILLEGAL_ARGUMENT_ERROR;
117 return 0;
118 }
119 UResourceBundle *rb = ures_openDirect(NULL, "supplementalData", err);
120 rb = ures_getByKey(rb, "territoryInfo", rb, err);
121 rb = ures_getByKey(rb, regionID, rb, err);
122 if (U_FAILURE(*err)) {
123 ures_close(rb);
124 return 0;
125 }
126
127 int32_t entryCount = 0;
128 UResourceBundle *langBund = NULL;
129 int32_t lbIdx, lbCount = ures_getSize(rb);
130 UALanguageEntry localLangEntries[kLocalLangEntriesMax];
131 UALanguageEntry * langEntries = localLangEntries;
132 int32_t langEntriesMax = kLocalLangEntriesMax;
133
134 for (lbIdx = 0; lbIdx < lbCount; lbIdx++) {
135 langBund = ures_getByIndex(rb, lbIdx, langBund, err);
136 if (U_FAILURE(*err)) {
137 break;
138 }
139 const char * langCode = ures_getKey(langBund);
140 if (uprv_strcmp(langCode,"territoryF") == 0) {
141 continue;
142 }
143 if (strnlen(langCode, UALANGDATA_CODELEN+1) > UALANGDATA_CODELEN) { // no uprv_strnlen
144 continue; // a code we cannot handle
145 }
146
147 UErrorCode localErr = U_ZERO_ERROR;
148 double userFraction = 0.0;
149 UResourceBundle *itemBund = ures_getByKey(langBund, "populationShareF", NULL, &localErr);
150 if (U_SUCCESS(localErr)) {
151 int32_t intF = ures_getInt(itemBund, &localErr);
152 if (U_SUCCESS(localErr)) {
153 userFraction = doubleFromIntF(intF);
154 }
155 ures_close(itemBund);
156 }
157 if (userFraction < minimumFraction) {
158 continue;
159 }
160 if (entries != NULL) {
161 localErr = U_ZERO_ERROR;
162 UALanguageStatus langStatus = UALANGSTATUS_UNSPECIFIED;
163 int32_t ulen;
164 const UChar * ustrLangStatus = ures_getStringByKey(langBund, "officialStatus", &ulen, &localErr);
165 if (U_SUCCESS(localErr)) {
166 int32_t cmp = u_strcmp(ustrLangStatus, ustrLangStatusOfficial);
167 if (cmp == 0) {
168 langStatus = UALANGSTATUS_OFFICIAL;
169 } else if (cmp < 0 && u_strcmp(ustrLangStatus, ustrLangStatusDefacto) == 0) {
170 langStatus = UALANGSTATUS_DEFACTO_OFFICIAL;
171 } else if (u_strcmp(ustrLangStatus, ustrLangStatusRegional) == 0) {
172 langStatus = UALANGSTATUS_REGIONAL_OFFICIAL;
173 }
174 }
175 // Now we have all of the info for our next entry
176 if (entryCount >= langEntriesMax) {
177 int32_t newMax = langEntriesMax * kLangEntriesFactor;
178 if (langEntries == localLangEntries) {
179 // first allocation, copy from local buf
180 langEntries = (UALanguageEntry*)uprv_malloc(newMax*sizeof(UALanguageEntry));
181 if (langEntries == NULL) {
182 *err = U_MEMORY_ALLOCATION_ERROR;
183 break;
184 }
185 uprv_memcpy(langEntries, localLangEntries, entryCount*sizeof(UALanguageEntry));
186 } else {
187 langEntries = (UALanguageEntry*)uprv_realloc(langEntries, newMax*sizeof(UALanguageEntry));
188 if (langEntries == NULL) {
189 *err = U_MEMORY_ALLOCATION_ERROR;
190 break;
191 }
192 }
193 langEntriesMax = newMax;
194 }
f3c0d7a5 195 uprv_strcpy(langEntries[entryCount].languageCode, langCodeWithScriptIfAmbig(langCode));
57a6839d
A
196 langEntries[entryCount].userFraction = userFraction;
197 langEntries[entryCount].status = langStatus;
198 }
199 entryCount++;
200 }
201 ures_close(langBund);
202 ures_close(rb);
203 if (U_FAILURE(*err)) {
204 if (langEntries != localLangEntries) {
205 free(langEntries);
206 }
207 return 0;
208 }
209 if (entries != NULL) {
210 // sort langEntries, copy entries that fit to provided array
211 qsort(langEntries, entryCount, sizeof(UALanguageEntry), compareLangEntries);
212 if (entryCount > entriesCapacity) {
213 entryCount = entriesCapacity;
214 }
215 uprv_memcpy(entries, langEntries, entryCount*sizeof(UALanguageEntry));
216 if (langEntries != localLangEntries) {
217 free(langEntries);
218 }
219 }
220 return entryCount;
221}
222
57a6839d 223static const char * forceParent[] = {
a961784b 224 "en_150", "en_GB", // en for Europe
b331163b 225 "en_AU", "en_GB",
a961784b
A
226 "en_BD", "en_GB", // en for Bangladesh
227 "en_BE", "en_150", // en for Belgium goes to en for Europe
228 "en_DG", "en_GB",
229 "en_FK", "en_GB",
230 "en_GG", "en_GB",
231 "en_GI", "en_GB",
232 "en_HK", "en_GB", // en for Hong Kong
233 "en_IE", "en_GB",
234 "en_IM", "en_GB",
b331163b 235 "en_IN", "en_GB",
a961784b
A
236 "en_IO", "en_GB",
237 "en_JE", "en_GB",
a62d09fc 238 "en_JM", "en_GB",
a961784b
A
239 "en_MO", "en_GB",
240 "en_MT", "en_GB",
2ca993e8 241 "en_MV", "en_GB", // for Maldives
a961784b 242 "en_MY", "en_GB", // en for Malaysia
2ca993e8 243 "en_NZ", "en_AU",
a961784b
A
244 "en_PK", "en_GB", // en for Pakistan
245 "en_SG", "en_GB",
246 "en_SH", "en_GB",
247 "en_VG", "en_GB",
f3c0d7a5
A
248 "yue", "yue_CN", // yue_CN has 71M users (5.2% of 1.37G), yue_HK has 6.5M (90% of 7.17M)
249 "yue_CN", "root",
250 "yue_HK", "root",
251 "yue_Hans","yue_CN",
252 "yue_Hant","yue_HK",
57a6839d
A
253 "zh", "zh_CN",
254 "zh_CN", "root",
255 "zh_Hant", "zh_TW",
256 "zh_TW", "root",
257 NULL
258};
259
2ca993e8
A
260enum { kLocBaseNameMax = 16 };
261
57a6839d
A
262U_CAPI int32_t U_EXPORT2
263ualoc_getAppleParent(const char* localeID,
264 char * parent,
265 int32_t parentCapacity,
266 UErrorCode* err)
267{
268 UResourceBundle *rb;
269 int32_t len;
270 UErrorCode tempStatus;
271 char locbuf[ULOC_FULLNAME_CAPACITY+1];
08b89b0a 272 char * foundDoubleUnderscore;
57a6839d
A
273
274 if (U_FAILURE(*err)) {
275 return 0;
276 }
277 if ( (parent==NULL)? parentCapacity!=0: parentCapacity<0 ) {
278 *err = U_ILLEGAL_ARGUMENT_ERROR;
279 return 0;
280 }
08b89b0a 281 len = uloc_getBaseName(localeID, locbuf, ULOC_FULLNAME_CAPACITY, err); /* canonicalize and strip keywords */
57a6839d
A
282 if (U_FAILURE(*err)) {
283 return 0;
284 }
285 if (*err == U_STRING_NOT_TERMINATED_WARNING) {
286 locbuf[ULOC_FULLNAME_CAPACITY] = 0;
287 *err = U_ZERO_ERROR;
288 }
08b89b0a
A
289 foundDoubleUnderscore = uprv_strstr(locbuf, "__"); /* __ comes from bad/missing subtag or variant */
290 if (foundDoubleUnderscore != NULL) {
291 *foundDoubleUnderscore = 0; /* terminate at the __ */
292 len = uprv_strlen(locbuf);
293 }
b331163b 294 if (len >= 2 && (uprv_strncmp(locbuf, "en", 2) == 0 || uprv_strncmp(locbuf, "zh", 2) == 0)) {
57a6839d
A
295 const char ** forceParentPtr = forceParent;
296 const char * testCurLoc;
297 while ( (testCurLoc = *forceParentPtr++) != NULL ) {
298 int cmp = uprv_strcmp(locbuf, testCurLoc);
299 if (cmp <= 0) {
300 if (cmp == 0) {
301 len = uprv_strlen(*forceParentPtr);
302 if (len < parentCapacity) {
303 uprv_strcpy(parent, *forceParentPtr);
304 } else {
305 *err = U_BUFFER_OVERFLOW_ERROR;
306 }
307 return len;
308 }
309 break;
310 }
311 forceParentPtr++;
312 }
313 }
314 tempStatus = U_ZERO_ERROR;
315 rb = ures_openDirect(NULL, locbuf, &tempStatus);
316 if (U_SUCCESS(tempStatus)) {
317 const char * actualLocale = ures_getLocaleByType(rb, ULOC_ACTUAL_LOCALE, &tempStatus);
2ca993e8 318 ures_close(rb);
57a6839d
A
319 if (U_SUCCESS(tempStatus) && uprv_strcmp(locbuf, actualLocale) != 0) {
320 // we have followed an alias
321 len = uprv_strlen(actualLocale);
322 if (len < parentCapacity) {
323 uprv_strcpy(parent, actualLocale);
324 } else {
325 *err = U_BUFFER_OVERFLOW_ERROR;
326 }
57a6839d
A
327 return len;
328 }
2ca993e8
A
329 }
330 tempStatus = U_ZERO_ERROR;
331 rb = ures_openDirect(NULL, "supplementalData", &tempStatus);
332 rb = ures_getByKey(rb, "parentLocales", rb, &tempStatus);
333 if (U_SUCCESS(tempStatus)) {
334 UResourceBundle * parentMapBundle = NULL;
335 int32_t childLen = 0;
336 while (childLen == 0) {
337 tempStatus = U_ZERO_ERROR;
338 parentMapBundle = ures_getNextResource(rb, parentMapBundle, &tempStatus);
339 if (U_FAILURE(tempStatus)) {
340 break; // no more parent bundles, normal exit
341 }
342 char childName[kLocBaseNameMax + 1];
343 childName[kLocBaseNameMax] = 0;
344 const char * childPtr = NULL;
345 if (ures_getType(parentMapBundle) == URES_STRING) {
346 childLen = kLocBaseNameMax;
347 childPtr = ures_getUTF8String(parentMapBundle, childName, &childLen, FALSE, &tempStatus);
348 if (U_FAILURE(tempStatus) || uprv_strncmp(locbuf, childPtr, kLocBaseNameMax) != 0) {
349 childLen = 0;
350 }
351 } else { // should be URES_ARRAY
352 int32_t childCur, childCount = ures_getSize(parentMapBundle);
353 for (childCur = 0; childCur < childCount && childLen == 0; childCur++) {
354 tempStatus = U_ZERO_ERROR;
355 childLen = kLocBaseNameMax;
356 childPtr = ures_getUTF8StringByIndex(parentMapBundle, childCur, childName, &childLen, FALSE, &tempStatus);
357 if (U_FAILURE(tempStatus) || uprv_strncmp(locbuf, childPtr, kLocBaseNameMax) != 0) {
358 childLen = 0;
359 }
360 }
361 }
362 }
363 ures_close(rb);
364 if (childLen > 0) {
365 // parentMapBundle key is the parent we are looking for
366 const char * keyStr = ures_getKey(parentMapBundle);
367 len = uprv_strlen(keyStr);
57a6839d 368 if (len < parentCapacity) {
2ca993e8 369 uprv_strcpy(parent, keyStr);
57a6839d
A
370 } else {
371 *err = U_BUFFER_OVERFLOW_ERROR;
372 }
2ca993e8 373 ures_close(parentMapBundle);
57a6839d
A
374 return len;
375 }
2ca993e8 376 ures_close(parentMapBundle);
57a6839d 377 }
2ca993e8 378
57a6839d
A
379 len = uloc_getParent(locbuf, parent, parentCapacity, err);
380 if (U_SUCCESS(*err) && len == 0) {
381 len = 4;
382 if (len < parentCapacity) {
383 uprv_strcpy(parent, "root");
384 } else {
385 *err = U_BUFFER_OVERFLOW_ERROR;
386 }
387 }
388 return len;
389}
390
b331163b
A
391// =================
392// Data and related functions for ualoc_localizationsToUse
393// =================
394
395static const char * appleAliasMap[][2] = {
396 // names are lowercase here because they are looked up after being processed by uloc_getBaseName
397 { "arabic", "ar" }, // T2
398 { "chinese", "zh_Hans" }, // T0
399 { "danish", "da" }, // T2
400 { "dutch", "nl" }, // T1, still in use
401 { "english", "en" }, // T0, still in use
402 { "finnish", "fi" }, // T2
403 { "french", "fr" }, // T0, still in use
404 { "german", "de" }, // T0, still in use
405 { "italian", "it" }, // T1, still in use
406 { "japanese", "ja" }, // T0, still in use
407 { "korean", "ko" }, // T1
a961784b 408 { "no_NO", "nb_NO" }, // special
b331163b
A
409 { "norwegian", "nb" }, // T2
410 { "polish", "pl" }, // T2
411 { "portuguese", "pt" }, // T2
412 { "russian", "ru" }, // T2
413 { "spanish", "es" }, // T1, still in use
414 { "swedish", "sv" }, // T2
415 { "thai", "th" }, // T2
416 { "turkish", "tr" }, // T2
f3c0d7a5 417 { "yue", "yue_Hans"}, // special
b331163b
A
418 { "zh", "zh_Hans" }, // special
419};
2ca993e8 420enum { kAppleAliasMapCount = UPRV_LENGTHOF(appleAliasMap) };
b331163b
A
421
422static const char * appleParentMap[][2] = {
423 { "en_150", "en_GB" }, // Apple custom parent
424 { "en_AD", "en_150" }, // Apple locale addition
f3c0d7a5
A
425 { "en_AG", "en_GB" }, // Antigua & Barbuda
426 { "en_AI", "en_GB" }, // Anguilla
b331163b
A
427 { "en_AL", "en_150" }, // Apple locale addition
428 { "en_AT", "en_150" }, // Apple locale addition
429 { "en_AU", "en_GB" }, // Apple custom parent
430 { "en_BA", "en_150" }, // Apple locale addition
f3c0d7a5 431 { "en_BB", "en_GB" }, // Barbados
b331163b 432 { "en_BD", "en_GB" }, // Apple custom parent
a961784b 433 { "en_BE", "en_150" }, // Apple custom parent
f3c0d7a5
A
434 { "en_BM", "en_GB" }, // Bermuda
435 { "en_BS", "en_GB" }, // Bahamas
436 { "en_BW", "en_GB" }, // Botswana
437 { "en_BZ", "en_GB" }, // Belize
438 { "en_CC", "en_AU" }, // Cocos (Keeling) Islands
b331163b 439 { "en_CH", "en_150" }, // Apple locale addition
f3c0d7a5
A
440 { "en_CK", "en_AU" }, // Cook Islands (maybe to en_NZ instead?)
441 { "en_CX", "en_AU" }, // Christmas Island
b331163b
A
442 { "en_CY", "en_150" }, // Apple locale addition
443 { "en_CZ", "en_150" }, // Apple locale addition
444 { "en_DE", "en_150" }, // Apple locale addition
a961784b 445 { "en_DG", "en_GB" },
b331163b 446 { "en_DK", "en_150" }, // Apple locale addition
f3c0d7a5 447 { "en_DM", "en_GB" }, // Dominica
b331163b
A
448 { "en_EE", "en_150" }, // Apple locale addition
449 { "en_ES", "en_150" }, // Apple locale addition
450 { "en_FI", "en_150" }, // Apple locale addition
f3c0d7a5 451 { "en_FJ", "en_GB" }, // Fiji
a961784b 452 { "en_FK", "en_GB" },
b331163b 453 { "en_FR", "en_150" }, // Apple locale addition
f3c0d7a5 454 { "en_GD", "en_GB" }, // Grenada
a961784b 455 { "en_GG", "en_GB" },
f3c0d7a5 456 { "en_GH", "en_GB" }, // Ghana
a961784b 457 { "en_GI", "en_GB" },
f3c0d7a5 458 { "en_GM", "en_GB" }, // Gambia
b331163b 459 { "en_GR", "en_150" }, // Apple locale addition
f3c0d7a5 460 { "en_GY", "en_GB" }, // Guyana
b331163b
A
461 { "en_HK", "en_GB" }, // Apple custom parent
462 { "en_HR", "en_150" }, // Apple locale addition
463 { "en_HU", "en_150" }, // Apple locale addition
a961784b 464 { "en_IE", "en_GB" },
b331163b 465 { "en_IL", "en_001" }, // Apple locale addition
a961784b 466 { "en_IM", "en_GB" },
b331163b 467 { "en_IN", "en_GB" }, // Apple custom parent
a961784b 468 { "en_IO", "en_GB" },
b331163b
A
469 { "en_IS", "en_150" }, // Apple locale addition
470 { "en_IT", "en_150" }, // Apple locale addition
a961784b 471 { "en_JE", "en_GB" },
a62d09fc 472 { "en_JM", "en_GB" },
f3c0d7a5
A
473 { "en_KE", "en_GB" }, // Kenya
474 { "en_KI", "en_GB" }, // Kiribati
475 { "en_KN", "en_GB" }, // St. Kitts & Nevis
476 { "en_KY", "en_GB" }, // Cayman Islands
477 { "en_LC", "en_GB" }, // St. Lucia
478 { "en_LS", "en_GB" }, // Lesotho
b331163b
A
479 { "en_LT", "en_150" }, // Apple locale addition
480 { "en_LU", "en_150" }, // Apple locale addition
481 { "en_LV", "en_150" }, // Apple locale addition
482 { "en_ME", "en_150" }, // Apple locale addition
a961784b 483 { "en_MO", "en_GB" },
f3c0d7a5 484 { "en_MS", "en_GB" }, // Montserrat
a961784b 485 { "en_MT", "en_GB" },
f3c0d7a5 486 { "en_MU", "en_GB" }, // Mauritius
2ca993e8 487 { "en_MV", "en_GB" },
f3c0d7a5 488 { "en_MW", "en_GB" }, // Malawi
b331163b 489 { "en_MY", "en_GB" }, // Apple custom parent
f3c0d7a5
A
490 { "en_NA", "en_GB" }, // Namibia
491 { "en_NF", "en_AU" }, // Norfolk Island
492 { "en_NG", "en_GB" }, // Nigeria
b331163b
A
493 { "en_NL", "en_150" }, // Apple locale addition
494 { "en_NO", "en_150" }, // Apple locale addition
f3c0d7a5
A
495 { "en_NR", "en_AU" }, // Nauru
496 { "en_NU", "en_AU" }, // Niue (maybe to en_NZ instead?)
2ca993e8 497 { "en_NZ", "en_AU" },
f3c0d7a5 498 { "en_PG", "en_AU" }, // Papua New Guinea
b331163b
A
499 { "en_PK", "en_GB" }, // Apple custom parent
500 { "en_PL", "en_150" }, // Apple locale addition
f3c0d7a5 501 { "en_PN", "en_GB" }, // Pitcairn Islands
b331163b
A
502 { "en_PT", "en_150" }, // Apple locale addition
503 { "en_RO", "en_150" }, // Apple locale addition
504 { "en_RU", "en_150" }, // Apple locale addition
f3c0d7a5
A
505 { "en_SB", "en_GB" }, // Solomon Islands
506 { "en_SC", "en_GB" }, // Seychelles
507 { "en_SD", "en_GB" }, // Sudan
b331163b 508 { "en_SE", "en_150" }, // Apple locale addition
a961784b
A
509 { "en_SG", "en_GB" },
510 { "en_SH", "en_GB" },
b331163b
A
511 { "en_SI", "en_150" }, // Apple locale addition
512 { "en_SK", "en_150" }, // Apple locale addition
f3c0d7a5
A
513 { "en_SL", "en_GB" }, // Sierra Leone
514 { "en_SS", "en_GB" }, // South Sudan
515 { "en_SZ", "en_GB" }, // Swaziland
516 { "en_TC", "en_GB" }, // Tristan da Cunha
517 { "en_TO", "en_GB" }, // Tonga
518 { "en_TT", "en_GB" }, // Trinidad & Tobago
519 { "en_TV", "en_GB" }, // Tuvalu
520 { "en_TZ", "en_GB" }, // Tanzania
521 { "en_UG", "en_GB" }, // Uganda
522 { "en_VC", "en_GB" }, // St. Vincent & Grenadines
a961784b 523 { "en_VG", "en_GB" },
f3c0d7a5
A
524 { "en_VU", "en_GB" }, // Vanuatu
525 { "en_WS", "en_AU" }, // Samoa (maybe to en_NZ instead?)
526 { "en_ZA", "en_GB" }, // South Africa
527 { "en_ZM", "en_GB" }, // Zambia
528 { "en_ZW", "en_GB" }, // Zimbabwe
b331163b 529};
2ca993e8
A
530enum { kAppleParentMapCount = UPRV_LENGTHOF(appleParentMap) };
531
532typedef struct {
533 const char * locale;
534 const char * parent;
535 int8_t distance;
536} LocParentAndDistance;
537
538static LocParentAndDistance locParentMap[] = {
539 // The localizations listed in the first column are in
540 // normalized form (e.g. zh_CN -> zh_Hans_CN, etc.).
541 // The distance is a rough measure of distance from
542 // the localization to its parent, used as a weight.
f3c0d7a5 543 { "en_001", "en", 2 },
2ca993e8
A
544 { "en_150", "en_GB", 1 },
545 { "en_AU", "en_GB", 1 },
f3c0d7a5
A
546 { "en_GB", "en_001", 0 },
547 { "en_US", "en", 0 },
2ca993e8
A
548 { "es_419", "es", 2 },
549 { "es_MX", "es_419", 0 },
550 { "pt_PT", "pt", 2 },
f3c0d7a5
A
551 { "yue_Hans_CN","yue_Hans",0 },
552 { "yue_Hant_HK","yue_Hant",0 },
2ca993e8
A
553 { "zh_Hans_CN", "zh_Hans", 0 },
554 { "zh_Hant_HK", "zh_Hant", 1 },
555 { "zh_Hant_TW", "zh_Hant", 0 },
b331163b 556};
2ca993e8 557enum { kLocParentMapCount = UPRV_LENGTHOF(locParentMap), kMaxParentDistance = 8 };
b331163b
A
558
559enum {
f3c0d7a5
A
560 kStringsAllocSize = 4480, // cannot expand; current actual usage 4150
561 kParentMapInitCount = 205 // can expand; current actual usage 205
b331163b
A
562};
563
564U_CDECL_BEGIN
565static UBool U_CALLCONV ualocale_cleanup(void);
566U_CDECL_END
567
568U_NAMESPACE_BEGIN
569
570static UInitOnce gUALocaleCacheInitOnce = U_INITONCE_INITIALIZER;
571
572static int gMapDataState = 0; // 0 = not initialized, 1 = initialized, -1 = failure
573static char* gStrings = NULL;
574static UHashtable* gAliasMap = NULL;
575static UHashtable* gParentMap = NULL;
576
577U_NAMESPACE_END
578
579U_CDECL_BEGIN
580
581static UBool U_CALLCONV ualocale_cleanup(void)
582{
583 U_NAMESPACE_USE
584
585 gUALocaleCacheInitOnce.reset();
586
587 if (gMapDataState > 0) {
588 uhash_close(gParentMap);
589 gParentMap = NULL;
590 uhash_close(gAliasMap);
591 gAliasMap = NULL;
592 uprv_free(gStrings);
593 gStrings = NULL;
594 }
595 gMapDataState = 0;
596 return TRUE;
597}
598
599static void initializeMapData() {
600 U_NAMESPACE_USE
601
602 UResourceBundle * curBundle;
603 char* stringsPtr;
604 char* stringsEnd;
605 UErrorCode status;
606 int32_t entryIndex, icuEntryCount;
607
608 ucln_common_registerCleanup(UCLN_COMMON_LOCALE, ualocale_cleanup);
609
610 gStrings = (char*)uprv_malloc(kStringsAllocSize);
611 if (gStrings) {
612 stringsPtr = gStrings;
613 stringsEnd = gStrings + kStringsAllocSize;
614 }
615
616 status = U_ZERO_ERROR;
617 curBundle = NULL;
618 icuEntryCount = 0;
619 if (gStrings) {
620 curBundle = ures_openDirect(NULL, "metadata", &status);
621 curBundle = ures_getByKey(curBundle, "alias", curBundle, &status);
622 curBundle = ures_getByKey(curBundle, "language", curBundle, &status); // language resource is URES_TABLE
623 if (U_SUCCESS(status)) {
624 icuEntryCount = ures_getSize(curBundle); // currently 331
625 }
626 }
627 status = U_ZERO_ERROR;
628 gAliasMap = uhash_openSize(uhash_hashIChars, uhash_compareIChars, uhash_compareIChars,
629 kAppleAliasMapCount + icuEntryCount, &status);
630 // defaults to keyDeleter NULL
631 if (U_SUCCESS(status)) {
632 for (entryIndex = 0; entryIndex < kAppleAliasMapCount && U_SUCCESS(status); entryIndex++) {
633 uhash_put(gAliasMap, (void*)appleAliasMap[entryIndex][0], (void*)appleAliasMap[entryIndex][1], &status);
634 }
635 status = U_ZERO_ERROR;
636 UResourceBundle * aliasMapBundle = NULL;
637 for (entryIndex = 0; entryIndex < icuEntryCount && U_SUCCESS(status); entryIndex++) {
638 aliasMapBundle = ures_getByIndex(curBundle, entryIndex, aliasMapBundle, &status);
639 if (U_FAILURE(status)) {
640 break; // error
641 }
642 const char * keyStr = ures_getKey(aliasMapBundle);
643 int32_t len = uprv_strlen(keyStr);
644 if (len >= stringsEnd - stringsPtr) {
645 break; // error
646 }
647 uprv_strcpy(stringsPtr, keyStr);
648 char * inLocStr = stringsPtr;
649 stringsPtr += len + 1;
650
651 len = stringsEnd - stringsPtr - 1;
652 ures_getUTF8StringByKey(aliasMapBundle, "replacement", stringsPtr, &len, TRUE, &status);
653 if (U_FAILURE(status)) {
654 break; // error
655 }
656 stringsPtr[len] = 0;
657 uhash_put(gAliasMap, inLocStr, stringsPtr, &status);
658 stringsPtr += len + 1;
659 }
660 ures_close(aliasMapBundle);
661 } else {
662 ures_close(curBundle);
663 uprv_free(gStrings);
664 gMapDataState = -1; // failure
665 return;
666 }
667 ures_close(curBundle);
668
669 status = U_ZERO_ERROR;
670 gParentMap = uhash_openSize(uhash_hashIChars, uhash_compareIChars, uhash_compareIChars,
671 kParentMapInitCount, &status);
672 // defaults to keyDeleter NULL
673 if (U_SUCCESS(status)) {
674 curBundle = ures_openDirect(NULL, "supplementalData", &status);
675 curBundle = ures_getByKey(curBundle, "parentLocales", curBundle, &status); // parentLocales resource is URES_TABLE
676 if (U_SUCCESS(status)) {
677 UResourceBundle * parentMapBundle = NULL;
678 while (TRUE) {
679 parentMapBundle = ures_getNextResource(curBundle, parentMapBundle, &status);
680 if (U_FAILURE(status)) {
681 break; // no more parent bundles, normal exit
682 }
683 const char * keyStr = ures_getKey(parentMapBundle);
684 int32_t len = uprv_strlen(keyStr);
685 if (len >= stringsEnd - stringsPtr) {
686 break; // error
687 }
688 uprv_strcpy(stringsPtr, keyStr);
689 char * parentStr = stringsPtr;
690 stringsPtr += len + 1;
691
692 if (ures_getType(parentMapBundle) == URES_STRING) {
693 len = stringsEnd - stringsPtr - 1;
694 ures_getUTF8String(parentMapBundle, stringsPtr, &len, TRUE, &status);
695 if (U_FAILURE(status)) {
696 break; // error
697 }
698 stringsPtr[len] = 0;
699 uhash_put(gParentMap, stringsPtr, parentStr, &status);
700 stringsPtr += len + 1;
701 } else {
702 // should be URES_ARRAY
703 icuEntryCount = ures_getSize(parentMapBundle);
704 for (entryIndex = 0; entryIndex < icuEntryCount && U_SUCCESS(status); entryIndex++) {
705 len = stringsEnd - stringsPtr - 1;
706 ures_getUTF8StringByIndex(parentMapBundle, entryIndex, stringsPtr, &len, TRUE, &status);
707 if (U_FAILURE(status)) {
708 break;
709 }
710 stringsPtr[len] = 0;
711 uhash_put(gParentMap, stringsPtr, parentStr, &status);
712 stringsPtr += len + 1;
713 }
714 }
715 }
716 ures_close(parentMapBundle);
717 }
718 ures_close(curBundle);
719
720 status = U_ZERO_ERROR;
721 for (entryIndex = 0; entryIndex < kAppleParentMapCount && U_SUCCESS(status); entryIndex++) {
722 uhash_put(gParentMap, (void*)appleParentMap[entryIndex][0], (void*)appleParentMap[entryIndex][1], &status);
723 }
724 } else {
725 uhash_close(gAliasMap);
726 gAliasMap = NULL;
727 uprv_free(gStrings);
728 gMapDataState = -1; // failure
729 return;
730 }
731
2ca993e8
A
732#if DEBUG_UALOC
733 printf("# gStrings size %ld\n", stringsPtr - gStrings);
734 printf("# gParentMap count %d\n", uhash_count(gParentMap));
735#endif
b331163b
A
736 gMapDataState = 1;
737}
738
739U_CDECL_END
740
741// The following maps aliases, etc. Ensures 0-termination if no error.
742static void ualoc_normalize(const char *locale, char *normalized, int32_t normalizedCapacity, UErrorCode *status)
743{
744 if (U_FAILURE(*status)) {
745 return;
746 }
747 // uloc_minimizeSubtags(locale, normalized, normalizedCapacity, status);
748
749 const char *replacement = NULL;
750 if (gMapDataState > 0) {
751 replacement = (const char *)uhash_get(gAliasMap, locale);
752 }
753 if (replacement == NULL) {
754 replacement = locale;
755 }
2ca993e8 756 int32_t len = strnlen(replacement, normalizedCapacity);
b331163b
A
757 if (len < normalizedCapacity) { // allow for 0 termination
758 uprv_strcpy(normalized, replacement);
759 } else {
760 *status = U_BUFFER_OVERFLOW_ERROR;
761 }
762}
763
764static void ualoc_getParent(const char *locale, char *parent, int32_t parentCapacity, UErrorCode *status)
765{
766 if (U_FAILURE(*status)) {
767 return;
768 }
769 if (gMapDataState > 0) {
770 const char *replacement = (const char *)uhash_get(gParentMap, locale);
771 if (replacement) {
772 int32_t len = uprv_strlen(replacement);
773 if (len < parentCapacity) { // allow for 0 termination
774 uprv_strcpy(parent, replacement);
775 } else {
776 *status = U_BUFFER_OVERFLOW_ERROR;
777 }
778 return;
779 }
780 }
781 uloc_getParent(locale, parent, parentCapacity - 1, status);
782 parent[parentCapacity - 1] = 0; // ensure 0 termination in case of U_STRING_NOT_TERMINATED_WARNING
783}
784
785// Might do something better for this, perhaps maximizing locales then stripping
2ca993e8 786static const char * getLocParent(const char *locale, int32_t* distance)
b331163b
A
787{
788 int32_t locParentIndex;
789 for (locParentIndex = 0; locParentIndex < kLocParentMapCount; locParentIndex++) {
2ca993e8
A
790 if (uprv_strcmp(locale, locParentMap[locParentIndex].locale) == 0) {
791 *distance = locParentMap[locParentIndex].distance;
792 return locParentMap[locParentIndex].parent;
b331163b
A
793 }
794 }
f3c0d7a5
A
795 if (gMapDataState > 0) {
796 const char *replacement = (const char *)uhash_get(gParentMap, locale);
797 if (replacement) {
798 *distance = 1;
799 return replacement;
800 }
801 }
b331163b
A
802 return NULL;
803}
804
805// this just checks if the *pointer* value is already in the array
806static UBool locInArray(const char* *localizationsToUse, int32_t locsToUseCount, const char *locToCheck)
807{
808 int32_t locIndex;
809 for (locIndex = 0; locIndex < locsToUseCount; locIndex++) {
810 if (locToCheck == localizationsToUse[locIndex]) {
811 return TRUE;
812 }
813 }
814 return FALSE;
815}
816
817enum { kLangScriptRegMaxLen = ULOC_LANG_CAPACITY + ULOC_SCRIPT_CAPACITY + ULOC_COUNTRY_CAPACITY }; // currently 22
818
819int32_t
820ualoc_localizationsToUse( const char* const *preferredLanguages,
821 int32_t preferredLanguagesCount,
822 const char* const *availableLocalizations,
823 int32_t availableLocalizationsCount,
824 const char* *localizationsToUse,
825 int32_t localizationsToUseCapacity,
826 UErrorCode *status )
827{
828 if (U_FAILURE(*status)) {
829 return -1;
830 }
831 if (preferredLanguages == NULL || availableLocalizations == NULL || localizationsToUse == NULL) {
832 *status = U_ILLEGAL_ARGUMENT_ERROR;
833 return -1;
834 }
835 // get resource data, need to protect with mutex
836 if (gMapDataState == 0) {
837 umtx_initOnce(gUALocaleCacheInitOnce, initializeMapData);
838 }
839 int32_t locsToUseCount = 0;
840 int32_t prefLangIndex, availLocIndex = 0;
2ca993e8
A
841 int32_t availLocIndexBackup = -1; // if >= 0, contains index of backup match
842 int32_t foundMatchPrefLangIndex = 0, backupMatchPrefLangIndex = 0;
b331163b
A
843 char (*availLocBase)[kLangScriptRegMaxLen + 1] = NULL;
844 char (*availLocNorm)[kLangScriptRegMaxLen + 1] = NULL;
b331163b 845 UBool foundMatch = FALSE;
f3c0d7a5 846 UBool backupMatchPrefLang_pt_PT = FALSE;
b331163b 847
2ca993e8
A
848#if DEBUG_UALOC
849 if (preferredLanguagesCount > 0 && availableLocalizationsCount > 0) {
850 printf("\n # ualoc_localizationsToUse start, preferredLanguages %d: %s, ..., availableLocalizations %d: %s, ...\n",
851 preferredLanguagesCount, preferredLanguages[0], availableLocalizationsCount, availableLocalizations[0]);
852 } else {
853 printf("\n # ualoc_localizationsToUse start, preferredLanguages %d: ..., availableLocalizations %d: ...\n",
854 preferredLanguagesCount, availableLocalizationsCount);
855 }
856#endif
857
b331163b
A
858 // Part 1, find the best matching localization, if any
859 for (prefLangIndex = 0; prefLangIndex < preferredLanguagesCount; prefLangIndex++) {
860 char prefLangBaseName[kLangScriptRegMaxLen + 1];
861 char prefLangNormName[kLangScriptRegMaxLen + 1];
862 char prefLangParentName[kLangScriptRegMaxLen + 1];
863 UErrorCode tmpStatus = U_ZERO_ERROR;
864
865 if (preferredLanguages[prefLangIndex] == NULL) {
866 continue; // skip NULL preferredLanguages entry, go to next one
867 }
868 // use underscores, fix bad capitalization, delete any keywords
869 uloc_getBaseName(preferredLanguages[prefLangIndex], prefLangBaseName, kLangScriptRegMaxLen, &tmpStatus);
870 if (U_FAILURE(tmpStatus) || prefLangBaseName[0] == 0 ||
871 uprv_strcmp(prefLangBaseName, "root") == 0 || prefLangBaseName[0] == '_') {
872 continue; // can't handle this preferredLanguages entry or it is invalid, go to next one
873 }
874 prefLangBaseName[kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING
2ca993e8
A
875#if DEBUG_UALOC
876 printf(" # loop: try prefLangBaseName %s\n", prefLangBaseName);
877#endif
b331163b
A
878
879 // if we have not already allocated and filled the array of
880 // base availableLocalizations, do so now.
881 if (availLocBase == NULL) {
882 availLocBase = (char (*)[kLangScriptRegMaxLen + 1])uprv_malloc(availableLocalizationsCount * (kLangScriptRegMaxLen + 1));
883 if (availLocBase == NULL) {
884 continue; // cannot further check this preferredLanguages entry, go to next one
885 }
2ca993e8
A
886#if DEBUG_UALOC
887 printf(" # allocate & fill availLocBase\n");
888#endif
b331163b
A
889 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
890 tmpStatus = U_ZERO_ERROR;
2ca993e8
A
891 if (availableLocalizations[availLocIndex] == NULL) {
892 availLocBase[availLocIndex][0] = 0; // effectively remove this entry
893 continue;
894 }
b331163b
A
895 uloc_getBaseName(availableLocalizations[availLocIndex], availLocBase[availLocIndex], kLangScriptRegMaxLen, &tmpStatus);
896 if (U_FAILURE(tmpStatus) || uprv_strcmp(availLocBase[availLocIndex], "root") == 0 || availLocBase[availLocIndex][0] == '_') {
897 availLocBase[availLocIndex][0] = 0; // effectively remove this entry
2ca993e8 898 continue;
b331163b 899 }
2ca993e8
A
900 availLocBase[availLocIndex][kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING
901#if DEBUG_UALOC
902 printf(" # add availLocBase %s\n", availLocBase[availLocIndex]);
903#endif
b331163b
A
904 }
905 }
906 // first compare base preferredLanguage to base versions of availableLocalizations names
907 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
908 if (uprv_strcmp(prefLangBaseName, availLocBase[availLocIndex]) == 0) {
909 foundMatch = TRUE; // availLocIndex records where
2ca993e8
A
910 foundMatchPrefLangIndex = prefLangIndex;
911#if DEBUG_UALOC
912 printf(" # FOUND: matched availLocBase %s -> actualLoc %s\n", availLocBase[availLocIndex], availableLocalizations[availLocIndex]);
913#endif
b331163b
A
914 break;
915 }
916 }
917 if (foundMatch) {
b331163b
A
918 break; // found a loc for this preferredLanguages entry
919 }
920
921 // get normalized preferredLanguage
922 tmpStatus = U_ZERO_ERROR;
923 ualoc_normalize(prefLangBaseName, prefLangNormName, kLangScriptRegMaxLen + 1, &tmpStatus);
924 if (U_FAILURE(tmpStatus)) {
925 continue; // can't handle this preferredLanguages entry, go to next one
926 }
2ca993e8
A
927#if DEBUG_UALOC
928 printf(" # prefLangNormName %s\n", prefLangNormName);
929#endif
b331163b
A
930 // if we have not already allocated and filled the array of
931 // normalized availableLocalizations, do so now.
932 // Note: ualoc_normalize turns "zh_TW" into "zh_Hant_TW", zh_HK" into "zh_Hant_HK",
933 // and fixes deprecated codes "iw" > "he", "in" > "id" etc.
934 if (availLocNorm == NULL) {
935 availLocNorm = (char (*)[kLangScriptRegMaxLen + 1])uprv_malloc(availableLocalizationsCount * (kLangScriptRegMaxLen + 1));
936 if (availLocNorm == NULL) {
937 continue; // cannot further check this preferredLanguages entry, go to next one
938 }
2ca993e8
A
939#if DEBUG_UALOC
940 printf(" # allocate & fill availLocNorm\n");
941#endif
b331163b
A
942 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
943 tmpStatus = U_ZERO_ERROR;
944 ualoc_normalize(availLocBase[availLocIndex], availLocNorm[availLocIndex], kLangScriptRegMaxLen + 1, &tmpStatus);
945 if (U_FAILURE(tmpStatus)) {
946 availLocNorm[availLocIndex][0] = 0; // effectively remove this entry
2ca993e8
A
947#if DEBUG_UALOC
948 } else {
949 printf(" # actualLoc %-11s -> norm %s\n", availableLocalizations[availLocIndex], availLocNorm[availLocIndex]);
950#endif
b331163b 951 }
b331163b
A
952 }
953 }
954 // now compare normalized preferredLanguage to normalized localization names
955 // if matches, copy *original* localization name
956 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
957 if (uprv_strcmp(prefLangNormName, availLocNorm[availLocIndex]) == 0) {
958 foundMatch = TRUE; // availLocIndex records where
2ca993e8
A
959 foundMatchPrefLangIndex = prefLangIndex;
960#if DEBUG_UALOC
961 printf(" # FOUND: matched availLocNorm %s -> actualLoc %s\n", availLocNorm[availLocIndex], availableLocalizations[availLocIndex]);
962#endif
b331163b
A
963 break;
964 }
965 }
966 if (foundMatch) {
b331163b
A
967 break; // found a loc for this preferredLanguages entry
968 }
969
970 // now walk up the parent chain for preferredLanguage
971 // until we find a match or hit root
972 uprv_strcpy(prefLangBaseName, prefLangNormName);
973 while (!foundMatch) {
974 tmpStatus = U_ZERO_ERROR;
975 ualoc_getParent(prefLangBaseName, prefLangParentName, kLangScriptRegMaxLen + 1, &tmpStatus);
976 if (U_FAILURE(tmpStatus) || uprv_strcmp(prefLangParentName, "root") == 0 || prefLangParentName[0] == 0) {
977 break; // reached root or cannot proceed further
978 }
2ca993e8
A
979#if DEBUG_UALOC
980 printf(" # prefLangParentName %s\n", prefLangParentName);
981#endif
b331163b
A
982
983 // now compare this preferredLanguage parent to normalized localization names
984 // if matches, copy *original* localization name
985 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
986 if (uprv_strcmp(prefLangParentName, availLocNorm[availLocIndex]) == 0) {
987 foundMatch = TRUE; // availLocIndex records where
2ca993e8
A
988 foundMatchPrefLangIndex = prefLangIndex;
989#if DEBUG_UALOC
990 printf(" # FOUND: matched availLocNorm %s -> actualLoc %s\n", availLocNorm[availLocIndex], availableLocalizations[availLocIndex]);
991#endif
b331163b
A
992 break;
993 }
994 }
995 uprv_strcpy(prefLangBaseName, prefLangParentName);
996 }
997 if (foundMatch) {
998 break; // found a loc for this preferredLanguages entry
999 }
1000
2ca993e8
A
1001 // last try, use parents of selected language to try for backup match
1002 // if we have not already found one
1003 if (availLocIndexBackup < 0) {
b331163b
A
1004 // now walk up the parent chain for preferredLanguage again
1005 // checking against parents of selected availLocNorm entries
1006 // but this time start with current prefLangNormName
1007 uprv_strcpy(prefLangBaseName, prefLangNormName);
2ca993e8 1008 int32_t minDistance = kMaxParentDistance;
b331163b 1009 while (TRUE) {
b331163b
A
1010 // now compare this preferredLanguage to normalized localization names
1011 // parent if have one for this; if matches, copy *original* localization name
2ca993e8
A
1012#if DEBUG_UALOC
1013 printf(" # BACKUP: trying prefLangBaseName %s\n", prefLangBaseName);
1014#endif
b331163b 1015 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
2ca993e8
A
1016 char availLocMinOrParent[kLangScriptRegMaxLen + 1];
1017 int32_t distance;
1018 // first check for special Apple parents of availLocNorm -
1019 // - the number of locales with such parents is small -
1020 // or if not such parent, then try stripping region.
1021 const char *availLocParent = getLocParent(availLocNorm[availLocIndex], &distance);
1022 if (availLocParent) {
1023#if DEBUG_UALOC
1024 printf(" # availLocAppleParentName %s\n", availLocParent);
1025#endif
1026 if (uprv_strcmp(prefLangBaseName, availLocParent) == 0 && distance < minDistance) {
1027 availLocIndexBackup = availLocIndex; // records where the match occurred
1028 backupMatchPrefLangIndex = prefLangIndex;
1029 minDistance = distance;
1030#if DEBUG_UALOC
1031 printf(" # BACKUP: LocAppleParent matched prefLangNormName with distance %d\n", distance);
1032#endif
1033 continue;
1034 }
1035 }
1036 if (minDistance <= 1) {
1037 continue; // we can't get any closer in the rest of this iteration
1038 }
1039 if (availLocParent == NULL) {
1040 tmpStatus = U_ZERO_ERROR;
1041 int32_t regLen = uloc_getCountry(availLocNorm[availLocIndex], availLocMinOrParent, kLangScriptRegMaxLen, &tmpStatus);
1042 if (U_SUCCESS(tmpStatus) && regLen > 1) {
1043 uloc_addLikelySubtags(availLocNorm[availLocIndex], availLocMinOrParent, kLangScriptRegMaxLen, &tmpStatus);
1044 if (U_SUCCESS(tmpStatus)) {
1045 availLocMinOrParent[kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING
1046#if DEBUG_UALOC
1047 printf(" # availLocRegMaxName %s\n", availLocMinOrParent);
1048#endif
1049 char availLocTemp[kLangScriptRegMaxLen + 1];
1050 uloc_getParent(availLocMinOrParent, availLocTemp, kLangScriptRegMaxLen, &tmpStatus);
1051 if (U_SUCCESS(tmpStatus)) {
1052 availLocTemp[kLangScriptRegMaxLen] = 0;
1053 uloc_minimizeSubtags(availLocTemp, availLocMinOrParent, kLangScriptRegMaxLen, &tmpStatus);
1054 if (U_SUCCESS(tmpStatus)) {
1055 availLocMinOrParent[kLangScriptRegMaxLen] = 0;
1056#if DEBUG_UALOC
1057 printf(" # availLocNoRegParentName %s\n", availLocMinOrParent);
1058#endif
1059 if (uprv_strcmp(prefLangBaseName, availLocMinOrParent) == 0) {
1060 availLocIndexBackup = availLocIndex; // records where the match occurred
1061 backupMatchPrefLangIndex = prefLangIndex;
1062 minDistance = 1;
f3c0d7a5 1063 backupMatchPrefLang_pt_PT = (uprv_strcmp(prefLangNormName, "pt_PT") == 0);
2ca993e8
A
1064#if DEBUG_UALOC
1065 printf(" # BACKUP: LocNoRegParent matched prefLangNormName with distance 1\n");
1066#endif
1067 continue;
1068 }
1069 }
1070 }
1071 }
1072 }
1073 }
1074 // then check against minimized version of availLocNorm
1075 tmpStatus = U_ZERO_ERROR;
1076 uloc_minimizeSubtags(availLocNorm[availLocIndex], availLocMinOrParent, kLangScriptRegMaxLen, &tmpStatus);
1077 if (U_FAILURE(tmpStatus)) {
1078 continue;
1079 }
1080 availLocMinOrParent[kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING
1081#if DEBUG_UALOC
1082 printf(" # availLocMinimized %s\n", availLocMinOrParent);
1083#endif
1084 if (uprv_strcmp(prefLangBaseName, availLocMinOrParent) == 0) {
1085 availLocIndexBackup = availLocIndex; // records where the match occurred
1086 backupMatchPrefLangIndex = prefLangIndex;
1087 minDistance = 1;
1088#if DEBUG_UALOC
1089 printf(" # BACKUP: LocMinimized matched prefLangNormName with distance 1\n");
1090#endif
b331163b
A
1091 }
1092 }
2ca993e8 1093 if (availLocIndexBackup >= 0) {
b331163b
A
1094 break;
1095 }
2ca993e8 1096 tmpStatus = U_ZERO_ERROR;
b331163b
A
1097 ualoc_getParent(prefLangBaseName, prefLangParentName, kLangScriptRegMaxLen + 1, &tmpStatus);
1098 if (U_FAILURE(tmpStatus) || uprv_strcmp(prefLangParentName, "root") == 0 || prefLangParentName[0] == 0) {
1099 break; // reached root or cannot proceed further
1100 }
1101 uprv_strcpy(prefLangBaseName, prefLangParentName);
1102 }
1103 }
2ca993e8
A
1104 }
1105 // If we have a backup match, decide what to do
1106 if (availLocIndexBackup >= 0) {
1107 if (!foundMatch) {
1108 // no main match, just use the backup
1109 availLocIndex = availLocIndexBackup;
1110 foundMatch = TRUE;
1111#if DEBUG_UALOC
1112 printf(" # no main match, have backup => use availLocIndexBackup %d\n", availLocIndexBackup);
1113#endif
f3c0d7a5 1114 } else if (backupMatchPrefLangIndex < foundMatchPrefLangIndex && (!backupMatchPrefLang_pt_PT || uprv_strcmp(availLocNorm[availLocIndexBackup], "pt_BR") != 0)) {
2ca993e8
A
1115 // have a main match but backup match was higher in the prefs, use it if for a different language
1116#if DEBUG_UALOC
1117 printf(" # have backup match higher in prefs, comparing its language and script to main match\n");
1118#endif
1119 char mainLang[ULOC_LANG_CAPACITY + 1];
1120 char backupLang[ULOC_LANG_CAPACITY + 1];
1121 UErrorCode tmpStatus = U_ZERO_ERROR;
1122 uloc_getLanguage(availLocNorm[availLocIndex], mainLang, ULOC_LANG_CAPACITY, &tmpStatus);
1123 mainLang[ULOC_LANG_CAPACITY] = 0; // ensure zero termination
1124 uloc_getLanguage(availLocNorm[availLocIndexBackup], backupLang, ULOC_LANG_CAPACITY, &tmpStatus);
1125 backupLang[ULOC_LANG_CAPACITY] = 0; // ensure zero termination
1126 if (U_SUCCESS(tmpStatus)) {
1127 if (uprv_strncmp(mainLang, backupLang, ULOC_LANG_CAPACITY) != 0) {
1128 // backup match has different language than main match
1129 availLocIndex = availLocIndexBackup;
1130 // foundMatch is already TRUE
1131#if DEBUG_UALOC
1132 printf(" # main match but backup is for a different lang higher in prefs => use availLocIndexBackup %d\n", availLocIndexBackup);
1133#endif
1134 } else {
1135 // backup match has same language as main match, check scripts too
1136 char availLocMaximized[kLangScriptRegMaxLen + 1];
1137
1138 uloc_addLikelySubtags(availLocNorm[availLocIndex], availLocMaximized, kLangScriptRegMaxLen, &tmpStatus);
1139 availLocMaximized[kLangScriptRegMaxLen] = 0;
1140 uloc_getScript(availLocMaximized, mainLang, ULOC_LANG_CAPACITY, &tmpStatus);
1141 mainLang[ULOC_LANG_CAPACITY] = 0;
1142
1143 uloc_addLikelySubtags(availLocNorm[availLocIndexBackup], availLocMaximized, kLangScriptRegMaxLen, &tmpStatus);
1144 availLocMaximized[kLangScriptRegMaxLen] = 0;
1145 uloc_getScript(availLocMaximized, backupLang, ULOC_LANG_CAPACITY, &tmpStatus);
1146 backupLang[ULOC_LANG_CAPACITY] = 0;
1147
1148 if (U_SUCCESS(tmpStatus) && uprv_strncmp(mainLang, backupLang, ULOC_LANG_CAPACITY) != 0) {
1149 // backup match has different script than main match
1150 availLocIndex = availLocIndexBackup;
1151 // foundMatch is already TRUE
1152#if DEBUG_UALOC
1153 printf(" # main match but backup is for a different script higher in prefs => use availLocIndexBackup %d\n", availLocIndexBackup);
1154#endif
1155 }
1156 }
1157 }
b331163b
A
1158 }
1159 }
1160
1161 // Part 2, if we found a matching localization, then walk up its parent tree to find any fallback matches in availableLocalizations
1162 if (foundMatch) {
1163 // Here availLocIndex corresponds to the first matched localization
1164 UErrorCode tmpStatus = U_ZERO_ERROR;
1165 int32_t availLocMatchIndex = availLocIndex;
1166 if (locsToUseCount < localizationsToUseCapacity) {
1167 localizationsToUse[locsToUseCount++] = availableLocalizations[availLocMatchIndex];
1168 }
1169 // at this point we must have availLocBase, and minimally matched against that.
1170 // if we have not already allocated and filled the array of
1171 // normalized availableLocalizations, do so now, but don't require it
1172 if (availLocNorm == NULL) {
1173 availLocNorm = (char (*)[kLangScriptRegMaxLen + 1])uprv_malloc(availableLocalizationsCount * (kLangScriptRegMaxLen + 1));
1174 if (availLocNorm != NULL) {
1175 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
1176 tmpStatus = U_ZERO_ERROR;
1177 ualoc_normalize(availLocBase[availLocIndex], availLocNorm[availLocIndex], kLangScriptRegMaxLen + 1, &tmpStatus);
1178 if (U_FAILURE(tmpStatus)) {
1179 availLocNorm[availLocIndex][0] = 0; // effectively remove this entry
1180 }
1181 }
1182 }
1183 }
1184
1185 // add normalized form of matching loc, if different and in availLocBase
1186 if (locsToUseCount < localizationsToUseCapacity) {
1187 tmpStatus = U_ZERO_ERROR;
1188 char matchedLocNormName[kLangScriptRegMaxLen + 1];
1189 char matchedLocParentName[kLangScriptRegMaxLen + 1];
1190 // get normalized form of matching loc
1191 if (availLocNorm != NULL) {
1192 uprv_strcpy(matchedLocNormName, availLocNorm[availLocMatchIndex]);
1193 } else {
1194 ualoc_normalize(availLocBase[availLocMatchIndex], matchedLocNormName, kLangScriptRegMaxLen + 1, &tmpStatus);
1195 }
1196 if (U_SUCCESS(tmpStatus)) {
1197 // add normalized form of matching loc, if different and in availLocBase
1198 if (uprv_strcmp(matchedLocNormName, localizationsToUse[0]) != 0) {
1199 // normalization of matched localization is different, see if we have the normalization in availableLocalizations
1200 // from this point on, availLocIndex no longer corresponds to the matched localization.
1201 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
1202 if ( (uprv_strcmp(matchedLocNormName, availLocBase[availLocIndex]) == 0
1203 || (availLocNorm != NULL && uprv_strcmp(matchedLocNormName, availLocNorm[availLocIndex]) == 0))
1204 && !locInArray(localizationsToUse, locsToUseCount, availableLocalizations[availLocIndex])) {
1205 localizationsToUse[locsToUseCount++] = availableLocalizations[availLocIndex];
1206 break;
1207 }
1208 }
1209 }
1210
1211 // now walk up the parent chain from matchedLocNormName, adding parents if they are in availLocBase
1212 while (locsToUseCount < localizationsToUseCapacity) {
1213 ualoc_getParent(matchedLocNormName, matchedLocParentName, kLangScriptRegMaxLen + 1, &tmpStatus);
1214 if (U_FAILURE(tmpStatus) || uprv_strcmp(matchedLocParentName, "root") == 0 || matchedLocParentName[0] == 0) {
1215 break; // reached root or cannot proceed further
1216 }
1217
1218 // now compare this matchedLocParentName parent to base localization names (and norm ones if we have them)
1219 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
1220 if ( (uprv_strcmp(matchedLocParentName, availLocBase[availLocIndex]) == 0
1221 || (availLocNorm != NULL && uprv_strcmp(matchedLocParentName, availLocNorm[availLocIndex]) == 0))
1222 && !locInArray(localizationsToUse, locsToUseCount, availableLocalizations[availLocIndex])) {
1223 localizationsToUse[locsToUseCount++] = availableLocalizations[availLocIndex];
1224 break;
1225 }
1226 }
1227 uprv_strcpy(matchedLocNormName, matchedLocParentName);
1228 }
1229
1230 // The above still fails to include "zh_TW" if it is in availLocBase and the matched localization
1231 // base name is "zh_HK" or "zh_MO". One option would be to walk up the parent chain from
1232 // matchedLocNormName again, comparing against parents of of selected availLocNorm entries.
1233 // But this picks up too many matches that are not parents of the matched localization. So
1234 // we just handle these specially.
1235 if ( locsToUseCount < localizationsToUseCapacity
1236 && (uprv_strcmp(availLocBase[availLocMatchIndex], "zh_HK") == 0
1237 || uprv_strcmp(availLocBase[availLocMatchIndex], "zh_MO") == 0) ) {
1238 int32_t zhTW_matchIndex = -1;
1239 UBool zhHant_found = FALSE;
1240 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
1241 if ( zhTW_matchIndex < 0 && uprv_strcmp("zh_TW", availLocBase[availLocIndex]) == 0 ) {
1242 zhTW_matchIndex = availLocIndex;
1243 }
1244 if ( !zhHant_found && uprv_strcmp("zh_Hant", availLocBase[availLocIndex]) == 0 ) {
1245 zhHant_found = TRUE;
1246 }
1247 }
1248 if (zhTW_matchIndex >= 0 && !zhHant_found
1249 && !locInArray(localizationsToUse, locsToUseCount, availableLocalizations[zhTW_matchIndex])) {
1250 localizationsToUse[locsToUseCount++] = availableLocalizations[zhTW_matchIndex];
1251 }
1252 }
1253 }
1254 }
1255 }
1256
1257 uprv_free(availLocNorm);
1258 uprv_free(availLocBase);
1259 return locsToUseCount;
1260}
1261