]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/ualoc.cpp
ICU-59173.0.1.tar.gz
[apple/icu.git] / icuSources / common / ualoc.cpp
CommitLineData
57a6839d
A
1/*
2*****************************************************************************************
9f1b1155 3* Copyright (C) 2014-2017 Apple Inc. All Rights Reserved.
57a6839d
A
4*****************************************************************************************
5*/
6
2ca993e8
A
7#define DEBUG_UALOC 0
8#if DEBUG_UALOC
9#include <stdio.h>
10#endif
11#include <string.h>
9f1b1155 12#include <ctype.h>
57a6839d
A
13#include "unicode/utypes.h"
14#include "unicode/ualoc.h"
15#include "unicode/uloc.h"
16#include "unicode/ures.h"
17#include "unicode/putil.h"
f3c0d7a5 18#include "unicode/ustring.h"
57a6839d
A
19#include "cstring.h"
20#include "cmemory.h"
b331163b
A
21#include "uhash.h"
22#include "umutex.h"
23#include "ucln_cmn.h"
57a6839d
A
24// the following has replacements for some math.h funcs etc
25#include "putilimp.h"
26
27
28// The numeric values in territoryInfo are in "IntF" format from LDML2ICUConverter.
29// From its docs (adapted): [IntF is] a special integer that represents the number in
30// normalized scientific notation.
31// Resultant integers are in the form -?xxyyyyyy, where xx is the exponent
32// offset by 50 and yyyyyy is the coefficient to 5 decimal places (range 1.0 to 9.99999), e.g.
33// 14660000000000 -> 1.46600E13 -> 63146600
34// 0.0001 -> 1.00000E-4 -> 46100000
35// -123.456 -> -1.23456E-2 -> -48123456
36//
37// Here to avoid an extra division we have the max coefficient as 999999 (instead of
38// 9.99999) and instead offset the exponent by -55.
39//
40static double doubleFromIntF(int32_t intF) {
41 double coefficient = (double)(intF % 1000000);
42 int32_t exponent = (intF / 1000000) - 55;
43 return coefficient * uprv_pow10(exponent);
44}
45
46static int compareLangEntries(const void * entry1, const void * entry2) {
47 double fraction1 = ((const UALanguageEntry *)entry1)->userFraction;
48 double fraction2 = ((const UALanguageEntry *)entry2)->userFraction;
49 // want descending order
50 if (fraction1 > fraction2) return -1;
51 if (fraction1 < fraction2) return 1;
52 // userFractions the same, sort by languageCode
53 return uprv_strcmp(((const UALanguageEntry *)entry1)->languageCode,((const UALanguageEntry *)entry2)->languageCode);
54}
55
f3c0d7a5
A
56// language codes to version with default script
57// must be sorted by language code
58static const char * langToDefaultScript[] = {
59 "az", "az_Latn",
60 "bs", "bs_Latn",
61 "iu", "iu_Cans",
62 "kk", "kk_Arab",
63 "ks", "ks_Arab",
64 "ku", "ku_Latn",
65 "ky", "ky_Cyrl",
66 "mn", "mn_Cyrl",
67 "ms", "ms_Latn",
68 "pa", "pa_Guru",
69 "rif", "rif_Tfng",
70 "shi", "shi_Tfng",
71 "sr", "sr_Cyrl",
72 "tg", "tg_Cyrl",
73 "tk", "tk_Latn",
74 "ug", "ug_Arab",
75 "uz", "uz_Latn",
76 "vai", "vai_Vaii",
77 "yue", "yue_Hant",
78 "zh", "zh_Hans",
79 NULL
80};
81
82static const char * langCodeWithScriptIfAmbig(const char * langCode) {
83 const char ** langToDefScriptPtr = langToDefaultScript;
84 const char * testCurLoc;
85 while ( (testCurLoc = *langToDefScriptPtr++) != NULL ) {
86 int cmp = uprv_strcmp(langCode, testCurLoc);
87 if (cmp <= 0) {
88 if (cmp == 0) {
89 return *langToDefScriptPtr;
90 }
91 break;
92 }
93 langToDefScriptPtr++;
94 }
95 return langCode;
96}
97
57a6839d
A
98static const UChar ustrLangStatusDefacto[] = {0x64,0x65,0x5F,0x66,0x61,0x63,0x74,0x6F,0x5F,0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0}; //"de_facto_official"
99static const UChar ustrLangStatusOfficial[] = {0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0}; //"official"
100static const UChar ustrLangStatusRegional[] = {0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0x5F,0x72,0x65,0x67,0x69,0x6F,0x6E,0x61,0x6C,0}; //"official_regional"
101
102enum {
103 kLocalLangEntriesMax = 26, // enough for most regions to minimumFraction 0.001 except India
104 kLangEntriesFactor = 3 // if we have to allocate, multiply existing size by this
105};
106
107U_CAPI int32_t U_EXPORT2
108ualoc_getLanguagesForRegion(const char *regionID, double minimumFraction,
109 UALanguageEntry *entries, int32_t entriesCapacity,
110 UErrorCode *err)
111{
112 if (U_FAILURE(*err)) {
113 return 0;
114 }
115 if ( regionID == NULL || minimumFraction < 0.0 || minimumFraction > 1.0 ||
116 ((entries==NULL)? entriesCapacity!=0: entriesCapacity<0) ) {
117 *err = U_ILLEGAL_ARGUMENT_ERROR;
118 return 0;
119 }
120 UResourceBundle *rb = ures_openDirect(NULL, "supplementalData", err);
121 rb = ures_getByKey(rb, "territoryInfo", rb, err);
122 rb = ures_getByKey(rb, regionID, rb, err);
123 if (U_FAILURE(*err)) {
124 ures_close(rb);
125 return 0;
126 }
127
128 int32_t entryCount = 0;
129 UResourceBundle *langBund = NULL;
130 int32_t lbIdx, lbCount = ures_getSize(rb);
131 UALanguageEntry localLangEntries[kLocalLangEntriesMax];
132 UALanguageEntry * langEntries = localLangEntries;
133 int32_t langEntriesMax = kLocalLangEntriesMax;
134
135 for (lbIdx = 0; lbIdx < lbCount; lbIdx++) {
136 langBund = ures_getByIndex(rb, lbIdx, langBund, err);
137 if (U_FAILURE(*err)) {
138 break;
139 }
140 const char * langCode = ures_getKey(langBund);
141 if (uprv_strcmp(langCode,"territoryF") == 0) {
142 continue;
143 }
144 if (strnlen(langCode, UALANGDATA_CODELEN+1) > UALANGDATA_CODELEN) { // no uprv_strnlen
145 continue; // a code we cannot handle
146 }
147
148 UErrorCode localErr = U_ZERO_ERROR;
149 double userFraction = 0.0;
150 UResourceBundle *itemBund = ures_getByKey(langBund, "populationShareF", NULL, &localErr);
151 if (U_SUCCESS(localErr)) {
152 int32_t intF = ures_getInt(itemBund, &localErr);
153 if (U_SUCCESS(localErr)) {
154 userFraction = doubleFromIntF(intF);
155 }
156 ures_close(itemBund);
157 }
158 if (userFraction < minimumFraction) {
159 continue;
160 }
161 if (entries != NULL) {
162 localErr = U_ZERO_ERROR;
163 UALanguageStatus langStatus = UALANGSTATUS_UNSPECIFIED;
164 int32_t ulen;
165 const UChar * ustrLangStatus = ures_getStringByKey(langBund, "officialStatus", &ulen, &localErr);
166 if (U_SUCCESS(localErr)) {
167 int32_t cmp = u_strcmp(ustrLangStatus, ustrLangStatusOfficial);
168 if (cmp == 0) {
169 langStatus = UALANGSTATUS_OFFICIAL;
170 } else if (cmp < 0 && u_strcmp(ustrLangStatus, ustrLangStatusDefacto) == 0) {
171 langStatus = UALANGSTATUS_DEFACTO_OFFICIAL;
172 } else if (u_strcmp(ustrLangStatus, ustrLangStatusRegional) == 0) {
173 langStatus = UALANGSTATUS_REGIONAL_OFFICIAL;
174 }
175 }
176 // Now we have all of the info for our next entry
177 if (entryCount >= langEntriesMax) {
178 int32_t newMax = langEntriesMax * kLangEntriesFactor;
179 if (langEntries == localLangEntries) {
180 // first allocation, copy from local buf
181 langEntries = (UALanguageEntry*)uprv_malloc(newMax*sizeof(UALanguageEntry));
182 if (langEntries == NULL) {
183 *err = U_MEMORY_ALLOCATION_ERROR;
184 break;
185 }
186 uprv_memcpy(langEntries, localLangEntries, entryCount*sizeof(UALanguageEntry));
187 } else {
188 langEntries = (UALanguageEntry*)uprv_realloc(langEntries, newMax*sizeof(UALanguageEntry));
189 if (langEntries == NULL) {
190 *err = U_MEMORY_ALLOCATION_ERROR;
191 break;
192 }
193 }
194 langEntriesMax = newMax;
195 }
f3c0d7a5 196 uprv_strcpy(langEntries[entryCount].languageCode, langCodeWithScriptIfAmbig(langCode));
57a6839d
A
197 langEntries[entryCount].userFraction = userFraction;
198 langEntries[entryCount].status = langStatus;
199 }
200 entryCount++;
201 }
202 ures_close(langBund);
203 ures_close(rb);
204 if (U_FAILURE(*err)) {
205 if (langEntries != localLangEntries) {
206 free(langEntries);
207 }
208 return 0;
209 }
210 if (entries != NULL) {
211 // sort langEntries, copy entries that fit to provided array
212 qsort(langEntries, entryCount, sizeof(UALanguageEntry), compareLangEntries);
213 if (entryCount > entriesCapacity) {
214 entryCount = entriesCapacity;
215 }
216 uprv_memcpy(entries, langEntries, entryCount*sizeof(UALanguageEntry));
217 if (langEntries != localLangEntries) {
218 free(langEntries);
219 }
220 }
221 return entryCount;
222}
223
57a6839d 224static const char * forceParent[] = {
a961784b 225 "en_150", "en_GB", // en for Europe
b331163b 226 "en_AU", "en_GB",
a961784b
A
227 "en_BD", "en_GB", // en for Bangladesh
228 "en_BE", "en_150", // en for Belgium goes to en for Europe
229 "en_DG", "en_GB",
230 "en_FK", "en_GB",
231 "en_GG", "en_GB",
232 "en_GI", "en_GB",
233 "en_HK", "en_GB", // en for Hong Kong
234 "en_IE", "en_GB",
235 "en_IM", "en_GB",
b331163b 236 "en_IN", "en_GB",
a961784b
A
237 "en_IO", "en_GB",
238 "en_JE", "en_GB",
a62d09fc 239 "en_JM", "en_GB",
a961784b
A
240 "en_MO", "en_GB",
241 "en_MT", "en_GB",
2ca993e8 242 "en_MV", "en_GB", // for Maldives
a961784b 243 "en_MY", "en_GB", // en for Malaysia
2ca993e8 244 "en_NZ", "en_AU",
a961784b
A
245 "en_PK", "en_GB", // en for Pakistan
246 "en_SG", "en_GB",
247 "en_SH", "en_GB",
248 "en_VG", "en_GB",
f3c0d7a5
A
249 "yue", "yue_CN", // yue_CN has 71M users (5.2% of 1.37G), yue_HK has 6.5M (90% of 7.17M)
250 "yue_CN", "root",
251 "yue_HK", "root",
252 "yue_Hans","yue_CN",
253 "yue_Hant","yue_HK",
57a6839d
A
254 "zh", "zh_CN",
255 "zh_CN", "root",
256 "zh_Hant", "zh_TW",
257 "zh_TW", "root",
258 NULL
259};
260
2ca993e8
A
261enum { kLocBaseNameMax = 16 };
262
57a6839d
A
263U_CAPI int32_t U_EXPORT2
264ualoc_getAppleParent(const char* localeID,
265 char * parent,
266 int32_t parentCapacity,
267 UErrorCode* err)
268{
269 UResourceBundle *rb;
270 int32_t len;
271 UErrorCode tempStatus;
272 char locbuf[ULOC_FULLNAME_CAPACITY+1];
08b89b0a 273 char * foundDoubleUnderscore;
57a6839d
A
274
275 if (U_FAILURE(*err)) {
276 return 0;
277 }
278 if ( (parent==NULL)? parentCapacity!=0: parentCapacity<0 ) {
279 *err = U_ILLEGAL_ARGUMENT_ERROR;
280 return 0;
281 }
08b89b0a 282 len = uloc_getBaseName(localeID, locbuf, ULOC_FULLNAME_CAPACITY, err); /* canonicalize and strip keywords */
57a6839d
A
283 if (U_FAILURE(*err)) {
284 return 0;
285 }
286 if (*err == U_STRING_NOT_TERMINATED_WARNING) {
287 locbuf[ULOC_FULLNAME_CAPACITY] = 0;
288 *err = U_ZERO_ERROR;
289 }
08b89b0a
A
290 foundDoubleUnderscore = uprv_strstr(locbuf, "__"); /* __ comes from bad/missing subtag or variant */
291 if (foundDoubleUnderscore != NULL) {
292 *foundDoubleUnderscore = 0; /* terminate at the __ */
293 len = uprv_strlen(locbuf);
294 }
b331163b 295 if (len >= 2 && (uprv_strncmp(locbuf, "en", 2) == 0 || uprv_strncmp(locbuf, "zh", 2) == 0)) {
57a6839d
A
296 const char ** forceParentPtr = forceParent;
297 const char * testCurLoc;
298 while ( (testCurLoc = *forceParentPtr++) != NULL ) {
299 int cmp = uprv_strcmp(locbuf, testCurLoc);
300 if (cmp <= 0) {
301 if (cmp == 0) {
302 len = uprv_strlen(*forceParentPtr);
303 if (len < parentCapacity) {
304 uprv_strcpy(parent, *forceParentPtr);
305 } else {
306 *err = U_BUFFER_OVERFLOW_ERROR;
307 }
308 return len;
309 }
310 break;
311 }
312 forceParentPtr++;
313 }
314 }
315 tempStatus = U_ZERO_ERROR;
316 rb = ures_openDirect(NULL, locbuf, &tempStatus);
317 if (U_SUCCESS(tempStatus)) {
318 const char * actualLocale = ures_getLocaleByType(rb, ULOC_ACTUAL_LOCALE, &tempStatus);
2ca993e8 319 ures_close(rb);
57a6839d
A
320 if (U_SUCCESS(tempStatus) && uprv_strcmp(locbuf, actualLocale) != 0) {
321 // we have followed an alias
322 len = uprv_strlen(actualLocale);
323 if (len < parentCapacity) {
324 uprv_strcpy(parent, actualLocale);
325 } else {
326 *err = U_BUFFER_OVERFLOW_ERROR;
327 }
57a6839d
A
328 return len;
329 }
2ca993e8
A
330 }
331 tempStatus = U_ZERO_ERROR;
332 rb = ures_openDirect(NULL, "supplementalData", &tempStatus);
333 rb = ures_getByKey(rb, "parentLocales", rb, &tempStatus);
334 if (U_SUCCESS(tempStatus)) {
335 UResourceBundle * parentMapBundle = NULL;
336 int32_t childLen = 0;
337 while (childLen == 0) {
338 tempStatus = U_ZERO_ERROR;
339 parentMapBundle = ures_getNextResource(rb, parentMapBundle, &tempStatus);
340 if (U_FAILURE(tempStatus)) {
341 break; // no more parent bundles, normal exit
342 }
343 char childName[kLocBaseNameMax + 1];
344 childName[kLocBaseNameMax] = 0;
345 const char * childPtr = NULL;
346 if (ures_getType(parentMapBundle) == URES_STRING) {
347 childLen = kLocBaseNameMax;
348 childPtr = ures_getUTF8String(parentMapBundle, childName, &childLen, FALSE, &tempStatus);
349 if (U_FAILURE(tempStatus) || uprv_strncmp(locbuf, childPtr, kLocBaseNameMax) != 0) {
350 childLen = 0;
351 }
352 } else { // should be URES_ARRAY
353 int32_t childCur, childCount = ures_getSize(parentMapBundle);
354 for (childCur = 0; childCur < childCount && childLen == 0; childCur++) {
355 tempStatus = U_ZERO_ERROR;
356 childLen = kLocBaseNameMax;
357 childPtr = ures_getUTF8StringByIndex(parentMapBundle, childCur, childName, &childLen, FALSE, &tempStatus);
358 if (U_FAILURE(tempStatus) || uprv_strncmp(locbuf, childPtr, kLocBaseNameMax) != 0) {
359 childLen = 0;
360 }
361 }
362 }
363 }
364 ures_close(rb);
365 if (childLen > 0) {
366 // parentMapBundle key is the parent we are looking for
367 const char * keyStr = ures_getKey(parentMapBundle);
368 len = uprv_strlen(keyStr);
57a6839d 369 if (len < parentCapacity) {
2ca993e8 370 uprv_strcpy(parent, keyStr);
57a6839d
A
371 } else {
372 *err = U_BUFFER_OVERFLOW_ERROR;
373 }
2ca993e8 374 ures_close(parentMapBundle);
57a6839d
A
375 return len;
376 }
2ca993e8 377 ures_close(parentMapBundle);
57a6839d 378 }
2ca993e8 379
57a6839d
A
380 len = uloc_getParent(locbuf, parent, parentCapacity, err);
381 if (U_SUCCESS(*err) && len == 0) {
382 len = 4;
383 if (len < parentCapacity) {
384 uprv_strcpy(parent, "root");
385 } else {
386 *err = U_BUFFER_OVERFLOW_ERROR;
387 }
388 }
389 return len;
390}
391
b331163b
A
392// =================
393// Data and related functions for ualoc_localizationsToUse
394// =================
395
396static const char * appleAliasMap[][2] = {
397 // names are lowercase here because they are looked up after being processed by uloc_getBaseName
398 { "arabic", "ar" }, // T2
399 { "chinese", "zh_Hans" }, // T0
400 { "danish", "da" }, // T2
401 { "dutch", "nl" }, // T1, still in use
402 { "english", "en" }, // T0, still in use
403 { "finnish", "fi" }, // T2
404 { "french", "fr" }, // T0, still in use
405 { "german", "de" }, // T0, still in use
406 { "italian", "it" }, // T1, still in use
407 { "japanese", "ja" }, // T0, still in use
408 { "korean", "ko" }, // T1
a961784b 409 { "no_NO", "nb_NO" }, // special
b331163b
A
410 { "norwegian", "nb" }, // T2
411 { "polish", "pl" }, // T2
412 { "portuguese", "pt" }, // T2
413 { "russian", "ru" }, // T2
414 { "spanish", "es" }, // T1, still in use
415 { "swedish", "sv" }, // T2
416 { "thai", "th" }, // T2
417 { "turkish", "tr" }, // T2
f3c0d7a5 418 { "yue", "yue_Hans"}, // special
b331163b
A
419 { "zh", "zh_Hans" }, // special
420};
2ca993e8 421enum { kAppleAliasMapCount = UPRV_LENGTHOF(appleAliasMap) };
b331163b
A
422
423static const char * appleParentMap[][2] = {
424 { "en_150", "en_GB" }, // Apple custom parent
425 { "en_AD", "en_150" }, // Apple locale addition
f3c0d7a5
A
426 { "en_AG", "en_GB" }, // Antigua & Barbuda
427 { "en_AI", "en_GB" }, // Anguilla
b331163b
A
428 { "en_AL", "en_150" }, // Apple locale addition
429 { "en_AT", "en_150" }, // Apple locale addition
430 { "en_AU", "en_GB" }, // Apple custom parent
431 { "en_BA", "en_150" }, // Apple locale addition
f3c0d7a5 432 { "en_BB", "en_GB" }, // Barbados
b331163b 433 { "en_BD", "en_GB" }, // Apple custom parent
a961784b 434 { "en_BE", "en_150" }, // Apple custom parent
f3c0d7a5
A
435 { "en_BM", "en_GB" }, // Bermuda
436 { "en_BS", "en_GB" }, // Bahamas
437 { "en_BW", "en_GB" }, // Botswana
438 { "en_BZ", "en_GB" }, // Belize
439 { "en_CC", "en_AU" }, // Cocos (Keeling) Islands
b331163b 440 { "en_CH", "en_150" }, // Apple locale addition
f3c0d7a5
A
441 { "en_CK", "en_AU" }, // Cook Islands (maybe to en_NZ instead?)
442 { "en_CX", "en_AU" }, // Christmas Island
b331163b
A
443 { "en_CY", "en_150" }, // Apple locale addition
444 { "en_CZ", "en_150" }, // Apple locale addition
445 { "en_DE", "en_150" }, // Apple locale addition
a961784b 446 { "en_DG", "en_GB" },
b331163b 447 { "en_DK", "en_150" }, // Apple locale addition
f3c0d7a5 448 { "en_DM", "en_GB" }, // Dominica
b331163b
A
449 { "en_EE", "en_150" }, // Apple locale addition
450 { "en_ES", "en_150" }, // Apple locale addition
451 { "en_FI", "en_150" }, // Apple locale addition
f3c0d7a5 452 { "en_FJ", "en_GB" }, // Fiji
a961784b 453 { "en_FK", "en_GB" },
b331163b 454 { "en_FR", "en_150" }, // Apple locale addition
f3c0d7a5 455 { "en_GD", "en_GB" }, // Grenada
a961784b 456 { "en_GG", "en_GB" },
f3c0d7a5 457 { "en_GH", "en_GB" }, // Ghana
a961784b 458 { "en_GI", "en_GB" },
f3c0d7a5 459 { "en_GM", "en_GB" }, // Gambia
b331163b 460 { "en_GR", "en_150" }, // Apple locale addition
f3c0d7a5 461 { "en_GY", "en_GB" }, // Guyana
b331163b
A
462 { "en_HK", "en_GB" }, // Apple custom parent
463 { "en_HR", "en_150" }, // Apple locale addition
464 { "en_HU", "en_150" }, // Apple locale addition
a961784b 465 { "en_IE", "en_GB" },
b331163b 466 { "en_IL", "en_001" }, // Apple locale addition
a961784b 467 { "en_IM", "en_GB" },
b331163b 468 { "en_IN", "en_GB" }, // Apple custom parent
a961784b 469 { "en_IO", "en_GB" },
b331163b
A
470 { "en_IS", "en_150" }, // Apple locale addition
471 { "en_IT", "en_150" }, // Apple locale addition
a961784b 472 { "en_JE", "en_GB" },
a62d09fc 473 { "en_JM", "en_GB" },
f3c0d7a5
A
474 { "en_KE", "en_GB" }, // Kenya
475 { "en_KI", "en_GB" }, // Kiribati
476 { "en_KN", "en_GB" }, // St. Kitts & Nevis
477 { "en_KY", "en_GB" }, // Cayman Islands
478 { "en_LC", "en_GB" }, // St. Lucia
479 { "en_LS", "en_GB" }, // Lesotho
b331163b
A
480 { "en_LT", "en_150" }, // Apple locale addition
481 { "en_LU", "en_150" }, // Apple locale addition
482 { "en_LV", "en_150" }, // Apple locale addition
483 { "en_ME", "en_150" }, // Apple locale addition
a961784b 484 { "en_MO", "en_GB" },
f3c0d7a5 485 { "en_MS", "en_GB" }, // Montserrat
a961784b 486 { "en_MT", "en_GB" },
f3c0d7a5 487 { "en_MU", "en_GB" }, // Mauritius
2ca993e8 488 { "en_MV", "en_GB" },
f3c0d7a5 489 { "en_MW", "en_GB" }, // Malawi
b331163b 490 { "en_MY", "en_GB" }, // Apple custom parent
f3c0d7a5
A
491 { "en_NA", "en_GB" }, // Namibia
492 { "en_NF", "en_AU" }, // Norfolk Island
493 { "en_NG", "en_GB" }, // Nigeria
b331163b
A
494 { "en_NL", "en_150" }, // Apple locale addition
495 { "en_NO", "en_150" }, // Apple locale addition
f3c0d7a5
A
496 { "en_NR", "en_AU" }, // Nauru
497 { "en_NU", "en_AU" }, // Niue (maybe to en_NZ instead?)
2ca993e8 498 { "en_NZ", "en_AU" },
f3c0d7a5 499 { "en_PG", "en_AU" }, // Papua New Guinea
b331163b
A
500 { "en_PK", "en_GB" }, // Apple custom parent
501 { "en_PL", "en_150" }, // Apple locale addition
f3c0d7a5 502 { "en_PN", "en_GB" }, // Pitcairn Islands
b331163b
A
503 { "en_PT", "en_150" }, // Apple locale addition
504 { "en_RO", "en_150" }, // Apple locale addition
505 { "en_RU", "en_150" }, // Apple locale addition
f3c0d7a5
A
506 { "en_SB", "en_GB" }, // Solomon Islands
507 { "en_SC", "en_GB" }, // Seychelles
508 { "en_SD", "en_GB" }, // Sudan
b331163b 509 { "en_SE", "en_150" }, // Apple locale addition
a961784b
A
510 { "en_SG", "en_GB" },
511 { "en_SH", "en_GB" },
b331163b
A
512 { "en_SI", "en_150" }, // Apple locale addition
513 { "en_SK", "en_150" }, // Apple locale addition
f3c0d7a5
A
514 { "en_SL", "en_GB" }, // Sierra Leone
515 { "en_SS", "en_GB" }, // South Sudan
516 { "en_SZ", "en_GB" }, // Swaziland
517 { "en_TC", "en_GB" }, // Tristan da Cunha
518 { "en_TO", "en_GB" }, // Tonga
519 { "en_TT", "en_GB" }, // Trinidad & Tobago
520 { "en_TV", "en_GB" }, // Tuvalu
521 { "en_TZ", "en_GB" }, // Tanzania
522 { "en_UG", "en_GB" }, // Uganda
523 { "en_VC", "en_GB" }, // St. Vincent & Grenadines
a961784b 524 { "en_VG", "en_GB" },
f3c0d7a5
A
525 { "en_VU", "en_GB" }, // Vanuatu
526 { "en_WS", "en_AU" }, // Samoa (maybe to en_NZ instead?)
527 { "en_ZA", "en_GB" }, // South Africa
528 { "en_ZM", "en_GB" }, // Zambia
529 { "en_ZW", "en_GB" }, // Zimbabwe
b331163b 530};
2ca993e8
A
531enum { kAppleParentMapCount = UPRV_LENGTHOF(appleParentMap) };
532
533typedef struct {
534 const char * locale;
535 const char * parent;
536 int8_t distance;
537} LocParentAndDistance;
538
539static LocParentAndDistance locParentMap[] = {
540 // The localizations listed in the first column are in
541 // normalized form (e.g. zh_CN -> zh_Hans_CN, etc.).
542 // The distance is a rough measure of distance from
543 // the localization to its parent, used as a weight.
f3c0d7a5 544 { "en_001", "en", 2 },
2ca993e8
A
545 { "en_150", "en_GB", 1 },
546 { "en_AU", "en_GB", 1 },
f3c0d7a5
A
547 { "en_GB", "en_001", 0 },
548 { "en_US", "en", 0 },
2ca993e8
A
549 { "es_419", "es", 2 },
550 { "es_MX", "es_419", 0 },
551 { "pt_PT", "pt", 2 },
f3c0d7a5
A
552 { "yue_Hans_CN","yue_Hans",0 },
553 { "yue_Hant_HK","yue_Hant",0 },
2ca993e8
A
554 { "zh_Hans_CN", "zh_Hans", 0 },
555 { "zh_Hant_HK", "zh_Hant", 1 },
556 { "zh_Hant_TW", "zh_Hant", 0 },
b331163b 557};
2ca993e8 558enum { kLocParentMapCount = UPRV_LENGTHOF(locParentMap), kMaxParentDistance = 8 };
b331163b
A
559
560enum {
f3c0d7a5
A
561 kStringsAllocSize = 4480, // cannot expand; current actual usage 4150
562 kParentMapInitCount = 205 // can expand; current actual usage 205
b331163b
A
563};
564
565U_CDECL_BEGIN
566static UBool U_CALLCONV ualocale_cleanup(void);
567U_CDECL_END
568
569U_NAMESPACE_BEGIN
570
571static UInitOnce gUALocaleCacheInitOnce = U_INITONCE_INITIALIZER;
572
573static int gMapDataState = 0; // 0 = not initialized, 1 = initialized, -1 = failure
574static char* gStrings = NULL;
575static UHashtable* gAliasMap = NULL;
576static UHashtable* gParentMap = NULL;
577
578U_NAMESPACE_END
579
580U_CDECL_BEGIN
581
582static UBool U_CALLCONV ualocale_cleanup(void)
583{
584 U_NAMESPACE_USE
585
586 gUALocaleCacheInitOnce.reset();
587
588 if (gMapDataState > 0) {
589 uhash_close(gParentMap);
590 gParentMap = NULL;
591 uhash_close(gAliasMap);
592 gAliasMap = NULL;
593 uprv_free(gStrings);
594 gStrings = NULL;
595 }
596 gMapDataState = 0;
597 return TRUE;
598}
599
600static void initializeMapData() {
601 U_NAMESPACE_USE
602
603 UResourceBundle * curBundle;
604 char* stringsPtr;
605 char* stringsEnd;
606 UErrorCode status;
607 int32_t entryIndex, icuEntryCount;
608
609 ucln_common_registerCleanup(UCLN_COMMON_LOCALE, ualocale_cleanup);
610
611 gStrings = (char*)uprv_malloc(kStringsAllocSize);
612 if (gStrings) {
613 stringsPtr = gStrings;
614 stringsEnd = gStrings + kStringsAllocSize;
615 }
616
617 status = U_ZERO_ERROR;
618 curBundle = NULL;
619 icuEntryCount = 0;
620 if (gStrings) {
621 curBundle = ures_openDirect(NULL, "metadata", &status);
622 curBundle = ures_getByKey(curBundle, "alias", curBundle, &status);
623 curBundle = ures_getByKey(curBundle, "language", curBundle, &status); // language resource is URES_TABLE
624 if (U_SUCCESS(status)) {
625 icuEntryCount = ures_getSize(curBundle); // currently 331
626 }
627 }
628 status = U_ZERO_ERROR;
629 gAliasMap = uhash_openSize(uhash_hashIChars, uhash_compareIChars, uhash_compareIChars,
630 kAppleAliasMapCount + icuEntryCount, &status);
631 // defaults to keyDeleter NULL
632 if (U_SUCCESS(status)) {
633 for (entryIndex = 0; entryIndex < kAppleAliasMapCount && U_SUCCESS(status); entryIndex++) {
634 uhash_put(gAliasMap, (void*)appleAliasMap[entryIndex][0], (void*)appleAliasMap[entryIndex][1], &status);
635 }
636 status = U_ZERO_ERROR;
637 UResourceBundle * aliasMapBundle = NULL;
638 for (entryIndex = 0; entryIndex < icuEntryCount && U_SUCCESS(status); entryIndex++) {
639 aliasMapBundle = ures_getByIndex(curBundle, entryIndex, aliasMapBundle, &status);
640 if (U_FAILURE(status)) {
641 break; // error
642 }
643 const char * keyStr = ures_getKey(aliasMapBundle);
644 int32_t len = uprv_strlen(keyStr);
645 if (len >= stringsEnd - stringsPtr) {
646 break; // error
647 }
648 uprv_strcpy(stringsPtr, keyStr);
649 char * inLocStr = stringsPtr;
650 stringsPtr += len + 1;
651
652 len = stringsEnd - stringsPtr - 1;
653 ures_getUTF8StringByKey(aliasMapBundle, "replacement", stringsPtr, &len, TRUE, &status);
654 if (U_FAILURE(status)) {
655 break; // error
656 }
657 stringsPtr[len] = 0;
658 uhash_put(gAliasMap, inLocStr, stringsPtr, &status);
659 stringsPtr += len + 1;
660 }
661 ures_close(aliasMapBundle);
662 } else {
663 ures_close(curBundle);
664 uprv_free(gStrings);
665 gMapDataState = -1; // failure
666 return;
667 }
668 ures_close(curBundle);
669
670 status = U_ZERO_ERROR;
671 gParentMap = uhash_openSize(uhash_hashIChars, uhash_compareIChars, uhash_compareIChars,
672 kParentMapInitCount, &status);
673 // defaults to keyDeleter NULL
674 if (U_SUCCESS(status)) {
675 curBundle = ures_openDirect(NULL, "supplementalData", &status);
676 curBundle = ures_getByKey(curBundle, "parentLocales", curBundle, &status); // parentLocales resource is URES_TABLE
677 if (U_SUCCESS(status)) {
678 UResourceBundle * parentMapBundle = NULL;
679 while (TRUE) {
680 parentMapBundle = ures_getNextResource(curBundle, parentMapBundle, &status);
681 if (U_FAILURE(status)) {
682 break; // no more parent bundles, normal exit
683 }
684 const char * keyStr = ures_getKey(parentMapBundle);
685 int32_t len = uprv_strlen(keyStr);
686 if (len >= stringsEnd - stringsPtr) {
687 break; // error
688 }
689 uprv_strcpy(stringsPtr, keyStr);
690 char * parentStr = stringsPtr;
691 stringsPtr += len + 1;
692
693 if (ures_getType(parentMapBundle) == URES_STRING) {
694 len = stringsEnd - stringsPtr - 1;
695 ures_getUTF8String(parentMapBundle, stringsPtr, &len, TRUE, &status);
696 if (U_FAILURE(status)) {
697 break; // error
698 }
699 stringsPtr[len] = 0;
700 uhash_put(gParentMap, stringsPtr, parentStr, &status);
701 stringsPtr += len + 1;
702 } else {
703 // should be URES_ARRAY
704 icuEntryCount = ures_getSize(parentMapBundle);
705 for (entryIndex = 0; entryIndex < icuEntryCount && U_SUCCESS(status); entryIndex++) {
706 len = stringsEnd - stringsPtr - 1;
707 ures_getUTF8StringByIndex(parentMapBundle, entryIndex, stringsPtr, &len, TRUE, &status);
708 if (U_FAILURE(status)) {
709 break;
710 }
711 stringsPtr[len] = 0;
712 uhash_put(gParentMap, stringsPtr, parentStr, &status);
713 stringsPtr += len + 1;
714 }
715 }
716 }
717 ures_close(parentMapBundle);
718 }
719 ures_close(curBundle);
720
721 status = U_ZERO_ERROR;
722 for (entryIndex = 0; entryIndex < kAppleParentMapCount && U_SUCCESS(status); entryIndex++) {
723 uhash_put(gParentMap, (void*)appleParentMap[entryIndex][0], (void*)appleParentMap[entryIndex][1], &status);
724 }
725 } else {
726 uhash_close(gAliasMap);
727 gAliasMap = NULL;
728 uprv_free(gStrings);
729 gMapDataState = -1; // failure
730 return;
731 }
732
2ca993e8
A
733#if DEBUG_UALOC
734 printf("# gStrings size %ld\n", stringsPtr - gStrings);
735 printf("# gParentMap count %d\n", uhash_count(gParentMap));
736#endif
b331163b
A
737 gMapDataState = 1;
738}
739
740U_CDECL_END
741
742// The following maps aliases, etc. Ensures 0-termination if no error.
743static void ualoc_normalize(const char *locale, char *normalized, int32_t normalizedCapacity, UErrorCode *status)
744{
745 if (U_FAILURE(*status)) {
746 return;
747 }
748 // uloc_minimizeSubtags(locale, normalized, normalizedCapacity, status);
749
750 const char *replacement = NULL;
751 if (gMapDataState > 0) {
752 replacement = (const char *)uhash_get(gAliasMap, locale);
753 }
754 if (replacement == NULL) {
755 replacement = locale;
756 }
2ca993e8 757 int32_t len = strnlen(replacement, normalizedCapacity);
b331163b
A
758 if (len < normalizedCapacity) { // allow for 0 termination
759 uprv_strcpy(normalized, replacement);
760 } else {
761 *status = U_BUFFER_OVERFLOW_ERROR;
762 }
763}
764
765static void ualoc_getParent(const char *locale, char *parent, int32_t parentCapacity, UErrorCode *status)
766{
767 if (U_FAILURE(*status)) {
768 return;
769 }
770 if (gMapDataState > 0) {
771 const char *replacement = (const char *)uhash_get(gParentMap, locale);
772 if (replacement) {
773 int32_t len = uprv_strlen(replacement);
774 if (len < parentCapacity) { // allow for 0 termination
775 uprv_strcpy(parent, replacement);
776 } else {
777 *status = U_BUFFER_OVERFLOW_ERROR;
778 }
779 return;
780 }
781 }
782 uloc_getParent(locale, parent, parentCapacity - 1, status);
783 parent[parentCapacity - 1] = 0; // ensure 0 termination in case of U_STRING_NOT_TERMINATED_WARNING
784}
785
786// Might do something better for this, perhaps maximizing locales then stripping
2ca993e8 787static const char * getLocParent(const char *locale, int32_t* distance)
b331163b
A
788{
789 int32_t locParentIndex;
790 for (locParentIndex = 0; locParentIndex < kLocParentMapCount; locParentIndex++) {
2ca993e8
A
791 if (uprv_strcmp(locale, locParentMap[locParentIndex].locale) == 0) {
792 *distance = locParentMap[locParentIndex].distance;
793 return locParentMap[locParentIndex].parent;
b331163b
A
794 }
795 }
f3c0d7a5
A
796 if (gMapDataState > 0) {
797 const char *replacement = (const char *)uhash_get(gParentMap, locale);
798 if (replacement) {
799 *distance = 1;
800 return replacement;
801 }
802 }
b331163b
A
803 return NULL;
804}
805
806// this just checks if the *pointer* value is already in the array
807static UBool locInArray(const char* *localizationsToUse, int32_t locsToUseCount, const char *locToCheck)
808{
809 int32_t locIndex;
810 for (locIndex = 0; locIndex < locsToUseCount; locIndex++) {
811 if (locToCheck == localizationsToUse[locIndex]) {
812 return TRUE;
813 }
814 }
815 return FALSE;
816}
817
818enum { kLangScriptRegMaxLen = ULOC_LANG_CAPACITY + ULOC_SCRIPT_CAPACITY + ULOC_COUNTRY_CAPACITY }; // currently 22
819
820int32_t
821ualoc_localizationsToUse( const char* const *preferredLanguages,
822 int32_t preferredLanguagesCount,
823 const char* const *availableLocalizations,
824 int32_t availableLocalizationsCount,
825 const char* *localizationsToUse,
826 int32_t localizationsToUseCapacity,
827 UErrorCode *status )
828{
829 if (U_FAILURE(*status)) {
830 return -1;
831 }
832 if (preferredLanguages == NULL || availableLocalizations == NULL || localizationsToUse == NULL) {
833 *status = U_ILLEGAL_ARGUMENT_ERROR;
834 return -1;
835 }
836 // get resource data, need to protect with mutex
837 if (gMapDataState == 0) {
838 umtx_initOnce(gUALocaleCacheInitOnce, initializeMapData);
839 }
840 int32_t locsToUseCount = 0;
841 int32_t prefLangIndex, availLocIndex = 0;
2ca993e8
A
842 int32_t availLocIndexBackup = -1; // if >= 0, contains index of backup match
843 int32_t foundMatchPrefLangIndex = 0, backupMatchPrefLangIndex = 0;
b331163b
A
844 char (*availLocBase)[kLangScriptRegMaxLen + 1] = NULL;
845 char (*availLocNorm)[kLangScriptRegMaxLen + 1] = NULL;
b331163b 846 UBool foundMatch = FALSE;
f3c0d7a5 847 UBool backupMatchPrefLang_pt_PT = FALSE;
b331163b 848
2ca993e8
A
849#if DEBUG_UALOC
850 if (preferredLanguagesCount > 0 && availableLocalizationsCount > 0) {
851 printf("\n # ualoc_localizationsToUse start, preferredLanguages %d: %s, ..., availableLocalizations %d: %s, ...\n",
852 preferredLanguagesCount, preferredLanguages[0], availableLocalizationsCount, availableLocalizations[0]);
853 } else {
854 printf("\n # ualoc_localizationsToUse start, preferredLanguages %d: ..., availableLocalizations %d: ...\n",
855 preferredLanguagesCount, availableLocalizationsCount);
856 }
857#endif
858
b331163b
A
859 // Part 1, find the best matching localization, if any
860 for (prefLangIndex = 0; prefLangIndex < preferredLanguagesCount; prefLangIndex++) {
861 char prefLangBaseName[kLangScriptRegMaxLen + 1];
862 char prefLangNormName[kLangScriptRegMaxLen + 1];
863 char prefLangParentName[kLangScriptRegMaxLen + 1];
864 UErrorCode tmpStatus = U_ZERO_ERROR;
865
866 if (preferredLanguages[prefLangIndex] == NULL) {
867 continue; // skip NULL preferredLanguages entry, go to next one
868 }
869 // use underscores, fix bad capitalization, delete any keywords
870 uloc_getBaseName(preferredLanguages[prefLangIndex], prefLangBaseName, kLangScriptRegMaxLen, &tmpStatus);
871 if (U_FAILURE(tmpStatus) || prefLangBaseName[0] == 0 ||
872 uprv_strcmp(prefLangBaseName, "root") == 0 || prefLangBaseName[0] == '_') {
873 continue; // can't handle this preferredLanguages entry or it is invalid, go to next one
874 }
875 prefLangBaseName[kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING
2ca993e8
A
876#if DEBUG_UALOC
877 printf(" # loop: try prefLangBaseName %s\n", prefLangBaseName);
878#endif
b331163b
A
879
880 // if we have not already allocated and filled the array of
881 // base availableLocalizations, do so now.
882 if (availLocBase == NULL) {
883 availLocBase = (char (*)[kLangScriptRegMaxLen + 1])uprv_malloc(availableLocalizationsCount * (kLangScriptRegMaxLen + 1));
884 if (availLocBase == NULL) {
885 continue; // cannot further check this preferredLanguages entry, go to next one
886 }
2ca993e8
A
887#if DEBUG_UALOC
888 printf(" # allocate & fill availLocBase\n");
889#endif
b331163b
A
890 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
891 tmpStatus = U_ZERO_ERROR;
2ca993e8
A
892 if (availableLocalizations[availLocIndex] == NULL) {
893 availLocBase[availLocIndex][0] = 0; // effectively remove this entry
894 continue;
895 }
b331163b
A
896 uloc_getBaseName(availableLocalizations[availLocIndex], availLocBase[availLocIndex], kLangScriptRegMaxLen, &tmpStatus);
897 if (U_FAILURE(tmpStatus) || uprv_strcmp(availLocBase[availLocIndex], "root") == 0 || availLocBase[availLocIndex][0] == '_') {
898 availLocBase[availLocIndex][0] = 0; // effectively remove this entry
2ca993e8 899 continue;
b331163b 900 }
2ca993e8
A
901 availLocBase[availLocIndex][kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING
902#if DEBUG_UALOC
903 printf(" # add availLocBase %s\n", availLocBase[availLocIndex]);
904#endif
b331163b
A
905 }
906 }
907 // first compare base preferredLanguage to base versions of availableLocalizations names
908 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
909 if (uprv_strcmp(prefLangBaseName, availLocBase[availLocIndex]) == 0) {
910 foundMatch = TRUE; // availLocIndex records where
2ca993e8
A
911 foundMatchPrefLangIndex = prefLangIndex;
912#if DEBUG_UALOC
913 printf(" # FOUND: matched availLocBase %s -> actualLoc %s\n", availLocBase[availLocIndex], availableLocalizations[availLocIndex]);
914#endif
b331163b
A
915 break;
916 }
917 }
918 if (foundMatch) {
b331163b
A
919 break; // found a loc for this preferredLanguages entry
920 }
921
922 // get normalized preferredLanguage
923 tmpStatus = U_ZERO_ERROR;
924 ualoc_normalize(prefLangBaseName, prefLangNormName, kLangScriptRegMaxLen + 1, &tmpStatus);
925 if (U_FAILURE(tmpStatus)) {
926 continue; // can't handle this preferredLanguages entry, go to next one
927 }
2ca993e8
A
928#if DEBUG_UALOC
929 printf(" # prefLangNormName %s\n", prefLangNormName);
930#endif
b331163b
A
931 // if we have not already allocated and filled the array of
932 // normalized availableLocalizations, do so now.
933 // Note: ualoc_normalize turns "zh_TW" into "zh_Hant_TW", zh_HK" into "zh_Hant_HK",
934 // and fixes deprecated codes "iw" > "he", "in" > "id" etc.
935 if (availLocNorm == NULL) {
936 availLocNorm = (char (*)[kLangScriptRegMaxLen + 1])uprv_malloc(availableLocalizationsCount * (kLangScriptRegMaxLen + 1));
937 if (availLocNorm == NULL) {
938 continue; // cannot further check this preferredLanguages entry, go to next one
939 }
2ca993e8
A
940#if DEBUG_UALOC
941 printf(" # allocate & fill availLocNorm\n");
942#endif
b331163b
A
943 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
944 tmpStatus = U_ZERO_ERROR;
945 ualoc_normalize(availLocBase[availLocIndex], availLocNorm[availLocIndex], kLangScriptRegMaxLen + 1, &tmpStatus);
946 if (U_FAILURE(tmpStatus)) {
947 availLocNorm[availLocIndex][0] = 0; // effectively remove this entry
2ca993e8
A
948#if DEBUG_UALOC
949 } else {
950 printf(" # actualLoc %-11s -> norm %s\n", availableLocalizations[availLocIndex], availLocNorm[availLocIndex]);
951#endif
b331163b 952 }
b331163b
A
953 }
954 }
955 // now compare normalized preferredLanguage to normalized localization names
956 // if matches, copy *original* localization name
957 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
958 if (uprv_strcmp(prefLangNormName, availLocNorm[availLocIndex]) == 0) {
959 foundMatch = TRUE; // availLocIndex records where
2ca993e8
A
960 foundMatchPrefLangIndex = prefLangIndex;
961#if DEBUG_UALOC
962 printf(" # FOUND: matched availLocNorm %s -> actualLoc %s\n", availLocNorm[availLocIndex], availableLocalizations[availLocIndex]);
963#endif
b331163b
A
964 break;
965 }
966 }
967 if (foundMatch) {
b331163b
A
968 break; // found a loc for this preferredLanguages entry
969 }
970
971 // now walk up the parent chain for preferredLanguage
972 // until we find a match or hit root
973 uprv_strcpy(prefLangBaseName, prefLangNormName);
974 while (!foundMatch) {
975 tmpStatus = U_ZERO_ERROR;
976 ualoc_getParent(prefLangBaseName, prefLangParentName, kLangScriptRegMaxLen + 1, &tmpStatus);
977 if (U_FAILURE(tmpStatus) || uprv_strcmp(prefLangParentName, "root") == 0 || prefLangParentName[0] == 0) {
978 break; // reached root or cannot proceed further
979 }
2ca993e8
A
980#if DEBUG_UALOC
981 printf(" # prefLangParentName %s\n", prefLangParentName);
982#endif
b331163b
A
983
984 // now compare this preferredLanguage parent to normalized localization names
985 // if matches, copy *original* localization name
986 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
987 if (uprv_strcmp(prefLangParentName, availLocNorm[availLocIndex]) == 0) {
988 foundMatch = TRUE; // availLocIndex records where
2ca993e8
A
989 foundMatchPrefLangIndex = prefLangIndex;
990#if DEBUG_UALOC
991 printf(" # FOUND: matched availLocNorm %s -> actualLoc %s\n", availLocNorm[availLocIndex], availableLocalizations[availLocIndex]);
992#endif
b331163b
A
993 break;
994 }
995 }
996 uprv_strcpy(prefLangBaseName, prefLangParentName);
997 }
998 if (foundMatch) {
999 break; // found a loc for this preferredLanguages entry
1000 }
1001
2ca993e8
A
1002 // last try, use parents of selected language to try for backup match
1003 // if we have not already found one
1004 if (availLocIndexBackup < 0) {
b331163b
A
1005 // now walk up the parent chain for preferredLanguage again
1006 // checking against parents of selected availLocNorm entries
1007 // but this time start with current prefLangNormName
1008 uprv_strcpy(prefLangBaseName, prefLangNormName);
2ca993e8 1009 int32_t minDistance = kMaxParentDistance;
b331163b 1010 while (TRUE) {
b331163b
A
1011 // now compare this preferredLanguage to normalized localization names
1012 // parent if have one for this; if matches, copy *original* localization name
2ca993e8
A
1013#if DEBUG_UALOC
1014 printf(" # BACKUP: trying prefLangBaseName %s\n", prefLangBaseName);
1015#endif
b331163b 1016 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
2ca993e8
A
1017 char availLocMinOrParent[kLangScriptRegMaxLen + 1];
1018 int32_t distance;
9f1b1155
A
1019 // first check for special Apple parents of availLocNorm; the number
1020 // of locales with such parents is small.
1021 // If no such parent, or if parent has an intermediate numeric region,
1022 // then try stripping the original region.
1023 int32_t availLocParentLen = 0;
2ca993e8
A
1024 const char *availLocParent = getLocParent(availLocNorm[availLocIndex], &distance);
1025 if (availLocParent) {
1026#if DEBUG_UALOC
1027 printf(" # availLocAppleParentName %s\n", availLocParent);
1028#endif
1029 if (uprv_strcmp(prefLangBaseName, availLocParent) == 0 && distance < minDistance) {
1030 availLocIndexBackup = availLocIndex; // records where the match occurred
1031 backupMatchPrefLangIndex = prefLangIndex;
1032 minDistance = distance;
1033#if DEBUG_UALOC
1034 printf(" # BACKUP: LocAppleParent matched prefLangNormName with distance %d\n", distance);
1035#endif
1036 continue;
1037 }
9f1b1155 1038 availLocParentLen = uprv_strlen(availLocParent);
2ca993e8
A
1039 }
1040 if (minDistance <= 1) {
1041 continue; // we can't get any closer in the rest of this iteration
1042 }
9f1b1155 1043 if (availLocParent == NULL || (availLocParentLen >= 6 && isdigit(availLocParent[availLocParentLen-1]))) {
2ca993e8
A
1044 tmpStatus = U_ZERO_ERROR;
1045 int32_t regLen = uloc_getCountry(availLocNorm[availLocIndex], availLocMinOrParent, kLangScriptRegMaxLen, &tmpStatus);
1046 if (U_SUCCESS(tmpStatus) && regLen > 1) {
1047 uloc_addLikelySubtags(availLocNorm[availLocIndex], availLocMinOrParent, kLangScriptRegMaxLen, &tmpStatus);
1048 if (U_SUCCESS(tmpStatus)) {
1049 availLocMinOrParent[kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING
1050#if DEBUG_UALOC
1051 printf(" # availLocRegMaxName %s\n", availLocMinOrParent);
1052#endif
1053 char availLocTemp[kLangScriptRegMaxLen + 1];
1054 uloc_getParent(availLocMinOrParent, availLocTemp, kLangScriptRegMaxLen, &tmpStatus);
1055 if (U_SUCCESS(tmpStatus)) {
1056 availLocTemp[kLangScriptRegMaxLen] = 0;
1057 uloc_minimizeSubtags(availLocTemp, availLocMinOrParent, kLangScriptRegMaxLen, &tmpStatus);
1058 if (U_SUCCESS(tmpStatus)) {
1059 availLocMinOrParent[kLangScriptRegMaxLen] = 0;
1060#if DEBUG_UALOC
1061 printf(" # availLocNoRegParentName %s\n", availLocMinOrParent);
1062#endif
1063 if (uprv_strcmp(prefLangBaseName, availLocMinOrParent) == 0) {
1064 availLocIndexBackup = availLocIndex; // records where the match occurred
1065 backupMatchPrefLangIndex = prefLangIndex;
1066 minDistance = 1;
f3c0d7a5 1067 backupMatchPrefLang_pt_PT = (uprv_strcmp(prefLangNormName, "pt_PT") == 0);
2ca993e8
A
1068#if DEBUG_UALOC
1069 printf(" # BACKUP: LocNoRegParent matched prefLangNormName with distance 1\n");
1070#endif
1071 continue;
1072 }
1073 }
1074 }
1075 }
1076 }
1077 }
1078 // then check against minimized version of availLocNorm
1079 tmpStatus = U_ZERO_ERROR;
1080 uloc_minimizeSubtags(availLocNorm[availLocIndex], availLocMinOrParent, kLangScriptRegMaxLen, &tmpStatus);
1081 if (U_FAILURE(tmpStatus)) {
1082 continue;
1083 }
1084 availLocMinOrParent[kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING
1085#if DEBUG_UALOC
1086 printf(" # availLocMinimized %s\n", availLocMinOrParent);
1087#endif
1088 if (uprv_strcmp(prefLangBaseName, availLocMinOrParent) == 0) {
1089 availLocIndexBackup = availLocIndex; // records where the match occurred
1090 backupMatchPrefLangIndex = prefLangIndex;
1091 minDistance = 1;
1092#if DEBUG_UALOC
1093 printf(" # BACKUP: LocMinimized matched prefLangNormName with distance 1\n");
1094#endif
b331163b
A
1095 }
1096 }
2ca993e8 1097 if (availLocIndexBackup >= 0) {
b331163b
A
1098 break;
1099 }
2ca993e8 1100 tmpStatus = U_ZERO_ERROR;
b331163b
A
1101 ualoc_getParent(prefLangBaseName, prefLangParentName, kLangScriptRegMaxLen + 1, &tmpStatus);
1102 if (U_FAILURE(tmpStatus) || uprv_strcmp(prefLangParentName, "root") == 0 || prefLangParentName[0] == 0) {
1103 break; // reached root or cannot proceed further
1104 }
1105 uprv_strcpy(prefLangBaseName, prefLangParentName);
1106 }
1107 }
2ca993e8
A
1108 }
1109 // If we have a backup match, decide what to do
1110 if (availLocIndexBackup >= 0) {
1111 if (!foundMatch) {
1112 // no main match, just use the backup
1113 availLocIndex = availLocIndexBackup;
1114 foundMatch = TRUE;
1115#if DEBUG_UALOC
1116 printf(" # no main match, have backup => use availLocIndexBackup %d\n", availLocIndexBackup);
1117#endif
f3c0d7a5 1118 } else if (backupMatchPrefLangIndex < foundMatchPrefLangIndex && (!backupMatchPrefLang_pt_PT || uprv_strcmp(availLocNorm[availLocIndexBackup], "pt_BR") != 0)) {
2ca993e8
A
1119 // have a main match but backup match was higher in the prefs, use it if for a different language
1120#if DEBUG_UALOC
1121 printf(" # have backup match higher in prefs, comparing its language and script to main match\n");
1122#endif
1123 char mainLang[ULOC_LANG_CAPACITY + 1];
1124 char backupLang[ULOC_LANG_CAPACITY + 1];
1125 UErrorCode tmpStatus = U_ZERO_ERROR;
1126 uloc_getLanguage(availLocNorm[availLocIndex], mainLang, ULOC_LANG_CAPACITY, &tmpStatus);
1127 mainLang[ULOC_LANG_CAPACITY] = 0; // ensure zero termination
1128 uloc_getLanguage(availLocNorm[availLocIndexBackup], backupLang, ULOC_LANG_CAPACITY, &tmpStatus);
1129 backupLang[ULOC_LANG_CAPACITY] = 0; // ensure zero termination
1130 if (U_SUCCESS(tmpStatus)) {
1131 if (uprv_strncmp(mainLang, backupLang, ULOC_LANG_CAPACITY) != 0) {
1132 // backup match has different language than main match
1133 availLocIndex = availLocIndexBackup;
1134 // foundMatch is already TRUE
1135#if DEBUG_UALOC
1136 printf(" # main match but backup is for a different lang higher in prefs => use availLocIndexBackup %d\n", availLocIndexBackup);
1137#endif
1138 } else {
1139 // backup match has same language as main match, check scripts too
1140 char availLocMaximized[kLangScriptRegMaxLen + 1];
1141
1142 uloc_addLikelySubtags(availLocNorm[availLocIndex], availLocMaximized, kLangScriptRegMaxLen, &tmpStatus);
1143 availLocMaximized[kLangScriptRegMaxLen] = 0;
1144 uloc_getScript(availLocMaximized, mainLang, ULOC_LANG_CAPACITY, &tmpStatus);
1145 mainLang[ULOC_LANG_CAPACITY] = 0;
1146
1147 uloc_addLikelySubtags(availLocNorm[availLocIndexBackup], availLocMaximized, kLangScriptRegMaxLen, &tmpStatus);
1148 availLocMaximized[kLangScriptRegMaxLen] = 0;
1149 uloc_getScript(availLocMaximized, backupLang, ULOC_LANG_CAPACITY, &tmpStatus);
1150 backupLang[ULOC_LANG_CAPACITY] = 0;
1151
1152 if (U_SUCCESS(tmpStatus) && uprv_strncmp(mainLang, backupLang, ULOC_LANG_CAPACITY) != 0) {
1153 // backup match has different script than main match
1154 availLocIndex = availLocIndexBackup;
1155 // foundMatch is already TRUE
1156#if DEBUG_UALOC
1157 printf(" # main match but backup is for a different script higher in prefs => use availLocIndexBackup %d\n", availLocIndexBackup);
1158#endif
1159 }
1160 }
1161 }
b331163b
A
1162 }
1163 }
1164
1165 // Part 2, if we found a matching localization, then walk up its parent tree to find any fallback matches in availableLocalizations
1166 if (foundMatch) {
1167 // Here availLocIndex corresponds to the first matched localization
1168 UErrorCode tmpStatus = U_ZERO_ERROR;
1169 int32_t availLocMatchIndex = availLocIndex;
1170 if (locsToUseCount < localizationsToUseCapacity) {
1171 localizationsToUse[locsToUseCount++] = availableLocalizations[availLocMatchIndex];
1172 }
1173 // at this point we must have availLocBase, and minimally matched against that.
1174 // if we have not already allocated and filled the array of
1175 // normalized availableLocalizations, do so now, but don't require it
1176 if (availLocNorm == NULL) {
1177 availLocNorm = (char (*)[kLangScriptRegMaxLen + 1])uprv_malloc(availableLocalizationsCount * (kLangScriptRegMaxLen + 1));
1178 if (availLocNorm != NULL) {
1179 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
1180 tmpStatus = U_ZERO_ERROR;
1181 ualoc_normalize(availLocBase[availLocIndex], availLocNorm[availLocIndex], kLangScriptRegMaxLen + 1, &tmpStatus);
1182 if (U_FAILURE(tmpStatus)) {
1183 availLocNorm[availLocIndex][0] = 0; // effectively remove this entry
1184 }
1185 }
1186 }
1187 }
1188
1189 // add normalized form of matching loc, if different and in availLocBase
1190 if (locsToUseCount < localizationsToUseCapacity) {
1191 tmpStatus = U_ZERO_ERROR;
1192 char matchedLocNormName[kLangScriptRegMaxLen + 1];
1193 char matchedLocParentName[kLangScriptRegMaxLen + 1];
1194 // get normalized form of matching loc
1195 if (availLocNorm != NULL) {
1196 uprv_strcpy(matchedLocNormName, availLocNorm[availLocMatchIndex]);
1197 } else {
1198 ualoc_normalize(availLocBase[availLocMatchIndex], matchedLocNormName, kLangScriptRegMaxLen + 1, &tmpStatus);
1199 }
1200 if (U_SUCCESS(tmpStatus)) {
1201 // add normalized form of matching loc, if different and in availLocBase
1202 if (uprv_strcmp(matchedLocNormName, localizationsToUse[0]) != 0) {
1203 // normalization of matched localization is different, see if we have the normalization in availableLocalizations
1204 // from this point on, availLocIndex no longer corresponds to the matched localization.
1205 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
1206 if ( (uprv_strcmp(matchedLocNormName, availLocBase[availLocIndex]) == 0
1207 || (availLocNorm != NULL && uprv_strcmp(matchedLocNormName, availLocNorm[availLocIndex]) == 0))
1208 && !locInArray(localizationsToUse, locsToUseCount, availableLocalizations[availLocIndex])) {
1209 localizationsToUse[locsToUseCount++] = availableLocalizations[availLocIndex];
1210 break;
1211 }
1212 }
1213 }
1214
1215 // now walk up the parent chain from matchedLocNormName, adding parents if they are in availLocBase
1216 while (locsToUseCount < localizationsToUseCapacity) {
1217 ualoc_getParent(matchedLocNormName, matchedLocParentName, kLangScriptRegMaxLen + 1, &tmpStatus);
1218 if (U_FAILURE(tmpStatus) || uprv_strcmp(matchedLocParentName, "root") == 0 || matchedLocParentName[0] == 0) {
1219 break; // reached root or cannot proceed further
1220 }
1221
1222 // now compare this matchedLocParentName parent to base localization names (and norm ones if we have them)
1223 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
1224 if ( (uprv_strcmp(matchedLocParentName, availLocBase[availLocIndex]) == 0
1225 || (availLocNorm != NULL && uprv_strcmp(matchedLocParentName, availLocNorm[availLocIndex]) == 0))
1226 && !locInArray(localizationsToUse, locsToUseCount, availableLocalizations[availLocIndex])) {
1227 localizationsToUse[locsToUseCount++] = availableLocalizations[availLocIndex];
1228 break;
1229 }
1230 }
1231 uprv_strcpy(matchedLocNormName, matchedLocParentName);
1232 }
1233
1234 // The above still fails to include "zh_TW" if it is in availLocBase and the matched localization
1235 // base name is "zh_HK" or "zh_MO". One option would be to walk up the parent chain from
1236 // matchedLocNormName again, comparing against parents of of selected availLocNorm entries.
1237 // But this picks up too many matches that are not parents of the matched localization. So
1238 // we just handle these specially.
1239 if ( locsToUseCount < localizationsToUseCapacity
1240 && (uprv_strcmp(availLocBase[availLocMatchIndex], "zh_HK") == 0
1241 || uprv_strcmp(availLocBase[availLocMatchIndex], "zh_MO") == 0) ) {
1242 int32_t zhTW_matchIndex = -1;
1243 UBool zhHant_found = FALSE;
1244 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
1245 if ( zhTW_matchIndex < 0 && uprv_strcmp("zh_TW", availLocBase[availLocIndex]) == 0 ) {
1246 zhTW_matchIndex = availLocIndex;
1247 }
1248 if ( !zhHant_found && uprv_strcmp("zh_Hant", availLocBase[availLocIndex]) == 0 ) {
1249 zhHant_found = TRUE;
1250 }
1251 }
1252 if (zhTW_matchIndex >= 0 && !zhHant_found
1253 && !locInArray(localizationsToUse, locsToUseCount, availableLocalizations[zhTW_matchIndex])) {
1254 localizationsToUse[locsToUseCount++] = availableLocalizations[zhTW_matchIndex];
1255 }
1256 }
1257 }
1258 }
1259 }
1260
1261 uprv_free(availLocNorm);
1262 uprv_free(availLocBase);
1263 return locsToUseCount;
1264}
1265