]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/ualoc.cpp
ICU-64252.0.1.tar.gz
[apple/icu.git] / icuSources / common / ualoc.cpp
CommitLineData
57a6839d
A
1/*
2*****************************************************************************************
3d1f044b 3* Copyright (C) 2014-2019 Apple Inc. All Rights Reserved.
57a6839d
A
4*****************************************************************************************
5*/
6
2ca993e8
A
7#define DEBUG_UALOC 0
8#if DEBUG_UALOC
9#include <stdio.h>
10#endif
11#include <string.h>
9f1b1155 12#include <ctype.h>
57a6839d
A
13#include "unicode/utypes.h"
14#include "unicode/ualoc.h"
15#include "unicode/uloc.h"
16#include "unicode/ures.h"
17#include "unicode/putil.h"
f3c0d7a5 18#include "unicode/ustring.h"
57a6839d
A
19#include "cstring.h"
20#include "cmemory.h"
b331163b
A
21#include "uhash.h"
22#include "umutex.h"
23#include "ucln_cmn.h"
57a6839d
A
24// the following has replacements for some math.h funcs etc
25#include "putilimp.h"
26
27
28// The numeric values in territoryInfo are in "IntF" format from LDML2ICUConverter.
29// From its docs (adapted): [IntF is] a special integer that represents the number in
30// normalized scientific notation.
31// Resultant integers are in the form -?xxyyyyyy, where xx is the exponent
32// offset by 50 and yyyyyy is the coefficient to 5 decimal places (range 1.0 to 9.99999), e.g.
33// 14660000000000 -> 1.46600E13 -> 63146600
34// 0.0001 -> 1.00000E-4 -> 46100000
35// -123.456 -> -1.23456E-2 -> -48123456
36//
37// Here to avoid an extra division we have the max coefficient as 999999 (instead of
38// 9.99999) and instead offset the exponent by -55.
39//
40static double doubleFromIntF(int32_t intF) {
41 double coefficient = (double)(intF % 1000000);
42 int32_t exponent = (intF / 1000000) - 55;
43 return coefficient * uprv_pow10(exponent);
44}
45
46static int compareLangEntries(const void * entry1, const void * entry2) {
47 double fraction1 = ((const UALanguageEntry *)entry1)->userFraction;
48 double fraction2 = ((const UALanguageEntry *)entry2)->userFraction;
49 // want descending order
50 if (fraction1 > fraction2) return -1;
51 if (fraction1 < fraction2) return 1;
52 // userFractions the same, sort by languageCode
53 return uprv_strcmp(((const UALanguageEntry *)entry1)->languageCode,((const UALanguageEntry *)entry2)->languageCode);
54}
55
f3c0d7a5
A
56// language codes to version with default script
57// must be sorted by language code
58static const char * langToDefaultScript[] = {
59 "az", "az_Latn",
3d1f044b 60 "bm", "bm_Latn", // <rdar://problem/47494729> added
f3c0d7a5 61 "bs", "bs_Latn",
3d1f044b
A
62 "byn", "byn_Ethi", // <rdar://problem/47494729> added
63 "cu", "cu_Cyrl", // <rdar://problem/47494729> added
64 "ff", "ff_Latn", // <rdar://problem/47494729> added
65 "ha", "ha_Latn", // <rdar://problem/47494729> added
f3c0d7a5 66 "iu", "iu_Cans",
3d1f044b
A
67 "kk", "kk_Cyrl", // <rdar://problem/47494729> changed from _Arab
68 "ks", "ks_Arab", // unnecessary?
f3c0d7a5
A
69 "ku", "ku_Latn",
70 "ky", "ky_Cyrl",
71 "mn", "mn_Cyrl",
72 "ms", "ms_Latn",
73 "pa", "pa_Guru",
3d1f044b
A
74 "rif", "rif_Tfng", // unnecessary? no locale support anyway
75 "sd", "sd_Arab", // <rdar://problem/47494729> added
f3c0d7a5
A
76 "shi", "shi_Tfng",
77 "sr", "sr_Cyrl",
78 "tg", "tg_Cyrl",
3d1f044b 79 "tk", "tk_Latn", // unnecessary?
f3c0d7a5
A
80 "ug", "ug_Arab",
81 "uz", "uz_Latn",
82 "vai", "vai_Vaii",
3d1f044b 83 "yue", "yue_Hant", // to match CLDR data, not Apple default
f3c0d7a5
A
84 "zh", "zh_Hans",
85 NULL
86};
87
88static const char * langCodeWithScriptIfAmbig(const char * langCode) {
89 const char ** langToDefScriptPtr = langToDefaultScript;
90 const char * testCurLoc;
91 while ( (testCurLoc = *langToDefScriptPtr++) != NULL ) {
92 int cmp = uprv_strcmp(langCode, testCurLoc);
93 if (cmp <= 0) {
94 if (cmp == 0) {
95 return *langToDefScriptPtr;
96 }
97 break;
98 }
99 langToDefScriptPtr++;
100 }
101 return langCode;
102}
103
57a6839d
A
104static const UChar ustrLangStatusDefacto[] = {0x64,0x65,0x5F,0x66,0x61,0x63,0x74,0x6F,0x5F,0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0}; //"de_facto_official"
105static const UChar ustrLangStatusOfficial[] = {0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0}; //"official"
106static const UChar ustrLangStatusRegional[] = {0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0x5F,0x72,0x65,0x67,0x69,0x6F,0x6E,0x61,0x6C,0}; //"official_regional"
107
108enum {
109 kLocalLangEntriesMax = 26, // enough for most regions to minimumFraction 0.001 except India
110 kLangEntriesFactor = 3 // if we have to allocate, multiply existing size by this
111};
112
113U_CAPI int32_t U_EXPORT2
114ualoc_getLanguagesForRegion(const char *regionID, double minimumFraction,
115 UALanguageEntry *entries, int32_t entriesCapacity,
116 UErrorCode *err)
117{
118 if (U_FAILURE(*err)) {
119 return 0;
120 }
121 if ( regionID == NULL || minimumFraction < 0.0 || minimumFraction > 1.0 ||
122 ((entries==NULL)? entriesCapacity!=0: entriesCapacity<0) ) {
123 *err = U_ILLEGAL_ARGUMENT_ERROR;
124 return 0;
125 }
126 UResourceBundle *rb = ures_openDirect(NULL, "supplementalData", err);
127 rb = ures_getByKey(rb, "territoryInfo", rb, err);
128 rb = ures_getByKey(rb, regionID, rb, err);
129 if (U_FAILURE(*err)) {
130 ures_close(rb);
131 return 0;
132 }
133
134 int32_t entryCount = 0;
135 UResourceBundle *langBund = NULL;
136 int32_t lbIdx, lbCount = ures_getSize(rb);
137 UALanguageEntry localLangEntries[kLocalLangEntriesMax];
138 UALanguageEntry * langEntries = localLangEntries;
139 int32_t langEntriesMax = kLocalLangEntriesMax;
140
141 for (lbIdx = 0; lbIdx < lbCount; lbIdx++) {
142 langBund = ures_getByIndex(rb, lbIdx, langBund, err);
143 if (U_FAILURE(*err)) {
144 break;
145 }
146 const char * langCode = ures_getKey(langBund);
147 if (uprv_strcmp(langCode,"territoryF") == 0) {
148 continue;
149 }
150 if (strnlen(langCode, UALANGDATA_CODELEN+1) > UALANGDATA_CODELEN) { // no uprv_strnlen
151 continue; // a code we cannot handle
152 }
153
154 UErrorCode localErr = U_ZERO_ERROR;
155 double userFraction = 0.0;
156 UResourceBundle *itemBund = ures_getByKey(langBund, "populationShareF", NULL, &localErr);
157 if (U_SUCCESS(localErr)) {
158 int32_t intF = ures_getInt(itemBund, &localErr);
159 if (U_SUCCESS(localErr)) {
160 userFraction = doubleFromIntF(intF);
161 }
162 ures_close(itemBund);
163 }
164 if (userFraction < minimumFraction) {
165 continue;
166 }
167 if (entries != NULL) {
168 localErr = U_ZERO_ERROR;
169 UALanguageStatus langStatus = UALANGSTATUS_UNSPECIFIED;
170 int32_t ulen;
171 const UChar * ustrLangStatus = ures_getStringByKey(langBund, "officialStatus", &ulen, &localErr);
172 if (U_SUCCESS(localErr)) {
173 int32_t cmp = u_strcmp(ustrLangStatus, ustrLangStatusOfficial);
174 if (cmp == 0) {
175 langStatus = UALANGSTATUS_OFFICIAL;
176 } else if (cmp < 0 && u_strcmp(ustrLangStatus, ustrLangStatusDefacto) == 0) {
177 langStatus = UALANGSTATUS_DEFACTO_OFFICIAL;
178 } else if (u_strcmp(ustrLangStatus, ustrLangStatusRegional) == 0) {
179 langStatus = UALANGSTATUS_REGIONAL_OFFICIAL;
180 }
181 }
182 // Now we have all of the info for our next entry
183 if (entryCount >= langEntriesMax) {
184 int32_t newMax = langEntriesMax * kLangEntriesFactor;
185 if (langEntries == localLangEntries) {
186 // first allocation, copy from local buf
187 langEntries = (UALanguageEntry*)uprv_malloc(newMax*sizeof(UALanguageEntry));
188 if (langEntries == NULL) {
189 *err = U_MEMORY_ALLOCATION_ERROR;
190 break;
191 }
192 uprv_memcpy(langEntries, localLangEntries, entryCount*sizeof(UALanguageEntry));
193 } else {
194 langEntries = (UALanguageEntry*)uprv_realloc(langEntries, newMax*sizeof(UALanguageEntry));
195 if (langEntries == NULL) {
196 *err = U_MEMORY_ALLOCATION_ERROR;
197 break;
198 }
199 }
200 langEntriesMax = newMax;
201 }
f3c0d7a5 202 uprv_strcpy(langEntries[entryCount].languageCode, langCodeWithScriptIfAmbig(langCode));
57a6839d
A
203 langEntries[entryCount].userFraction = userFraction;
204 langEntries[entryCount].status = langStatus;
205 }
206 entryCount++;
207 }
208 ures_close(langBund);
209 ures_close(rb);
210 if (U_FAILURE(*err)) {
211 if (langEntries != localLangEntries) {
212 free(langEntries);
213 }
214 return 0;
215 }
216 if (entries != NULL) {
217 // sort langEntries, copy entries that fit to provided array
218 qsort(langEntries, entryCount, sizeof(UALanguageEntry), compareLangEntries);
219 if (entryCount > entriesCapacity) {
220 entryCount = entriesCapacity;
221 }
222 uprv_memcpy(entries, langEntries, entryCount*sizeof(UALanguageEntry));
223 if (langEntries != localLangEntries) {
224 free(langEntries);
225 }
226 }
227 return entryCount;
228}
229
57a6839d 230static const char * forceParent[] = {
a961784b 231 "en_150", "en_GB", // en for Europe
b331163b 232 "en_AU", "en_GB",
a961784b
A
233 "en_BD", "en_GB", // en for Bangladesh
234 "en_BE", "en_150", // en for Belgium goes to en for Europe
235 "en_DG", "en_GB",
236 "en_FK", "en_GB",
237 "en_GG", "en_GB",
238 "en_GI", "en_GB",
239 "en_HK", "en_GB", // en for Hong Kong
240 "en_IE", "en_GB",
241 "en_IM", "en_GB",
b331163b 242 "en_IN", "en_GB",
a961784b
A
243 "en_IO", "en_GB",
244 "en_JE", "en_GB",
a62d09fc 245 "en_JM", "en_GB",
3d1f044b 246 "en_LK", "en_GB",
a961784b
A
247 "en_MO", "en_GB",
248 "en_MT", "en_GB",
2ca993e8 249 "en_MV", "en_GB", // for Maldives
a961784b 250 "en_MY", "en_GB", // en for Malaysia
2ca993e8 251 "en_NZ", "en_AU",
a961784b
A
252 "en_PK", "en_GB", // en for Pakistan
253 "en_SG", "en_GB",
254 "en_SH", "en_GB",
255 "en_VG", "en_GB",
f3c0d7a5
A
256 "yue", "yue_CN", // yue_CN has 71M users (5.2% of 1.37G), yue_HK has 6.5M (90% of 7.17M)
257 "yue_CN", "root",
258 "yue_HK", "root",
259 "yue_Hans","yue_CN",
260 "yue_Hant","yue_HK",
57a6839d
A
261 "zh", "zh_CN",
262 "zh_CN", "root",
263 "zh_Hant", "zh_TW",
264 "zh_TW", "root",
265 NULL
266};
267
2ca993e8
A
268enum { kLocBaseNameMax = 16 };
269
57a6839d
A
270U_CAPI int32_t U_EXPORT2
271ualoc_getAppleParent(const char* localeID,
272 char * parent,
273 int32_t parentCapacity,
274 UErrorCode* err)
275{
276 UResourceBundle *rb;
277 int32_t len;
278 UErrorCode tempStatus;
279 char locbuf[ULOC_FULLNAME_CAPACITY+1];
08b89b0a 280 char * foundDoubleUnderscore;
57a6839d
A
281
282 if (U_FAILURE(*err)) {
283 return 0;
284 }
285 if ( (parent==NULL)? parentCapacity!=0: parentCapacity<0 ) {
286 *err = U_ILLEGAL_ARGUMENT_ERROR;
287 return 0;
288 }
08b89b0a 289 len = uloc_getBaseName(localeID, locbuf, ULOC_FULLNAME_CAPACITY, err); /* canonicalize and strip keywords */
57a6839d
A
290 if (U_FAILURE(*err)) {
291 return 0;
292 }
293 if (*err == U_STRING_NOT_TERMINATED_WARNING) {
294 locbuf[ULOC_FULLNAME_CAPACITY] = 0;
295 *err = U_ZERO_ERROR;
296 }
08b89b0a
A
297 foundDoubleUnderscore = uprv_strstr(locbuf, "__"); /* __ comes from bad/missing subtag or variant */
298 if (foundDoubleUnderscore != NULL) {
299 *foundDoubleUnderscore = 0; /* terminate at the __ */
300 len = uprv_strlen(locbuf);
301 }
b331163b 302 if (len >= 2 && (uprv_strncmp(locbuf, "en", 2) == 0 || uprv_strncmp(locbuf, "zh", 2) == 0)) {
57a6839d
A
303 const char ** forceParentPtr = forceParent;
304 const char * testCurLoc;
305 while ( (testCurLoc = *forceParentPtr++) != NULL ) {
306 int cmp = uprv_strcmp(locbuf, testCurLoc);
307 if (cmp <= 0) {
308 if (cmp == 0) {
309 len = uprv_strlen(*forceParentPtr);
310 if (len < parentCapacity) {
311 uprv_strcpy(parent, *forceParentPtr);
312 } else {
313 *err = U_BUFFER_OVERFLOW_ERROR;
314 }
315 return len;
316 }
317 break;
318 }
319 forceParentPtr++;
320 }
321 }
322 tempStatus = U_ZERO_ERROR;
323 rb = ures_openDirect(NULL, locbuf, &tempStatus);
324 if (U_SUCCESS(tempStatus)) {
325 const char * actualLocale = ures_getLocaleByType(rb, ULOC_ACTUAL_LOCALE, &tempStatus);
2ca993e8 326 ures_close(rb);
57a6839d
A
327 if (U_SUCCESS(tempStatus) && uprv_strcmp(locbuf, actualLocale) != 0) {
328 // we have followed an alias
329 len = uprv_strlen(actualLocale);
330 if (len < parentCapacity) {
331 uprv_strcpy(parent, actualLocale);
332 } else {
333 *err = U_BUFFER_OVERFLOW_ERROR;
334 }
57a6839d
A
335 return len;
336 }
2ca993e8
A
337 }
338 tempStatus = U_ZERO_ERROR;
339 rb = ures_openDirect(NULL, "supplementalData", &tempStatus);
340 rb = ures_getByKey(rb, "parentLocales", rb, &tempStatus);
341 if (U_SUCCESS(tempStatus)) {
342 UResourceBundle * parentMapBundle = NULL;
343 int32_t childLen = 0;
344 while (childLen == 0) {
345 tempStatus = U_ZERO_ERROR;
346 parentMapBundle = ures_getNextResource(rb, parentMapBundle, &tempStatus);
347 if (U_FAILURE(tempStatus)) {
348 break; // no more parent bundles, normal exit
349 }
350 char childName[kLocBaseNameMax + 1];
351 childName[kLocBaseNameMax] = 0;
352 const char * childPtr = NULL;
353 if (ures_getType(parentMapBundle) == URES_STRING) {
354 childLen = kLocBaseNameMax;
355 childPtr = ures_getUTF8String(parentMapBundle, childName, &childLen, FALSE, &tempStatus);
356 if (U_FAILURE(tempStatus) || uprv_strncmp(locbuf, childPtr, kLocBaseNameMax) != 0) {
357 childLen = 0;
358 }
359 } else { // should be URES_ARRAY
360 int32_t childCur, childCount = ures_getSize(parentMapBundle);
361 for (childCur = 0; childCur < childCount && childLen == 0; childCur++) {
362 tempStatus = U_ZERO_ERROR;
363 childLen = kLocBaseNameMax;
364 childPtr = ures_getUTF8StringByIndex(parentMapBundle, childCur, childName, &childLen, FALSE, &tempStatus);
365 if (U_FAILURE(tempStatus) || uprv_strncmp(locbuf, childPtr, kLocBaseNameMax) != 0) {
366 childLen = 0;
367 }
368 }
369 }
370 }
371 ures_close(rb);
372 if (childLen > 0) {
373 // parentMapBundle key is the parent we are looking for
374 const char * keyStr = ures_getKey(parentMapBundle);
375 len = uprv_strlen(keyStr);
57a6839d 376 if (len < parentCapacity) {
2ca993e8 377 uprv_strcpy(parent, keyStr);
57a6839d
A
378 } else {
379 *err = U_BUFFER_OVERFLOW_ERROR;
380 }
2ca993e8 381 ures_close(parentMapBundle);
57a6839d
A
382 return len;
383 }
2ca993e8 384 ures_close(parentMapBundle);
57a6839d 385 }
2ca993e8 386
57a6839d
A
387 len = uloc_getParent(locbuf, parent, parentCapacity, err);
388 if (U_SUCCESS(*err) && len == 0) {
389 len = 4;
390 if (len < parentCapacity) {
391 uprv_strcpy(parent, "root");
392 } else {
393 *err = U_BUFFER_OVERFLOW_ERROR;
394 }
395 }
396 return len;
397}
398
b331163b
A
399// =================
400// Data and related functions for ualoc_localizationsToUse
401// =================
402
403static const char * appleAliasMap[][2] = {
404 // names are lowercase here because they are looked up after being processed by uloc_getBaseName
405 { "arabic", "ar" }, // T2
406 { "chinese", "zh_Hans" }, // T0
407 { "danish", "da" }, // T2
408 { "dutch", "nl" }, // T1, still in use
409 { "english", "en" }, // T0, still in use
410 { "finnish", "fi" }, // T2
411 { "french", "fr" }, // T0, still in use
412 { "german", "de" }, // T0, still in use
413 { "italian", "it" }, // T1, still in use
414 { "japanese", "ja" }, // T0, still in use
415 { "korean", "ko" }, // T1
a961784b 416 { "no_NO", "nb_NO" }, // special
b331163b
A
417 { "norwegian", "nb" }, // T2
418 { "polish", "pl" }, // T2
419 { "portuguese", "pt" }, // T2
420 { "russian", "ru" }, // T2
421 { "spanish", "es" }, // T1, still in use
422 { "swedish", "sv" }, // T2
423 { "thai", "th" }, // T2
424 { "turkish", "tr" }, // T2
f3c0d7a5 425 { "yue", "yue_Hans"}, // special
b331163b
A
426 { "zh", "zh_Hans" }, // special
427};
2ca993e8 428enum { kAppleAliasMapCount = UPRV_LENGTHOF(appleAliasMap) };
b331163b
A
429
430static const char * appleParentMap[][2] = {
431 { "en_150", "en_GB" }, // Apple custom parent
432 { "en_AD", "en_150" }, // Apple locale addition
f3c0d7a5
A
433 { "en_AG", "en_GB" }, // Antigua & Barbuda
434 { "en_AI", "en_GB" }, // Anguilla
b331163b
A
435 { "en_AL", "en_150" }, // Apple locale addition
436 { "en_AT", "en_150" }, // Apple locale addition
437 { "en_AU", "en_GB" }, // Apple custom parent
438 { "en_BA", "en_150" }, // Apple locale addition
f3c0d7a5 439 { "en_BB", "en_GB" }, // Barbados
b331163b 440 { "en_BD", "en_GB" }, // Apple custom parent
a961784b 441 { "en_BE", "en_150" }, // Apple custom parent
f3c0d7a5
A
442 { "en_BM", "en_GB" }, // Bermuda
443 { "en_BS", "en_GB" }, // Bahamas
444 { "en_BW", "en_GB" }, // Botswana
445 { "en_BZ", "en_GB" }, // Belize
446 { "en_CC", "en_AU" }, // Cocos (Keeling) Islands
b331163b 447 { "en_CH", "en_150" }, // Apple locale addition
f3c0d7a5
A
448 { "en_CK", "en_AU" }, // Cook Islands (maybe to en_NZ instead?)
449 { "en_CX", "en_AU" }, // Christmas Island
b331163b
A
450 { "en_CY", "en_150" }, // Apple locale addition
451 { "en_CZ", "en_150" }, // Apple locale addition
452 { "en_DE", "en_150" }, // Apple locale addition
a961784b 453 { "en_DG", "en_GB" },
b331163b 454 { "en_DK", "en_150" }, // Apple locale addition
f3c0d7a5 455 { "en_DM", "en_GB" }, // Dominica
b331163b
A
456 { "en_EE", "en_150" }, // Apple locale addition
457 { "en_ES", "en_150" }, // Apple locale addition
458 { "en_FI", "en_150" }, // Apple locale addition
f3c0d7a5 459 { "en_FJ", "en_GB" }, // Fiji
a961784b 460 { "en_FK", "en_GB" },
b331163b 461 { "en_FR", "en_150" }, // Apple locale addition
f3c0d7a5 462 { "en_GD", "en_GB" }, // Grenada
a961784b 463 { "en_GG", "en_GB" },
f3c0d7a5 464 { "en_GH", "en_GB" }, // Ghana
a961784b 465 { "en_GI", "en_GB" },
f3c0d7a5 466 { "en_GM", "en_GB" }, // Gambia
b331163b 467 { "en_GR", "en_150" }, // Apple locale addition
f3c0d7a5 468 { "en_GY", "en_GB" }, // Guyana
b331163b
A
469 { "en_HK", "en_GB" }, // Apple custom parent
470 { "en_HR", "en_150" }, // Apple locale addition
471 { "en_HU", "en_150" }, // Apple locale addition
a961784b 472 { "en_IE", "en_GB" },
b331163b 473 { "en_IL", "en_001" }, // Apple locale addition
a961784b 474 { "en_IM", "en_GB" },
b331163b 475 { "en_IN", "en_GB" }, // Apple custom parent
a961784b 476 { "en_IO", "en_GB" },
b331163b
A
477 { "en_IS", "en_150" }, // Apple locale addition
478 { "en_IT", "en_150" }, // Apple locale addition
a961784b 479 { "en_JE", "en_GB" },
a62d09fc 480 { "en_JM", "en_GB" },
f3c0d7a5
A
481 { "en_KE", "en_GB" }, // Kenya
482 { "en_KI", "en_GB" }, // Kiribati
483 { "en_KN", "en_GB" }, // St. Kitts & Nevis
484 { "en_KY", "en_GB" }, // Cayman Islands
485 { "en_LC", "en_GB" }, // St. Lucia
3d1f044b 486 { "en_LK", "en_GB" }, // Apple custom parent
f3c0d7a5 487 { "en_LS", "en_GB" }, // Lesotho
b331163b
A
488 { "en_LT", "en_150" }, // Apple locale addition
489 { "en_LU", "en_150" }, // Apple locale addition
490 { "en_LV", "en_150" }, // Apple locale addition
491 { "en_ME", "en_150" }, // Apple locale addition
a961784b 492 { "en_MO", "en_GB" },
f3c0d7a5 493 { "en_MS", "en_GB" }, // Montserrat
a961784b 494 { "en_MT", "en_GB" },
f3c0d7a5 495 { "en_MU", "en_GB" }, // Mauritius
2ca993e8 496 { "en_MV", "en_GB" },
f3c0d7a5 497 { "en_MW", "en_GB" }, // Malawi
b331163b 498 { "en_MY", "en_GB" }, // Apple custom parent
f3c0d7a5
A
499 { "en_NA", "en_GB" }, // Namibia
500 { "en_NF", "en_AU" }, // Norfolk Island
501 { "en_NG", "en_GB" }, // Nigeria
b331163b
A
502 { "en_NL", "en_150" }, // Apple locale addition
503 { "en_NO", "en_150" }, // Apple locale addition
f3c0d7a5
A
504 { "en_NR", "en_AU" }, // Nauru
505 { "en_NU", "en_AU" }, // Niue (maybe to en_NZ instead?)
2ca993e8 506 { "en_NZ", "en_AU" },
f3c0d7a5 507 { "en_PG", "en_AU" }, // Papua New Guinea
b331163b
A
508 { "en_PK", "en_GB" }, // Apple custom parent
509 { "en_PL", "en_150" }, // Apple locale addition
f3c0d7a5 510 { "en_PN", "en_GB" }, // Pitcairn Islands
b331163b
A
511 { "en_PT", "en_150" }, // Apple locale addition
512 { "en_RO", "en_150" }, // Apple locale addition
3d1f044b 513 { "en_RS", "en_150" }, // Apple locale addition
b331163b 514 { "en_RU", "en_150" }, // Apple locale addition
f3c0d7a5
A
515 { "en_SB", "en_GB" }, // Solomon Islands
516 { "en_SC", "en_GB" }, // Seychelles
517 { "en_SD", "en_GB" }, // Sudan
b331163b 518 { "en_SE", "en_150" }, // Apple locale addition
a961784b
A
519 { "en_SG", "en_GB" },
520 { "en_SH", "en_GB" },
b331163b
A
521 { "en_SI", "en_150" }, // Apple locale addition
522 { "en_SK", "en_150" }, // Apple locale addition
f3c0d7a5
A
523 { "en_SL", "en_GB" }, // Sierra Leone
524 { "en_SS", "en_GB" }, // South Sudan
525 { "en_SZ", "en_GB" }, // Swaziland
526 { "en_TC", "en_GB" }, // Tristan da Cunha
527 { "en_TO", "en_GB" }, // Tonga
528 { "en_TT", "en_GB" }, // Trinidad & Tobago
529 { "en_TV", "en_GB" }, // Tuvalu
530 { "en_TZ", "en_GB" }, // Tanzania
3d1f044b 531 { "en_UA", "en_150" }, // Apple locale addition
f3c0d7a5
A
532 { "en_UG", "en_GB" }, // Uganda
533 { "en_VC", "en_GB" }, // St. Vincent & Grenadines
a961784b 534 { "en_VG", "en_GB" },
f3c0d7a5
A
535 { "en_VU", "en_GB" }, // Vanuatu
536 { "en_WS", "en_AU" }, // Samoa (maybe to en_NZ instead?)
537 { "en_ZA", "en_GB" }, // South Africa
538 { "en_ZM", "en_GB" }, // Zambia
539 { "en_ZW", "en_GB" }, // Zimbabwe
b331163b 540};
2ca993e8
A
541enum { kAppleParentMapCount = UPRV_LENGTHOF(appleParentMap) };
542
543typedef struct {
544 const char * locale;
545 const char * parent;
546 int8_t distance;
547} LocParentAndDistance;
548
549static LocParentAndDistance locParentMap[] = {
550 // The localizations listed in the first column are in
551 // normalized form (e.g. zh_CN -> zh_Hans_CN, etc.).
552 // The distance is a rough measure of distance from
553 // the localization to its parent, used as a weight.
3d1f044b 554 { "de_DE", "de", 0 },
f3c0d7a5 555 { "en_001", "en", 2 },
2ca993e8
A
556 { "en_150", "en_GB", 1 },
557 { "en_AU", "en_GB", 1 },
f3c0d7a5
A
558 { "en_GB", "en_001", 0 },
559 { "en_US", "en", 0 },
2ca993e8
A
560 { "es_419", "es", 2 },
561 { "es_MX", "es_419", 0 },
3d1f044b
A
562 { "fr_FR", "fr", 0 },
563 { "it_IT", "it", 0 },
2ca993e8 564 { "pt_PT", "pt", 2 },
f3c0d7a5
A
565 { "yue_Hans_CN","yue_Hans",0 },
566 { "yue_Hant_HK","yue_Hant",0 },
2ca993e8
A
567 { "zh_Hans_CN", "zh_Hans", 0 },
568 { "zh_Hant_HK", "zh_Hant", 1 },
569 { "zh_Hant_TW", "zh_Hant", 0 },
b331163b 570};
2ca993e8 571enum { kLocParentMapCount = UPRV_LENGTHOF(locParentMap), kMaxParentDistance = 8 };
b331163b
A
572
573enum {
c5116b9f
A
574 kStringsAllocSize = 5280, // cannot expand; current actual usage 5259
575 kParentMapInitCount = 272 // can expand; current actual usage 254
b331163b
A
576};
577
578U_CDECL_BEGIN
579static UBool U_CALLCONV ualocale_cleanup(void);
580U_CDECL_END
581
582U_NAMESPACE_BEGIN
583
584static UInitOnce gUALocaleCacheInitOnce = U_INITONCE_INITIALIZER;
585
586static int gMapDataState = 0; // 0 = not initialized, 1 = initialized, -1 = failure
587static char* gStrings = NULL;
588static UHashtable* gAliasMap = NULL;
589static UHashtable* gParentMap = NULL;
590
591U_NAMESPACE_END
592
593U_CDECL_BEGIN
594
595static UBool U_CALLCONV ualocale_cleanup(void)
596{
597 U_NAMESPACE_USE
598
599 gUALocaleCacheInitOnce.reset();
600
601 if (gMapDataState > 0) {
602 uhash_close(gParentMap);
603 gParentMap = NULL;
604 uhash_close(gAliasMap);
605 gAliasMap = NULL;
606 uprv_free(gStrings);
607 gStrings = NULL;
608 }
609 gMapDataState = 0;
610 return TRUE;
611}
612
613static void initializeMapData() {
614 U_NAMESPACE_USE
615
616 UResourceBundle * curBundle;
617 char* stringsPtr;
618 char* stringsEnd;
619 UErrorCode status;
620 int32_t entryIndex, icuEntryCount;
621
622 ucln_common_registerCleanup(UCLN_COMMON_LOCALE, ualocale_cleanup);
623
624 gStrings = (char*)uprv_malloc(kStringsAllocSize);
625 if (gStrings) {
626 stringsPtr = gStrings;
627 stringsEnd = gStrings + kStringsAllocSize;
628 }
629
630 status = U_ZERO_ERROR;
631 curBundle = NULL;
632 icuEntryCount = 0;
633 if (gStrings) {
634 curBundle = ures_openDirect(NULL, "metadata", &status);
635 curBundle = ures_getByKey(curBundle, "alias", curBundle, &status);
636 curBundle = ures_getByKey(curBundle, "language", curBundle, &status); // language resource is URES_TABLE
637 if (U_SUCCESS(status)) {
638 icuEntryCount = ures_getSize(curBundle); // currently 331
639 }
640 }
641 status = U_ZERO_ERROR;
642 gAliasMap = uhash_openSize(uhash_hashIChars, uhash_compareIChars, uhash_compareIChars,
643 kAppleAliasMapCount + icuEntryCount, &status);
644 // defaults to keyDeleter NULL
645 if (U_SUCCESS(status)) {
646 for (entryIndex = 0; entryIndex < kAppleAliasMapCount && U_SUCCESS(status); entryIndex++) {
647 uhash_put(gAliasMap, (void*)appleAliasMap[entryIndex][0], (void*)appleAliasMap[entryIndex][1], &status);
3d1f044b
A
648#if DEBUG_UALOC
649 if (U_FAILURE(status)) {
650 printf("# uhash_put 1 fails %s\n", u_errorName(status));
651 }
652#endif
b331163b
A
653 }
654 status = U_ZERO_ERROR;
655 UResourceBundle * aliasMapBundle = NULL;
656 for (entryIndex = 0; entryIndex < icuEntryCount && U_SUCCESS(status); entryIndex++) {
657 aliasMapBundle = ures_getByIndex(curBundle, entryIndex, aliasMapBundle, &status);
658 if (U_FAILURE(status)) {
659 break; // error
660 }
661 const char * keyStr = ures_getKey(aliasMapBundle);
662 int32_t len = uprv_strlen(keyStr);
663 if (len >= stringsEnd - stringsPtr) {
664 break; // error
665 }
666 uprv_strcpy(stringsPtr, keyStr);
667 char * inLocStr = stringsPtr;
668 stringsPtr += len + 1;
669
670 len = stringsEnd - stringsPtr - 1;
671 ures_getUTF8StringByKey(aliasMapBundle, "replacement", stringsPtr, &len, TRUE, &status);
672 if (U_FAILURE(status)) {
673 break; // error
674 }
675 stringsPtr[len] = 0;
676 uhash_put(gAliasMap, inLocStr, stringsPtr, &status);
3d1f044b
A
677#if DEBUG_UALOC
678 if (U_FAILURE(status)) {
679 printf("# uhash_put 2 fails %s\n", u_errorName(status));
680 }
681#endif
b331163b
A
682 stringsPtr += len + 1;
683 }
684 ures_close(aliasMapBundle);
685 } else {
686 ures_close(curBundle);
687 uprv_free(gStrings);
688 gMapDataState = -1; // failure
689 return;
690 }
691 ures_close(curBundle);
692
693 status = U_ZERO_ERROR;
694 gParentMap = uhash_openSize(uhash_hashIChars, uhash_compareIChars, uhash_compareIChars,
695 kParentMapInitCount, &status);
696 // defaults to keyDeleter NULL
697 if (U_SUCCESS(status)) {
698 curBundle = ures_openDirect(NULL, "supplementalData", &status);
699 curBundle = ures_getByKey(curBundle, "parentLocales", curBundle, &status); // parentLocales resource is URES_TABLE
700 if (U_SUCCESS(status)) {
701 UResourceBundle * parentMapBundle = NULL;
702 while (TRUE) {
703 parentMapBundle = ures_getNextResource(curBundle, parentMapBundle, &status);
704 if (U_FAILURE(status)) {
705 break; // no more parent bundles, normal exit
706 }
707 const char * keyStr = ures_getKey(parentMapBundle);
708 int32_t len = uprv_strlen(keyStr);
709 if (len >= stringsEnd - stringsPtr) {
710 break; // error
711 }
712 uprv_strcpy(stringsPtr, keyStr);
713 char * parentStr = stringsPtr;
714 stringsPtr += len + 1;
715
716 if (ures_getType(parentMapBundle) == URES_STRING) {
717 len = stringsEnd - stringsPtr - 1;
718 ures_getUTF8String(parentMapBundle, stringsPtr, &len, TRUE, &status);
719 if (U_FAILURE(status)) {
720 break; // error
721 }
722 stringsPtr[len] = 0;
723 uhash_put(gParentMap, stringsPtr, parentStr, &status);
724 stringsPtr += len + 1;
725 } else {
726 // should be URES_ARRAY
727 icuEntryCount = ures_getSize(parentMapBundle);
728 for (entryIndex = 0; entryIndex < icuEntryCount && U_SUCCESS(status); entryIndex++) {
729 len = stringsEnd - stringsPtr - 1;
730 ures_getUTF8StringByIndex(parentMapBundle, entryIndex, stringsPtr, &len, TRUE, &status);
731 if (U_FAILURE(status)) {
732 break;
733 }
734 stringsPtr[len] = 0;
735 uhash_put(gParentMap, stringsPtr, parentStr, &status);
736 stringsPtr += len + 1;
737 }
738 }
739 }
740 ures_close(parentMapBundle);
741 }
742 ures_close(curBundle);
743
744 status = U_ZERO_ERROR;
745 for (entryIndex = 0; entryIndex < kAppleParentMapCount && U_SUCCESS(status); entryIndex++) {
746 uhash_put(gParentMap, (void*)appleParentMap[entryIndex][0], (void*)appleParentMap[entryIndex][1], &status);
747 }
748 } else {
749 uhash_close(gAliasMap);
750 gAliasMap = NULL;
751 uprv_free(gStrings);
752 gMapDataState = -1; // failure
753 return;
754 }
755
2ca993e8
A
756#if DEBUG_UALOC
757 printf("# gStrings size %ld\n", stringsPtr - gStrings);
758 printf("# gParentMap count %d\n", uhash_count(gParentMap));
759#endif
b331163b
A
760 gMapDataState = 1;
761}
762
763U_CDECL_END
764
765// The following maps aliases, etc. Ensures 0-termination if no error.
766static void ualoc_normalize(const char *locale, char *normalized, int32_t normalizedCapacity, UErrorCode *status)
767{
768 if (U_FAILURE(*status)) {
769 return;
770 }
771 // uloc_minimizeSubtags(locale, normalized, normalizedCapacity, status);
772
773 const char *replacement = NULL;
0f5d89e8
A
774 if (icu::gMapDataState > 0) {
775 replacement = (const char *)uhash_get(icu::gAliasMap, locale);
b331163b
A
776 }
777 if (replacement == NULL) {
778 replacement = locale;
779 }
2ca993e8 780 int32_t len = strnlen(replacement, normalizedCapacity);
b331163b
A
781 if (len < normalizedCapacity) { // allow for 0 termination
782 uprv_strcpy(normalized, replacement);
783 } else {
784 *status = U_BUFFER_OVERFLOW_ERROR;
785 }
786}
787
788static void ualoc_getParent(const char *locale, char *parent, int32_t parentCapacity, UErrorCode *status)
789{
790 if (U_FAILURE(*status)) {
791 return;
792 }
0f5d89e8
A
793 if (icu::gMapDataState > 0) {
794 const char *replacement = (const char *)uhash_get(icu::gParentMap, locale);
b331163b
A
795 if (replacement) {
796 int32_t len = uprv_strlen(replacement);
797 if (len < parentCapacity) { // allow for 0 termination
798 uprv_strcpy(parent, replacement);
3d1f044b
A
799#if DEBUG_UALOC
800 printf(" # ualoc_getParent 1: locale %s -> parent %s\n", locale, parent);
801#endif
b331163b
A
802 } else {
803 *status = U_BUFFER_OVERFLOW_ERROR;
804 }
805 return;
806 }
807 }
808 uloc_getParent(locale, parent, parentCapacity - 1, status);
3d1f044b
A
809#if DEBUG_UALOC
810 printf(" # ualoc_getParent 2: locale %s -> parent %s\n", locale, parent);
811#endif
b331163b
A
812 parent[parentCapacity - 1] = 0; // ensure 0 termination in case of U_STRING_NOT_TERMINATED_WARNING
813}
814
815// Might do something better for this, perhaps maximizing locales then stripping
2ca993e8 816static const char * getLocParent(const char *locale, int32_t* distance)
b331163b
A
817{
818 int32_t locParentIndex;
819 for (locParentIndex = 0; locParentIndex < kLocParentMapCount; locParentIndex++) {
2ca993e8
A
820 if (uprv_strcmp(locale, locParentMap[locParentIndex].locale) == 0) {
821 *distance = locParentMap[locParentIndex].distance;
822 return locParentMap[locParentIndex].parent;
b331163b
A
823 }
824 }
0f5d89e8
A
825 if (icu::gMapDataState > 0) {
826 const char *replacement = (const char *)uhash_get(icu::gParentMap, locale);
f3c0d7a5
A
827 if (replacement) {
828 *distance = 1;
829 return replacement;
830 }
831 }
b331163b
A
832 return NULL;
833}
834
835// this just checks if the *pointer* value is already in the array
836static UBool locInArray(const char* *localizationsToUse, int32_t locsToUseCount, const char *locToCheck)
837{
838 int32_t locIndex;
839 for (locIndex = 0; locIndex < locsToUseCount; locIndex++) {
840 if (locToCheck == localizationsToUse[locIndex]) {
841 return TRUE;
842 }
843 }
844 return FALSE;
845}
846
847enum { kLangScriptRegMaxLen = ULOC_LANG_CAPACITY + ULOC_SCRIPT_CAPACITY + ULOC_COUNTRY_CAPACITY }; // currently 22
848
849int32_t
850ualoc_localizationsToUse( const char* const *preferredLanguages,
851 int32_t preferredLanguagesCount,
852 const char* const *availableLocalizations,
853 int32_t availableLocalizationsCount,
854 const char* *localizationsToUse,
855 int32_t localizationsToUseCapacity,
856 UErrorCode *status )
857{
858 if (U_FAILURE(*status)) {
859 return -1;
860 }
861 if (preferredLanguages == NULL || availableLocalizations == NULL || localizationsToUse == NULL) {
862 *status = U_ILLEGAL_ARGUMENT_ERROR;
863 return -1;
864 }
865 // get resource data, need to protect with mutex
0f5d89e8
A
866 if (icu::gMapDataState == 0) {
867 umtx_initOnce(icu::gUALocaleCacheInitOnce, initializeMapData);
b331163b
A
868 }
869 int32_t locsToUseCount = 0;
870 int32_t prefLangIndex, availLocIndex = 0;
2ca993e8
A
871 int32_t availLocIndexBackup = -1; // if >= 0, contains index of backup match
872 int32_t foundMatchPrefLangIndex = 0, backupMatchPrefLangIndex = 0;
b331163b
A
873 char (*availLocBase)[kLangScriptRegMaxLen + 1] = NULL;
874 char (*availLocNorm)[kLangScriptRegMaxLen + 1] = NULL;
b331163b 875 UBool foundMatch = FALSE;
f3c0d7a5 876 UBool backupMatchPrefLang_pt_PT = FALSE;
b331163b 877
2ca993e8
A
878#if DEBUG_UALOC
879 if (preferredLanguagesCount > 0 && availableLocalizationsCount > 0) {
880 printf("\n # ualoc_localizationsToUse start, preferredLanguages %d: %s, ..., availableLocalizations %d: %s, ...\n",
881 preferredLanguagesCount, preferredLanguages[0], availableLocalizationsCount, availableLocalizations[0]);
882 } else {
883 printf("\n # ualoc_localizationsToUse start, preferredLanguages %d: ..., availableLocalizations %d: ...\n",
884 preferredLanguagesCount, availableLocalizationsCount);
885 }
886#endif
887
b331163b
A
888 // Part 1, find the best matching localization, if any
889 for (prefLangIndex = 0; prefLangIndex < preferredLanguagesCount; prefLangIndex++) {
890 char prefLangBaseName[kLangScriptRegMaxLen + 1];
891 char prefLangNormName[kLangScriptRegMaxLen + 1];
892 char prefLangParentName[kLangScriptRegMaxLen + 1];
893 UErrorCode tmpStatus = U_ZERO_ERROR;
894
895 if (preferredLanguages[prefLangIndex] == NULL) {
896 continue; // skip NULL preferredLanguages entry, go to next one
897 }
898 // use underscores, fix bad capitalization, delete any keywords
899 uloc_getBaseName(preferredLanguages[prefLangIndex], prefLangBaseName, kLangScriptRegMaxLen, &tmpStatus);
900 if (U_FAILURE(tmpStatus) || prefLangBaseName[0] == 0 ||
901 uprv_strcmp(prefLangBaseName, "root") == 0 || prefLangBaseName[0] == '_') {
902 continue; // can't handle this preferredLanguages entry or it is invalid, go to next one
903 }
904 prefLangBaseName[kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING
2ca993e8
A
905#if DEBUG_UALOC
906 printf(" # loop: try prefLangBaseName %s\n", prefLangBaseName);
907#endif
b331163b
A
908
909 // if we have not already allocated and filled the array of
910 // base availableLocalizations, do so now.
911 if (availLocBase == NULL) {
912 availLocBase = (char (*)[kLangScriptRegMaxLen + 1])uprv_malloc(availableLocalizationsCount * (kLangScriptRegMaxLen + 1));
913 if (availLocBase == NULL) {
914 continue; // cannot further check this preferredLanguages entry, go to next one
915 }
2ca993e8
A
916#if DEBUG_UALOC
917 printf(" # allocate & fill availLocBase\n");
918#endif
b331163b
A
919 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
920 tmpStatus = U_ZERO_ERROR;
2ca993e8
A
921 if (availableLocalizations[availLocIndex] == NULL) {
922 availLocBase[availLocIndex][0] = 0; // effectively remove this entry
923 continue;
924 }
b331163b
A
925 uloc_getBaseName(availableLocalizations[availLocIndex], availLocBase[availLocIndex], kLangScriptRegMaxLen, &tmpStatus);
926 if (U_FAILURE(tmpStatus) || uprv_strcmp(availLocBase[availLocIndex], "root") == 0 || availLocBase[availLocIndex][0] == '_') {
927 availLocBase[availLocIndex][0] = 0; // effectively remove this entry
2ca993e8 928 continue;
b331163b 929 }
2ca993e8
A
930 availLocBase[availLocIndex][kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING
931#if DEBUG_UALOC
932 printf(" # add availLocBase %s\n", availLocBase[availLocIndex]);
933#endif
b331163b
A
934 }
935 }
936 // first compare base preferredLanguage to base versions of availableLocalizations names
937 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
938 if (uprv_strcmp(prefLangBaseName, availLocBase[availLocIndex]) == 0) {
939 foundMatch = TRUE; // availLocIndex records where
2ca993e8
A
940 foundMatchPrefLangIndex = prefLangIndex;
941#if DEBUG_UALOC
942 printf(" # FOUND: matched availLocBase %s -> actualLoc %s\n", availLocBase[availLocIndex], availableLocalizations[availLocIndex]);
943#endif
b331163b
A
944 break;
945 }
946 }
947 if (foundMatch) {
b331163b
A
948 break; // found a loc for this preferredLanguages entry
949 }
950
951 // get normalized preferredLanguage
952 tmpStatus = U_ZERO_ERROR;
953 ualoc_normalize(prefLangBaseName, prefLangNormName, kLangScriptRegMaxLen + 1, &tmpStatus);
954 if (U_FAILURE(tmpStatus)) {
955 continue; // can't handle this preferredLanguages entry, go to next one
956 }
2ca993e8
A
957#if DEBUG_UALOC
958 printf(" # prefLangNormName %s\n", prefLangNormName);
959#endif
b331163b
A
960 // if we have not already allocated and filled the array of
961 // normalized availableLocalizations, do so now.
962 // Note: ualoc_normalize turns "zh_TW" into "zh_Hant_TW", zh_HK" into "zh_Hant_HK",
963 // and fixes deprecated codes "iw" > "he", "in" > "id" etc.
964 if (availLocNorm == NULL) {
965 availLocNorm = (char (*)[kLangScriptRegMaxLen + 1])uprv_malloc(availableLocalizationsCount * (kLangScriptRegMaxLen + 1));
966 if (availLocNorm == NULL) {
967 continue; // cannot further check this preferredLanguages entry, go to next one
968 }
2ca993e8
A
969#if DEBUG_UALOC
970 printf(" # allocate & fill availLocNorm\n");
971#endif
b331163b
A
972 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
973 tmpStatus = U_ZERO_ERROR;
974 ualoc_normalize(availLocBase[availLocIndex], availLocNorm[availLocIndex], kLangScriptRegMaxLen + 1, &tmpStatus);
975 if (U_FAILURE(tmpStatus)) {
976 availLocNorm[availLocIndex][0] = 0; // effectively remove this entry
2ca993e8
A
977#if DEBUG_UALOC
978 } else {
979 printf(" # actualLoc %-11s -> norm %s\n", availableLocalizations[availLocIndex], availLocNorm[availLocIndex]);
980#endif
b331163b 981 }
b331163b
A
982 }
983 }
984 // now compare normalized preferredLanguage to normalized localization names
985 // if matches, copy *original* localization name
986 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
987 if (uprv_strcmp(prefLangNormName, availLocNorm[availLocIndex]) == 0) {
988 foundMatch = TRUE; // availLocIndex records where
2ca993e8
A
989 foundMatchPrefLangIndex = prefLangIndex;
990#if DEBUG_UALOC
991 printf(" # FOUND: matched availLocNorm %s -> actualLoc %s\n", availLocNorm[availLocIndex], availableLocalizations[availLocIndex]);
992#endif
b331163b
A
993 break;
994 }
995 }
996 if (foundMatch) {
b331163b
A
997 break; // found a loc for this preferredLanguages entry
998 }
999
1000 // now walk up the parent chain for preferredLanguage
1001 // until we find a match or hit root
1002 uprv_strcpy(prefLangBaseName, prefLangNormName);
1003 while (!foundMatch) {
1004 tmpStatus = U_ZERO_ERROR;
1005 ualoc_getParent(prefLangBaseName, prefLangParentName, kLangScriptRegMaxLen + 1, &tmpStatus);
1006 if (U_FAILURE(tmpStatus) || uprv_strcmp(prefLangParentName, "root") == 0 || prefLangParentName[0] == 0) {
1007 break; // reached root or cannot proceed further
1008 }
2ca993e8
A
1009#if DEBUG_UALOC
1010 printf(" # prefLangParentName %s\n", prefLangParentName);
1011#endif
b331163b
A
1012
1013 // now compare this preferredLanguage parent to normalized localization names
1014 // if matches, copy *original* localization name
1015 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
1016 if (uprv_strcmp(prefLangParentName, availLocNorm[availLocIndex]) == 0) {
1017 foundMatch = TRUE; // availLocIndex records where
2ca993e8
A
1018 foundMatchPrefLangIndex = prefLangIndex;
1019#if DEBUG_UALOC
1020 printf(" # FOUND: matched availLocNorm %s -> actualLoc %s\n", availLocNorm[availLocIndex], availableLocalizations[availLocIndex]);
1021#endif
b331163b
A
1022 break;
1023 }
1024 }
1025 uprv_strcpy(prefLangBaseName, prefLangParentName);
1026 }
1027 if (foundMatch) {
1028 break; // found a loc for this preferredLanguages entry
1029 }
1030
2ca993e8
A
1031 // last try, use parents of selected language to try for backup match
1032 // if we have not already found one
1033 if (availLocIndexBackup < 0) {
b331163b
A
1034 // now walk up the parent chain for preferredLanguage again
1035 // checking against parents of selected availLocNorm entries
1036 // but this time start with current prefLangNormName
1037 uprv_strcpy(prefLangBaseName, prefLangNormName);
2ca993e8 1038 int32_t minDistance = kMaxParentDistance;
b331163b 1039 while (TRUE) {
b331163b
A
1040 // now compare this preferredLanguage to normalized localization names
1041 // parent if have one for this; if matches, copy *original* localization name
2ca993e8
A
1042#if DEBUG_UALOC
1043 printf(" # BACKUP: trying prefLangBaseName %s\n", prefLangBaseName);
1044#endif
b331163b 1045 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
2ca993e8
A
1046 char availLocMinOrParent[kLangScriptRegMaxLen + 1];
1047 int32_t distance;
9f1b1155
A
1048 // first check for special Apple parents of availLocNorm; the number
1049 // of locales with such parents is small.
1050 // If no such parent, or if parent has an intermediate numeric region,
1051 // then try stripping the original region.
1052 int32_t availLocParentLen = 0;
2ca993e8
A
1053 const char *availLocParent = getLocParent(availLocNorm[availLocIndex], &distance);
1054 if (availLocParent) {
1055#if DEBUG_UALOC
1056 printf(" # availLocAppleParentName %s\n", availLocParent);
1057#endif
1058 if (uprv_strcmp(prefLangBaseName, availLocParent) == 0 && distance < minDistance) {
1059 availLocIndexBackup = availLocIndex; // records where the match occurred
1060 backupMatchPrefLangIndex = prefLangIndex;
1061 minDistance = distance;
1062#if DEBUG_UALOC
1063 printf(" # BACKUP: LocAppleParent matched prefLangNormName with distance %d\n", distance);
1064#endif
1065 continue;
1066 }
9f1b1155 1067 availLocParentLen = uprv_strlen(availLocParent);
2ca993e8
A
1068 }
1069 if (minDistance <= 1) {
1070 continue; // we can't get any closer in the rest of this iteration
1071 }
9f1b1155 1072 if (availLocParent == NULL || (availLocParentLen >= 6 && isdigit(availLocParent[availLocParentLen-1]))) {
2ca993e8
A
1073 tmpStatus = U_ZERO_ERROR;
1074 int32_t regLen = uloc_getCountry(availLocNorm[availLocIndex], availLocMinOrParent, kLangScriptRegMaxLen, &tmpStatus);
1075 if (U_SUCCESS(tmpStatus) && regLen > 1) {
1076 uloc_addLikelySubtags(availLocNorm[availLocIndex], availLocMinOrParent, kLangScriptRegMaxLen, &tmpStatus);
1077 if (U_SUCCESS(tmpStatus)) {
1078 availLocMinOrParent[kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING
1079#if DEBUG_UALOC
1080 printf(" # availLocRegMaxName %s\n", availLocMinOrParent);
1081#endif
1082 char availLocTemp[kLangScriptRegMaxLen + 1];
1083 uloc_getParent(availLocMinOrParent, availLocTemp, kLangScriptRegMaxLen, &tmpStatus);
1084 if (U_SUCCESS(tmpStatus)) {
1085 availLocTemp[kLangScriptRegMaxLen] = 0;
1086 uloc_minimizeSubtags(availLocTemp, availLocMinOrParent, kLangScriptRegMaxLen, &tmpStatus);
1087 if (U_SUCCESS(tmpStatus)) {
1088 availLocMinOrParent[kLangScriptRegMaxLen] = 0;
1089#if DEBUG_UALOC
1090 printf(" # availLocNoRegParentName %s\n", availLocMinOrParent);
1091#endif
1092 if (uprv_strcmp(prefLangBaseName, availLocMinOrParent) == 0) {
1093 availLocIndexBackup = availLocIndex; // records where the match occurred
1094 backupMatchPrefLangIndex = prefLangIndex;
1095 minDistance = 1;
f3c0d7a5 1096 backupMatchPrefLang_pt_PT = (uprv_strcmp(prefLangNormName, "pt_PT") == 0);
2ca993e8
A
1097#if DEBUG_UALOC
1098 printf(" # BACKUP: LocNoRegParent matched prefLangNormName with distance 1\n");
1099#endif
1100 continue;
1101 }
1102 }
1103 }
1104 }
1105 }
1106 }
1107 // then check against minimized version of availLocNorm
1108 tmpStatus = U_ZERO_ERROR;
1109 uloc_minimizeSubtags(availLocNorm[availLocIndex], availLocMinOrParent, kLangScriptRegMaxLen, &tmpStatus);
1110 if (U_FAILURE(tmpStatus)) {
1111 continue;
1112 }
1113 availLocMinOrParent[kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING
1114#if DEBUG_UALOC
1115 printf(" # availLocMinimized %s\n", availLocMinOrParent);
1116#endif
1117 if (uprv_strcmp(prefLangBaseName, availLocMinOrParent) == 0) {
1118 availLocIndexBackup = availLocIndex; // records where the match occurred
1119 backupMatchPrefLangIndex = prefLangIndex;
1120 minDistance = 1;
1121#if DEBUG_UALOC
1122 printf(" # BACKUP: LocMinimized matched prefLangNormName with distance 1\n");
1123#endif
b331163b
A
1124 }
1125 }
2ca993e8 1126 if (availLocIndexBackup >= 0) {
b331163b
A
1127 break;
1128 }
2ca993e8 1129 tmpStatus = U_ZERO_ERROR;
b331163b
A
1130 ualoc_getParent(prefLangBaseName, prefLangParentName, kLangScriptRegMaxLen + 1, &tmpStatus);
1131 if (U_FAILURE(tmpStatus) || uprv_strcmp(prefLangParentName, "root") == 0 || prefLangParentName[0] == 0) {
1132 break; // reached root or cannot proceed further
1133 }
1134 uprv_strcpy(prefLangBaseName, prefLangParentName);
1135 }
1136 }
2ca993e8
A
1137 }
1138 // If we have a backup match, decide what to do
1139 if (availLocIndexBackup >= 0) {
1140 if (!foundMatch) {
1141 // no main match, just use the backup
1142 availLocIndex = availLocIndexBackup;
1143 foundMatch = TRUE;
1144#if DEBUG_UALOC
1145 printf(" # no main match, have backup => use availLocIndexBackup %d\n", availLocIndexBackup);
1146#endif
f3c0d7a5 1147 } else if (backupMatchPrefLangIndex < foundMatchPrefLangIndex && (!backupMatchPrefLang_pt_PT || uprv_strcmp(availLocNorm[availLocIndexBackup], "pt_BR") != 0)) {
2ca993e8
A
1148 // have a main match but backup match was higher in the prefs, use it if for a different language
1149#if DEBUG_UALOC
1150 printf(" # have backup match higher in prefs, comparing its language and script to main match\n");
1151#endif
1152 char mainLang[ULOC_LANG_CAPACITY + 1];
1153 char backupLang[ULOC_LANG_CAPACITY + 1];
1154 UErrorCode tmpStatus = U_ZERO_ERROR;
1155 uloc_getLanguage(availLocNorm[availLocIndex], mainLang, ULOC_LANG_CAPACITY, &tmpStatus);
1156 mainLang[ULOC_LANG_CAPACITY] = 0; // ensure zero termination
1157 uloc_getLanguage(availLocNorm[availLocIndexBackup], backupLang, ULOC_LANG_CAPACITY, &tmpStatus);
1158 backupLang[ULOC_LANG_CAPACITY] = 0; // ensure zero termination
1159 if (U_SUCCESS(tmpStatus)) {
1160 if (uprv_strncmp(mainLang, backupLang, ULOC_LANG_CAPACITY) != 0) {
1161 // backup match has different language than main match
1162 availLocIndex = availLocIndexBackup;
1163 // foundMatch is already TRUE
1164#if DEBUG_UALOC
1165 printf(" # main match but backup is for a different lang higher in prefs => use availLocIndexBackup %d\n", availLocIndexBackup);
1166#endif
1167 } else {
1168 // backup match has same language as main match, check scripts too
1169 char availLocMaximized[kLangScriptRegMaxLen + 1];
1170
1171 uloc_addLikelySubtags(availLocNorm[availLocIndex], availLocMaximized, kLangScriptRegMaxLen, &tmpStatus);
1172 availLocMaximized[kLangScriptRegMaxLen] = 0;
1173 uloc_getScript(availLocMaximized, mainLang, ULOC_LANG_CAPACITY, &tmpStatus);
1174 mainLang[ULOC_LANG_CAPACITY] = 0;
1175
1176 uloc_addLikelySubtags(availLocNorm[availLocIndexBackup], availLocMaximized, kLangScriptRegMaxLen, &tmpStatus);
1177 availLocMaximized[kLangScriptRegMaxLen] = 0;
1178 uloc_getScript(availLocMaximized, backupLang, ULOC_LANG_CAPACITY, &tmpStatus);
1179 backupLang[ULOC_LANG_CAPACITY] = 0;
1180
1181 if (U_SUCCESS(tmpStatus) && uprv_strncmp(mainLang, backupLang, ULOC_LANG_CAPACITY) != 0) {
1182 // backup match has different script than main match
1183 availLocIndex = availLocIndexBackup;
1184 // foundMatch is already TRUE
1185#if DEBUG_UALOC
1186 printf(" # main match but backup is for a different script higher in prefs => use availLocIndexBackup %d\n", availLocIndexBackup);
1187#endif
1188 }
1189 }
1190 }
b331163b
A
1191 }
1192 }
1193
1194 // Part 2, if we found a matching localization, then walk up its parent tree to find any fallback matches in availableLocalizations
1195 if (foundMatch) {
1196 // Here availLocIndex corresponds to the first matched localization
1197 UErrorCode tmpStatus = U_ZERO_ERROR;
1198 int32_t availLocMatchIndex = availLocIndex;
1199 if (locsToUseCount < localizationsToUseCapacity) {
1200 localizationsToUse[locsToUseCount++] = availableLocalizations[availLocMatchIndex];
1201 }
1202 // at this point we must have availLocBase, and minimally matched against that.
1203 // if we have not already allocated and filled the array of
1204 // normalized availableLocalizations, do so now, but don't require it
1205 if (availLocNorm == NULL) {
1206 availLocNorm = (char (*)[kLangScriptRegMaxLen + 1])uprv_malloc(availableLocalizationsCount * (kLangScriptRegMaxLen + 1));
1207 if (availLocNorm != NULL) {
1208 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
1209 tmpStatus = U_ZERO_ERROR;
1210 ualoc_normalize(availLocBase[availLocIndex], availLocNorm[availLocIndex], kLangScriptRegMaxLen + 1, &tmpStatus);
1211 if (U_FAILURE(tmpStatus)) {
1212 availLocNorm[availLocIndex][0] = 0; // effectively remove this entry
1213 }
1214 }
1215 }
1216 }
1217
1218 // add normalized form of matching loc, if different and in availLocBase
1219 if (locsToUseCount < localizationsToUseCapacity) {
1220 tmpStatus = U_ZERO_ERROR;
1221 char matchedLocNormName[kLangScriptRegMaxLen + 1];
1222 char matchedLocParentName[kLangScriptRegMaxLen + 1];
1223 // get normalized form of matching loc
1224 if (availLocNorm != NULL) {
1225 uprv_strcpy(matchedLocNormName, availLocNorm[availLocMatchIndex]);
1226 } else {
1227 ualoc_normalize(availLocBase[availLocMatchIndex], matchedLocNormName, kLangScriptRegMaxLen + 1, &tmpStatus);
1228 }
1229 if (U_SUCCESS(tmpStatus)) {
1230 // add normalized form of matching loc, if different and in availLocBase
1231 if (uprv_strcmp(matchedLocNormName, localizationsToUse[0]) != 0) {
1232 // normalization of matched localization is different, see if we have the normalization in availableLocalizations
1233 // from this point on, availLocIndex no longer corresponds to the matched localization.
1234 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
1235 if ( (uprv_strcmp(matchedLocNormName, availLocBase[availLocIndex]) == 0
1236 || (availLocNorm != NULL && uprv_strcmp(matchedLocNormName, availLocNorm[availLocIndex]) == 0))
1237 && !locInArray(localizationsToUse, locsToUseCount, availableLocalizations[availLocIndex])) {
1238 localizationsToUse[locsToUseCount++] = availableLocalizations[availLocIndex];
1239 break;
1240 }
1241 }
1242 }
1243
1244 // now walk up the parent chain from matchedLocNormName, adding parents if they are in availLocBase
1245 while (locsToUseCount < localizationsToUseCapacity) {
1246 ualoc_getParent(matchedLocNormName, matchedLocParentName, kLangScriptRegMaxLen + 1, &tmpStatus);
1247 if (U_FAILURE(tmpStatus) || uprv_strcmp(matchedLocParentName, "root") == 0 || matchedLocParentName[0] == 0) {
1248 break; // reached root or cannot proceed further
1249 }
1250
1251 // now compare this matchedLocParentName parent to base localization names (and norm ones if we have them)
1252 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
1253 if ( (uprv_strcmp(matchedLocParentName, availLocBase[availLocIndex]) == 0
1254 || (availLocNorm != NULL && uprv_strcmp(matchedLocParentName, availLocNorm[availLocIndex]) == 0))
1255 && !locInArray(localizationsToUse, locsToUseCount, availableLocalizations[availLocIndex])) {
1256 localizationsToUse[locsToUseCount++] = availableLocalizations[availLocIndex];
1257 break;
1258 }
1259 }
1260 uprv_strcpy(matchedLocNormName, matchedLocParentName);
1261 }
1262
1263 // The above still fails to include "zh_TW" if it is in availLocBase and the matched localization
1264 // base name is "zh_HK" or "zh_MO". One option would be to walk up the parent chain from
1265 // matchedLocNormName again, comparing against parents of of selected availLocNorm entries.
1266 // But this picks up too many matches that are not parents of the matched localization. So
1267 // we just handle these specially.
1268 if ( locsToUseCount < localizationsToUseCapacity
1269 && (uprv_strcmp(availLocBase[availLocMatchIndex], "zh_HK") == 0
1270 || uprv_strcmp(availLocBase[availLocMatchIndex], "zh_MO") == 0) ) {
1271 int32_t zhTW_matchIndex = -1;
1272 UBool zhHant_found = FALSE;
1273 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
1274 if ( zhTW_matchIndex < 0 && uprv_strcmp("zh_TW", availLocBase[availLocIndex]) == 0 ) {
1275 zhTW_matchIndex = availLocIndex;
1276 }
1277 if ( !zhHant_found && uprv_strcmp("zh_Hant", availLocBase[availLocIndex]) == 0 ) {
1278 zhHant_found = TRUE;
1279 }
1280 }
1281 if (zhTW_matchIndex >= 0 && !zhHant_found
1282 && !locInArray(localizationsToUse, locsToUseCount, availableLocalizations[zhTW_matchIndex])) {
1283 localizationsToUse[locsToUseCount++] = availableLocalizations[zhTW_matchIndex];
1284 }
1285 }
1286 }
1287 }
1288 }
1289
1290 uprv_free(availLocNorm);
1291 uprv_free(availLocBase);
1292 return locsToUseCount;
1293}
1294