]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/ualoc.cpp
ICU-57166.0.1.tar.gz
[apple/icu.git] / icuSources / common / ualoc.cpp
CommitLineData
57a6839d
A
1/*
2*****************************************************************************************
a961784b 3* Copyright (C) 2014-2016 Apple Inc. All Rights Reserved.
57a6839d
A
4*****************************************************************************************
5*/
6
2ca993e8
A
7#define DEBUG_UALOC 0
8#if DEBUG_UALOC
9#include <stdio.h>
10#endif
11#include <string.h>
57a6839d
A
12#include "unicode/utypes.h"
13#include "unicode/ualoc.h"
14#include "unicode/uloc.h"
15#include "unicode/ures.h"
16#include "unicode/putil.h"
17#include "cstring.h"
18#include "cmemory.h"
b331163b
A
19#include "uhash.h"
20#include "umutex.h"
21#include "ucln_cmn.h"
57a6839d
A
22// the following has replacements for some math.h funcs etc
23#include "putilimp.h"
24
25
26// The numeric values in territoryInfo are in "IntF" format from LDML2ICUConverter.
27// From its docs (adapted): [IntF is] a special integer that represents the number in
28// normalized scientific notation.
29// Resultant integers are in the form -?xxyyyyyy, where xx is the exponent
30// offset by 50 and yyyyyy is the coefficient to 5 decimal places (range 1.0 to 9.99999), e.g.
31// 14660000000000 -> 1.46600E13 -> 63146600
32// 0.0001 -> 1.00000E-4 -> 46100000
33// -123.456 -> -1.23456E-2 -> -48123456
34//
35// Here to avoid an extra division we have the max coefficient as 999999 (instead of
36// 9.99999) and instead offset the exponent by -55.
37//
38static double doubleFromIntF(int32_t intF) {
39 double coefficient = (double)(intF % 1000000);
40 int32_t exponent = (intF / 1000000) - 55;
41 return coefficient * uprv_pow10(exponent);
42}
43
44static int compareLangEntries(const void * entry1, const void * entry2) {
45 double fraction1 = ((const UALanguageEntry *)entry1)->userFraction;
46 double fraction2 = ((const UALanguageEntry *)entry2)->userFraction;
47 // want descending order
48 if (fraction1 > fraction2) return -1;
49 if (fraction1 < fraction2) return 1;
50 // userFractions the same, sort by languageCode
51 return uprv_strcmp(((const UALanguageEntry *)entry1)->languageCode,((const UALanguageEntry *)entry2)->languageCode);
52}
53
54static const UChar ustrLangStatusDefacto[] = {0x64,0x65,0x5F,0x66,0x61,0x63,0x74,0x6F,0x5F,0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0}; //"de_facto_official"
55static const UChar ustrLangStatusOfficial[] = {0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0}; //"official"
56static const UChar ustrLangStatusRegional[] = {0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0x5F,0x72,0x65,0x67,0x69,0x6F,0x6E,0x61,0x6C,0}; //"official_regional"
57
58enum {
59 kLocalLangEntriesMax = 26, // enough for most regions to minimumFraction 0.001 except India
60 kLangEntriesFactor = 3 // if we have to allocate, multiply existing size by this
61};
62
63U_CAPI int32_t U_EXPORT2
64ualoc_getLanguagesForRegion(const char *regionID, double minimumFraction,
65 UALanguageEntry *entries, int32_t entriesCapacity,
66 UErrorCode *err)
67{
68 if (U_FAILURE(*err)) {
69 return 0;
70 }
71 if ( regionID == NULL || minimumFraction < 0.0 || minimumFraction > 1.0 ||
72 ((entries==NULL)? entriesCapacity!=0: entriesCapacity<0) ) {
73 *err = U_ILLEGAL_ARGUMENT_ERROR;
74 return 0;
75 }
76 UResourceBundle *rb = ures_openDirect(NULL, "supplementalData", err);
77 rb = ures_getByKey(rb, "territoryInfo", rb, err);
78 rb = ures_getByKey(rb, regionID, rb, err);
79 if (U_FAILURE(*err)) {
80 ures_close(rb);
81 return 0;
82 }
83
84 int32_t entryCount = 0;
85 UResourceBundle *langBund = NULL;
86 int32_t lbIdx, lbCount = ures_getSize(rb);
87 UALanguageEntry localLangEntries[kLocalLangEntriesMax];
88 UALanguageEntry * langEntries = localLangEntries;
89 int32_t langEntriesMax = kLocalLangEntriesMax;
90
91 for (lbIdx = 0; lbIdx < lbCount; lbIdx++) {
92 langBund = ures_getByIndex(rb, lbIdx, langBund, err);
93 if (U_FAILURE(*err)) {
94 break;
95 }
96 const char * langCode = ures_getKey(langBund);
97 if (uprv_strcmp(langCode,"territoryF") == 0) {
98 continue;
99 }
100 if (strnlen(langCode, UALANGDATA_CODELEN+1) > UALANGDATA_CODELEN) { // no uprv_strnlen
101 continue; // a code we cannot handle
102 }
103
104 UErrorCode localErr = U_ZERO_ERROR;
105 double userFraction = 0.0;
106 UResourceBundle *itemBund = ures_getByKey(langBund, "populationShareF", NULL, &localErr);
107 if (U_SUCCESS(localErr)) {
108 int32_t intF = ures_getInt(itemBund, &localErr);
109 if (U_SUCCESS(localErr)) {
110 userFraction = doubleFromIntF(intF);
111 }
112 ures_close(itemBund);
113 }
114 if (userFraction < minimumFraction) {
115 continue;
116 }
117 if (entries != NULL) {
118 localErr = U_ZERO_ERROR;
119 UALanguageStatus langStatus = UALANGSTATUS_UNSPECIFIED;
120 int32_t ulen;
121 const UChar * ustrLangStatus = ures_getStringByKey(langBund, "officialStatus", &ulen, &localErr);
122 if (U_SUCCESS(localErr)) {
123 int32_t cmp = u_strcmp(ustrLangStatus, ustrLangStatusOfficial);
124 if (cmp == 0) {
125 langStatus = UALANGSTATUS_OFFICIAL;
126 } else if (cmp < 0 && u_strcmp(ustrLangStatus, ustrLangStatusDefacto) == 0) {
127 langStatus = UALANGSTATUS_DEFACTO_OFFICIAL;
128 } else if (u_strcmp(ustrLangStatus, ustrLangStatusRegional) == 0) {
129 langStatus = UALANGSTATUS_REGIONAL_OFFICIAL;
130 }
131 }
132 // Now we have all of the info for our next entry
133 if (entryCount >= langEntriesMax) {
134 int32_t newMax = langEntriesMax * kLangEntriesFactor;
135 if (langEntries == localLangEntries) {
136 // first allocation, copy from local buf
137 langEntries = (UALanguageEntry*)uprv_malloc(newMax*sizeof(UALanguageEntry));
138 if (langEntries == NULL) {
139 *err = U_MEMORY_ALLOCATION_ERROR;
140 break;
141 }
142 uprv_memcpy(langEntries, localLangEntries, entryCount*sizeof(UALanguageEntry));
143 } else {
144 langEntries = (UALanguageEntry*)uprv_realloc(langEntries, newMax*sizeof(UALanguageEntry));
145 if (langEntries == NULL) {
146 *err = U_MEMORY_ALLOCATION_ERROR;
147 break;
148 }
149 }
150 langEntriesMax = newMax;
151 }
152 uprv_strcpy(langEntries[entryCount].languageCode, langCode);
153 langEntries[entryCount].userFraction = userFraction;
154 langEntries[entryCount].status = langStatus;
155 }
156 entryCount++;
157 }
158 ures_close(langBund);
159 ures_close(rb);
160 if (U_FAILURE(*err)) {
161 if (langEntries != localLangEntries) {
162 free(langEntries);
163 }
164 return 0;
165 }
166 if (entries != NULL) {
167 // sort langEntries, copy entries that fit to provided array
168 qsort(langEntries, entryCount, sizeof(UALanguageEntry), compareLangEntries);
169 if (entryCount > entriesCapacity) {
170 entryCount = entriesCapacity;
171 }
172 uprv_memcpy(entries, langEntries, entryCount*sizeof(UALanguageEntry));
173 if (langEntries != localLangEntries) {
174 free(langEntries);
175 }
176 }
177 return entryCount;
178}
179
57a6839d 180static const char * forceParent[] = {
a961784b 181 "en_150", "en_GB", // en for Europe
b331163b 182 "en_AU", "en_GB",
a961784b
A
183 "en_BD", "en_GB", // en for Bangladesh
184 "en_BE", "en_150", // en for Belgium goes to en for Europe
185 "en_DG", "en_GB",
186 "en_FK", "en_GB",
187 "en_GG", "en_GB",
188 "en_GI", "en_GB",
189 "en_HK", "en_GB", // en for Hong Kong
190 "en_IE", "en_GB",
191 "en_IM", "en_GB",
b331163b 192 "en_IN", "en_GB",
a961784b
A
193 "en_IO", "en_GB",
194 "en_JE", "en_GB",
a62d09fc 195 "en_JM", "en_GB",
a961784b
A
196 "en_MO", "en_GB",
197 "en_MT", "en_GB",
2ca993e8 198 "en_MV", "en_GB", // for Maldives
a961784b 199 "en_MY", "en_GB", // en for Malaysia
2ca993e8 200 "en_NZ", "en_AU",
a961784b
A
201 "en_PK", "en_GB", // en for Pakistan
202 "en_SG", "en_GB",
203 "en_SH", "en_GB",
204 "en_VG", "en_GB",
57a6839d
A
205 "zh", "zh_CN",
206 "zh_CN", "root",
207 "zh_Hant", "zh_TW",
208 "zh_TW", "root",
209 NULL
210};
211
2ca993e8
A
212enum { kLocBaseNameMax = 16 };
213
57a6839d
A
214U_CAPI int32_t U_EXPORT2
215ualoc_getAppleParent(const char* localeID,
216 char * parent,
217 int32_t parentCapacity,
218 UErrorCode* err)
219{
220 UResourceBundle *rb;
221 int32_t len;
222 UErrorCode tempStatus;
223 char locbuf[ULOC_FULLNAME_CAPACITY+1];
08b89b0a 224 char * foundDoubleUnderscore;
57a6839d
A
225
226 if (U_FAILURE(*err)) {
227 return 0;
228 }
229 if ( (parent==NULL)? parentCapacity!=0: parentCapacity<0 ) {
230 *err = U_ILLEGAL_ARGUMENT_ERROR;
231 return 0;
232 }
08b89b0a 233 len = uloc_getBaseName(localeID, locbuf, ULOC_FULLNAME_CAPACITY, err); /* canonicalize and strip keywords */
57a6839d
A
234 if (U_FAILURE(*err)) {
235 return 0;
236 }
237 if (*err == U_STRING_NOT_TERMINATED_WARNING) {
238 locbuf[ULOC_FULLNAME_CAPACITY] = 0;
239 *err = U_ZERO_ERROR;
240 }
08b89b0a
A
241 foundDoubleUnderscore = uprv_strstr(locbuf, "__"); /* __ comes from bad/missing subtag or variant */
242 if (foundDoubleUnderscore != NULL) {
243 *foundDoubleUnderscore = 0; /* terminate at the __ */
244 len = uprv_strlen(locbuf);
245 }
b331163b 246 if (len >= 2 && (uprv_strncmp(locbuf, "en", 2) == 0 || uprv_strncmp(locbuf, "zh", 2) == 0)) {
57a6839d
A
247 const char ** forceParentPtr = forceParent;
248 const char * testCurLoc;
249 while ( (testCurLoc = *forceParentPtr++) != NULL ) {
250 int cmp = uprv_strcmp(locbuf, testCurLoc);
251 if (cmp <= 0) {
252 if (cmp == 0) {
253 len = uprv_strlen(*forceParentPtr);
254 if (len < parentCapacity) {
255 uprv_strcpy(parent, *forceParentPtr);
256 } else {
257 *err = U_BUFFER_OVERFLOW_ERROR;
258 }
259 return len;
260 }
261 break;
262 }
263 forceParentPtr++;
264 }
265 }
266 tempStatus = U_ZERO_ERROR;
267 rb = ures_openDirect(NULL, locbuf, &tempStatus);
268 if (U_SUCCESS(tempStatus)) {
269 const char * actualLocale = ures_getLocaleByType(rb, ULOC_ACTUAL_LOCALE, &tempStatus);
2ca993e8 270 ures_close(rb);
57a6839d
A
271 if (U_SUCCESS(tempStatus) && uprv_strcmp(locbuf, actualLocale) != 0) {
272 // we have followed an alias
273 len = uprv_strlen(actualLocale);
274 if (len < parentCapacity) {
275 uprv_strcpy(parent, actualLocale);
276 } else {
277 *err = U_BUFFER_OVERFLOW_ERROR;
278 }
57a6839d
A
279 return len;
280 }
2ca993e8
A
281 }
282 tempStatus = U_ZERO_ERROR;
283 rb = ures_openDirect(NULL, "supplementalData", &tempStatus);
284 rb = ures_getByKey(rb, "parentLocales", rb, &tempStatus);
285 if (U_SUCCESS(tempStatus)) {
286 UResourceBundle * parentMapBundle = NULL;
287 int32_t childLen = 0;
288 while (childLen == 0) {
289 tempStatus = U_ZERO_ERROR;
290 parentMapBundle = ures_getNextResource(rb, parentMapBundle, &tempStatus);
291 if (U_FAILURE(tempStatus)) {
292 break; // no more parent bundles, normal exit
293 }
294 char childName[kLocBaseNameMax + 1];
295 childName[kLocBaseNameMax] = 0;
296 const char * childPtr = NULL;
297 if (ures_getType(parentMapBundle) == URES_STRING) {
298 childLen = kLocBaseNameMax;
299 childPtr = ures_getUTF8String(parentMapBundle, childName, &childLen, FALSE, &tempStatus);
300 if (U_FAILURE(tempStatus) || uprv_strncmp(locbuf, childPtr, kLocBaseNameMax) != 0) {
301 childLen = 0;
302 }
303 } else { // should be URES_ARRAY
304 int32_t childCur, childCount = ures_getSize(parentMapBundle);
305 for (childCur = 0; childCur < childCount && childLen == 0; childCur++) {
306 tempStatus = U_ZERO_ERROR;
307 childLen = kLocBaseNameMax;
308 childPtr = ures_getUTF8StringByIndex(parentMapBundle, childCur, childName, &childLen, FALSE, &tempStatus);
309 if (U_FAILURE(tempStatus) || uprv_strncmp(locbuf, childPtr, kLocBaseNameMax) != 0) {
310 childLen = 0;
311 }
312 }
313 }
314 }
315 ures_close(rb);
316 if (childLen > 0) {
317 // parentMapBundle key is the parent we are looking for
318 const char * keyStr = ures_getKey(parentMapBundle);
319 len = uprv_strlen(keyStr);
57a6839d 320 if (len < parentCapacity) {
2ca993e8 321 uprv_strcpy(parent, keyStr);
57a6839d
A
322 } else {
323 *err = U_BUFFER_OVERFLOW_ERROR;
324 }
2ca993e8 325 ures_close(parentMapBundle);
57a6839d
A
326 return len;
327 }
2ca993e8 328 ures_close(parentMapBundle);
57a6839d 329 }
2ca993e8 330
57a6839d
A
331 len = uloc_getParent(locbuf, parent, parentCapacity, err);
332 if (U_SUCCESS(*err) && len == 0) {
333 len = 4;
334 if (len < parentCapacity) {
335 uprv_strcpy(parent, "root");
336 } else {
337 *err = U_BUFFER_OVERFLOW_ERROR;
338 }
339 }
340 return len;
341}
342
b331163b
A
343// =================
344// Data and related functions for ualoc_localizationsToUse
345// =================
346
347static const char * appleAliasMap[][2] = {
348 // names are lowercase here because they are looked up after being processed by uloc_getBaseName
349 { "arabic", "ar" }, // T2
350 { "chinese", "zh_Hans" }, // T0
351 { "danish", "da" }, // T2
352 { "dutch", "nl" }, // T1, still in use
353 { "english", "en" }, // T0, still in use
354 { "finnish", "fi" }, // T2
355 { "french", "fr" }, // T0, still in use
356 { "german", "de" }, // T0, still in use
357 { "italian", "it" }, // T1, still in use
358 { "japanese", "ja" }, // T0, still in use
359 { "korean", "ko" }, // T1
a961784b 360 { "no_NO", "nb_NO" }, // special
b331163b
A
361 { "norwegian", "nb" }, // T2
362 { "polish", "pl" }, // T2
363 { "portuguese", "pt" }, // T2
364 { "russian", "ru" }, // T2
365 { "spanish", "es" }, // T1, still in use
366 { "swedish", "sv" }, // T2
367 { "thai", "th" }, // T2
368 { "turkish", "tr" }, // T2
369 { "zh", "zh_Hans" }, // special
370};
2ca993e8 371enum { kAppleAliasMapCount = UPRV_LENGTHOF(appleAliasMap) };
b331163b
A
372
373static const char * appleParentMap[][2] = {
374 { "en_150", "en_GB" }, // Apple custom parent
375 { "en_AD", "en_150" }, // Apple locale addition
376 { "en_AL", "en_150" }, // Apple locale addition
377 { "en_AT", "en_150" }, // Apple locale addition
378 { "en_AU", "en_GB" }, // Apple custom parent
379 { "en_BA", "en_150" }, // Apple locale addition
380 { "en_BD", "en_GB" }, // Apple custom parent
a961784b 381 { "en_BE", "en_150" }, // Apple custom parent
b331163b
A
382 { "en_CH", "en_150" }, // Apple locale addition
383 { "en_CY", "en_150" }, // Apple locale addition
384 { "en_CZ", "en_150" }, // Apple locale addition
385 { "en_DE", "en_150" }, // Apple locale addition
a961784b 386 { "en_DG", "en_GB" },
b331163b
A
387 { "en_DK", "en_150" }, // Apple locale addition
388 { "en_EE", "en_150" }, // Apple locale addition
389 { "en_ES", "en_150" }, // Apple locale addition
390 { "en_FI", "en_150" }, // Apple locale addition
a961784b 391 { "en_FK", "en_GB" },
b331163b 392 { "en_FR", "en_150" }, // Apple locale addition
a961784b
A
393 { "en_GG", "en_GB" },
394 { "en_GI", "en_GB" },
b331163b
A
395 { "en_GR", "en_150" }, // Apple locale addition
396 { "en_HK", "en_GB" }, // Apple custom parent
397 { "en_HR", "en_150" }, // Apple locale addition
398 { "en_HU", "en_150" }, // Apple locale addition
a961784b 399 { "en_IE", "en_GB" },
b331163b 400 { "en_IL", "en_001" }, // Apple locale addition
a961784b 401 { "en_IM", "en_GB" },
b331163b 402 { "en_IN", "en_GB" }, // Apple custom parent
a961784b 403 { "en_IO", "en_GB" },
b331163b
A
404 { "en_IS", "en_150" }, // Apple locale addition
405 { "en_IT", "en_150" }, // Apple locale addition
a961784b 406 { "en_JE", "en_GB" },
a62d09fc 407 { "en_JM", "en_GB" },
b331163b
A
408 { "en_LT", "en_150" }, // Apple locale addition
409 { "en_LU", "en_150" }, // Apple locale addition
410 { "en_LV", "en_150" }, // Apple locale addition
411 { "en_ME", "en_150" }, // Apple locale addition
a961784b
A
412 { "en_MO", "en_GB" },
413 { "en_MT", "en_GB" },
2ca993e8 414 { "en_MV", "en_GB" },
b331163b
A
415 { "en_MY", "en_GB" }, // Apple custom parent
416 { "en_NL", "en_150" }, // Apple locale addition
417 { "en_NO", "en_150" }, // Apple locale addition
2ca993e8 418 { "en_NZ", "en_AU" },
b331163b
A
419 { "en_PK", "en_GB" }, // Apple custom parent
420 { "en_PL", "en_150" }, // Apple locale addition
421 { "en_PT", "en_150" }, // Apple locale addition
422 { "en_RO", "en_150" }, // Apple locale addition
423 { "en_RU", "en_150" }, // Apple locale addition
424 { "en_SE", "en_150" }, // Apple locale addition
a961784b
A
425 { "en_SG", "en_GB" },
426 { "en_SH", "en_GB" },
b331163b
A
427 { "en_SI", "en_150" }, // Apple locale addition
428 { "en_SK", "en_150" }, // Apple locale addition
429 { "en_TR", "en_150" }, // Apple locale addition
a961784b 430 { "en_VG", "en_GB" },
b331163b 431};
2ca993e8
A
432enum { kAppleParentMapCount = UPRV_LENGTHOF(appleParentMap) };
433
434typedef struct {
435 const char * locale;
436 const char * parent;
437 int8_t distance;
438} LocParentAndDistance;
439
440static LocParentAndDistance locParentMap[] = {
441 // The localizations listed in the first column are in
442 // normalized form (e.g. zh_CN -> zh_Hans_CN, etc.).
443 // The distance is a rough measure of distance from
444 // the localization to its parent, used as a weight.
445 { "en_100", "en", 2 },
446 { "en_150", "en_GB", 1 },
447 { "en_AU", "en_GB", 1 },
448 { "en_GB", "en_100", 0 },
449 { "es_419", "es", 2 },
450 { "es_MX", "es_419", 0 },
451 { "pt_PT", "pt", 2 },
452 { "zh_Hans_CN", "zh_Hans", 0 },
453 { "zh_Hant_HK", "zh_Hant", 1 },
454 { "zh_Hant_TW", "zh_Hant", 0 },
b331163b 455};
2ca993e8 456enum { kLocParentMapCount = UPRV_LENGTHOF(locParentMap), kMaxParentDistance = 8 };
b331163b
A
457
458enum {
459 kStringsAllocSize = 4096, // cannot expand; current actual usage 3610
460 kParentMapInitCount = 161 // can expand; current actual usage 161
461};
462
463U_CDECL_BEGIN
464static UBool U_CALLCONV ualocale_cleanup(void);
465U_CDECL_END
466
467U_NAMESPACE_BEGIN
468
469static UInitOnce gUALocaleCacheInitOnce = U_INITONCE_INITIALIZER;
470
471static int gMapDataState = 0; // 0 = not initialized, 1 = initialized, -1 = failure
472static char* gStrings = NULL;
473static UHashtable* gAliasMap = NULL;
474static UHashtable* gParentMap = NULL;
475
476U_NAMESPACE_END
477
478U_CDECL_BEGIN
479
480static UBool U_CALLCONV ualocale_cleanup(void)
481{
482 U_NAMESPACE_USE
483
484 gUALocaleCacheInitOnce.reset();
485
486 if (gMapDataState > 0) {
487 uhash_close(gParentMap);
488 gParentMap = NULL;
489 uhash_close(gAliasMap);
490 gAliasMap = NULL;
491 uprv_free(gStrings);
492 gStrings = NULL;
493 }
494 gMapDataState = 0;
495 return TRUE;
496}
497
498static void initializeMapData() {
499 U_NAMESPACE_USE
500
501 UResourceBundle * curBundle;
502 char* stringsPtr;
503 char* stringsEnd;
504 UErrorCode status;
505 int32_t entryIndex, icuEntryCount;
506
507 ucln_common_registerCleanup(UCLN_COMMON_LOCALE, ualocale_cleanup);
508
509 gStrings = (char*)uprv_malloc(kStringsAllocSize);
510 if (gStrings) {
511 stringsPtr = gStrings;
512 stringsEnd = gStrings + kStringsAllocSize;
513 }
514
515 status = U_ZERO_ERROR;
516 curBundle = NULL;
517 icuEntryCount = 0;
518 if (gStrings) {
519 curBundle = ures_openDirect(NULL, "metadata", &status);
520 curBundle = ures_getByKey(curBundle, "alias", curBundle, &status);
521 curBundle = ures_getByKey(curBundle, "language", curBundle, &status); // language resource is URES_TABLE
522 if (U_SUCCESS(status)) {
523 icuEntryCount = ures_getSize(curBundle); // currently 331
524 }
525 }
526 status = U_ZERO_ERROR;
527 gAliasMap = uhash_openSize(uhash_hashIChars, uhash_compareIChars, uhash_compareIChars,
528 kAppleAliasMapCount + icuEntryCount, &status);
529 // defaults to keyDeleter NULL
530 if (U_SUCCESS(status)) {
531 for (entryIndex = 0; entryIndex < kAppleAliasMapCount && U_SUCCESS(status); entryIndex++) {
532 uhash_put(gAliasMap, (void*)appleAliasMap[entryIndex][0], (void*)appleAliasMap[entryIndex][1], &status);
533 }
534 status = U_ZERO_ERROR;
535 UResourceBundle * aliasMapBundle = NULL;
536 for (entryIndex = 0; entryIndex < icuEntryCount && U_SUCCESS(status); entryIndex++) {
537 aliasMapBundle = ures_getByIndex(curBundle, entryIndex, aliasMapBundle, &status);
538 if (U_FAILURE(status)) {
539 break; // error
540 }
541 const char * keyStr = ures_getKey(aliasMapBundle);
542 int32_t len = uprv_strlen(keyStr);
543 if (len >= stringsEnd - stringsPtr) {
544 break; // error
545 }
546 uprv_strcpy(stringsPtr, keyStr);
547 char * inLocStr = stringsPtr;
548 stringsPtr += len + 1;
549
550 len = stringsEnd - stringsPtr - 1;
551 ures_getUTF8StringByKey(aliasMapBundle, "replacement", stringsPtr, &len, TRUE, &status);
552 if (U_FAILURE(status)) {
553 break; // error
554 }
555 stringsPtr[len] = 0;
556 uhash_put(gAliasMap, inLocStr, stringsPtr, &status);
557 stringsPtr += len + 1;
558 }
559 ures_close(aliasMapBundle);
560 } else {
561 ures_close(curBundle);
562 uprv_free(gStrings);
563 gMapDataState = -1; // failure
564 return;
565 }
566 ures_close(curBundle);
567
568 status = U_ZERO_ERROR;
569 gParentMap = uhash_openSize(uhash_hashIChars, uhash_compareIChars, uhash_compareIChars,
570 kParentMapInitCount, &status);
571 // defaults to keyDeleter NULL
572 if (U_SUCCESS(status)) {
573 curBundle = ures_openDirect(NULL, "supplementalData", &status);
574 curBundle = ures_getByKey(curBundle, "parentLocales", curBundle, &status); // parentLocales resource is URES_TABLE
575 if (U_SUCCESS(status)) {
576 UResourceBundle * parentMapBundle = NULL;
577 while (TRUE) {
578 parentMapBundle = ures_getNextResource(curBundle, parentMapBundle, &status);
579 if (U_FAILURE(status)) {
580 break; // no more parent bundles, normal exit
581 }
582 const char * keyStr = ures_getKey(parentMapBundle);
583 int32_t len = uprv_strlen(keyStr);
584 if (len >= stringsEnd - stringsPtr) {
585 break; // error
586 }
587 uprv_strcpy(stringsPtr, keyStr);
588 char * parentStr = stringsPtr;
589 stringsPtr += len + 1;
590
591 if (ures_getType(parentMapBundle) == URES_STRING) {
592 len = stringsEnd - stringsPtr - 1;
593 ures_getUTF8String(parentMapBundle, stringsPtr, &len, TRUE, &status);
594 if (U_FAILURE(status)) {
595 break; // error
596 }
597 stringsPtr[len] = 0;
598 uhash_put(gParentMap, stringsPtr, parentStr, &status);
599 stringsPtr += len + 1;
600 } else {
601 // should be URES_ARRAY
602 icuEntryCount = ures_getSize(parentMapBundle);
603 for (entryIndex = 0; entryIndex < icuEntryCount && U_SUCCESS(status); entryIndex++) {
604 len = stringsEnd - stringsPtr - 1;
605 ures_getUTF8StringByIndex(parentMapBundle, entryIndex, stringsPtr, &len, TRUE, &status);
606 if (U_FAILURE(status)) {
607 break;
608 }
609 stringsPtr[len] = 0;
610 uhash_put(gParentMap, stringsPtr, parentStr, &status);
611 stringsPtr += len + 1;
612 }
613 }
614 }
615 ures_close(parentMapBundle);
616 }
617 ures_close(curBundle);
618
619 status = U_ZERO_ERROR;
620 for (entryIndex = 0; entryIndex < kAppleParentMapCount && U_SUCCESS(status); entryIndex++) {
621 uhash_put(gParentMap, (void*)appleParentMap[entryIndex][0], (void*)appleParentMap[entryIndex][1], &status);
622 }
623 } else {
624 uhash_close(gAliasMap);
625 gAliasMap = NULL;
626 uprv_free(gStrings);
627 gMapDataState = -1; // failure
628 return;
629 }
630
2ca993e8
A
631#if DEBUG_UALOC
632 printf("# gStrings size %ld\n", stringsPtr - gStrings);
633 printf("# gParentMap count %d\n", uhash_count(gParentMap));
634#endif
b331163b
A
635 gMapDataState = 1;
636}
637
638U_CDECL_END
639
640// The following maps aliases, etc. Ensures 0-termination if no error.
641static void ualoc_normalize(const char *locale, char *normalized, int32_t normalizedCapacity, UErrorCode *status)
642{
643 if (U_FAILURE(*status)) {
644 return;
645 }
646 // uloc_minimizeSubtags(locale, normalized, normalizedCapacity, status);
647
648 const char *replacement = NULL;
649 if (gMapDataState > 0) {
650 replacement = (const char *)uhash_get(gAliasMap, locale);
651 }
652 if (replacement == NULL) {
653 replacement = locale;
654 }
2ca993e8 655 int32_t len = strnlen(replacement, normalizedCapacity);
b331163b
A
656 if (len < normalizedCapacity) { // allow for 0 termination
657 uprv_strcpy(normalized, replacement);
658 } else {
659 *status = U_BUFFER_OVERFLOW_ERROR;
660 }
661}
662
663static void ualoc_getParent(const char *locale, char *parent, int32_t parentCapacity, UErrorCode *status)
664{
665 if (U_FAILURE(*status)) {
666 return;
667 }
668 if (gMapDataState > 0) {
669 const char *replacement = (const char *)uhash_get(gParentMap, locale);
670 if (replacement) {
671 int32_t len = uprv_strlen(replacement);
672 if (len < parentCapacity) { // allow for 0 termination
673 uprv_strcpy(parent, replacement);
674 } else {
675 *status = U_BUFFER_OVERFLOW_ERROR;
676 }
677 return;
678 }
679 }
680 uloc_getParent(locale, parent, parentCapacity - 1, status);
681 parent[parentCapacity - 1] = 0; // ensure 0 termination in case of U_STRING_NOT_TERMINATED_WARNING
682}
683
684// Might do something better for this, perhaps maximizing locales then stripping
2ca993e8 685static const char * getLocParent(const char *locale, int32_t* distance)
b331163b
A
686{
687 int32_t locParentIndex;
688 for (locParentIndex = 0; locParentIndex < kLocParentMapCount; locParentIndex++) {
2ca993e8
A
689 if (uprv_strcmp(locale, locParentMap[locParentIndex].locale) == 0) {
690 *distance = locParentMap[locParentIndex].distance;
691 return locParentMap[locParentIndex].parent;
b331163b
A
692 }
693 }
694 return NULL;
695}
696
697// this just checks if the *pointer* value is already in the array
698static UBool locInArray(const char* *localizationsToUse, int32_t locsToUseCount, const char *locToCheck)
699{
700 int32_t locIndex;
701 for (locIndex = 0; locIndex < locsToUseCount; locIndex++) {
702 if (locToCheck == localizationsToUse[locIndex]) {
703 return TRUE;
704 }
705 }
706 return FALSE;
707}
708
709enum { kLangScriptRegMaxLen = ULOC_LANG_CAPACITY + ULOC_SCRIPT_CAPACITY + ULOC_COUNTRY_CAPACITY }; // currently 22
710
711int32_t
712ualoc_localizationsToUse( const char* const *preferredLanguages,
713 int32_t preferredLanguagesCount,
714 const char* const *availableLocalizations,
715 int32_t availableLocalizationsCount,
716 const char* *localizationsToUse,
717 int32_t localizationsToUseCapacity,
718 UErrorCode *status )
719{
720 if (U_FAILURE(*status)) {
721 return -1;
722 }
723 if (preferredLanguages == NULL || availableLocalizations == NULL || localizationsToUse == NULL) {
724 *status = U_ILLEGAL_ARGUMENT_ERROR;
725 return -1;
726 }
727 // get resource data, need to protect with mutex
728 if (gMapDataState == 0) {
729 umtx_initOnce(gUALocaleCacheInitOnce, initializeMapData);
730 }
731 int32_t locsToUseCount = 0;
732 int32_t prefLangIndex, availLocIndex = 0;
2ca993e8
A
733 int32_t availLocIndexBackup = -1; // if >= 0, contains index of backup match
734 int32_t foundMatchPrefLangIndex = 0, backupMatchPrefLangIndex = 0;
b331163b
A
735 char (*availLocBase)[kLangScriptRegMaxLen + 1] = NULL;
736 char (*availLocNorm)[kLangScriptRegMaxLen + 1] = NULL;
b331163b
A
737 UBool foundMatch = FALSE;
738
2ca993e8
A
739#if DEBUG_UALOC
740 if (preferredLanguagesCount > 0 && availableLocalizationsCount > 0) {
741 printf("\n # ualoc_localizationsToUse start, preferredLanguages %d: %s, ..., availableLocalizations %d: %s, ...\n",
742 preferredLanguagesCount, preferredLanguages[0], availableLocalizationsCount, availableLocalizations[0]);
743 } else {
744 printf("\n # ualoc_localizationsToUse start, preferredLanguages %d: ..., availableLocalizations %d: ...\n",
745 preferredLanguagesCount, availableLocalizationsCount);
746 }
747#endif
748
b331163b
A
749 // Part 1, find the best matching localization, if any
750 for (prefLangIndex = 0; prefLangIndex < preferredLanguagesCount; prefLangIndex++) {
751 char prefLangBaseName[kLangScriptRegMaxLen + 1];
752 char prefLangNormName[kLangScriptRegMaxLen + 1];
753 char prefLangParentName[kLangScriptRegMaxLen + 1];
754 UErrorCode tmpStatus = U_ZERO_ERROR;
755
756 if (preferredLanguages[prefLangIndex] == NULL) {
757 continue; // skip NULL preferredLanguages entry, go to next one
758 }
759 // use underscores, fix bad capitalization, delete any keywords
760 uloc_getBaseName(preferredLanguages[prefLangIndex], prefLangBaseName, kLangScriptRegMaxLen, &tmpStatus);
761 if (U_FAILURE(tmpStatus) || prefLangBaseName[0] == 0 ||
762 uprv_strcmp(prefLangBaseName, "root") == 0 || prefLangBaseName[0] == '_') {
763 continue; // can't handle this preferredLanguages entry or it is invalid, go to next one
764 }
765 prefLangBaseName[kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING
2ca993e8
A
766#if DEBUG_UALOC
767 printf(" # loop: try prefLangBaseName %s\n", prefLangBaseName);
768#endif
b331163b
A
769
770 // if we have not already allocated and filled the array of
771 // base availableLocalizations, do so now.
772 if (availLocBase == NULL) {
773 availLocBase = (char (*)[kLangScriptRegMaxLen + 1])uprv_malloc(availableLocalizationsCount * (kLangScriptRegMaxLen + 1));
774 if (availLocBase == NULL) {
775 continue; // cannot further check this preferredLanguages entry, go to next one
776 }
2ca993e8
A
777#if DEBUG_UALOC
778 printf(" # allocate & fill availLocBase\n");
779#endif
b331163b
A
780 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
781 tmpStatus = U_ZERO_ERROR;
2ca993e8
A
782 if (availableLocalizations[availLocIndex] == NULL) {
783 availLocBase[availLocIndex][0] = 0; // effectively remove this entry
784 continue;
785 }
b331163b
A
786 uloc_getBaseName(availableLocalizations[availLocIndex], availLocBase[availLocIndex], kLangScriptRegMaxLen, &tmpStatus);
787 if (U_FAILURE(tmpStatus) || uprv_strcmp(availLocBase[availLocIndex], "root") == 0 || availLocBase[availLocIndex][0] == '_') {
788 availLocBase[availLocIndex][0] = 0; // effectively remove this entry
2ca993e8 789 continue;
b331163b 790 }
2ca993e8
A
791 availLocBase[availLocIndex][kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING
792#if DEBUG_UALOC
793 printf(" # add availLocBase %s\n", availLocBase[availLocIndex]);
794#endif
b331163b
A
795 }
796 }
797 // first compare base preferredLanguage to base versions of availableLocalizations names
798 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
799 if (uprv_strcmp(prefLangBaseName, availLocBase[availLocIndex]) == 0) {
800 foundMatch = TRUE; // availLocIndex records where
2ca993e8
A
801 foundMatchPrefLangIndex = prefLangIndex;
802#if DEBUG_UALOC
803 printf(" # FOUND: matched availLocBase %s -> actualLoc %s\n", availLocBase[availLocIndex], availableLocalizations[availLocIndex]);
804#endif
b331163b
A
805 break;
806 }
807 }
808 if (foundMatch) {
b331163b
A
809 break; // found a loc for this preferredLanguages entry
810 }
811
812 // get normalized preferredLanguage
813 tmpStatus = U_ZERO_ERROR;
814 ualoc_normalize(prefLangBaseName, prefLangNormName, kLangScriptRegMaxLen + 1, &tmpStatus);
815 if (U_FAILURE(tmpStatus)) {
816 continue; // can't handle this preferredLanguages entry, go to next one
817 }
2ca993e8
A
818#if DEBUG_UALOC
819 printf(" # prefLangNormName %s\n", prefLangNormName);
820#endif
b331163b
A
821 // if we have not already allocated and filled the array of
822 // normalized availableLocalizations, do so now.
823 // Note: ualoc_normalize turns "zh_TW" into "zh_Hant_TW", zh_HK" into "zh_Hant_HK",
824 // and fixes deprecated codes "iw" > "he", "in" > "id" etc.
825 if (availLocNorm == NULL) {
826 availLocNorm = (char (*)[kLangScriptRegMaxLen + 1])uprv_malloc(availableLocalizationsCount * (kLangScriptRegMaxLen + 1));
827 if (availLocNorm == NULL) {
828 continue; // cannot further check this preferredLanguages entry, go to next one
829 }
2ca993e8
A
830#if DEBUG_UALOC
831 printf(" # allocate & fill availLocNorm\n");
832#endif
b331163b
A
833 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
834 tmpStatus = U_ZERO_ERROR;
835 ualoc_normalize(availLocBase[availLocIndex], availLocNorm[availLocIndex], kLangScriptRegMaxLen + 1, &tmpStatus);
836 if (U_FAILURE(tmpStatus)) {
837 availLocNorm[availLocIndex][0] = 0; // effectively remove this entry
2ca993e8
A
838#if DEBUG_UALOC
839 } else {
840 printf(" # actualLoc %-11s -> norm %s\n", availableLocalizations[availLocIndex], availLocNorm[availLocIndex]);
841#endif
b331163b 842 }
b331163b
A
843 }
844 }
845 // now compare normalized preferredLanguage to normalized localization names
846 // if matches, copy *original* localization name
847 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
848 if (uprv_strcmp(prefLangNormName, availLocNorm[availLocIndex]) == 0) {
849 foundMatch = TRUE; // availLocIndex records where
2ca993e8
A
850 foundMatchPrefLangIndex = prefLangIndex;
851#if DEBUG_UALOC
852 printf(" # FOUND: matched availLocNorm %s -> actualLoc %s\n", availLocNorm[availLocIndex], availableLocalizations[availLocIndex]);
853#endif
b331163b
A
854 break;
855 }
856 }
857 if (foundMatch) {
b331163b
A
858 break; // found a loc for this preferredLanguages entry
859 }
860
861 // now walk up the parent chain for preferredLanguage
862 // until we find a match or hit root
863 uprv_strcpy(prefLangBaseName, prefLangNormName);
864 while (!foundMatch) {
865 tmpStatus = U_ZERO_ERROR;
866 ualoc_getParent(prefLangBaseName, prefLangParentName, kLangScriptRegMaxLen + 1, &tmpStatus);
867 if (U_FAILURE(tmpStatus) || uprv_strcmp(prefLangParentName, "root") == 0 || prefLangParentName[0] == 0) {
868 break; // reached root or cannot proceed further
869 }
2ca993e8
A
870#if DEBUG_UALOC
871 printf(" # prefLangParentName %s\n", prefLangParentName);
872#endif
b331163b
A
873
874 // now compare this preferredLanguage parent to normalized localization names
875 // if matches, copy *original* localization name
876 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
877 if (uprv_strcmp(prefLangParentName, availLocNorm[availLocIndex]) == 0) {
878 foundMatch = TRUE; // availLocIndex records where
2ca993e8
A
879 foundMatchPrefLangIndex = prefLangIndex;
880#if DEBUG_UALOC
881 printf(" # FOUND: matched availLocNorm %s -> actualLoc %s\n", availLocNorm[availLocIndex], availableLocalizations[availLocIndex]);
882#endif
b331163b
A
883 break;
884 }
885 }
886 uprv_strcpy(prefLangBaseName, prefLangParentName);
887 }
888 if (foundMatch) {
889 break; // found a loc for this preferredLanguages entry
890 }
891
2ca993e8
A
892 // last try, use parents of selected language to try for backup match
893 // if we have not already found one
894 if (availLocIndexBackup < 0) {
b331163b
A
895 // now walk up the parent chain for preferredLanguage again
896 // checking against parents of selected availLocNorm entries
897 // but this time start with current prefLangNormName
898 uprv_strcpy(prefLangBaseName, prefLangNormName);
2ca993e8 899 int32_t minDistance = kMaxParentDistance;
b331163b 900 while (TRUE) {
b331163b
A
901 // now compare this preferredLanguage to normalized localization names
902 // parent if have one for this; if matches, copy *original* localization name
2ca993e8
A
903#if DEBUG_UALOC
904 printf(" # BACKUP: trying prefLangBaseName %s\n", prefLangBaseName);
905#endif
b331163b 906 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
2ca993e8
A
907 char availLocMinOrParent[kLangScriptRegMaxLen + 1];
908 int32_t distance;
909 // first check for special Apple parents of availLocNorm -
910 // - the number of locales with such parents is small -
911 // or if not such parent, then try stripping region.
912 const char *availLocParent = getLocParent(availLocNorm[availLocIndex], &distance);
913 if (availLocParent) {
914#if DEBUG_UALOC
915 printf(" # availLocAppleParentName %s\n", availLocParent);
916#endif
917 if (uprv_strcmp(prefLangBaseName, availLocParent) == 0 && distance < minDistance) {
918 availLocIndexBackup = availLocIndex; // records where the match occurred
919 backupMatchPrefLangIndex = prefLangIndex;
920 minDistance = distance;
921#if DEBUG_UALOC
922 printf(" # BACKUP: LocAppleParent matched prefLangNormName with distance %d\n", distance);
923#endif
924 continue;
925 }
926 }
927 if (minDistance <= 1) {
928 continue; // we can't get any closer in the rest of this iteration
929 }
930 if (availLocParent == NULL) {
931 tmpStatus = U_ZERO_ERROR;
932 int32_t regLen = uloc_getCountry(availLocNorm[availLocIndex], availLocMinOrParent, kLangScriptRegMaxLen, &tmpStatus);
933 if (U_SUCCESS(tmpStatus) && regLen > 1) {
934 uloc_addLikelySubtags(availLocNorm[availLocIndex], availLocMinOrParent, kLangScriptRegMaxLen, &tmpStatus);
935 if (U_SUCCESS(tmpStatus)) {
936 availLocMinOrParent[kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING
937#if DEBUG_UALOC
938 printf(" # availLocRegMaxName %s\n", availLocMinOrParent);
939#endif
940 char availLocTemp[kLangScriptRegMaxLen + 1];
941 uloc_getParent(availLocMinOrParent, availLocTemp, kLangScriptRegMaxLen, &tmpStatus);
942 if (U_SUCCESS(tmpStatus)) {
943 availLocTemp[kLangScriptRegMaxLen] = 0;
944 uloc_minimizeSubtags(availLocTemp, availLocMinOrParent, kLangScriptRegMaxLen, &tmpStatus);
945 if (U_SUCCESS(tmpStatus)) {
946 availLocMinOrParent[kLangScriptRegMaxLen] = 0;
947#if DEBUG_UALOC
948 printf(" # availLocNoRegParentName %s\n", availLocMinOrParent);
949#endif
950 if (uprv_strcmp(prefLangBaseName, availLocMinOrParent) == 0) {
951 availLocIndexBackup = availLocIndex; // records where the match occurred
952 backupMatchPrefLangIndex = prefLangIndex;
953 minDistance = 1;
954#if DEBUG_UALOC
955 printf(" # BACKUP: LocNoRegParent matched prefLangNormName with distance 1\n");
956#endif
957 continue;
958 }
959 }
960 }
961 }
962 }
963 }
964 // then check against minimized version of availLocNorm
965 tmpStatus = U_ZERO_ERROR;
966 uloc_minimizeSubtags(availLocNorm[availLocIndex], availLocMinOrParent, kLangScriptRegMaxLen, &tmpStatus);
967 if (U_FAILURE(tmpStatus)) {
968 continue;
969 }
970 availLocMinOrParent[kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING
971#if DEBUG_UALOC
972 printf(" # availLocMinimized %s\n", availLocMinOrParent);
973#endif
974 if (uprv_strcmp(prefLangBaseName, availLocMinOrParent) == 0) {
975 availLocIndexBackup = availLocIndex; // records where the match occurred
976 backupMatchPrefLangIndex = prefLangIndex;
977 minDistance = 1;
978#if DEBUG_UALOC
979 printf(" # BACKUP: LocMinimized matched prefLangNormName with distance 1\n");
980#endif
b331163b
A
981 }
982 }
2ca993e8 983 if (availLocIndexBackup >= 0) {
b331163b
A
984 break;
985 }
2ca993e8 986 tmpStatus = U_ZERO_ERROR;
b331163b
A
987 ualoc_getParent(prefLangBaseName, prefLangParentName, kLangScriptRegMaxLen + 1, &tmpStatus);
988 if (U_FAILURE(tmpStatus) || uprv_strcmp(prefLangParentName, "root") == 0 || prefLangParentName[0] == 0) {
989 break; // reached root or cannot proceed further
990 }
991 uprv_strcpy(prefLangBaseName, prefLangParentName);
992 }
993 }
2ca993e8
A
994 }
995 // If we have a backup match, decide what to do
996 if (availLocIndexBackup >= 0) {
997 if (!foundMatch) {
998 // no main match, just use the backup
999 availLocIndex = availLocIndexBackup;
1000 foundMatch = TRUE;
1001#if DEBUG_UALOC
1002 printf(" # no main match, have backup => use availLocIndexBackup %d\n", availLocIndexBackup);
1003#endif
1004 } else if (backupMatchPrefLangIndex < foundMatchPrefLangIndex && uprv_strncmp(availLocNorm[availLocIndexBackup], "pt_BR", ULOC_LANG_CAPACITY) != 0) {
1005 // have a main match but backup match was higher in the prefs, use it if for a different language
1006#if DEBUG_UALOC
1007 printf(" # have backup match higher in prefs, comparing its language and script to main match\n");
1008#endif
1009 char mainLang[ULOC_LANG_CAPACITY + 1];
1010 char backupLang[ULOC_LANG_CAPACITY + 1];
1011 UErrorCode tmpStatus = U_ZERO_ERROR;
1012 uloc_getLanguage(availLocNorm[availLocIndex], mainLang, ULOC_LANG_CAPACITY, &tmpStatus);
1013 mainLang[ULOC_LANG_CAPACITY] = 0; // ensure zero termination
1014 uloc_getLanguage(availLocNorm[availLocIndexBackup], backupLang, ULOC_LANG_CAPACITY, &tmpStatus);
1015 backupLang[ULOC_LANG_CAPACITY] = 0; // ensure zero termination
1016 if (U_SUCCESS(tmpStatus)) {
1017 if (uprv_strncmp(mainLang, backupLang, ULOC_LANG_CAPACITY) != 0) {
1018 // backup match has different language than main match
1019 availLocIndex = availLocIndexBackup;
1020 // foundMatch is already TRUE
1021#if DEBUG_UALOC
1022 printf(" # main match but backup is for a different lang higher in prefs => use availLocIndexBackup %d\n", availLocIndexBackup);
1023#endif
1024 } else {
1025 // backup match has same language as main match, check scripts too
1026 char availLocMaximized[kLangScriptRegMaxLen + 1];
1027
1028 uloc_addLikelySubtags(availLocNorm[availLocIndex], availLocMaximized, kLangScriptRegMaxLen, &tmpStatus);
1029 availLocMaximized[kLangScriptRegMaxLen] = 0;
1030 uloc_getScript(availLocMaximized, mainLang, ULOC_LANG_CAPACITY, &tmpStatus);
1031 mainLang[ULOC_LANG_CAPACITY] = 0;
1032
1033 uloc_addLikelySubtags(availLocNorm[availLocIndexBackup], availLocMaximized, kLangScriptRegMaxLen, &tmpStatus);
1034 availLocMaximized[kLangScriptRegMaxLen] = 0;
1035 uloc_getScript(availLocMaximized, backupLang, ULOC_LANG_CAPACITY, &tmpStatus);
1036 backupLang[ULOC_LANG_CAPACITY] = 0;
1037
1038 if (U_SUCCESS(tmpStatus) && uprv_strncmp(mainLang, backupLang, ULOC_LANG_CAPACITY) != 0) {
1039 // backup match has different script than main match
1040 availLocIndex = availLocIndexBackup;
1041 // foundMatch is already TRUE
1042#if DEBUG_UALOC
1043 printf(" # main match but backup is for a different script higher in prefs => use availLocIndexBackup %d\n", availLocIndexBackup);
1044#endif
1045 }
1046 }
1047 }
b331163b
A
1048 }
1049 }
1050
1051 // Part 2, if we found a matching localization, then walk up its parent tree to find any fallback matches in availableLocalizations
1052 if (foundMatch) {
1053 // Here availLocIndex corresponds to the first matched localization
1054 UErrorCode tmpStatus = U_ZERO_ERROR;
1055 int32_t availLocMatchIndex = availLocIndex;
1056 if (locsToUseCount < localizationsToUseCapacity) {
1057 localizationsToUse[locsToUseCount++] = availableLocalizations[availLocMatchIndex];
1058 }
1059 // at this point we must have availLocBase, and minimally matched against that.
1060 // if we have not already allocated and filled the array of
1061 // normalized availableLocalizations, do so now, but don't require it
1062 if (availLocNorm == NULL) {
1063 availLocNorm = (char (*)[kLangScriptRegMaxLen + 1])uprv_malloc(availableLocalizationsCount * (kLangScriptRegMaxLen + 1));
1064 if (availLocNorm != NULL) {
1065 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
1066 tmpStatus = U_ZERO_ERROR;
1067 ualoc_normalize(availLocBase[availLocIndex], availLocNorm[availLocIndex], kLangScriptRegMaxLen + 1, &tmpStatus);
1068 if (U_FAILURE(tmpStatus)) {
1069 availLocNorm[availLocIndex][0] = 0; // effectively remove this entry
1070 }
1071 }
1072 }
1073 }
1074
1075 // add normalized form of matching loc, if different and in availLocBase
1076 if (locsToUseCount < localizationsToUseCapacity) {
1077 tmpStatus = U_ZERO_ERROR;
1078 char matchedLocNormName[kLangScriptRegMaxLen + 1];
1079 char matchedLocParentName[kLangScriptRegMaxLen + 1];
1080 // get normalized form of matching loc
1081 if (availLocNorm != NULL) {
1082 uprv_strcpy(matchedLocNormName, availLocNorm[availLocMatchIndex]);
1083 } else {
1084 ualoc_normalize(availLocBase[availLocMatchIndex], matchedLocNormName, kLangScriptRegMaxLen + 1, &tmpStatus);
1085 }
1086 if (U_SUCCESS(tmpStatus)) {
1087 // add normalized form of matching loc, if different and in availLocBase
1088 if (uprv_strcmp(matchedLocNormName, localizationsToUse[0]) != 0) {
1089 // normalization of matched localization is different, see if we have the normalization in availableLocalizations
1090 // from this point on, availLocIndex no longer corresponds to the matched localization.
1091 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
1092 if ( (uprv_strcmp(matchedLocNormName, availLocBase[availLocIndex]) == 0
1093 || (availLocNorm != NULL && uprv_strcmp(matchedLocNormName, availLocNorm[availLocIndex]) == 0))
1094 && !locInArray(localizationsToUse, locsToUseCount, availableLocalizations[availLocIndex])) {
1095 localizationsToUse[locsToUseCount++] = availableLocalizations[availLocIndex];
1096 break;
1097 }
1098 }
1099 }
1100
1101 // now walk up the parent chain from matchedLocNormName, adding parents if they are in availLocBase
1102 while (locsToUseCount < localizationsToUseCapacity) {
1103 ualoc_getParent(matchedLocNormName, matchedLocParentName, kLangScriptRegMaxLen + 1, &tmpStatus);
1104 if (U_FAILURE(tmpStatus) || uprv_strcmp(matchedLocParentName, "root") == 0 || matchedLocParentName[0] == 0) {
1105 break; // reached root or cannot proceed further
1106 }
1107
1108 // now compare this matchedLocParentName parent to base localization names (and norm ones if we have them)
1109 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
1110 if ( (uprv_strcmp(matchedLocParentName, availLocBase[availLocIndex]) == 0
1111 || (availLocNorm != NULL && uprv_strcmp(matchedLocParentName, availLocNorm[availLocIndex]) == 0))
1112 && !locInArray(localizationsToUse, locsToUseCount, availableLocalizations[availLocIndex])) {
1113 localizationsToUse[locsToUseCount++] = availableLocalizations[availLocIndex];
1114 break;
1115 }
1116 }
1117 uprv_strcpy(matchedLocNormName, matchedLocParentName);
1118 }
1119
1120 // The above still fails to include "zh_TW" if it is in availLocBase and the matched localization
1121 // base name is "zh_HK" or "zh_MO". One option would be to walk up the parent chain from
1122 // matchedLocNormName again, comparing against parents of of selected availLocNorm entries.
1123 // But this picks up too many matches that are not parents of the matched localization. So
1124 // we just handle these specially.
1125 if ( locsToUseCount < localizationsToUseCapacity
1126 && (uprv_strcmp(availLocBase[availLocMatchIndex], "zh_HK") == 0
1127 || uprv_strcmp(availLocBase[availLocMatchIndex], "zh_MO") == 0) ) {
1128 int32_t zhTW_matchIndex = -1;
1129 UBool zhHant_found = FALSE;
1130 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
1131 if ( zhTW_matchIndex < 0 && uprv_strcmp("zh_TW", availLocBase[availLocIndex]) == 0 ) {
1132 zhTW_matchIndex = availLocIndex;
1133 }
1134 if ( !zhHant_found && uprv_strcmp("zh_Hant", availLocBase[availLocIndex]) == 0 ) {
1135 zhHant_found = TRUE;
1136 }
1137 }
1138 if (zhTW_matchIndex >= 0 && !zhHant_found
1139 && !locInArray(localizationsToUse, locsToUseCount, availableLocalizations[zhTW_matchIndex])) {
1140 localizationsToUse[locsToUseCount++] = availableLocalizations[zhTW_matchIndex];
1141 }
1142 }
1143 }
1144 }
1145 }
1146
1147 uprv_free(availLocNorm);
1148 uprv_free(availLocBase);
1149 return locsToUseCount;
1150}
1151