]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/ualoc.cpp
9e0f814257c7b2a76582e6bd3c5fe855cf4a9839
[apple/icu.git] / icuSources / common / ualoc.cpp
1 /*
2 *****************************************************************************************
3 * Copyright (C) 2014-2016 Apple Inc. All Rights Reserved.
4 *****************************************************************************************
5 */
6
7 #define DEBUG_UALOC 0
8 #if DEBUG_UALOC
9 #include <stdio.h>
10 #endif
11 #include <string.h>
12 #include "unicode/utypes.h"
13 #include "unicode/ualoc.h"
14 #include "unicode/uloc.h"
15 #include "unicode/ures.h"
16 #include "unicode/putil.h"
17 #include "cstring.h"
18 #include "cmemory.h"
19 #include "uhash.h"
20 #include "umutex.h"
21 #include "ucln_cmn.h"
22 // the following has replacements for some math.h funcs etc
23 #include "putilimp.h"
24
25
26 // The numeric values in territoryInfo are in "IntF" format from LDML2ICUConverter.
27 // From its docs (adapted): [IntF is] a special integer that represents the number in
28 // normalized scientific notation.
29 // Resultant integers are in the form -?xxyyyyyy, where xx is the exponent
30 // offset by 50 and yyyyyy is the coefficient to 5 decimal places (range 1.0 to 9.99999), e.g.
31 // 14660000000000 -> 1.46600E13 -> 63146600
32 // 0.0001 -> 1.00000E-4 -> 46100000
33 // -123.456 -> -1.23456E-2 -> -48123456
34 //
35 // Here to avoid an extra division we have the max coefficient as 999999 (instead of
36 // 9.99999) and instead offset the exponent by -55.
37 //
38 static double doubleFromIntF(int32_t intF) {
39 double coefficient = (double)(intF % 1000000);
40 int32_t exponent = (intF / 1000000) - 55;
41 return coefficient * uprv_pow10(exponent);
42 }
43
44 static int compareLangEntries(const void * entry1, const void * entry2) {
45 double fraction1 = ((const UALanguageEntry *)entry1)->userFraction;
46 double fraction2 = ((const UALanguageEntry *)entry2)->userFraction;
47 // want descending order
48 if (fraction1 > fraction2) return -1;
49 if (fraction1 < fraction2) return 1;
50 // userFractions the same, sort by languageCode
51 return uprv_strcmp(((const UALanguageEntry *)entry1)->languageCode,((const UALanguageEntry *)entry2)->languageCode);
52 }
53
54 static const UChar ustrLangStatusDefacto[] = {0x64,0x65,0x5F,0x66,0x61,0x63,0x74,0x6F,0x5F,0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0}; //"de_facto_official"
55 static const UChar ustrLangStatusOfficial[] = {0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0}; //"official"
56 static const UChar ustrLangStatusRegional[] = {0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0x5F,0x72,0x65,0x67,0x69,0x6F,0x6E,0x61,0x6C,0}; //"official_regional"
57
58 enum {
59 kLocalLangEntriesMax = 26, // enough for most regions to minimumFraction 0.001 except India
60 kLangEntriesFactor = 3 // if we have to allocate, multiply existing size by this
61 };
62
63 U_CAPI int32_t U_EXPORT2
64 ualoc_getLanguagesForRegion(const char *regionID, double minimumFraction,
65 UALanguageEntry *entries, int32_t entriesCapacity,
66 UErrorCode *err)
67 {
68 if (U_FAILURE(*err)) {
69 return 0;
70 }
71 if ( regionID == NULL || minimumFraction < 0.0 || minimumFraction > 1.0 ||
72 ((entries==NULL)? entriesCapacity!=0: entriesCapacity<0) ) {
73 *err = U_ILLEGAL_ARGUMENT_ERROR;
74 return 0;
75 }
76 UResourceBundle *rb = ures_openDirect(NULL, "supplementalData", err);
77 rb = ures_getByKey(rb, "territoryInfo", rb, err);
78 rb = ures_getByKey(rb, regionID, rb, err);
79 if (U_FAILURE(*err)) {
80 ures_close(rb);
81 return 0;
82 }
83
84 int32_t entryCount = 0;
85 UResourceBundle *langBund = NULL;
86 int32_t lbIdx, lbCount = ures_getSize(rb);
87 UALanguageEntry localLangEntries[kLocalLangEntriesMax];
88 UALanguageEntry * langEntries = localLangEntries;
89 int32_t langEntriesMax = kLocalLangEntriesMax;
90
91 for (lbIdx = 0; lbIdx < lbCount; lbIdx++) {
92 langBund = ures_getByIndex(rb, lbIdx, langBund, err);
93 if (U_FAILURE(*err)) {
94 break;
95 }
96 const char * langCode = ures_getKey(langBund);
97 if (uprv_strcmp(langCode,"territoryF") == 0) {
98 continue;
99 }
100 if (strnlen(langCode, UALANGDATA_CODELEN+1) > UALANGDATA_CODELEN) { // no uprv_strnlen
101 continue; // a code we cannot handle
102 }
103
104 UErrorCode localErr = U_ZERO_ERROR;
105 double userFraction = 0.0;
106 UResourceBundle *itemBund = ures_getByKey(langBund, "populationShareF", NULL, &localErr);
107 if (U_SUCCESS(localErr)) {
108 int32_t intF = ures_getInt(itemBund, &localErr);
109 if (U_SUCCESS(localErr)) {
110 userFraction = doubleFromIntF(intF);
111 }
112 ures_close(itemBund);
113 }
114 if (userFraction < minimumFraction) {
115 continue;
116 }
117 if (entries != NULL) {
118 localErr = U_ZERO_ERROR;
119 UALanguageStatus langStatus = UALANGSTATUS_UNSPECIFIED;
120 int32_t ulen;
121 const UChar * ustrLangStatus = ures_getStringByKey(langBund, "officialStatus", &ulen, &localErr);
122 if (U_SUCCESS(localErr)) {
123 int32_t cmp = u_strcmp(ustrLangStatus, ustrLangStatusOfficial);
124 if (cmp == 0) {
125 langStatus = UALANGSTATUS_OFFICIAL;
126 } else if (cmp < 0 && u_strcmp(ustrLangStatus, ustrLangStatusDefacto) == 0) {
127 langStatus = UALANGSTATUS_DEFACTO_OFFICIAL;
128 } else if (u_strcmp(ustrLangStatus, ustrLangStatusRegional) == 0) {
129 langStatus = UALANGSTATUS_REGIONAL_OFFICIAL;
130 }
131 }
132 // Now we have all of the info for our next entry
133 if (entryCount >= langEntriesMax) {
134 int32_t newMax = langEntriesMax * kLangEntriesFactor;
135 if (langEntries == localLangEntries) {
136 // first allocation, copy from local buf
137 langEntries = (UALanguageEntry*)uprv_malloc(newMax*sizeof(UALanguageEntry));
138 if (langEntries == NULL) {
139 *err = U_MEMORY_ALLOCATION_ERROR;
140 break;
141 }
142 uprv_memcpy(langEntries, localLangEntries, entryCount*sizeof(UALanguageEntry));
143 } else {
144 langEntries = (UALanguageEntry*)uprv_realloc(langEntries, newMax*sizeof(UALanguageEntry));
145 if (langEntries == NULL) {
146 *err = U_MEMORY_ALLOCATION_ERROR;
147 break;
148 }
149 }
150 langEntriesMax = newMax;
151 }
152 uprv_strcpy(langEntries[entryCount].languageCode, langCode);
153 langEntries[entryCount].userFraction = userFraction;
154 langEntries[entryCount].status = langStatus;
155 }
156 entryCount++;
157 }
158 ures_close(langBund);
159 ures_close(rb);
160 if (U_FAILURE(*err)) {
161 if (langEntries != localLangEntries) {
162 free(langEntries);
163 }
164 return 0;
165 }
166 if (entries != NULL) {
167 // sort langEntries, copy entries that fit to provided array
168 qsort(langEntries, entryCount, sizeof(UALanguageEntry), compareLangEntries);
169 if (entryCount > entriesCapacity) {
170 entryCount = entriesCapacity;
171 }
172 uprv_memcpy(entries, langEntries, entryCount*sizeof(UALanguageEntry));
173 if (langEntries != localLangEntries) {
174 free(langEntries);
175 }
176 }
177 return entryCount;
178 }
179
180 static const char * forceParent[] = {
181 "en_150", "en_GB", // en for Europe
182 "en_AU", "en_GB",
183 "en_BD", "en_GB", // en for Bangladesh
184 "en_BE", "en_150", // en for Belgium goes to en for Europe
185 "en_DG", "en_GB",
186 "en_FK", "en_GB",
187 "en_GG", "en_GB",
188 "en_GI", "en_GB",
189 "en_HK", "en_GB", // en for Hong Kong
190 "en_IE", "en_GB",
191 "en_IM", "en_GB",
192 "en_IN", "en_GB",
193 "en_IO", "en_GB",
194 "en_JE", "en_GB",
195 "en_MO", "en_GB",
196 "en_MT", "en_GB",
197 "en_MV", "en_GB", // for Maldives
198 "en_MY", "en_GB", // en for Malaysia
199 "en_NZ", "en_AU",
200 "en_PK", "en_GB", // en for Pakistan
201 "en_SG", "en_GB",
202 "en_SH", "en_GB",
203 "en_VG", "en_GB",
204 "zh", "zh_CN",
205 "zh_CN", "root",
206 "zh_Hant", "zh_TW",
207 "zh_TW", "root",
208 NULL
209 };
210
211 enum { kLocBaseNameMax = 16 };
212
213 U_CAPI int32_t U_EXPORT2
214 ualoc_getAppleParent(const char* localeID,
215 char * parent,
216 int32_t parentCapacity,
217 UErrorCode* err)
218 {
219 UResourceBundle *rb;
220 int32_t len;
221 UErrorCode tempStatus;
222 char locbuf[ULOC_FULLNAME_CAPACITY+1];
223 char * foundDoubleUnderscore;
224
225 if (U_FAILURE(*err)) {
226 return 0;
227 }
228 if ( (parent==NULL)? parentCapacity!=0: parentCapacity<0 ) {
229 *err = U_ILLEGAL_ARGUMENT_ERROR;
230 return 0;
231 }
232 len = uloc_getBaseName(localeID, locbuf, ULOC_FULLNAME_CAPACITY, err); /* canonicalize and strip keywords */
233 if (U_FAILURE(*err)) {
234 return 0;
235 }
236 if (*err == U_STRING_NOT_TERMINATED_WARNING) {
237 locbuf[ULOC_FULLNAME_CAPACITY] = 0;
238 *err = U_ZERO_ERROR;
239 }
240 foundDoubleUnderscore = uprv_strstr(locbuf, "__"); /* __ comes from bad/missing subtag or variant */
241 if (foundDoubleUnderscore != NULL) {
242 *foundDoubleUnderscore = 0; /* terminate at the __ */
243 len = uprv_strlen(locbuf);
244 }
245 if (len >= 2 && (uprv_strncmp(locbuf, "en", 2) == 0 || uprv_strncmp(locbuf, "zh", 2) == 0)) {
246 const char ** forceParentPtr = forceParent;
247 const char * testCurLoc;
248 while ( (testCurLoc = *forceParentPtr++) != NULL ) {
249 int cmp = uprv_strcmp(locbuf, testCurLoc);
250 if (cmp <= 0) {
251 if (cmp == 0) {
252 len = uprv_strlen(*forceParentPtr);
253 if (len < parentCapacity) {
254 uprv_strcpy(parent, *forceParentPtr);
255 } else {
256 *err = U_BUFFER_OVERFLOW_ERROR;
257 }
258 return len;
259 }
260 break;
261 }
262 forceParentPtr++;
263 }
264 }
265 tempStatus = U_ZERO_ERROR;
266 rb = ures_openDirect(NULL, locbuf, &tempStatus);
267 if (U_SUCCESS(tempStatus)) {
268 const char * actualLocale = ures_getLocaleByType(rb, ULOC_ACTUAL_LOCALE, &tempStatus);
269 ures_close(rb);
270 if (U_SUCCESS(tempStatus) && uprv_strcmp(locbuf, actualLocale) != 0) {
271 // we have followed an alias
272 len = uprv_strlen(actualLocale);
273 if (len < parentCapacity) {
274 uprv_strcpy(parent, actualLocale);
275 } else {
276 *err = U_BUFFER_OVERFLOW_ERROR;
277 }
278 return len;
279 }
280 }
281 tempStatus = U_ZERO_ERROR;
282 rb = ures_openDirect(NULL, "supplementalData", &tempStatus);
283 rb = ures_getByKey(rb, "parentLocales", rb, &tempStatus);
284 if (U_SUCCESS(tempStatus)) {
285 UResourceBundle * parentMapBundle = NULL;
286 int32_t childLen = 0;
287 while (childLen == 0) {
288 tempStatus = U_ZERO_ERROR;
289 parentMapBundle = ures_getNextResource(rb, parentMapBundle, &tempStatus);
290 if (U_FAILURE(tempStatus)) {
291 break; // no more parent bundles, normal exit
292 }
293 char childName[kLocBaseNameMax + 1];
294 childName[kLocBaseNameMax] = 0;
295 const char * childPtr = NULL;
296 if (ures_getType(parentMapBundle) == URES_STRING) {
297 childLen = kLocBaseNameMax;
298 childPtr = ures_getUTF8String(parentMapBundle, childName, &childLen, FALSE, &tempStatus);
299 if (U_FAILURE(tempStatus) || uprv_strncmp(locbuf, childPtr, kLocBaseNameMax) != 0) {
300 childLen = 0;
301 }
302 } else { // should be URES_ARRAY
303 int32_t childCur, childCount = ures_getSize(parentMapBundle);
304 for (childCur = 0; childCur < childCount && childLen == 0; childCur++) {
305 tempStatus = U_ZERO_ERROR;
306 childLen = kLocBaseNameMax;
307 childPtr = ures_getUTF8StringByIndex(parentMapBundle, childCur, childName, &childLen, FALSE, &tempStatus);
308 if (U_FAILURE(tempStatus) || uprv_strncmp(locbuf, childPtr, kLocBaseNameMax) != 0) {
309 childLen = 0;
310 }
311 }
312 }
313 }
314 ures_close(rb);
315 if (childLen > 0) {
316 // parentMapBundle key is the parent we are looking for
317 const char * keyStr = ures_getKey(parentMapBundle);
318 len = uprv_strlen(keyStr);
319 if (len < parentCapacity) {
320 uprv_strcpy(parent, keyStr);
321 } else {
322 *err = U_BUFFER_OVERFLOW_ERROR;
323 }
324 ures_close(parentMapBundle);
325 return len;
326 }
327 ures_close(parentMapBundle);
328 }
329
330 len = uloc_getParent(locbuf, parent, parentCapacity, err);
331 if (U_SUCCESS(*err) && len == 0) {
332 len = 4;
333 if (len < parentCapacity) {
334 uprv_strcpy(parent, "root");
335 } else {
336 *err = U_BUFFER_OVERFLOW_ERROR;
337 }
338 }
339 return len;
340 }
341
342 // =================
343 // Data and related functions for ualoc_localizationsToUse
344 // =================
345
346 static const char * appleAliasMap[][2] = {
347 // names are lowercase here because they are looked up after being processed by uloc_getBaseName
348 { "arabic", "ar" }, // T2
349 { "chinese", "zh_Hans" }, // T0
350 { "danish", "da" }, // T2
351 { "dutch", "nl" }, // T1, still in use
352 { "english", "en" }, // T0, still in use
353 { "finnish", "fi" }, // T2
354 { "french", "fr" }, // T0, still in use
355 { "german", "de" }, // T0, still in use
356 { "italian", "it" }, // T1, still in use
357 { "japanese", "ja" }, // T0, still in use
358 { "korean", "ko" }, // T1
359 { "no_NO", "nb_NO" }, // special
360 { "norwegian", "nb" }, // T2
361 { "polish", "pl" }, // T2
362 { "portuguese", "pt" }, // T2
363 { "russian", "ru" }, // T2
364 { "spanish", "es" }, // T1, still in use
365 { "swedish", "sv" }, // T2
366 { "thai", "th" }, // T2
367 { "turkish", "tr" }, // T2
368 { "zh", "zh_Hans" }, // special
369 };
370 enum { kAppleAliasMapCount = UPRV_LENGTHOF(appleAliasMap) };
371
372 static const char * appleParentMap[][2] = {
373 { "en_150", "en_GB" }, // Apple custom parent
374 { "en_AD", "en_150" }, // Apple locale addition
375 { "en_AL", "en_150" }, // Apple locale addition
376 { "en_AT", "en_150" }, // Apple locale addition
377 { "en_AU", "en_GB" }, // Apple custom parent
378 { "en_BA", "en_150" }, // Apple locale addition
379 { "en_BD", "en_GB" }, // Apple custom parent
380 { "en_BE", "en_150" }, // Apple custom parent
381 { "en_CH", "en_150" }, // Apple locale addition
382 { "en_CY", "en_150" }, // Apple locale addition
383 { "en_CZ", "en_150" }, // Apple locale addition
384 { "en_DE", "en_150" }, // Apple locale addition
385 { "en_DG", "en_GB" },
386 { "en_DK", "en_150" }, // Apple locale addition
387 { "en_EE", "en_150" }, // Apple locale addition
388 { "en_ES", "en_150" }, // Apple locale addition
389 { "en_FI", "en_150" }, // Apple locale addition
390 { "en_FK", "en_GB" },
391 { "en_FR", "en_150" }, // Apple locale addition
392 { "en_GG", "en_GB" },
393 { "en_GI", "en_GB" },
394 { "en_GR", "en_150" }, // Apple locale addition
395 { "en_HK", "en_GB" }, // Apple custom parent
396 { "en_HR", "en_150" }, // Apple locale addition
397 { "en_HU", "en_150" }, // Apple locale addition
398 { "en_IE", "en_GB" },
399 { "en_IL", "en_001" }, // Apple locale addition
400 { "en_IM", "en_GB" },
401 { "en_IN", "en_GB" }, // Apple custom parent
402 { "en_IO", "en_GB" },
403 { "en_IS", "en_150" }, // Apple locale addition
404 { "en_IT", "en_150" }, // Apple locale addition
405 { "en_JE", "en_GB" },
406 { "en_LT", "en_150" }, // Apple locale addition
407 { "en_LU", "en_150" }, // Apple locale addition
408 { "en_LV", "en_150" }, // Apple locale addition
409 { "en_ME", "en_150" }, // Apple locale addition
410 { "en_MO", "en_GB" },
411 { "en_MT", "en_GB" },
412 { "en_MV", "en_GB" },
413 { "en_MY", "en_GB" }, // Apple custom parent
414 { "en_NL", "en_150" }, // Apple locale addition
415 { "en_NO", "en_150" }, // Apple locale addition
416 { "en_NZ", "en_AU" },
417 { "en_PK", "en_GB" }, // Apple custom parent
418 { "en_PL", "en_150" }, // Apple locale addition
419 { "en_PT", "en_150" }, // Apple locale addition
420 { "en_RO", "en_150" }, // Apple locale addition
421 { "en_RU", "en_150" }, // Apple locale addition
422 { "en_SE", "en_150" }, // Apple locale addition
423 { "en_SG", "en_GB" },
424 { "en_SH", "en_GB" },
425 { "en_SI", "en_150" }, // Apple locale addition
426 { "en_SK", "en_150" }, // Apple locale addition
427 { "en_TR", "en_150" }, // Apple locale addition
428 { "en_VG", "en_GB" },
429 };
430 enum { kAppleParentMapCount = UPRV_LENGTHOF(appleParentMap) };
431
432 typedef struct {
433 const char * locale;
434 const char * parent;
435 int8_t distance;
436 } LocParentAndDistance;
437
438 static LocParentAndDistance locParentMap[] = {
439 // The localizations listed in the first column are in
440 // normalized form (e.g. zh_CN -> zh_Hans_CN, etc.).
441 // The distance is a rough measure of distance from
442 // the localization to its parent, used as a weight.
443 { "en_100", "en", 2 },
444 { "en_150", "en_GB", 1 },
445 { "en_AU", "en_GB", 1 },
446 { "en_GB", "en_100", 0 },
447 { "es_419", "es", 2 },
448 { "es_MX", "es_419", 0 },
449 { "pt_PT", "pt", 2 },
450 { "zh_Hans_CN", "zh_Hans", 0 },
451 { "zh_Hant_HK", "zh_Hant", 1 },
452 { "zh_Hant_TW", "zh_Hant", 0 },
453 };
454 enum { kLocParentMapCount = UPRV_LENGTHOF(locParentMap), kMaxParentDistance = 8 };
455
456 enum {
457 kStringsAllocSize = 4096, // cannot expand; current actual usage 3610
458 kParentMapInitCount = 161 // can expand; current actual usage 161
459 };
460
461 U_CDECL_BEGIN
462 static UBool U_CALLCONV ualocale_cleanup(void);
463 U_CDECL_END
464
465 U_NAMESPACE_BEGIN
466
467 static UInitOnce gUALocaleCacheInitOnce = U_INITONCE_INITIALIZER;
468
469 static int gMapDataState = 0; // 0 = not initialized, 1 = initialized, -1 = failure
470 static char* gStrings = NULL;
471 static UHashtable* gAliasMap = NULL;
472 static UHashtable* gParentMap = NULL;
473
474 U_NAMESPACE_END
475
476 U_CDECL_BEGIN
477
478 static UBool U_CALLCONV ualocale_cleanup(void)
479 {
480 U_NAMESPACE_USE
481
482 gUALocaleCacheInitOnce.reset();
483
484 if (gMapDataState > 0) {
485 uhash_close(gParentMap);
486 gParentMap = NULL;
487 uhash_close(gAliasMap);
488 gAliasMap = NULL;
489 uprv_free(gStrings);
490 gStrings = NULL;
491 }
492 gMapDataState = 0;
493 return TRUE;
494 }
495
496 static void initializeMapData() {
497 U_NAMESPACE_USE
498
499 UResourceBundle * curBundle;
500 char* stringsPtr;
501 char* stringsEnd;
502 UErrorCode status;
503 int32_t entryIndex, icuEntryCount;
504
505 ucln_common_registerCleanup(UCLN_COMMON_LOCALE, ualocale_cleanup);
506
507 gStrings = (char*)uprv_malloc(kStringsAllocSize);
508 if (gStrings) {
509 stringsPtr = gStrings;
510 stringsEnd = gStrings + kStringsAllocSize;
511 }
512
513 status = U_ZERO_ERROR;
514 curBundle = NULL;
515 icuEntryCount = 0;
516 if (gStrings) {
517 curBundle = ures_openDirect(NULL, "metadata", &status);
518 curBundle = ures_getByKey(curBundle, "alias", curBundle, &status);
519 curBundle = ures_getByKey(curBundle, "language", curBundle, &status); // language resource is URES_TABLE
520 if (U_SUCCESS(status)) {
521 icuEntryCount = ures_getSize(curBundle); // currently 331
522 }
523 }
524 status = U_ZERO_ERROR;
525 gAliasMap = uhash_openSize(uhash_hashIChars, uhash_compareIChars, uhash_compareIChars,
526 kAppleAliasMapCount + icuEntryCount, &status);
527 // defaults to keyDeleter NULL
528 if (U_SUCCESS(status)) {
529 for (entryIndex = 0; entryIndex < kAppleAliasMapCount && U_SUCCESS(status); entryIndex++) {
530 uhash_put(gAliasMap, (void*)appleAliasMap[entryIndex][0], (void*)appleAliasMap[entryIndex][1], &status);
531 }
532 status = U_ZERO_ERROR;
533 UResourceBundle * aliasMapBundle = NULL;
534 for (entryIndex = 0; entryIndex < icuEntryCount && U_SUCCESS(status); entryIndex++) {
535 aliasMapBundle = ures_getByIndex(curBundle, entryIndex, aliasMapBundle, &status);
536 if (U_FAILURE(status)) {
537 break; // error
538 }
539 const char * keyStr = ures_getKey(aliasMapBundle);
540 int32_t len = uprv_strlen(keyStr);
541 if (len >= stringsEnd - stringsPtr) {
542 break; // error
543 }
544 uprv_strcpy(stringsPtr, keyStr);
545 char * inLocStr = stringsPtr;
546 stringsPtr += len + 1;
547
548 len = stringsEnd - stringsPtr - 1;
549 ures_getUTF8StringByKey(aliasMapBundle, "replacement", stringsPtr, &len, TRUE, &status);
550 if (U_FAILURE(status)) {
551 break; // error
552 }
553 stringsPtr[len] = 0;
554 uhash_put(gAliasMap, inLocStr, stringsPtr, &status);
555 stringsPtr += len + 1;
556 }
557 ures_close(aliasMapBundle);
558 } else {
559 ures_close(curBundle);
560 uprv_free(gStrings);
561 gMapDataState = -1; // failure
562 return;
563 }
564 ures_close(curBundle);
565
566 status = U_ZERO_ERROR;
567 gParentMap = uhash_openSize(uhash_hashIChars, uhash_compareIChars, uhash_compareIChars,
568 kParentMapInitCount, &status);
569 // defaults to keyDeleter NULL
570 if (U_SUCCESS(status)) {
571 curBundle = ures_openDirect(NULL, "supplementalData", &status);
572 curBundle = ures_getByKey(curBundle, "parentLocales", curBundle, &status); // parentLocales resource is URES_TABLE
573 if (U_SUCCESS(status)) {
574 UResourceBundle * parentMapBundle = NULL;
575 while (TRUE) {
576 parentMapBundle = ures_getNextResource(curBundle, parentMapBundle, &status);
577 if (U_FAILURE(status)) {
578 break; // no more parent bundles, normal exit
579 }
580 const char * keyStr = ures_getKey(parentMapBundle);
581 int32_t len = uprv_strlen(keyStr);
582 if (len >= stringsEnd - stringsPtr) {
583 break; // error
584 }
585 uprv_strcpy(stringsPtr, keyStr);
586 char * parentStr = stringsPtr;
587 stringsPtr += len + 1;
588
589 if (ures_getType(parentMapBundle) == URES_STRING) {
590 len = stringsEnd - stringsPtr - 1;
591 ures_getUTF8String(parentMapBundle, stringsPtr, &len, TRUE, &status);
592 if (U_FAILURE(status)) {
593 break; // error
594 }
595 stringsPtr[len] = 0;
596 uhash_put(gParentMap, stringsPtr, parentStr, &status);
597 stringsPtr += len + 1;
598 } else {
599 // should be URES_ARRAY
600 icuEntryCount = ures_getSize(parentMapBundle);
601 for (entryIndex = 0; entryIndex < icuEntryCount && U_SUCCESS(status); entryIndex++) {
602 len = stringsEnd - stringsPtr - 1;
603 ures_getUTF8StringByIndex(parentMapBundle, entryIndex, stringsPtr, &len, TRUE, &status);
604 if (U_FAILURE(status)) {
605 break;
606 }
607 stringsPtr[len] = 0;
608 uhash_put(gParentMap, stringsPtr, parentStr, &status);
609 stringsPtr += len + 1;
610 }
611 }
612 }
613 ures_close(parentMapBundle);
614 }
615 ures_close(curBundle);
616
617 status = U_ZERO_ERROR;
618 for (entryIndex = 0; entryIndex < kAppleParentMapCount && U_SUCCESS(status); entryIndex++) {
619 uhash_put(gParentMap, (void*)appleParentMap[entryIndex][0], (void*)appleParentMap[entryIndex][1], &status);
620 }
621 } else {
622 uhash_close(gAliasMap);
623 gAliasMap = NULL;
624 uprv_free(gStrings);
625 gMapDataState = -1; // failure
626 return;
627 }
628
629 #if DEBUG_UALOC
630 printf("# gStrings size %ld\n", stringsPtr - gStrings);
631 printf("# gParentMap count %d\n", uhash_count(gParentMap));
632 #endif
633 gMapDataState = 1;
634 }
635
636 U_CDECL_END
637
638 // The following maps aliases, etc. Ensures 0-termination if no error.
639 static void ualoc_normalize(const char *locale, char *normalized, int32_t normalizedCapacity, UErrorCode *status)
640 {
641 if (U_FAILURE(*status)) {
642 return;
643 }
644 // uloc_minimizeSubtags(locale, normalized, normalizedCapacity, status);
645
646 const char *replacement = NULL;
647 if (gMapDataState > 0) {
648 replacement = (const char *)uhash_get(gAliasMap, locale);
649 }
650 if (replacement == NULL) {
651 replacement = locale;
652 }
653 int32_t len = strnlen(replacement, normalizedCapacity);
654 if (len < normalizedCapacity) { // allow for 0 termination
655 uprv_strcpy(normalized, replacement);
656 } else {
657 *status = U_BUFFER_OVERFLOW_ERROR;
658 }
659 }
660
661 static void ualoc_getParent(const char *locale, char *parent, int32_t parentCapacity, UErrorCode *status)
662 {
663 if (U_FAILURE(*status)) {
664 return;
665 }
666 if (gMapDataState > 0) {
667 const char *replacement = (const char *)uhash_get(gParentMap, locale);
668 if (replacement) {
669 int32_t len = uprv_strlen(replacement);
670 if (len < parentCapacity) { // allow for 0 termination
671 uprv_strcpy(parent, replacement);
672 } else {
673 *status = U_BUFFER_OVERFLOW_ERROR;
674 }
675 return;
676 }
677 }
678 uloc_getParent(locale, parent, parentCapacity - 1, status);
679 parent[parentCapacity - 1] = 0; // ensure 0 termination in case of U_STRING_NOT_TERMINATED_WARNING
680 }
681
682 // Might do something better for this, perhaps maximizing locales then stripping
683 static const char * getLocParent(const char *locale, int32_t* distance)
684 {
685 int32_t locParentIndex;
686 for (locParentIndex = 0; locParentIndex < kLocParentMapCount; locParentIndex++) {
687 if (uprv_strcmp(locale, locParentMap[locParentIndex].locale) == 0) {
688 *distance = locParentMap[locParentIndex].distance;
689 return locParentMap[locParentIndex].parent;
690 }
691 }
692 return NULL;
693 }
694
695 // this just checks if the *pointer* value is already in the array
696 static UBool locInArray(const char* *localizationsToUse, int32_t locsToUseCount, const char *locToCheck)
697 {
698 int32_t locIndex;
699 for (locIndex = 0; locIndex < locsToUseCount; locIndex++) {
700 if (locToCheck == localizationsToUse[locIndex]) {
701 return TRUE;
702 }
703 }
704 return FALSE;
705 }
706
707 enum { kLangScriptRegMaxLen = ULOC_LANG_CAPACITY + ULOC_SCRIPT_CAPACITY + ULOC_COUNTRY_CAPACITY }; // currently 22
708
709 int32_t
710 ualoc_localizationsToUse( const char* const *preferredLanguages,
711 int32_t preferredLanguagesCount,
712 const char* const *availableLocalizations,
713 int32_t availableLocalizationsCount,
714 const char* *localizationsToUse,
715 int32_t localizationsToUseCapacity,
716 UErrorCode *status )
717 {
718 if (U_FAILURE(*status)) {
719 return -1;
720 }
721 if (preferredLanguages == NULL || availableLocalizations == NULL || localizationsToUse == NULL) {
722 *status = U_ILLEGAL_ARGUMENT_ERROR;
723 return -1;
724 }
725 // get resource data, need to protect with mutex
726 if (gMapDataState == 0) {
727 umtx_initOnce(gUALocaleCacheInitOnce, initializeMapData);
728 }
729 int32_t locsToUseCount = 0;
730 int32_t prefLangIndex, availLocIndex = 0;
731 int32_t availLocIndexBackup = -1; // if >= 0, contains index of backup match
732 int32_t foundMatchPrefLangIndex = 0, backupMatchPrefLangIndex = 0;
733 char (*availLocBase)[kLangScriptRegMaxLen + 1] = NULL;
734 char (*availLocNorm)[kLangScriptRegMaxLen + 1] = NULL;
735 UBool foundMatch = FALSE;
736
737 #if DEBUG_UALOC
738 if (preferredLanguagesCount > 0 && availableLocalizationsCount > 0) {
739 printf("\n # ualoc_localizationsToUse start, preferredLanguages %d: %s, ..., availableLocalizations %d: %s, ...\n",
740 preferredLanguagesCount, preferredLanguages[0], availableLocalizationsCount, availableLocalizations[0]);
741 } else {
742 printf("\n # ualoc_localizationsToUse start, preferredLanguages %d: ..., availableLocalizations %d: ...\n",
743 preferredLanguagesCount, availableLocalizationsCount);
744 }
745 #endif
746
747 // Part 1, find the best matching localization, if any
748 for (prefLangIndex = 0; prefLangIndex < preferredLanguagesCount; prefLangIndex++) {
749 char prefLangBaseName[kLangScriptRegMaxLen + 1];
750 char prefLangNormName[kLangScriptRegMaxLen + 1];
751 char prefLangParentName[kLangScriptRegMaxLen + 1];
752 UErrorCode tmpStatus = U_ZERO_ERROR;
753
754 if (preferredLanguages[prefLangIndex] == NULL) {
755 continue; // skip NULL preferredLanguages entry, go to next one
756 }
757 // use underscores, fix bad capitalization, delete any keywords
758 uloc_getBaseName(preferredLanguages[prefLangIndex], prefLangBaseName, kLangScriptRegMaxLen, &tmpStatus);
759 if (U_FAILURE(tmpStatus) || prefLangBaseName[0] == 0 ||
760 uprv_strcmp(prefLangBaseName, "root") == 0 || prefLangBaseName[0] == '_') {
761 continue; // can't handle this preferredLanguages entry or it is invalid, go to next one
762 }
763 prefLangBaseName[kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING
764 #if DEBUG_UALOC
765 printf(" # loop: try prefLangBaseName %s\n", prefLangBaseName);
766 #endif
767
768 // if we have not already allocated and filled the array of
769 // base availableLocalizations, do so now.
770 if (availLocBase == NULL) {
771 availLocBase = (char (*)[kLangScriptRegMaxLen + 1])uprv_malloc(availableLocalizationsCount * (kLangScriptRegMaxLen + 1));
772 if (availLocBase == NULL) {
773 continue; // cannot further check this preferredLanguages entry, go to next one
774 }
775 #if DEBUG_UALOC
776 printf(" # allocate & fill availLocBase\n");
777 #endif
778 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
779 tmpStatus = U_ZERO_ERROR;
780 if (availableLocalizations[availLocIndex] == NULL) {
781 availLocBase[availLocIndex][0] = 0; // effectively remove this entry
782 continue;
783 }
784 uloc_getBaseName(availableLocalizations[availLocIndex], availLocBase[availLocIndex], kLangScriptRegMaxLen, &tmpStatus);
785 if (U_FAILURE(tmpStatus) || uprv_strcmp(availLocBase[availLocIndex], "root") == 0 || availLocBase[availLocIndex][0] == '_') {
786 availLocBase[availLocIndex][0] = 0; // effectively remove this entry
787 continue;
788 }
789 availLocBase[availLocIndex][kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING
790 #if DEBUG_UALOC
791 printf(" # add availLocBase %s\n", availLocBase[availLocIndex]);
792 #endif
793 }
794 }
795 // first compare base preferredLanguage to base versions of availableLocalizations names
796 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
797 if (uprv_strcmp(prefLangBaseName, availLocBase[availLocIndex]) == 0) {
798 foundMatch = TRUE; // availLocIndex records where
799 foundMatchPrefLangIndex = prefLangIndex;
800 #if DEBUG_UALOC
801 printf(" # FOUND: matched availLocBase %s -> actualLoc %s\n", availLocBase[availLocIndex], availableLocalizations[availLocIndex]);
802 #endif
803 break;
804 }
805 }
806 if (foundMatch) {
807 break; // found a loc for this preferredLanguages entry
808 }
809
810 // get normalized preferredLanguage
811 tmpStatus = U_ZERO_ERROR;
812 ualoc_normalize(prefLangBaseName, prefLangNormName, kLangScriptRegMaxLen + 1, &tmpStatus);
813 if (U_FAILURE(tmpStatus)) {
814 continue; // can't handle this preferredLanguages entry, go to next one
815 }
816 #if DEBUG_UALOC
817 printf(" # prefLangNormName %s\n", prefLangNormName);
818 #endif
819 // if we have not already allocated and filled the array of
820 // normalized availableLocalizations, do so now.
821 // Note: ualoc_normalize turns "zh_TW" into "zh_Hant_TW", zh_HK" into "zh_Hant_HK",
822 // and fixes deprecated codes "iw" > "he", "in" > "id" etc.
823 if (availLocNorm == NULL) {
824 availLocNorm = (char (*)[kLangScriptRegMaxLen + 1])uprv_malloc(availableLocalizationsCount * (kLangScriptRegMaxLen + 1));
825 if (availLocNorm == NULL) {
826 continue; // cannot further check this preferredLanguages entry, go to next one
827 }
828 #if DEBUG_UALOC
829 printf(" # allocate & fill availLocNorm\n");
830 #endif
831 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
832 tmpStatus = U_ZERO_ERROR;
833 ualoc_normalize(availLocBase[availLocIndex], availLocNorm[availLocIndex], kLangScriptRegMaxLen + 1, &tmpStatus);
834 if (U_FAILURE(tmpStatus)) {
835 availLocNorm[availLocIndex][0] = 0; // effectively remove this entry
836 #if DEBUG_UALOC
837 } else {
838 printf(" # actualLoc %-11s -> norm %s\n", availableLocalizations[availLocIndex], availLocNorm[availLocIndex]);
839 #endif
840 }
841 }
842 }
843 // now compare normalized preferredLanguage to normalized localization names
844 // if matches, copy *original* localization name
845 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
846 if (uprv_strcmp(prefLangNormName, availLocNorm[availLocIndex]) == 0) {
847 foundMatch = TRUE; // availLocIndex records where
848 foundMatchPrefLangIndex = prefLangIndex;
849 #if DEBUG_UALOC
850 printf(" # FOUND: matched availLocNorm %s -> actualLoc %s\n", availLocNorm[availLocIndex], availableLocalizations[availLocIndex]);
851 #endif
852 break;
853 }
854 }
855 if (foundMatch) {
856 break; // found a loc for this preferredLanguages entry
857 }
858
859 // now walk up the parent chain for preferredLanguage
860 // until we find a match or hit root
861 uprv_strcpy(prefLangBaseName, prefLangNormName);
862 while (!foundMatch) {
863 tmpStatus = U_ZERO_ERROR;
864 ualoc_getParent(prefLangBaseName, prefLangParentName, kLangScriptRegMaxLen + 1, &tmpStatus);
865 if (U_FAILURE(tmpStatus) || uprv_strcmp(prefLangParentName, "root") == 0 || prefLangParentName[0] == 0) {
866 break; // reached root or cannot proceed further
867 }
868 #if DEBUG_UALOC
869 printf(" # prefLangParentName %s\n", prefLangParentName);
870 #endif
871
872 // now compare this preferredLanguage parent to normalized localization names
873 // if matches, copy *original* localization name
874 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
875 if (uprv_strcmp(prefLangParentName, availLocNorm[availLocIndex]) == 0) {
876 foundMatch = TRUE; // availLocIndex records where
877 foundMatchPrefLangIndex = prefLangIndex;
878 #if DEBUG_UALOC
879 printf(" # FOUND: matched availLocNorm %s -> actualLoc %s\n", availLocNorm[availLocIndex], availableLocalizations[availLocIndex]);
880 #endif
881 break;
882 }
883 }
884 uprv_strcpy(prefLangBaseName, prefLangParentName);
885 }
886 if (foundMatch) {
887 break; // found a loc for this preferredLanguages entry
888 }
889
890 // last try, use parents of selected language to try for backup match
891 // if we have not already found one
892 if (availLocIndexBackup < 0) {
893 // now walk up the parent chain for preferredLanguage again
894 // checking against parents of selected availLocNorm entries
895 // but this time start with current prefLangNormName
896 uprv_strcpy(prefLangBaseName, prefLangNormName);
897 int32_t minDistance = kMaxParentDistance;
898 while (TRUE) {
899 // now compare this preferredLanguage to normalized localization names
900 // parent if have one for this; if matches, copy *original* localization name
901 #if DEBUG_UALOC
902 printf(" # BACKUP: trying prefLangBaseName %s\n", prefLangBaseName);
903 #endif
904 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
905 char availLocMinOrParent[kLangScriptRegMaxLen + 1];
906 int32_t distance;
907 // first check for special Apple parents of availLocNorm -
908 // - the number of locales with such parents is small -
909 // or if not such parent, then try stripping region.
910 const char *availLocParent = getLocParent(availLocNorm[availLocIndex], &distance);
911 if (availLocParent) {
912 #if DEBUG_UALOC
913 printf(" # availLocAppleParentName %s\n", availLocParent);
914 #endif
915 if (uprv_strcmp(prefLangBaseName, availLocParent) == 0 && distance < minDistance) {
916 availLocIndexBackup = availLocIndex; // records where the match occurred
917 backupMatchPrefLangIndex = prefLangIndex;
918 minDistance = distance;
919 #if DEBUG_UALOC
920 printf(" # BACKUP: LocAppleParent matched prefLangNormName with distance %d\n", distance);
921 #endif
922 continue;
923 }
924 }
925 if (minDistance <= 1) {
926 continue; // we can't get any closer in the rest of this iteration
927 }
928 if (availLocParent == NULL) {
929 tmpStatus = U_ZERO_ERROR;
930 int32_t regLen = uloc_getCountry(availLocNorm[availLocIndex], availLocMinOrParent, kLangScriptRegMaxLen, &tmpStatus);
931 if (U_SUCCESS(tmpStatus) && regLen > 1) {
932 uloc_addLikelySubtags(availLocNorm[availLocIndex], availLocMinOrParent, kLangScriptRegMaxLen, &tmpStatus);
933 if (U_SUCCESS(tmpStatus)) {
934 availLocMinOrParent[kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING
935 #if DEBUG_UALOC
936 printf(" # availLocRegMaxName %s\n", availLocMinOrParent);
937 #endif
938 char availLocTemp[kLangScriptRegMaxLen + 1];
939 uloc_getParent(availLocMinOrParent, availLocTemp, kLangScriptRegMaxLen, &tmpStatus);
940 if (U_SUCCESS(tmpStatus)) {
941 availLocTemp[kLangScriptRegMaxLen] = 0;
942 uloc_minimizeSubtags(availLocTemp, availLocMinOrParent, kLangScriptRegMaxLen, &tmpStatus);
943 if (U_SUCCESS(tmpStatus)) {
944 availLocMinOrParent[kLangScriptRegMaxLen] = 0;
945 #if DEBUG_UALOC
946 printf(" # availLocNoRegParentName %s\n", availLocMinOrParent);
947 #endif
948 if (uprv_strcmp(prefLangBaseName, availLocMinOrParent) == 0) {
949 availLocIndexBackup = availLocIndex; // records where the match occurred
950 backupMatchPrefLangIndex = prefLangIndex;
951 minDistance = 1;
952 #if DEBUG_UALOC
953 printf(" # BACKUP: LocNoRegParent matched prefLangNormName with distance 1\n");
954 #endif
955 continue;
956 }
957 }
958 }
959 }
960 }
961 }
962 // then check against minimized version of availLocNorm
963 tmpStatus = U_ZERO_ERROR;
964 uloc_minimizeSubtags(availLocNorm[availLocIndex], availLocMinOrParent, kLangScriptRegMaxLen, &tmpStatus);
965 if (U_FAILURE(tmpStatus)) {
966 continue;
967 }
968 availLocMinOrParent[kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING
969 #if DEBUG_UALOC
970 printf(" # availLocMinimized %s\n", availLocMinOrParent);
971 #endif
972 if (uprv_strcmp(prefLangBaseName, availLocMinOrParent) == 0) {
973 availLocIndexBackup = availLocIndex; // records where the match occurred
974 backupMatchPrefLangIndex = prefLangIndex;
975 minDistance = 1;
976 #if DEBUG_UALOC
977 printf(" # BACKUP: LocMinimized matched prefLangNormName with distance 1\n");
978 #endif
979 }
980 }
981 if (availLocIndexBackup >= 0) {
982 break;
983 }
984 tmpStatus = U_ZERO_ERROR;
985 ualoc_getParent(prefLangBaseName, prefLangParentName, kLangScriptRegMaxLen + 1, &tmpStatus);
986 if (U_FAILURE(tmpStatus) || uprv_strcmp(prefLangParentName, "root") == 0 || prefLangParentName[0] == 0) {
987 break; // reached root or cannot proceed further
988 }
989 uprv_strcpy(prefLangBaseName, prefLangParentName);
990 }
991 }
992 }
993 // If we have a backup match, decide what to do
994 if (availLocIndexBackup >= 0) {
995 if (!foundMatch) {
996 // no main match, just use the backup
997 availLocIndex = availLocIndexBackup;
998 foundMatch = TRUE;
999 #if DEBUG_UALOC
1000 printf(" # no main match, have backup => use availLocIndexBackup %d\n", availLocIndexBackup);
1001 #endif
1002 } else if (backupMatchPrefLangIndex < foundMatchPrefLangIndex && uprv_strncmp(availLocNorm[availLocIndexBackup], "pt_BR", ULOC_LANG_CAPACITY) != 0) {
1003 // have a main match but backup match was higher in the prefs, use it if for a different language
1004 #if DEBUG_UALOC
1005 printf(" # have backup match higher in prefs, comparing its language and script to main match\n");
1006 #endif
1007 char mainLang[ULOC_LANG_CAPACITY + 1];
1008 char backupLang[ULOC_LANG_CAPACITY + 1];
1009 UErrorCode tmpStatus = U_ZERO_ERROR;
1010 uloc_getLanguage(availLocNorm[availLocIndex], mainLang, ULOC_LANG_CAPACITY, &tmpStatus);
1011 mainLang[ULOC_LANG_CAPACITY] = 0; // ensure zero termination
1012 uloc_getLanguage(availLocNorm[availLocIndexBackup], backupLang, ULOC_LANG_CAPACITY, &tmpStatus);
1013 backupLang[ULOC_LANG_CAPACITY] = 0; // ensure zero termination
1014 if (U_SUCCESS(tmpStatus)) {
1015 if (uprv_strncmp(mainLang, backupLang, ULOC_LANG_CAPACITY) != 0) {
1016 // backup match has different language than main match
1017 availLocIndex = availLocIndexBackup;
1018 // foundMatch is already TRUE
1019 #if DEBUG_UALOC
1020 printf(" # main match but backup is for a different lang higher in prefs => use availLocIndexBackup %d\n", availLocIndexBackup);
1021 #endif
1022 } else {
1023 // backup match has same language as main match, check scripts too
1024 char availLocMaximized[kLangScriptRegMaxLen + 1];
1025
1026 uloc_addLikelySubtags(availLocNorm[availLocIndex], availLocMaximized, kLangScriptRegMaxLen, &tmpStatus);
1027 availLocMaximized[kLangScriptRegMaxLen] = 0;
1028 uloc_getScript(availLocMaximized, mainLang, ULOC_LANG_CAPACITY, &tmpStatus);
1029 mainLang[ULOC_LANG_CAPACITY] = 0;
1030
1031 uloc_addLikelySubtags(availLocNorm[availLocIndexBackup], availLocMaximized, kLangScriptRegMaxLen, &tmpStatus);
1032 availLocMaximized[kLangScriptRegMaxLen] = 0;
1033 uloc_getScript(availLocMaximized, backupLang, ULOC_LANG_CAPACITY, &tmpStatus);
1034 backupLang[ULOC_LANG_CAPACITY] = 0;
1035
1036 if (U_SUCCESS(tmpStatus) && uprv_strncmp(mainLang, backupLang, ULOC_LANG_CAPACITY) != 0) {
1037 // backup match has different script than main match
1038 availLocIndex = availLocIndexBackup;
1039 // foundMatch is already TRUE
1040 #if DEBUG_UALOC
1041 printf(" # main match but backup is for a different script higher in prefs => use availLocIndexBackup %d\n", availLocIndexBackup);
1042 #endif
1043 }
1044 }
1045 }
1046 }
1047 }
1048
1049 // Part 2, if we found a matching localization, then walk up its parent tree to find any fallback matches in availableLocalizations
1050 if (foundMatch) {
1051 // Here availLocIndex corresponds to the first matched localization
1052 UErrorCode tmpStatus = U_ZERO_ERROR;
1053 int32_t availLocMatchIndex = availLocIndex;
1054 if (locsToUseCount < localizationsToUseCapacity) {
1055 localizationsToUse[locsToUseCount++] = availableLocalizations[availLocMatchIndex];
1056 }
1057 // at this point we must have availLocBase, and minimally matched against that.
1058 // if we have not already allocated and filled the array of
1059 // normalized availableLocalizations, do so now, but don't require it
1060 if (availLocNorm == NULL) {
1061 availLocNorm = (char (*)[kLangScriptRegMaxLen + 1])uprv_malloc(availableLocalizationsCount * (kLangScriptRegMaxLen + 1));
1062 if (availLocNorm != NULL) {
1063 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
1064 tmpStatus = U_ZERO_ERROR;
1065 ualoc_normalize(availLocBase[availLocIndex], availLocNorm[availLocIndex], kLangScriptRegMaxLen + 1, &tmpStatus);
1066 if (U_FAILURE(tmpStatus)) {
1067 availLocNorm[availLocIndex][0] = 0; // effectively remove this entry
1068 }
1069 }
1070 }
1071 }
1072
1073 // add normalized form of matching loc, if different and in availLocBase
1074 if (locsToUseCount < localizationsToUseCapacity) {
1075 tmpStatus = U_ZERO_ERROR;
1076 char matchedLocNormName[kLangScriptRegMaxLen + 1];
1077 char matchedLocParentName[kLangScriptRegMaxLen + 1];
1078 // get normalized form of matching loc
1079 if (availLocNorm != NULL) {
1080 uprv_strcpy(matchedLocNormName, availLocNorm[availLocMatchIndex]);
1081 } else {
1082 ualoc_normalize(availLocBase[availLocMatchIndex], matchedLocNormName, kLangScriptRegMaxLen + 1, &tmpStatus);
1083 }
1084 if (U_SUCCESS(tmpStatus)) {
1085 // add normalized form of matching loc, if different and in availLocBase
1086 if (uprv_strcmp(matchedLocNormName, localizationsToUse[0]) != 0) {
1087 // normalization of matched localization is different, see if we have the normalization in availableLocalizations
1088 // from this point on, availLocIndex no longer corresponds to the matched localization.
1089 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
1090 if ( (uprv_strcmp(matchedLocNormName, availLocBase[availLocIndex]) == 0
1091 || (availLocNorm != NULL && uprv_strcmp(matchedLocNormName, availLocNorm[availLocIndex]) == 0))
1092 && !locInArray(localizationsToUse, locsToUseCount, availableLocalizations[availLocIndex])) {
1093 localizationsToUse[locsToUseCount++] = availableLocalizations[availLocIndex];
1094 break;
1095 }
1096 }
1097 }
1098
1099 // now walk up the parent chain from matchedLocNormName, adding parents if they are in availLocBase
1100 while (locsToUseCount < localizationsToUseCapacity) {
1101 ualoc_getParent(matchedLocNormName, matchedLocParentName, kLangScriptRegMaxLen + 1, &tmpStatus);
1102 if (U_FAILURE(tmpStatus) || uprv_strcmp(matchedLocParentName, "root") == 0 || matchedLocParentName[0] == 0) {
1103 break; // reached root or cannot proceed further
1104 }
1105
1106 // now compare this matchedLocParentName parent to base localization names (and norm ones if we have them)
1107 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
1108 if ( (uprv_strcmp(matchedLocParentName, availLocBase[availLocIndex]) == 0
1109 || (availLocNorm != NULL && uprv_strcmp(matchedLocParentName, availLocNorm[availLocIndex]) == 0))
1110 && !locInArray(localizationsToUse, locsToUseCount, availableLocalizations[availLocIndex])) {
1111 localizationsToUse[locsToUseCount++] = availableLocalizations[availLocIndex];
1112 break;
1113 }
1114 }
1115 uprv_strcpy(matchedLocNormName, matchedLocParentName);
1116 }
1117
1118 // The above still fails to include "zh_TW" if it is in availLocBase and the matched localization
1119 // base name is "zh_HK" or "zh_MO". One option would be to walk up the parent chain from
1120 // matchedLocNormName again, comparing against parents of of selected availLocNorm entries.
1121 // But this picks up too many matches that are not parents of the matched localization. So
1122 // we just handle these specially.
1123 if ( locsToUseCount < localizationsToUseCapacity
1124 && (uprv_strcmp(availLocBase[availLocMatchIndex], "zh_HK") == 0
1125 || uprv_strcmp(availLocBase[availLocMatchIndex], "zh_MO") == 0) ) {
1126 int32_t zhTW_matchIndex = -1;
1127 UBool zhHant_found = FALSE;
1128 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
1129 if ( zhTW_matchIndex < 0 && uprv_strcmp("zh_TW", availLocBase[availLocIndex]) == 0 ) {
1130 zhTW_matchIndex = availLocIndex;
1131 }
1132 if ( !zhHant_found && uprv_strcmp("zh_Hant", availLocBase[availLocIndex]) == 0 ) {
1133 zhHant_found = TRUE;
1134 }
1135 }
1136 if (zhTW_matchIndex >= 0 && !zhHant_found
1137 && !locInArray(localizationsToUse, locsToUseCount, availableLocalizations[zhTW_matchIndex])) {
1138 localizationsToUse[locsToUseCount++] = availableLocalizations[zhTW_matchIndex];
1139 }
1140 }
1141 }
1142 }
1143 }
1144
1145 uprv_free(availLocNorm);
1146 uprv_free(availLocBase);
1147 return locsToUseCount;
1148 }
1149