2 *****************************************************************************************
3 * Copyright (C) 2014-2016 Apple Inc. All Rights Reserved.
4 *****************************************************************************************
7 #include "unicode/utypes.h"
8 #include "unicode/ualoc.h"
9 #include "unicode/uloc.h"
10 #include "unicode/ures.h"
11 #include "unicode/putil.h"
17 // the following has replacements for some math.h funcs etc
21 // The numeric values in territoryInfo are in "IntF" format from LDML2ICUConverter.
22 // From its docs (adapted): [IntF is] a special integer that represents the number in
23 // normalized scientific notation.
24 // Resultant integers are in the form -?xxyyyyyy, where xx is the exponent
25 // offset by 50 and yyyyyy is the coefficient to 5 decimal places (range 1.0 to 9.99999), e.g.
26 // 14660000000000 -> 1.46600E13 -> 63146600
27 // 0.0001 -> 1.00000E-4 -> 46100000
28 // -123.456 -> -1.23456E-2 -> -48123456
30 // Here to avoid an extra division we have the max coefficient as 999999 (instead of
31 // 9.99999) and instead offset the exponent by -55.
33 static double doubleFromIntF(int32_t intF
) {
34 double coefficient
= (double)(intF
% 1000000);
35 int32_t exponent
= (intF
/ 1000000) - 55;
36 return coefficient
* uprv_pow10(exponent
);
39 static int compareLangEntries(const void * entry1
, const void * entry2
) {
40 double fraction1
= ((const UALanguageEntry
*)entry1
)->userFraction
;
41 double fraction2
= ((const UALanguageEntry
*)entry2
)->userFraction
;
42 // want descending order
43 if (fraction1
> fraction2
) return -1;
44 if (fraction1
< fraction2
) return 1;
45 // userFractions the same, sort by languageCode
46 return uprv_strcmp(((const UALanguageEntry
*)entry1
)->languageCode
,((const UALanguageEntry
*)entry2
)->languageCode
);
49 static const UChar ustrLangStatusDefacto
[] = {0x64,0x65,0x5F,0x66,0x61,0x63,0x74,0x6F,0x5F,0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0}; //"de_facto_official"
50 static const UChar ustrLangStatusOfficial
[] = {0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0}; //"official"
51 static const UChar ustrLangStatusRegional
[] = {0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0x5F,0x72,0x65,0x67,0x69,0x6F,0x6E,0x61,0x6C,0}; //"official_regional"
54 kLocalLangEntriesMax
= 26, // enough for most regions to minimumFraction 0.001 except India
55 kLangEntriesFactor
= 3 // if we have to allocate, multiply existing size by this
58 U_CAPI
int32_t U_EXPORT2
59 ualoc_getLanguagesForRegion(const char *regionID
, double minimumFraction
,
60 UALanguageEntry
*entries
, int32_t entriesCapacity
,
63 if (U_FAILURE(*err
)) {
66 if ( regionID
== NULL
|| minimumFraction
< 0.0 || minimumFraction
> 1.0 ||
67 ((entries
==NULL
)? entriesCapacity
!=0: entriesCapacity
<0) ) {
68 *err
= U_ILLEGAL_ARGUMENT_ERROR
;
71 UResourceBundle
*rb
= ures_openDirect(NULL
, "supplementalData", err
);
72 rb
= ures_getByKey(rb
, "territoryInfo", rb
, err
);
73 rb
= ures_getByKey(rb
, regionID
, rb
, err
);
74 if (U_FAILURE(*err
)) {
79 int32_t entryCount
= 0;
80 UResourceBundle
*langBund
= NULL
;
81 int32_t lbIdx
, lbCount
= ures_getSize(rb
);
82 UALanguageEntry localLangEntries
[kLocalLangEntriesMax
];
83 UALanguageEntry
* langEntries
= localLangEntries
;
84 int32_t langEntriesMax
= kLocalLangEntriesMax
;
86 for (lbIdx
= 0; lbIdx
< lbCount
; lbIdx
++) {
87 langBund
= ures_getByIndex(rb
, lbIdx
, langBund
, err
);
88 if (U_FAILURE(*err
)) {
91 const char * langCode
= ures_getKey(langBund
);
92 if (uprv_strcmp(langCode
,"territoryF") == 0) {
95 if (strnlen(langCode
, UALANGDATA_CODELEN
+1) > UALANGDATA_CODELEN
) { // no uprv_strnlen
96 continue; // a code we cannot handle
99 UErrorCode localErr
= U_ZERO_ERROR
;
100 double userFraction
= 0.0;
101 UResourceBundle
*itemBund
= ures_getByKey(langBund
, "populationShareF", NULL
, &localErr
);
102 if (U_SUCCESS(localErr
)) {
103 int32_t intF
= ures_getInt(itemBund
, &localErr
);
104 if (U_SUCCESS(localErr
)) {
105 userFraction
= doubleFromIntF(intF
);
107 ures_close(itemBund
);
109 if (userFraction
< minimumFraction
) {
112 if (entries
!= NULL
) {
113 localErr
= U_ZERO_ERROR
;
114 UALanguageStatus langStatus
= UALANGSTATUS_UNSPECIFIED
;
116 const UChar
* ustrLangStatus
= ures_getStringByKey(langBund
, "officialStatus", &ulen
, &localErr
);
117 if (U_SUCCESS(localErr
)) {
118 int32_t cmp
= u_strcmp(ustrLangStatus
, ustrLangStatusOfficial
);
120 langStatus
= UALANGSTATUS_OFFICIAL
;
121 } else if (cmp
< 0 && u_strcmp(ustrLangStatus
, ustrLangStatusDefacto
) == 0) {
122 langStatus
= UALANGSTATUS_DEFACTO_OFFICIAL
;
123 } else if (u_strcmp(ustrLangStatus
, ustrLangStatusRegional
) == 0) {
124 langStatus
= UALANGSTATUS_REGIONAL_OFFICIAL
;
127 // Now we have all of the info for our next entry
128 if (entryCount
>= langEntriesMax
) {
129 int32_t newMax
= langEntriesMax
* kLangEntriesFactor
;
130 if (langEntries
== localLangEntries
) {
131 // first allocation, copy from local buf
132 langEntries
= (UALanguageEntry
*)uprv_malloc(newMax
*sizeof(UALanguageEntry
));
133 if (langEntries
== NULL
) {
134 *err
= U_MEMORY_ALLOCATION_ERROR
;
137 uprv_memcpy(langEntries
, localLangEntries
, entryCount
*sizeof(UALanguageEntry
));
139 langEntries
= (UALanguageEntry
*)uprv_realloc(langEntries
, newMax
*sizeof(UALanguageEntry
));
140 if (langEntries
== NULL
) {
141 *err
= U_MEMORY_ALLOCATION_ERROR
;
145 langEntriesMax
= newMax
;
147 uprv_strcpy(langEntries
[entryCount
].languageCode
, langCode
);
148 langEntries
[entryCount
].userFraction
= userFraction
;
149 langEntries
[entryCount
].status
= langStatus
;
153 ures_close(langBund
);
155 if (U_FAILURE(*err
)) {
156 if (langEntries
!= localLangEntries
) {
161 if (entries
!= NULL
) {
162 // sort langEntries, copy entries that fit to provided array
163 qsort(langEntries
, entryCount
, sizeof(UALanguageEntry
), compareLangEntries
);
164 if (entryCount
> entriesCapacity
) {
165 entryCount
= entriesCapacity
;
167 uprv_memcpy(entries
, langEntries
, entryCount
*sizeof(UALanguageEntry
));
168 if (langEntries
!= localLangEntries
) {
175 static const char * forceParent
[] = {
176 "en_150", "en_GB", // en for Europe
178 "en_BD", "en_GB", // en for Bangladesh
179 "en_BE", "en_150", // en for Belgium goes to en for Europe
184 "en_HK", "en_GB", // en for Hong Kong
192 "en_MY", "en_GB", // en for Malaysia
194 "en_PK", "en_GB", // en for Pakistan
205 U_CAPI
int32_t U_EXPORT2
206 ualoc_getAppleParent(const char* localeID
,
208 int32_t parentCapacity
,
213 UErrorCode tempStatus
;
214 char locbuf
[ULOC_FULLNAME_CAPACITY
+1];
215 char * foundDoubleUnderscore
;
217 if (U_FAILURE(*err
)) {
220 if ( (parent
==NULL
)? parentCapacity
!=0: parentCapacity
<0 ) {
221 *err
= U_ILLEGAL_ARGUMENT_ERROR
;
224 len
= uloc_getBaseName(localeID
, locbuf
, ULOC_FULLNAME_CAPACITY
, err
); /* canonicalize and strip keywords */
225 if (U_FAILURE(*err
)) {
228 if (*err
== U_STRING_NOT_TERMINATED_WARNING
) {
229 locbuf
[ULOC_FULLNAME_CAPACITY
] = 0;
232 foundDoubleUnderscore
= uprv_strstr(locbuf
, "__"); /* __ comes from bad/missing subtag or variant */
233 if (foundDoubleUnderscore
!= NULL
) {
234 *foundDoubleUnderscore
= 0; /* terminate at the __ */
235 len
= uprv_strlen(locbuf
);
237 if (len
>= 2 && (uprv_strncmp(locbuf
, "en", 2) == 0 || uprv_strncmp(locbuf
, "zh", 2) == 0)) {
238 const char ** forceParentPtr
= forceParent
;
239 const char * testCurLoc
;
240 while ( (testCurLoc
= *forceParentPtr
++) != NULL
) {
241 int cmp
= uprv_strcmp(locbuf
, testCurLoc
);
244 len
= uprv_strlen(*forceParentPtr
);
245 if (len
< parentCapacity
) {
246 uprv_strcpy(parent
, *forceParentPtr
);
248 *err
= U_BUFFER_OVERFLOW_ERROR
;
257 tempStatus
= U_ZERO_ERROR
;
258 rb
= ures_openDirect(NULL
, locbuf
, &tempStatus
);
259 if (U_SUCCESS(tempStatus
)) {
260 const char * actualLocale
= ures_getLocaleByType(rb
, ULOC_ACTUAL_LOCALE
, &tempStatus
);
261 if (U_SUCCESS(tempStatus
) && uprv_strcmp(locbuf
, actualLocale
) != 0) {
262 // we have followed an alias
263 len
= uprv_strlen(actualLocale
);
264 if (len
< parentCapacity
) {
265 uprv_strcpy(parent
, actualLocale
);
267 *err
= U_BUFFER_OVERFLOW_ERROR
;
272 tempStatus
= U_ZERO_ERROR
;
273 const UChar
* parentUName
= ures_getStringByKey(rb
, "%%Parent", &len
, &tempStatus
);
274 if (U_SUCCESS(tempStatus
) && tempStatus
!= U_USING_FALLBACK_WARNING
) {
275 if (len
< parentCapacity
) {
276 u_UCharsToChars(parentUName
, parent
, len
+ 1);
278 *err
= U_BUFFER_OVERFLOW_ERROR
;
285 len
= uloc_getParent(locbuf
, parent
, parentCapacity
, err
);
286 if (U_SUCCESS(*err
) && len
== 0) {
288 if (len
< parentCapacity
) {
289 uprv_strcpy(parent
, "root");
291 *err
= U_BUFFER_OVERFLOW_ERROR
;
298 // Data and related functions for ualoc_localizationsToUse
301 static const char * appleAliasMap
[][2] = {
302 // names are lowercase here because they are looked up after being processed by uloc_getBaseName
303 { "arabic", "ar" }, // T2
304 { "chinese", "zh_Hans" }, // T0
305 { "danish", "da" }, // T2
306 { "dutch", "nl" }, // T1, still in use
307 { "english", "en" }, // T0, still in use
308 { "finnish", "fi" }, // T2
309 { "french", "fr" }, // T0, still in use
310 { "german", "de" }, // T0, still in use
311 { "italian", "it" }, // T1, still in use
312 { "japanese", "ja" }, // T0, still in use
313 { "korean", "ko" }, // T1
314 { "no_NO", "nb_NO" }, // special
315 { "norwegian", "nb" }, // T2
316 { "polish", "pl" }, // T2
317 { "portuguese", "pt" }, // T2
318 { "russian", "ru" }, // T2
319 { "spanish", "es" }, // T1, still in use
320 { "swedish", "sv" }, // T2
321 { "thai", "th" }, // T2
322 { "turkish", "tr" }, // T2
323 { "zh", "zh_Hans" }, // special
325 enum { kAppleAliasMapCount
= sizeof(appleAliasMap
)/sizeof(appleAliasMap
[0]) };
327 static const char * appleParentMap
[][2] = {
328 { "en_150", "en_GB" }, // Apple custom parent
329 { "en_AD", "en_150" }, // Apple locale addition
330 { "en_AL", "en_150" }, // Apple locale addition
331 { "en_AT", "en_150" }, // Apple locale addition
332 { "en_AU", "en_GB" }, // Apple custom parent
333 { "en_BA", "en_150" }, // Apple locale addition
334 { "en_BD", "en_GB" }, // Apple custom parent
335 { "en_BE", "en_150" }, // Apple custom parent
336 { "en_CH", "en_150" }, // Apple locale addition
337 { "en_CY", "en_150" }, // Apple locale addition
338 { "en_CZ", "en_150" }, // Apple locale addition
339 { "en_DE", "en_150" }, // Apple locale addition
340 { "en_DG", "en_GB" },
341 { "en_DK", "en_150" }, // Apple locale addition
342 { "en_EE", "en_150" }, // Apple locale addition
343 { "en_ES", "en_150" }, // Apple locale addition
344 { "en_FI", "en_150" }, // Apple locale addition
345 { "en_FK", "en_GB" },
346 { "en_FR", "en_150" }, // Apple locale addition
347 { "en_GG", "en_GB" },
348 { "en_GI", "en_GB" },
349 { "en_GR", "en_150" }, // Apple locale addition
350 { "en_HK", "en_GB" }, // Apple custom parent
351 { "en_HR", "en_150" }, // Apple locale addition
352 { "en_HU", "en_150" }, // Apple locale addition
353 { "en_IE", "en_GB" },
354 { "en_IL", "en_001" }, // Apple locale addition
355 { "en_IM", "en_GB" },
356 { "en_IN", "en_GB" }, // Apple custom parent
357 { "en_IO", "en_GB" },
358 { "en_IS", "en_150" }, // Apple locale addition
359 { "en_IT", "en_150" }, // Apple locale addition
360 { "en_JE", "en_GB" },
361 { "en_LT", "en_150" }, // Apple locale addition
362 { "en_LU", "en_150" }, // Apple locale addition
363 { "en_LV", "en_150" }, // Apple locale addition
364 { "en_ME", "en_150" }, // Apple locale addition
365 { "en_MO", "en_GB" },
366 { "en_MT", "en_GB" },
367 { "en_MY", "en_GB" }, // Apple custom parent
368 { "en_NL", "en_150" }, // Apple locale addition
369 { "en_NO", "en_150" }, // Apple locale addition
370 { "en_NZ", "en_GB" },
371 { "en_PK", "en_GB" }, // Apple custom parent
372 { "en_PL", "en_150" }, // Apple locale addition
373 { "en_PT", "en_150" }, // Apple locale addition
374 { "en_RO", "en_150" }, // Apple locale addition
375 { "en_RU", "en_150" }, // Apple locale addition
376 { "en_SE", "en_150" }, // Apple locale addition
377 { "en_SG", "en_GB" },
378 { "en_SH", "en_GB" },
379 { "en_SI", "en_150" }, // Apple locale addition
380 { "en_SK", "en_150" }, // Apple locale addition
381 { "en_TR", "en_150" }, // Apple locale addition
382 { "en_VG", "en_GB" },
384 enum { kAppleParentMapCount
= sizeof(appleParentMap
)/sizeof(appleParentMap
[0]) };
386 // Might do something better for this, perhaps maximizing locales then stripping.
387 // Selected parents of available localizations, add as necessary.
388 static const char * locParentMap
[][2] = {
391 { "zh_Hans_CN", "zh_Hans" },
392 { "zh_Hant_TW", "zh_Hant" },
394 enum { kLocParentMapCount
= sizeof(locParentMap
)/sizeof(locParentMap
[0]) };
397 kStringsAllocSize
= 4096, // cannot expand; current actual usage 3610
398 kParentMapInitCount
= 161 // can expand; current actual usage 161
402 static UBool U_CALLCONV
ualocale_cleanup(void);
407 static UInitOnce gUALocaleCacheInitOnce
= U_INITONCE_INITIALIZER
;
409 static int gMapDataState
= 0; // 0 = not initialized, 1 = initialized, -1 = failure
410 static char* gStrings
= NULL
;
411 static UHashtable
* gAliasMap
= NULL
;
412 static UHashtable
* gParentMap
= NULL
;
418 static UBool U_CALLCONV
ualocale_cleanup(void)
422 gUALocaleCacheInitOnce
.reset();
424 if (gMapDataState
> 0) {
425 uhash_close(gParentMap
);
427 uhash_close(gAliasMap
);
436 static void initializeMapData() {
439 UResourceBundle
* curBundle
;
443 int32_t entryIndex
, icuEntryCount
;
445 ucln_common_registerCleanup(UCLN_COMMON_LOCALE
, ualocale_cleanup
);
447 gStrings
= (char*)uprv_malloc(kStringsAllocSize
);
449 stringsPtr
= gStrings
;
450 stringsEnd
= gStrings
+ kStringsAllocSize
;
453 status
= U_ZERO_ERROR
;
457 curBundle
= ures_openDirect(NULL
, "metadata", &status
);
458 curBundle
= ures_getByKey(curBundle
, "alias", curBundle
, &status
);
459 curBundle
= ures_getByKey(curBundle
, "language", curBundle
, &status
); // language resource is URES_TABLE
460 if (U_SUCCESS(status
)) {
461 icuEntryCount
= ures_getSize(curBundle
); // currently 331
464 status
= U_ZERO_ERROR
;
465 gAliasMap
= uhash_openSize(uhash_hashIChars
, uhash_compareIChars
, uhash_compareIChars
,
466 kAppleAliasMapCount
+ icuEntryCount
, &status
);
467 // defaults to keyDeleter NULL
468 if (U_SUCCESS(status
)) {
469 for (entryIndex
= 0; entryIndex
< kAppleAliasMapCount
&& U_SUCCESS(status
); entryIndex
++) {
470 uhash_put(gAliasMap
, (void*)appleAliasMap
[entryIndex
][0], (void*)appleAliasMap
[entryIndex
][1], &status
);
472 status
= U_ZERO_ERROR
;
473 UResourceBundle
* aliasMapBundle
= NULL
;
474 for (entryIndex
= 0; entryIndex
< icuEntryCount
&& U_SUCCESS(status
); entryIndex
++) {
475 aliasMapBundle
= ures_getByIndex(curBundle
, entryIndex
, aliasMapBundle
, &status
);
476 if (U_FAILURE(status
)) {
479 const char * keyStr
= ures_getKey(aliasMapBundle
);
480 int32_t len
= uprv_strlen(keyStr
);
481 if (len
>= stringsEnd
- stringsPtr
) {
484 uprv_strcpy(stringsPtr
, keyStr
);
485 char * inLocStr
= stringsPtr
;
486 stringsPtr
+= len
+ 1;
488 len
= stringsEnd
- stringsPtr
- 1;
489 ures_getUTF8StringByKey(aliasMapBundle
, "replacement", stringsPtr
, &len
, TRUE
, &status
);
490 if (U_FAILURE(status
)) {
494 uhash_put(gAliasMap
, inLocStr
, stringsPtr
, &status
);
495 stringsPtr
+= len
+ 1;
497 ures_close(aliasMapBundle
);
499 ures_close(curBundle
);
501 gMapDataState
= -1; // failure
504 ures_close(curBundle
);
506 status
= U_ZERO_ERROR
;
507 gParentMap
= uhash_openSize(uhash_hashIChars
, uhash_compareIChars
, uhash_compareIChars
,
508 kParentMapInitCount
, &status
);
509 // defaults to keyDeleter NULL
510 if (U_SUCCESS(status
)) {
511 curBundle
= ures_openDirect(NULL
, "supplementalData", &status
);
512 curBundle
= ures_getByKey(curBundle
, "parentLocales", curBundle
, &status
); // parentLocales resource is URES_TABLE
513 if (U_SUCCESS(status
)) {
514 UResourceBundle
* parentMapBundle
= NULL
;
516 parentMapBundle
= ures_getNextResource(curBundle
, parentMapBundle
, &status
);
517 if (U_FAILURE(status
)) {
518 break; // no more parent bundles, normal exit
520 const char * keyStr
= ures_getKey(parentMapBundle
);
521 int32_t len
= uprv_strlen(keyStr
);
522 if (len
>= stringsEnd
- stringsPtr
) {
525 uprv_strcpy(stringsPtr
, keyStr
);
526 char * parentStr
= stringsPtr
;
527 stringsPtr
+= len
+ 1;
529 if (ures_getType(parentMapBundle
) == URES_STRING
) {
530 len
= stringsEnd
- stringsPtr
- 1;
531 ures_getUTF8String(parentMapBundle
, stringsPtr
, &len
, TRUE
, &status
);
532 if (U_FAILURE(status
)) {
536 uhash_put(gParentMap
, stringsPtr
, parentStr
, &status
);
537 stringsPtr
+= len
+ 1;
539 // should be URES_ARRAY
540 icuEntryCount
= ures_getSize(parentMapBundle
);
541 for (entryIndex
= 0; entryIndex
< icuEntryCount
&& U_SUCCESS(status
); entryIndex
++) {
542 len
= stringsEnd
- stringsPtr
- 1;
543 ures_getUTF8StringByIndex(parentMapBundle
, entryIndex
, stringsPtr
, &len
, TRUE
, &status
);
544 if (U_FAILURE(status
)) {
548 uhash_put(gParentMap
, stringsPtr
, parentStr
, &status
);
549 stringsPtr
+= len
+ 1;
553 ures_close(parentMapBundle
);
555 ures_close(curBundle
);
557 status
= U_ZERO_ERROR
;
558 for (entryIndex
= 0; entryIndex
< kAppleParentMapCount
&& U_SUCCESS(status
); entryIndex
++) {
559 uhash_put(gParentMap
, (void*)appleParentMap
[entryIndex
][0], (void*)appleParentMap
[entryIndex
][1], &status
);
562 uhash_close(gAliasMap
);
565 gMapDataState
= -1; // failure
569 //printf("# gStrings size %ld\n", stringsPtr - gStrings);
570 //printf("# gParentMap count %d\n", uhash_count(gParentMap));
576 // The following maps aliases, etc. Ensures 0-termination if no error.
577 static void ualoc_normalize(const char *locale
, char *normalized
, int32_t normalizedCapacity
, UErrorCode
*status
)
579 if (U_FAILURE(*status
)) {
582 // uloc_minimizeSubtags(locale, normalized, normalizedCapacity, status);
584 const char *replacement
= NULL
;
585 if (gMapDataState
> 0) {
586 replacement
= (const char *)uhash_get(gAliasMap
, locale
);
588 if (replacement
== NULL
) {
589 replacement
= locale
;
591 int32_t len
= uprv_strlen(replacement
);
592 if (len
< normalizedCapacity
) { // allow for 0 termination
593 uprv_strcpy(normalized
, replacement
);
595 *status
= U_BUFFER_OVERFLOW_ERROR
;
599 static void ualoc_getParent(const char *locale
, char *parent
, int32_t parentCapacity
, UErrorCode
*status
)
601 if (U_FAILURE(*status
)) {
604 if (gMapDataState
> 0) {
605 const char *replacement
= (const char *)uhash_get(gParentMap
, locale
);
607 int32_t len
= uprv_strlen(replacement
);
608 if (len
< parentCapacity
) { // allow for 0 termination
609 uprv_strcpy(parent
, replacement
);
611 *status
= U_BUFFER_OVERFLOW_ERROR
;
616 uloc_getParent(locale
, parent
, parentCapacity
- 1, status
);
617 parent
[parentCapacity
- 1] = 0; // ensure 0 termination in case of U_STRING_NOT_TERMINATED_WARNING
620 // Might do something better for this, perhaps maximizing locales then stripping
621 const char * getLocParent(const char *locale
)
623 int32_t locParentIndex
;
624 for (locParentIndex
= 0; locParentIndex
< kLocParentMapCount
; locParentIndex
++) {
625 if (uprv_strcmp(locale
, locParentMap
[locParentIndex
][0]) == 0) {
626 return locParentMap
[locParentIndex
][1];
632 // this just checks if the *pointer* value is already in the array
633 static UBool
locInArray(const char* *localizationsToUse
, int32_t locsToUseCount
, const char *locToCheck
)
636 for (locIndex
= 0; locIndex
< locsToUseCount
; locIndex
++) {
637 if (locToCheck
== localizationsToUse
[locIndex
]) {
644 enum { kLangScriptRegMaxLen
= ULOC_LANG_CAPACITY
+ ULOC_SCRIPT_CAPACITY
+ ULOC_COUNTRY_CAPACITY
}; // currently 22
647 ualoc_localizationsToUse( const char* const *preferredLanguages
,
648 int32_t preferredLanguagesCount
,
649 const char* const *availableLocalizations
,
650 int32_t availableLocalizationsCount
,
651 const char* *localizationsToUse
,
652 int32_t localizationsToUseCapacity
,
655 if (U_FAILURE(*status
)) {
658 if (preferredLanguages
== NULL
|| availableLocalizations
== NULL
|| localizationsToUse
== NULL
) {
659 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
662 // get resource data, need to protect with mutex
663 if (gMapDataState
== 0) {
664 umtx_initOnce(gUALocaleCacheInitOnce
, initializeMapData
);
666 int32_t locsToUseCount
= 0;
667 int32_t prefLangIndex
, availLocIndex
= 0;
668 char (*availLocBase
)[kLangScriptRegMaxLen
+ 1] = NULL
;
669 char (*availLocNorm
)[kLangScriptRegMaxLen
+ 1] = NULL
;
670 UBool checkAvailLocParents
= FALSE
;
671 UBool foundMatch
= FALSE
;
673 // Part 1, find the best matching localization, if any
674 for (prefLangIndex
= 0; prefLangIndex
< preferredLanguagesCount
; prefLangIndex
++) {
675 char prefLangBaseName
[kLangScriptRegMaxLen
+ 1];
676 char prefLangNormName
[kLangScriptRegMaxLen
+ 1];
677 char prefLangParentName
[kLangScriptRegMaxLen
+ 1];
678 UErrorCode tmpStatus
= U_ZERO_ERROR
;
680 if (preferredLanguages
[prefLangIndex
] == NULL
) {
681 continue; // skip NULL preferredLanguages entry, go to next one
683 // use underscores, fix bad capitalization, delete any keywords
684 uloc_getBaseName(preferredLanguages
[prefLangIndex
], prefLangBaseName
, kLangScriptRegMaxLen
, &tmpStatus
);
685 if (U_FAILURE(tmpStatus
) || prefLangBaseName
[0] == 0 ||
686 uprv_strcmp(prefLangBaseName
, "root") == 0 || prefLangBaseName
[0] == '_') {
687 continue; // can't handle this preferredLanguages entry or it is invalid, go to next one
689 prefLangBaseName
[kLangScriptRegMaxLen
] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING
690 //printf(" # prefLangBaseName %s\n", prefLangBaseName);
692 // if we have not already allocated and filled the array of
693 // base availableLocalizations, do so now.
694 if (availLocBase
== NULL
) {
695 availLocBase
= (char (*)[kLangScriptRegMaxLen
+ 1])uprv_malloc(availableLocalizationsCount
* (kLangScriptRegMaxLen
+ 1));
696 if (availLocBase
== NULL
) {
697 continue; // cannot further check this preferredLanguages entry, go to next one
699 for (availLocIndex
= 0; availLocIndex
< availableLocalizationsCount
; availLocIndex
++) {
700 tmpStatus
= U_ZERO_ERROR
;
701 uloc_getBaseName(availableLocalizations
[availLocIndex
], availLocBase
[availLocIndex
], kLangScriptRegMaxLen
, &tmpStatus
);
702 if (U_FAILURE(tmpStatus
) || uprv_strcmp(availLocBase
[availLocIndex
], "root") == 0 || availLocBase
[availLocIndex
][0] == '_') {
703 availLocBase
[availLocIndex
][0] = 0; // effectively remove this entry
705 availLocBase
[availLocIndex
][kLangScriptRegMaxLen
] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING
709 // first compare base preferredLanguage to base versions of availableLocalizations names
710 for (availLocIndex
= 0; availLocIndex
< availableLocalizationsCount
; availLocIndex
++) {
711 if (uprv_strcmp(prefLangBaseName
, availLocBase
[availLocIndex
]) == 0) {
712 foundMatch
= TRUE
; // availLocIndex records where
717 //printf(" # matched actualLocName\n");
718 break; // found a loc for this preferredLanguages entry
721 // get normalized preferredLanguage
722 tmpStatus
= U_ZERO_ERROR
;
723 ualoc_normalize(prefLangBaseName
, prefLangNormName
, kLangScriptRegMaxLen
+ 1, &tmpStatus
);
724 if (U_FAILURE(tmpStatus
)) {
725 continue; // can't handle this preferredLanguages entry, go to next one
727 //printf(" # prefLangNormName %s\n", prefLangNormName);
728 // if we have not already allocated and filled the array of
729 // normalized availableLocalizations, do so now.
730 // Note: ualoc_normalize turns "zh_TW" into "zh_Hant_TW", zh_HK" into "zh_Hant_HK",
731 // and fixes deprecated codes "iw" > "he", "in" > "id" etc.
732 if (availLocNorm
== NULL
) {
733 availLocNorm
= (char (*)[kLangScriptRegMaxLen
+ 1])uprv_malloc(availableLocalizationsCount
* (kLangScriptRegMaxLen
+ 1));
734 if (availLocNorm
== NULL
) {
735 continue; // cannot further check this preferredLanguages entry, go to next one
737 for (availLocIndex
= 0; availLocIndex
< availableLocalizationsCount
; availLocIndex
++) {
738 tmpStatus
= U_ZERO_ERROR
;
739 ualoc_normalize(availLocBase
[availLocIndex
], availLocNorm
[availLocIndex
], kLangScriptRegMaxLen
+ 1, &tmpStatus
);
740 if (U_FAILURE(tmpStatus
)) {
741 availLocNorm
[availLocIndex
][0] = 0; // effectively remove this entry
742 } else if (getLocParent(availLocNorm
[availLocIndex
]) != NULL
) {
743 checkAvailLocParents
= TRUE
;
745 //printf(" # actualLoc %-11s -> norm %s\n", availableLocalizations[availLocIndex], availLocNorm[availLocIndex]);
748 // now compare normalized preferredLanguage to normalized localization names
749 // if matches, copy *original* localization name
750 for (availLocIndex
= 0; availLocIndex
< availableLocalizationsCount
; availLocIndex
++) {
751 if (uprv_strcmp(prefLangNormName
, availLocNorm
[availLocIndex
]) == 0) {
752 foundMatch
= TRUE
; // availLocIndex records where
757 //printf(" # matched actualLocNormName\n");
758 break; // found a loc for this preferredLanguages entry
761 // now walk up the parent chain for preferredLanguage
762 // until we find a match or hit root
763 uprv_strcpy(prefLangBaseName
, prefLangNormName
);
764 while (!foundMatch
) {
765 tmpStatus
= U_ZERO_ERROR
;
766 ualoc_getParent(prefLangBaseName
, prefLangParentName
, kLangScriptRegMaxLen
+ 1, &tmpStatus
);
767 if (U_FAILURE(tmpStatus
) || uprv_strcmp(prefLangParentName
, "root") == 0 || prefLangParentName
[0] == 0) {
768 break; // reached root or cannot proceed further
770 //printf(" # prefLangParentName %s\n", prefLangParentName);
772 // now compare this preferredLanguage parent to normalized localization names
773 // if matches, copy *original* localization name
774 for (availLocIndex
= 0; availLocIndex
< availableLocalizationsCount
; availLocIndex
++) {
775 if (uprv_strcmp(prefLangParentName
, availLocNorm
[availLocIndex
]) == 0) {
776 foundMatch
= TRUE
; // availLocIndex records where
780 uprv_strcpy(prefLangBaseName
, prefLangParentName
);
783 break; // found a loc for this preferredLanguages entry
786 // last try, use parents of selected
787 if (checkAvailLocParents
) {
788 // now walk up the parent chain for preferredLanguage again
789 // checking against parents of selected availLocNorm entries
790 // but this time start with current prefLangNormName
791 uprv_strcpy(prefLangBaseName
, prefLangNormName
);
793 tmpStatus
= U_ZERO_ERROR
;
794 // now compare this preferredLanguage to normalized localization names
795 // parent if have one for this; if matches, copy *original* localization name
796 for (availLocIndex
= 0; availLocIndex
< availableLocalizationsCount
; availLocIndex
++) {
797 const char *availLocParent
= getLocParent(availLocNorm
[availLocIndex
]);
798 if (availLocParent
&& uprv_strcmp(prefLangBaseName
, availLocParent
) == 0) {
799 foundMatch
= TRUE
; // availLocIndex records where
806 ualoc_getParent(prefLangBaseName
, prefLangParentName
, kLangScriptRegMaxLen
+ 1, &tmpStatus
);
807 if (U_FAILURE(tmpStatus
) || uprv_strcmp(prefLangParentName
, "root") == 0 || prefLangParentName
[0] == 0) {
808 break; // reached root or cannot proceed further
810 uprv_strcpy(prefLangBaseName
, prefLangParentName
);
814 break; // found a loc for this preferredLanguages entry
818 // Part 2, if we found a matching localization, then walk up its parent tree to find any fallback matches in availableLocalizations
820 // Here availLocIndex corresponds to the first matched localization
821 UErrorCode tmpStatus
= U_ZERO_ERROR
;
822 int32_t availLocMatchIndex
= availLocIndex
;
823 if (locsToUseCount
< localizationsToUseCapacity
) {
824 localizationsToUse
[locsToUseCount
++] = availableLocalizations
[availLocMatchIndex
];
826 // at this point we must have availLocBase, and minimally matched against that.
827 // if we have not already allocated and filled the array of
828 // normalized availableLocalizations, do so now, but don't require it
829 if (availLocNorm
== NULL
) {
830 availLocNorm
= (char (*)[kLangScriptRegMaxLen
+ 1])uprv_malloc(availableLocalizationsCount
* (kLangScriptRegMaxLen
+ 1));
831 if (availLocNorm
!= NULL
) {
832 for (availLocIndex
= 0; availLocIndex
< availableLocalizationsCount
; availLocIndex
++) {
833 tmpStatus
= U_ZERO_ERROR
;
834 ualoc_normalize(availLocBase
[availLocIndex
], availLocNorm
[availLocIndex
], kLangScriptRegMaxLen
+ 1, &tmpStatus
);
835 if (U_FAILURE(tmpStatus
)) {
836 availLocNorm
[availLocIndex
][0] = 0; // effectively remove this entry
842 // add normalized form of matching loc, if different and in availLocBase
843 if (locsToUseCount
< localizationsToUseCapacity
) {
844 tmpStatus
= U_ZERO_ERROR
;
845 char matchedLocNormName
[kLangScriptRegMaxLen
+ 1];
846 char matchedLocParentName
[kLangScriptRegMaxLen
+ 1];
847 // get normalized form of matching loc
848 if (availLocNorm
!= NULL
) {
849 uprv_strcpy(matchedLocNormName
, availLocNorm
[availLocMatchIndex
]);
851 ualoc_normalize(availLocBase
[availLocMatchIndex
], matchedLocNormName
, kLangScriptRegMaxLen
+ 1, &tmpStatus
);
853 if (U_SUCCESS(tmpStatus
)) {
854 // add normalized form of matching loc, if different and in availLocBase
855 if (uprv_strcmp(matchedLocNormName
, localizationsToUse
[0]) != 0) {
856 // normalization of matched localization is different, see if we have the normalization in availableLocalizations
857 // from this point on, availLocIndex no longer corresponds to the matched localization.
858 for (availLocIndex
= 0; availLocIndex
< availableLocalizationsCount
; availLocIndex
++) {
859 if ( (uprv_strcmp(matchedLocNormName
, availLocBase
[availLocIndex
]) == 0
860 || (availLocNorm
!= NULL
&& uprv_strcmp(matchedLocNormName
, availLocNorm
[availLocIndex
]) == 0))
861 && !locInArray(localizationsToUse
, locsToUseCount
, availableLocalizations
[availLocIndex
])) {
862 localizationsToUse
[locsToUseCount
++] = availableLocalizations
[availLocIndex
];
868 // now walk up the parent chain from matchedLocNormName, adding parents if they are in availLocBase
869 while (locsToUseCount
< localizationsToUseCapacity
) {
870 ualoc_getParent(matchedLocNormName
, matchedLocParentName
, kLangScriptRegMaxLen
+ 1, &tmpStatus
);
871 if (U_FAILURE(tmpStatus
) || uprv_strcmp(matchedLocParentName
, "root") == 0 || matchedLocParentName
[0] == 0) {
872 break; // reached root or cannot proceed further
875 // now compare this matchedLocParentName parent to base localization names (and norm ones if we have them)
876 for (availLocIndex
= 0; availLocIndex
< availableLocalizationsCount
; availLocIndex
++) {
877 if ( (uprv_strcmp(matchedLocParentName
, availLocBase
[availLocIndex
]) == 0
878 || (availLocNorm
!= NULL
&& uprv_strcmp(matchedLocParentName
, availLocNorm
[availLocIndex
]) == 0))
879 && !locInArray(localizationsToUse
, locsToUseCount
, availableLocalizations
[availLocIndex
])) {
880 localizationsToUse
[locsToUseCount
++] = availableLocalizations
[availLocIndex
];
884 uprv_strcpy(matchedLocNormName
, matchedLocParentName
);
887 // The above still fails to include "zh_TW" if it is in availLocBase and the matched localization
888 // base name is "zh_HK" or "zh_MO". One option would be to walk up the parent chain from
889 // matchedLocNormName again, comparing against parents of of selected availLocNorm entries.
890 // But this picks up too many matches that are not parents of the matched localization. So
891 // we just handle these specially.
892 if ( locsToUseCount
< localizationsToUseCapacity
893 && (uprv_strcmp(availLocBase
[availLocMatchIndex
], "zh_HK") == 0
894 || uprv_strcmp(availLocBase
[availLocMatchIndex
], "zh_MO") == 0) ) {
895 int32_t zhTW_matchIndex
= -1;
896 UBool zhHant_found
= FALSE
;
897 for (availLocIndex
= 0; availLocIndex
< availableLocalizationsCount
; availLocIndex
++) {
898 if ( zhTW_matchIndex
< 0 && uprv_strcmp("zh_TW", availLocBase
[availLocIndex
]) == 0 ) {
899 zhTW_matchIndex
= availLocIndex
;
901 if ( !zhHant_found
&& uprv_strcmp("zh_Hant", availLocBase
[availLocIndex
]) == 0 ) {
905 if (zhTW_matchIndex
>= 0 && !zhHant_found
906 && !locInArray(localizationsToUse
, locsToUseCount
, availableLocalizations
[zhTW_matchIndex
])) {
907 localizationsToUse
[locsToUseCount
++] = availableLocalizations
[zhTW_matchIndex
];
914 uprv_free(availLocNorm
);
915 uprv_free(availLocBase
);
916 return locsToUseCount
;