]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/ualoc.cpp
ICU-551.30.tar.gz
[apple/icu.git] / icuSources / common / ualoc.cpp
1 /*
2 *****************************************************************************************
3 * Copyright (C) 2014-2015 Apple Inc. All Rights Reserved.
4 *****************************************************************************************
5 */
6
7 #include "unicode/utypes.h"
8 #include "unicode/ualoc.h"
9 #include "unicode/uloc.h"
10 #include "unicode/ures.h"
11 #include "unicode/putil.h"
12 #include "cstring.h"
13 #include "cmemory.h"
14 #include "uhash.h"
15 #include "umutex.h"
16 #include "ucln_cmn.h"
17 // the following has replacements for some math.h funcs etc
18 #include "putilimp.h"
19
20
21 // The numeric values in territoryInfo are in "IntF" format from LDML2ICUConverter.
22 // From its docs (adapted): [IntF is] a special integer that represents the number in
23 // normalized scientific notation.
24 // Resultant integers are in the form -?xxyyyyyy, where xx is the exponent
25 // offset by 50 and yyyyyy is the coefficient to 5 decimal places (range 1.0 to 9.99999), e.g.
26 // 14660000000000 -> 1.46600E13 -> 63146600
27 // 0.0001 -> 1.00000E-4 -> 46100000
28 // -123.456 -> -1.23456E-2 -> -48123456
29 //
30 // Here to avoid an extra division we have the max coefficient as 999999 (instead of
31 // 9.99999) and instead offset the exponent by -55.
32 //
33 static double doubleFromIntF(int32_t intF) {
34 double coefficient = (double)(intF % 1000000);
35 int32_t exponent = (intF / 1000000) - 55;
36 return coefficient * uprv_pow10(exponent);
37 }
38
39 static int compareLangEntries(const void * entry1, const void * entry2) {
40 double fraction1 = ((const UALanguageEntry *)entry1)->userFraction;
41 double fraction2 = ((const UALanguageEntry *)entry2)->userFraction;
42 // want descending order
43 if (fraction1 > fraction2) return -1;
44 if (fraction1 < fraction2) return 1;
45 // userFractions the same, sort by languageCode
46 return uprv_strcmp(((const UALanguageEntry *)entry1)->languageCode,((const UALanguageEntry *)entry2)->languageCode);
47 }
48
49 static const UChar ustrLangStatusDefacto[] = {0x64,0x65,0x5F,0x66,0x61,0x63,0x74,0x6F,0x5F,0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0}; //"de_facto_official"
50 static const UChar ustrLangStatusOfficial[] = {0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0}; //"official"
51 static const UChar ustrLangStatusRegional[] = {0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0x5F,0x72,0x65,0x67,0x69,0x6F,0x6E,0x61,0x6C,0}; //"official_regional"
52
53 enum {
54 kLocalLangEntriesMax = 26, // enough for most regions to minimumFraction 0.001 except India
55 kLangEntriesFactor = 3 // if we have to allocate, multiply existing size by this
56 };
57
58 U_CAPI int32_t U_EXPORT2
59 ualoc_getLanguagesForRegion(const char *regionID, double minimumFraction,
60 UALanguageEntry *entries, int32_t entriesCapacity,
61 UErrorCode *err)
62 {
63 if (U_FAILURE(*err)) {
64 return 0;
65 }
66 if ( regionID == NULL || minimumFraction < 0.0 || minimumFraction > 1.0 ||
67 ((entries==NULL)? entriesCapacity!=0: entriesCapacity<0) ) {
68 *err = U_ILLEGAL_ARGUMENT_ERROR;
69 return 0;
70 }
71 UResourceBundle *rb = ures_openDirect(NULL, "supplementalData", err);
72 rb = ures_getByKey(rb, "territoryInfo", rb, err);
73 rb = ures_getByKey(rb, regionID, rb, err);
74 if (U_FAILURE(*err)) {
75 ures_close(rb);
76 return 0;
77 }
78
79 int32_t entryCount = 0;
80 UResourceBundle *langBund = NULL;
81 int32_t lbIdx, lbCount = ures_getSize(rb);
82 UALanguageEntry localLangEntries[kLocalLangEntriesMax];
83 UALanguageEntry * langEntries = localLangEntries;
84 int32_t langEntriesMax = kLocalLangEntriesMax;
85
86 for (lbIdx = 0; lbIdx < lbCount; lbIdx++) {
87 langBund = ures_getByIndex(rb, lbIdx, langBund, err);
88 if (U_FAILURE(*err)) {
89 break;
90 }
91 const char * langCode = ures_getKey(langBund);
92 if (uprv_strcmp(langCode,"territoryF") == 0) {
93 continue;
94 }
95 if (strnlen(langCode, UALANGDATA_CODELEN+1) > UALANGDATA_CODELEN) { // no uprv_strnlen
96 continue; // a code we cannot handle
97 }
98
99 UErrorCode localErr = U_ZERO_ERROR;
100 double userFraction = 0.0;
101 UResourceBundle *itemBund = ures_getByKey(langBund, "populationShareF", NULL, &localErr);
102 if (U_SUCCESS(localErr)) {
103 int32_t intF = ures_getInt(itemBund, &localErr);
104 if (U_SUCCESS(localErr)) {
105 userFraction = doubleFromIntF(intF);
106 }
107 ures_close(itemBund);
108 }
109 if (userFraction < minimumFraction) {
110 continue;
111 }
112 if (entries != NULL) {
113 localErr = U_ZERO_ERROR;
114 UALanguageStatus langStatus = UALANGSTATUS_UNSPECIFIED;
115 int32_t ulen;
116 const UChar * ustrLangStatus = ures_getStringByKey(langBund, "officialStatus", &ulen, &localErr);
117 if (U_SUCCESS(localErr)) {
118 int32_t cmp = u_strcmp(ustrLangStatus, ustrLangStatusOfficial);
119 if (cmp == 0) {
120 langStatus = UALANGSTATUS_OFFICIAL;
121 } else if (cmp < 0 && u_strcmp(ustrLangStatus, ustrLangStatusDefacto) == 0) {
122 langStatus = UALANGSTATUS_DEFACTO_OFFICIAL;
123 } else if (u_strcmp(ustrLangStatus, ustrLangStatusRegional) == 0) {
124 langStatus = UALANGSTATUS_REGIONAL_OFFICIAL;
125 }
126 }
127 // Now we have all of the info for our next entry
128 if (entryCount >= langEntriesMax) {
129 int32_t newMax = langEntriesMax * kLangEntriesFactor;
130 if (langEntries == localLangEntries) {
131 // first allocation, copy from local buf
132 langEntries = (UALanguageEntry*)uprv_malloc(newMax*sizeof(UALanguageEntry));
133 if (langEntries == NULL) {
134 *err = U_MEMORY_ALLOCATION_ERROR;
135 break;
136 }
137 uprv_memcpy(langEntries, localLangEntries, entryCount*sizeof(UALanguageEntry));
138 } else {
139 langEntries = (UALanguageEntry*)uprv_realloc(langEntries, newMax*sizeof(UALanguageEntry));
140 if (langEntries == NULL) {
141 *err = U_MEMORY_ALLOCATION_ERROR;
142 break;
143 }
144 }
145 langEntriesMax = newMax;
146 }
147 uprv_strcpy(langEntries[entryCount].languageCode, langCode);
148 langEntries[entryCount].userFraction = userFraction;
149 langEntries[entryCount].status = langStatus;
150 }
151 entryCount++;
152 }
153 ures_close(langBund);
154 ures_close(rb);
155 if (U_FAILURE(*err)) {
156 if (langEntries != localLangEntries) {
157 free(langEntries);
158 }
159 return 0;
160 }
161 if (entries != NULL) {
162 // sort langEntries, copy entries that fit to provided array
163 qsort(langEntries, entryCount, sizeof(UALanguageEntry), compareLangEntries);
164 if (entryCount > entriesCapacity) {
165 entryCount = entriesCapacity;
166 }
167 uprv_memcpy(entries, langEntries, entryCount*sizeof(UALanguageEntry));
168 if (langEntries != localLangEntries) {
169 free(langEntries);
170 }
171 }
172 return entryCount;
173 }
174
175 static const char * forceParent[] = {
176 "en_AU", "en_GB",
177 "en_BD", "en_GB", // en for Bangladesh
178 "en_HK", "en_GB", // en for Hong Kong
179 "en_IN", "en_GB",
180 "en_MY", "en_GB", // en for Malaysia
181 "en_PK", "en_GB", // en for Pakistan
182 "zh", "zh_CN",
183 "zh_CN", "root",
184 "zh_Hant", "zh_TW",
185 "zh_TW", "root",
186 NULL
187 };
188
189 U_CAPI int32_t U_EXPORT2
190 ualoc_getAppleParent(const char* localeID,
191 char * parent,
192 int32_t parentCapacity,
193 UErrorCode* err)
194 {
195 UResourceBundle *rb;
196 int32_t len;
197 UErrorCode tempStatus;
198 char locbuf[ULOC_FULLNAME_CAPACITY+1];
199 char * foundDoubleUnderscore;
200
201 if (U_FAILURE(*err)) {
202 return 0;
203 }
204 if ( (parent==NULL)? parentCapacity!=0: parentCapacity<0 ) {
205 *err = U_ILLEGAL_ARGUMENT_ERROR;
206 return 0;
207 }
208 len = uloc_getBaseName(localeID, locbuf, ULOC_FULLNAME_CAPACITY, err); /* canonicalize and strip keywords */
209 if (U_FAILURE(*err)) {
210 return 0;
211 }
212 if (*err == U_STRING_NOT_TERMINATED_WARNING) {
213 locbuf[ULOC_FULLNAME_CAPACITY] = 0;
214 *err = U_ZERO_ERROR;
215 }
216 foundDoubleUnderscore = uprv_strstr(locbuf, "__"); /* __ comes from bad/missing subtag or variant */
217 if (foundDoubleUnderscore != NULL) {
218 *foundDoubleUnderscore = 0; /* terminate at the __ */
219 len = uprv_strlen(locbuf);
220 }
221 if (len >= 2 && (uprv_strncmp(locbuf, "en", 2) == 0 || uprv_strncmp(locbuf, "zh", 2) == 0)) {
222 const char ** forceParentPtr = forceParent;
223 const char * testCurLoc;
224 while ( (testCurLoc = *forceParentPtr++) != NULL ) {
225 int cmp = uprv_strcmp(locbuf, testCurLoc);
226 if (cmp <= 0) {
227 if (cmp == 0) {
228 len = uprv_strlen(*forceParentPtr);
229 if (len < parentCapacity) {
230 uprv_strcpy(parent, *forceParentPtr);
231 } else {
232 *err = U_BUFFER_OVERFLOW_ERROR;
233 }
234 return len;
235 }
236 break;
237 }
238 forceParentPtr++;
239 }
240 }
241 tempStatus = U_ZERO_ERROR;
242 rb = ures_openDirect(NULL, locbuf, &tempStatus);
243 if (U_SUCCESS(tempStatus)) {
244 const char * actualLocale = ures_getLocaleByType(rb, ULOC_ACTUAL_LOCALE, &tempStatus);
245 if (U_SUCCESS(tempStatus) && uprv_strcmp(locbuf, actualLocale) != 0) {
246 // we have followed an alias
247 len = uprv_strlen(actualLocale);
248 if (len < parentCapacity) {
249 uprv_strcpy(parent, actualLocale);
250 } else {
251 *err = U_BUFFER_OVERFLOW_ERROR;
252 }
253 ures_close(rb);
254 return len;
255 }
256 tempStatus = U_ZERO_ERROR;
257 const UChar * parentUName = ures_getStringByKey(rb, "%%Parent", &len, &tempStatus);
258 if (U_SUCCESS(tempStatus) && tempStatus != U_USING_FALLBACK_WARNING) {
259 if (len < parentCapacity) {
260 u_UCharsToChars(parentUName, parent, len + 1);
261 } else {
262 *err = U_BUFFER_OVERFLOW_ERROR;
263 }
264 ures_close(rb);
265 return len;
266 }
267 ures_close(rb);
268 }
269 len = uloc_getParent(locbuf, parent, parentCapacity, err);
270 if (U_SUCCESS(*err) && len == 0) {
271 len = 4;
272 if (len < parentCapacity) {
273 uprv_strcpy(parent, "root");
274 } else {
275 *err = U_BUFFER_OVERFLOW_ERROR;
276 }
277 }
278 return len;
279 }
280
281 // =================
282 // Data and related functions for ualoc_localizationsToUse
283 // =================
284
285 static const char * appleAliasMap[][2] = {
286 // names are lowercase here because they are looked up after being processed by uloc_getBaseName
287 { "arabic", "ar" }, // T2
288 { "chinese", "zh_Hans" }, // T0
289 { "danish", "da" }, // T2
290 { "dutch", "nl" }, // T1, still in use
291 { "english", "en" }, // T0, still in use
292 { "finnish", "fi" }, // T2
293 { "french", "fr" }, // T0, still in use
294 { "german", "de" }, // T0, still in use
295 { "italian", "it" }, // T1, still in use
296 { "japanese", "ja" }, // T0, still in use
297 { "korean", "ko" }, // T1
298 { "norwegian", "nb" }, // T2
299 { "polish", "pl" }, // T2
300 { "portuguese", "pt" }, // T2
301 { "russian", "ru" }, // T2
302 { "spanish", "es" }, // T1, still in use
303 { "swedish", "sv" }, // T2
304 { "thai", "th" }, // T2
305 { "turkish", "tr" }, // T2
306 { "zh", "zh_Hans" }, // special
307 };
308 enum { kAppleAliasMapCount = sizeof(appleAliasMap)/sizeof(appleAliasMap[0]) };
309
310 static const char * appleParentMap[][2] = {
311 { "en_150", "en_GB" }, // Apple custom parent
312 { "en_AD", "en_150" }, // Apple locale addition
313 { "en_AL", "en_150" }, // Apple locale addition
314 { "en_AT", "en_150" }, // Apple locale addition
315 { "en_AU", "en_GB" }, // Apple custom parent
316 { "en_BA", "en_150" }, // Apple locale addition
317 { "en_BD", "en_GB" }, // Apple custom parent
318 { "en_CH", "en_150" }, // Apple locale addition
319 { "en_CY", "en_150" }, // Apple locale addition
320 { "en_CZ", "en_150" }, // Apple locale addition
321 { "en_DE", "en_150" }, // Apple locale addition
322 { "en_DK", "en_150" }, // Apple locale addition
323 { "en_EE", "en_150" }, // Apple locale addition
324 { "en_ES", "en_150" }, // Apple locale addition
325 { "en_FI", "en_150" }, // Apple locale addition
326 { "en_FR", "en_150" }, // Apple locale addition
327 { "en_GR", "en_150" }, // Apple locale addition
328 { "en_HK", "en_GB" }, // Apple custom parent
329 { "en_HR", "en_150" }, // Apple locale addition
330 { "en_HU", "en_150" }, // Apple locale addition
331 { "en_IL", "en_001" }, // Apple locale addition
332 { "en_IN", "en_GB" }, // Apple custom parent
333 { "en_IS", "en_150" }, // Apple locale addition
334 { "en_IT", "en_150" }, // Apple locale addition
335 { "en_LT", "en_150" }, // Apple locale addition
336 { "en_LU", "en_150" }, // Apple locale addition
337 { "en_LV", "en_150" }, // Apple locale addition
338 { "en_ME", "en_150" }, // Apple locale addition
339 { "en_MY", "en_GB" }, // Apple custom parent
340 { "en_NL", "en_150" }, // Apple locale addition
341 { "en_NO", "en_150" }, // Apple locale addition
342 { "en_PK", "en_GB" }, // Apple custom parent
343 { "en_PL", "en_150" }, // Apple locale addition
344 { "en_PT", "en_150" }, // Apple locale addition
345 { "en_RO", "en_150" }, // Apple locale addition
346 { "en_RU", "en_150" }, // Apple locale addition
347 { "en_SE", "en_150" }, // Apple locale addition
348 { "en_SI", "en_150" }, // Apple locale addition
349 { "en_SK", "en_150" }, // Apple locale addition
350 { "en_TR", "en_150" }, // Apple locale addition
351 };
352 enum { kAppleParentMapCount = sizeof(appleParentMap)/sizeof(appleParentMap[0]) };
353
354 // Might do something better for this, perhaps maximizing locales then stripping.
355 // Selected parents of available localizations, add as necessary.
356 static const char * locParentMap[][2] = {
357 { "pt_BR", "pt" },
358 { "pt_PT", "pt" },
359 { "zh_Hans_CN", "zh_Hans" },
360 { "zh_Hant_TW", "zh_Hant" },
361 };
362 enum { kLocParentMapCount = sizeof(locParentMap)/sizeof(locParentMap[0]) };
363
364 enum {
365 kStringsAllocSize = 4096, // cannot expand; current actual usage 3610
366 kParentMapInitCount = 161 // can expand; current actual usage 161
367 };
368
369 U_CDECL_BEGIN
370 static UBool U_CALLCONV ualocale_cleanup(void);
371 U_CDECL_END
372
373 U_NAMESPACE_BEGIN
374
375 static UInitOnce gUALocaleCacheInitOnce = U_INITONCE_INITIALIZER;
376
377 static int gMapDataState = 0; // 0 = not initialized, 1 = initialized, -1 = failure
378 static char* gStrings = NULL;
379 static UHashtable* gAliasMap = NULL;
380 static UHashtable* gParentMap = NULL;
381
382 U_NAMESPACE_END
383
384 U_CDECL_BEGIN
385
386 static UBool U_CALLCONV ualocale_cleanup(void)
387 {
388 U_NAMESPACE_USE
389
390 gUALocaleCacheInitOnce.reset();
391
392 if (gMapDataState > 0) {
393 uhash_close(gParentMap);
394 gParentMap = NULL;
395 uhash_close(gAliasMap);
396 gAliasMap = NULL;
397 uprv_free(gStrings);
398 gStrings = NULL;
399 }
400 gMapDataState = 0;
401 return TRUE;
402 }
403
404 static void initializeMapData() {
405 U_NAMESPACE_USE
406
407 UResourceBundle * curBundle;
408 char* stringsPtr;
409 char* stringsEnd;
410 UErrorCode status;
411 int32_t entryIndex, icuEntryCount;
412
413 ucln_common_registerCleanup(UCLN_COMMON_LOCALE, ualocale_cleanup);
414
415 gStrings = (char*)uprv_malloc(kStringsAllocSize);
416 if (gStrings) {
417 stringsPtr = gStrings;
418 stringsEnd = gStrings + kStringsAllocSize;
419 }
420
421 status = U_ZERO_ERROR;
422 curBundle = NULL;
423 icuEntryCount = 0;
424 if (gStrings) {
425 curBundle = ures_openDirect(NULL, "metadata", &status);
426 curBundle = ures_getByKey(curBundle, "alias", curBundle, &status);
427 curBundle = ures_getByKey(curBundle, "language", curBundle, &status); // language resource is URES_TABLE
428 if (U_SUCCESS(status)) {
429 icuEntryCount = ures_getSize(curBundle); // currently 331
430 }
431 }
432 status = U_ZERO_ERROR;
433 gAliasMap = uhash_openSize(uhash_hashIChars, uhash_compareIChars, uhash_compareIChars,
434 kAppleAliasMapCount + icuEntryCount, &status);
435 // defaults to keyDeleter NULL
436 if (U_SUCCESS(status)) {
437 for (entryIndex = 0; entryIndex < kAppleAliasMapCount && U_SUCCESS(status); entryIndex++) {
438 uhash_put(gAliasMap, (void*)appleAliasMap[entryIndex][0], (void*)appleAliasMap[entryIndex][1], &status);
439 }
440 status = U_ZERO_ERROR;
441 UResourceBundle * aliasMapBundle = NULL;
442 for (entryIndex = 0; entryIndex < icuEntryCount && U_SUCCESS(status); entryIndex++) {
443 aliasMapBundle = ures_getByIndex(curBundle, entryIndex, aliasMapBundle, &status);
444 if (U_FAILURE(status)) {
445 break; // error
446 }
447 const char * keyStr = ures_getKey(aliasMapBundle);
448 int32_t len = uprv_strlen(keyStr);
449 if (len >= stringsEnd - stringsPtr) {
450 break; // error
451 }
452 uprv_strcpy(stringsPtr, keyStr);
453 char * inLocStr = stringsPtr;
454 stringsPtr += len + 1;
455
456 len = stringsEnd - stringsPtr - 1;
457 ures_getUTF8StringByKey(aliasMapBundle, "replacement", stringsPtr, &len, TRUE, &status);
458 if (U_FAILURE(status)) {
459 break; // error
460 }
461 stringsPtr[len] = 0;
462 uhash_put(gAliasMap, inLocStr, stringsPtr, &status);
463 stringsPtr += len + 1;
464 }
465 ures_close(aliasMapBundle);
466 } else {
467 ures_close(curBundle);
468 uprv_free(gStrings);
469 gMapDataState = -1; // failure
470 return;
471 }
472 ures_close(curBundle);
473
474 status = U_ZERO_ERROR;
475 gParentMap = uhash_openSize(uhash_hashIChars, uhash_compareIChars, uhash_compareIChars,
476 kParentMapInitCount, &status);
477 // defaults to keyDeleter NULL
478 if (U_SUCCESS(status)) {
479 curBundle = ures_openDirect(NULL, "supplementalData", &status);
480 curBundle = ures_getByKey(curBundle, "parentLocales", curBundle, &status); // parentLocales resource is URES_TABLE
481 if (U_SUCCESS(status)) {
482 UResourceBundle * parentMapBundle = NULL;
483 while (TRUE) {
484 parentMapBundle = ures_getNextResource(curBundle, parentMapBundle, &status);
485 if (U_FAILURE(status)) {
486 break; // no more parent bundles, normal exit
487 }
488 const char * keyStr = ures_getKey(parentMapBundle);
489 int32_t len = uprv_strlen(keyStr);
490 if (len >= stringsEnd - stringsPtr) {
491 break; // error
492 }
493 uprv_strcpy(stringsPtr, keyStr);
494 char * parentStr = stringsPtr;
495 stringsPtr += len + 1;
496
497 if (ures_getType(parentMapBundle) == URES_STRING) {
498 len = stringsEnd - stringsPtr - 1;
499 ures_getUTF8String(parentMapBundle, stringsPtr, &len, TRUE, &status);
500 if (U_FAILURE(status)) {
501 break; // error
502 }
503 stringsPtr[len] = 0;
504 uhash_put(gParentMap, stringsPtr, parentStr, &status);
505 stringsPtr += len + 1;
506 } else {
507 // should be URES_ARRAY
508 icuEntryCount = ures_getSize(parentMapBundle);
509 for (entryIndex = 0; entryIndex < icuEntryCount && U_SUCCESS(status); entryIndex++) {
510 len = stringsEnd - stringsPtr - 1;
511 ures_getUTF8StringByIndex(parentMapBundle, entryIndex, stringsPtr, &len, TRUE, &status);
512 if (U_FAILURE(status)) {
513 break;
514 }
515 stringsPtr[len] = 0;
516 uhash_put(gParentMap, stringsPtr, parentStr, &status);
517 stringsPtr += len + 1;
518 }
519 }
520 }
521 ures_close(parentMapBundle);
522 }
523 ures_close(curBundle);
524
525 status = U_ZERO_ERROR;
526 for (entryIndex = 0; entryIndex < kAppleParentMapCount && U_SUCCESS(status); entryIndex++) {
527 uhash_put(gParentMap, (void*)appleParentMap[entryIndex][0], (void*)appleParentMap[entryIndex][1], &status);
528 }
529 } else {
530 uhash_close(gAliasMap);
531 gAliasMap = NULL;
532 uprv_free(gStrings);
533 gMapDataState = -1; // failure
534 return;
535 }
536
537 //printf("# gStrings size %ld\n", stringsPtr - gStrings);
538 //printf("# gParentMap count %d\n", uhash_count(gParentMap));
539 gMapDataState = 1;
540 }
541
542 U_CDECL_END
543
544 // The following maps aliases, etc. Ensures 0-termination if no error.
545 static void ualoc_normalize(const char *locale, char *normalized, int32_t normalizedCapacity, UErrorCode *status)
546 {
547 if (U_FAILURE(*status)) {
548 return;
549 }
550 // uloc_minimizeSubtags(locale, normalized, normalizedCapacity, status);
551
552 const char *replacement = NULL;
553 if (gMapDataState > 0) {
554 replacement = (const char *)uhash_get(gAliasMap, locale);
555 }
556 if (replacement == NULL) {
557 replacement = locale;
558 }
559 int32_t len = uprv_strlen(replacement);
560 if (len < normalizedCapacity) { // allow for 0 termination
561 uprv_strcpy(normalized, replacement);
562 } else {
563 *status = U_BUFFER_OVERFLOW_ERROR;
564 }
565 }
566
567 static void ualoc_getParent(const char *locale, char *parent, int32_t parentCapacity, UErrorCode *status)
568 {
569 if (U_FAILURE(*status)) {
570 return;
571 }
572 if (gMapDataState > 0) {
573 const char *replacement = (const char *)uhash_get(gParentMap, locale);
574 if (replacement) {
575 int32_t len = uprv_strlen(replacement);
576 if (len < parentCapacity) { // allow for 0 termination
577 uprv_strcpy(parent, replacement);
578 } else {
579 *status = U_BUFFER_OVERFLOW_ERROR;
580 }
581 return;
582 }
583 }
584 uloc_getParent(locale, parent, parentCapacity - 1, status);
585 parent[parentCapacity - 1] = 0; // ensure 0 termination in case of U_STRING_NOT_TERMINATED_WARNING
586 }
587
588 // Might do something better for this, perhaps maximizing locales then stripping
589 const char * getLocParent(const char *locale)
590 {
591 int32_t locParentIndex;
592 for (locParentIndex = 0; locParentIndex < kLocParentMapCount; locParentIndex++) {
593 if (uprv_strcmp(locale, locParentMap[locParentIndex][0]) == 0) {
594 return locParentMap[locParentIndex][1];
595 }
596 }
597 return NULL;
598 }
599
600 // this just checks if the *pointer* value is already in the array
601 static UBool locInArray(const char* *localizationsToUse, int32_t locsToUseCount, const char *locToCheck)
602 {
603 int32_t locIndex;
604 for (locIndex = 0; locIndex < locsToUseCount; locIndex++) {
605 if (locToCheck == localizationsToUse[locIndex]) {
606 return TRUE;
607 }
608 }
609 return FALSE;
610 }
611
612 enum { kLangScriptRegMaxLen = ULOC_LANG_CAPACITY + ULOC_SCRIPT_CAPACITY + ULOC_COUNTRY_CAPACITY }; // currently 22
613
614 int32_t
615 ualoc_localizationsToUse( const char* const *preferredLanguages,
616 int32_t preferredLanguagesCount,
617 const char* const *availableLocalizations,
618 int32_t availableLocalizationsCount,
619 const char* *localizationsToUse,
620 int32_t localizationsToUseCapacity,
621 UErrorCode *status )
622 {
623 if (U_FAILURE(*status)) {
624 return -1;
625 }
626 if (preferredLanguages == NULL || availableLocalizations == NULL || localizationsToUse == NULL) {
627 *status = U_ILLEGAL_ARGUMENT_ERROR;
628 return -1;
629 }
630 // get resource data, need to protect with mutex
631 if (gMapDataState == 0) {
632 umtx_initOnce(gUALocaleCacheInitOnce, initializeMapData);
633 }
634 int32_t locsToUseCount = 0;
635 int32_t prefLangIndex, availLocIndex = 0;
636 char (*availLocBase)[kLangScriptRegMaxLen + 1] = NULL;
637 char (*availLocNorm)[kLangScriptRegMaxLen + 1] = NULL;
638 UBool checkAvailLocParents = FALSE;
639 UBool foundMatch = FALSE;
640
641 // Part 1, find the best matching localization, if any
642 for (prefLangIndex = 0; prefLangIndex < preferredLanguagesCount; prefLangIndex++) {
643 char prefLangBaseName[kLangScriptRegMaxLen + 1];
644 char prefLangNormName[kLangScriptRegMaxLen + 1];
645 char prefLangParentName[kLangScriptRegMaxLen + 1];
646 UErrorCode tmpStatus = U_ZERO_ERROR;
647
648 if (preferredLanguages[prefLangIndex] == NULL) {
649 continue; // skip NULL preferredLanguages entry, go to next one
650 }
651 // use underscores, fix bad capitalization, delete any keywords
652 uloc_getBaseName(preferredLanguages[prefLangIndex], prefLangBaseName, kLangScriptRegMaxLen, &tmpStatus);
653 if (U_FAILURE(tmpStatus) || prefLangBaseName[0] == 0 ||
654 uprv_strcmp(prefLangBaseName, "root") == 0 || prefLangBaseName[0] == '_') {
655 continue; // can't handle this preferredLanguages entry or it is invalid, go to next one
656 }
657 prefLangBaseName[kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING
658 //printf(" # prefLangBaseName %s\n", prefLangBaseName);
659
660 // if we have not already allocated and filled the array of
661 // base availableLocalizations, do so now.
662 if (availLocBase == NULL) {
663 availLocBase = (char (*)[kLangScriptRegMaxLen + 1])uprv_malloc(availableLocalizationsCount * (kLangScriptRegMaxLen + 1));
664 if (availLocBase == NULL) {
665 continue; // cannot further check this preferredLanguages entry, go to next one
666 }
667 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
668 tmpStatus = U_ZERO_ERROR;
669 uloc_getBaseName(availableLocalizations[availLocIndex], availLocBase[availLocIndex], kLangScriptRegMaxLen, &tmpStatus);
670 if (U_FAILURE(tmpStatus) || uprv_strcmp(availLocBase[availLocIndex], "root") == 0 || availLocBase[availLocIndex][0] == '_') {
671 availLocBase[availLocIndex][0] = 0; // effectively remove this entry
672 } else {
673 availLocBase[availLocIndex][kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING
674 }
675 }
676 }
677 // first compare base preferredLanguage to base versions of availableLocalizations names
678 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
679 if (uprv_strcmp(prefLangBaseName, availLocBase[availLocIndex]) == 0) {
680 foundMatch = TRUE; // availLocIndex records where
681 break;
682 }
683 }
684 if (foundMatch) {
685 //printf(" # matched actualLocName\n");
686 break; // found a loc for this preferredLanguages entry
687 }
688
689 // get normalized preferredLanguage
690 tmpStatus = U_ZERO_ERROR;
691 ualoc_normalize(prefLangBaseName, prefLangNormName, kLangScriptRegMaxLen + 1, &tmpStatus);
692 if (U_FAILURE(tmpStatus)) {
693 continue; // can't handle this preferredLanguages entry, go to next one
694 }
695 //printf(" # prefLangNormName %s\n", prefLangNormName);
696 // if we have not already allocated and filled the array of
697 // normalized availableLocalizations, do so now.
698 // Note: ualoc_normalize turns "zh_TW" into "zh_Hant_TW", zh_HK" into "zh_Hant_HK",
699 // and fixes deprecated codes "iw" > "he", "in" > "id" etc.
700 if (availLocNorm == NULL) {
701 availLocNorm = (char (*)[kLangScriptRegMaxLen + 1])uprv_malloc(availableLocalizationsCount * (kLangScriptRegMaxLen + 1));
702 if (availLocNorm == NULL) {
703 continue; // cannot further check this preferredLanguages entry, go to next one
704 }
705 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
706 tmpStatus = U_ZERO_ERROR;
707 ualoc_normalize(availLocBase[availLocIndex], availLocNorm[availLocIndex], kLangScriptRegMaxLen + 1, &tmpStatus);
708 if (U_FAILURE(tmpStatus)) {
709 availLocNorm[availLocIndex][0] = 0; // effectively remove this entry
710 } else if (getLocParent(availLocNorm[availLocIndex]) != NULL) {
711 checkAvailLocParents = TRUE;
712 }
713 //printf(" # actualLoc %-11s -> norm %s\n", availableLocalizations[availLocIndex], availLocNorm[availLocIndex]);
714 }
715 }
716 // now compare normalized preferredLanguage to normalized localization names
717 // if matches, copy *original* localization name
718 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
719 if (uprv_strcmp(prefLangNormName, availLocNorm[availLocIndex]) == 0) {
720 foundMatch = TRUE; // availLocIndex records where
721 break;
722 }
723 }
724 if (foundMatch) {
725 //printf(" # matched actualLocNormName\n");
726 break; // found a loc for this preferredLanguages entry
727 }
728
729 // now walk up the parent chain for preferredLanguage
730 // until we find a match or hit root
731 uprv_strcpy(prefLangBaseName, prefLangNormName);
732 while (!foundMatch) {
733 tmpStatus = U_ZERO_ERROR;
734 ualoc_getParent(prefLangBaseName, prefLangParentName, kLangScriptRegMaxLen + 1, &tmpStatus);
735 if (U_FAILURE(tmpStatus) || uprv_strcmp(prefLangParentName, "root") == 0 || prefLangParentName[0] == 0) {
736 break; // reached root or cannot proceed further
737 }
738 //printf(" # prefLangParentName %s\n", prefLangParentName);
739
740 // now compare this preferredLanguage parent to normalized localization names
741 // if matches, copy *original* localization name
742 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
743 if (uprv_strcmp(prefLangParentName, availLocNorm[availLocIndex]) == 0) {
744 foundMatch = TRUE; // availLocIndex records where
745 break;
746 }
747 }
748 uprv_strcpy(prefLangBaseName, prefLangParentName);
749 }
750 if (foundMatch) {
751 break; // found a loc for this preferredLanguages entry
752 }
753
754 // last try, use parents of selected
755 if (checkAvailLocParents) {
756 // now walk up the parent chain for preferredLanguage again
757 // checking against parents of selected availLocNorm entries
758 // but this time start with current prefLangNormName
759 uprv_strcpy(prefLangBaseName, prefLangNormName);
760 while (TRUE) {
761 tmpStatus = U_ZERO_ERROR;
762 // now compare this preferredLanguage to normalized localization names
763 // parent if have one for this; if matches, copy *original* localization name
764 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
765 const char *availLocParent = getLocParent(availLocNorm[availLocIndex]);
766 if (availLocParent && uprv_strcmp(prefLangBaseName, availLocParent) == 0) {
767 foundMatch = TRUE; // availLocIndex records where
768 break;
769 }
770 }
771 if (foundMatch) {
772 break;
773 }
774 ualoc_getParent(prefLangBaseName, prefLangParentName, kLangScriptRegMaxLen + 1, &tmpStatus);
775 if (U_FAILURE(tmpStatus) || uprv_strcmp(prefLangParentName, "root") == 0 || prefLangParentName[0] == 0) {
776 break; // reached root or cannot proceed further
777 }
778 uprv_strcpy(prefLangBaseName, prefLangParentName);
779 }
780 }
781 if (foundMatch) {
782 break; // found a loc for this preferredLanguages entry
783 }
784 }
785
786 // Part 2, if we found a matching localization, then walk up its parent tree to find any fallback matches in availableLocalizations
787 if (foundMatch) {
788 // Here availLocIndex corresponds to the first matched localization
789 UErrorCode tmpStatus = U_ZERO_ERROR;
790 int32_t availLocMatchIndex = availLocIndex;
791 if (locsToUseCount < localizationsToUseCapacity) {
792 localizationsToUse[locsToUseCount++] = availableLocalizations[availLocMatchIndex];
793 }
794 // at this point we must have availLocBase, and minimally matched against that.
795 // if we have not already allocated and filled the array of
796 // normalized availableLocalizations, do so now, but don't require it
797 if (availLocNorm == NULL) {
798 availLocNorm = (char (*)[kLangScriptRegMaxLen + 1])uprv_malloc(availableLocalizationsCount * (kLangScriptRegMaxLen + 1));
799 if (availLocNorm != NULL) {
800 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
801 tmpStatus = U_ZERO_ERROR;
802 ualoc_normalize(availLocBase[availLocIndex], availLocNorm[availLocIndex], kLangScriptRegMaxLen + 1, &tmpStatus);
803 if (U_FAILURE(tmpStatus)) {
804 availLocNorm[availLocIndex][0] = 0; // effectively remove this entry
805 }
806 }
807 }
808 }
809
810 // add normalized form of matching loc, if different and in availLocBase
811 if (locsToUseCount < localizationsToUseCapacity) {
812 tmpStatus = U_ZERO_ERROR;
813 char matchedLocNormName[kLangScriptRegMaxLen + 1];
814 char matchedLocParentName[kLangScriptRegMaxLen + 1];
815 // get normalized form of matching loc
816 if (availLocNorm != NULL) {
817 uprv_strcpy(matchedLocNormName, availLocNorm[availLocMatchIndex]);
818 } else {
819 ualoc_normalize(availLocBase[availLocMatchIndex], matchedLocNormName, kLangScriptRegMaxLen + 1, &tmpStatus);
820 }
821 if (U_SUCCESS(tmpStatus)) {
822 // add normalized form of matching loc, if different and in availLocBase
823 if (uprv_strcmp(matchedLocNormName, localizationsToUse[0]) != 0) {
824 // normalization of matched localization is different, see if we have the normalization in availableLocalizations
825 // from this point on, availLocIndex no longer corresponds to the matched localization.
826 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
827 if ( (uprv_strcmp(matchedLocNormName, availLocBase[availLocIndex]) == 0
828 || (availLocNorm != NULL && uprv_strcmp(matchedLocNormName, availLocNorm[availLocIndex]) == 0))
829 && !locInArray(localizationsToUse, locsToUseCount, availableLocalizations[availLocIndex])) {
830 localizationsToUse[locsToUseCount++] = availableLocalizations[availLocIndex];
831 break;
832 }
833 }
834 }
835
836 // now walk up the parent chain from matchedLocNormName, adding parents if they are in availLocBase
837 while (locsToUseCount < localizationsToUseCapacity) {
838 ualoc_getParent(matchedLocNormName, matchedLocParentName, kLangScriptRegMaxLen + 1, &tmpStatus);
839 if (U_FAILURE(tmpStatus) || uprv_strcmp(matchedLocParentName, "root") == 0 || matchedLocParentName[0] == 0) {
840 break; // reached root or cannot proceed further
841 }
842
843 // now compare this matchedLocParentName parent to base localization names (and norm ones if we have them)
844 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
845 if ( (uprv_strcmp(matchedLocParentName, availLocBase[availLocIndex]) == 0
846 || (availLocNorm != NULL && uprv_strcmp(matchedLocParentName, availLocNorm[availLocIndex]) == 0))
847 && !locInArray(localizationsToUse, locsToUseCount, availableLocalizations[availLocIndex])) {
848 localizationsToUse[locsToUseCount++] = availableLocalizations[availLocIndex];
849 break;
850 }
851 }
852 uprv_strcpy(matchedLocNormName, matchedLocParentName);
853 }
854
855 // The above still fails to include "zh_TW" if it is in availLocBase and the matched localization
856 // base name is "zh_HK" or "zh_MO". One option would be to walk up the parent chain from
857 // matchedLocNormName again, comparing against parents of of selected availLocNorm entries.
858 // But this picks up too many matches that are not parents of the matched localization. So
859 // we just handle these specially.
860 if ( locsToUseCount < localizationsToUseCapacity
861 && (uprv_strcmp(availLocBase[availLocMatchIndex], "zh_HK") == 0
862 || uprv_strcmp(availLocBase[availLocMatchIndex], "zh_MO") == 0) ) {
863 int32_t zhTW_matchIndex = -1;
864 UBool zhHant_found = FALSE;
865 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
866 if ( zhTW_matchIndex < 0 && uprv_strcmp("zh_TW", availLocBase[availLocIndex]) == 0 ) {
867 zhTW_matchIndex = availLocIndex;
868 }
869 if ( !zhHant_found && uprv_strcmp("zh_Hant", availLocBase[availLocIndex]) == 0 ) {
870 zhHant_found = TRUE;
871 }
872 }
873 if (zhTW_matchIndex >= 0 && !zhHant_found
874 && !locInArray(localizationsToUse, locsToUseCount, availableLocalizations[zhTW_matchIndex])) {
875 localizationsToUse[locsToUseCount++] = availableLocalizations[zhTW_matchIndex];
876 }
877 }
878 }
879 }
880 }
881
882 uprv_free(availLocNorm);
883 uprv_free(availLocBase);
884 return locsToUseCount;
885 }
886