]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/ualoc.cpp
ICU-551.51.4.tar.gz
[apple/icu.git] / icuSources / common / ualoc.cpp
1 /*
2 *****************************************************************************************
3 * Copyright (C) 2014-2016 Apple Inc. All Rights Reserved.
4 *****************************************************************************************
5 */
6
7 #include "unicode/utypes.h"
8 #include "unicode/ualoc.h"
9 #include "unicode/uloc.h"
10 #include "unicode/ures.h"
11 #include "unicode/putil.h"
12 #include "cstring.h"
13 #include "cmemory.h"
14 #include "uhash.h"
15 #include "umutex.h"
16 #include "ucln_cmn.h"
17 // the following has replacements for some math.h funcs etc
18 #include "putilimp.h"
19
20
21 // The numeric values in territoryInfo are in "IntF" format from LDML2ICUConverter.
22 // From its docs (adapted): [IntF is] a special integer that represents the number in
23 // normalized scientific notation.
24 // Resultant integers are in the form -?xxyyyyyy, where xx is the exponent
25 // offset by 50 and yyyyyy is the coefficient to 5 decimal places (range 1.0 to 9.99999), e.g.
26 // 14660000000000 -> 1.46600E13 -> 63146600
27 // 0.0001 -> 1.00000E-4 -> 46100000
28 // -123.456 -> -1.23456E-2 -> -48123456
29 //
30 // Here to avoid an extra division we have the max coefficient as 999999 (instead of
31 // 9.99999) and instead offset the exponent by -55.
32 //
33 static double doubleFromIntF(int32_t intF) {
34 double coefficient = (double)(intF % 1000000);
35 int32_t exponent = (intF / 1000000) - 55;
36 return coefficient * uprv_pow10(exponent);
37 }
38
39 static int compareLangEntries(const void * entry1, const void * entry2) {
40 double fraction1 = ((const UALanguageEntry *)entry1)->userFraction;
41 double fraction2 = ((const UALanguageEntry *)entry2)->userFraction;
42 // want descending order
43 if (fraction1 > fraction2) return -1;
44 if (fraction1 < fraction2) return 1;
45 // userFractions the same, sort by languageCode
46 return uprv_strcmp(((const UALanguageEntry *)entry1)->languageCode,((const UALanguageEntry *)entry2)->languageCode);
47 }
48
49 static const UChar ustrLangStatusDefacto[] = {0x64,0x65,0x5F,0x66,0x61,0x63,0x74,0x6F,0x5F,0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0}; //"de_facto_official"
50 static const UChar ustrLangStatusOfficial[] = {0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0}; //"official"
51 static const UChar ustrLangStatusRegional[] = {0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0x5F,0x72,0x65,0x67,0x69,0x6F,0x6E,0x61,0x6C,0}; //"official_regional"
52
53 enum {
54 kLocalLangEntriesMax = 26, // enough for most regions to minimumFraction 0.001 except India
55 kLangEntriesFactor = 3 // if we have to allocate, multiply existing size by this
56 };
57
58 U_CAPI int32_t U_EXPORT2
59 ualoc_getLanguagesForRegion(const char *regionID, double minimumFraction,
60 UALanguageEntry *entries, int32_t entriesCapacity,
61 UErrorCode *err)
62 {
63 if (U_FAILURE(*err)) {
64 return 0;
65 }
66 if ( regionID == NULL || minimumFraction < 0.0 || minimumFraction > 1.0 ||
67 ((entries==NULL)? entriesCapacity!=0: entriesCapacity<0) ) {
68 *err = U_ILLEGAL_ARGUMENT_ERROR;
69 return 0;
70 }
71 UResourceBundle *rb = ures_openDirect(NULL, "supplementalData", err);
72 rb = ures_getByKey(rb, "territoryInfo", rb, err);
73 rb = ures_getByKey(rb, regionID, rb, err);
74 if (U_FAILURE(*err)) {
75 ures_close(rb);
76 return 0;
77 }
78
79 int32_t entryCount = 0;
80 UResourceBundle *langBund = NULL;
81 int32_t lbIdx, lbCount = ures_getSize(rb);
82 UALanguageEntry localLangEntries[kLocalLangEntriesMax];
83 UALanguageEntry * langEntries = localLangEntries;
84 int32_t langEntriesMax = kLocalLangEntriesMax;
85
86 for (lbIdx = 0; lbIdx < lbCount; lbIdx++) {
87 langBund = ures_getByIndex(rb, lbIdx, langBund, err);
88 if (U_FAILURE(*err)) {
89 break;
90 }
91 const char * langCode = ures_getKey(langBund);
92 if (uprv_strcmp(langCode,"territoryF") == 0) {
93 continue;
94 }
95 if (strnlen(langCode, UALANGDATA_CODELEN+1) > UALANGDATA_CODELEN) { // no uprv_strnlen
96 continue; // a code we cannot handle
97 }
98
99 UErrorCode localErr = U_ZERO_ERROR;
100 double userFraction = 0.0;
101 UResourceBundle *itemBund = ures_getByKey(langBund, "populationShareF", NULL, &localErr);
102 if (U_SUCCESS(localErr)) {
103 int32_t intF = ures_getInt(itemBund, &localErr);
104 if (U_SUCCESS(localErr)) {
105 userFraction = doubleFromIntF(intF);
106 }
107 ures_close(itemBund);
108 }
109 if (userFraction < minimumFraction) {
110 continue;
111 }
112 if (entries != NULL) {
113 localErr = U_ZERO_ERROR;
114 UALanguageStatus langStatus = UALANGSTATUS_UNSPECIFIED;
115 int32_t ulen;
116 const UChar * ustrLangStatus = ures_getStringByKey(langBund, "officialStatus", &ulen, &localErr);
117 if (U_SUCCESS(localErr)) {
118 int32_t cmp = u_strcmp(ustrLangStatus, ustrLangStatusOfficial);
119 if (cmp == 0) {
120 langStatus = UALANGSTATUS_OFFICIAL;
121 } else if (cmp < 0 && u_strcmp(ustrLangStatus, ustrLangStatusDefacto) == 0) {
122 langStatus = UALANGSTATUS_DEFACTO_OFFICIAL;
123 } else if (u_strcmp(ustrLangStatus, ustrLangStatusRegional) == 0) {
124 langStatus = UALANGSTATUS_REGIONAL_OFFICIAL;
125 }
126 }
127 // Now we have all of the info for our next entry
128 if (entryCount >= langEntriesMax) {
129 int32_t newMax = langEntriesMax * kLangEntriesFactor;
130 if (langEntries == localLangEntries) {
131 // first allocation, copy from local buf
132 langEntries = (UALanguageEntry*)uprv_malloc(newMax*sizeof(UALanguageEntry));
133 if (langEntries == NULL) {
134 *err = U_MEMORY_ALLOCATION_ERROR;
135 break;
136 }
137 uprv_memcpy(langEntries, localLangEntries, entryCount*sizeof(UALanguageEntry));
138 } else {
139 langEntries = (UALanguageEntry*)uprv_realloc(langEntries, newMax*sizeof(UALanguageEntry));
140 if (langEntries == NULL) {
141 *err = U_MEMORY_ALLOCATION_ERROR;
142 break;
143 }
144 }
145 langEntriesMax = newMax;
146 }
147 uprv_strcpy(langEntries[entryCount].languageCode, langCode);
148 langEntries[entryCount].userFraction = userFraction;
149 langEntries[entryCount].status = langStatus;
150 }
151 entryCount++;
152 }
153 ures_close(langBund);
154 ures_close(rb);
155 if (U_FAILURE(*err)) {
156 if (langEntries != localLangEntries) {
157 free(langEntries);
158 }
159 return 0;
160 }
161 if (entries != NULL) {
162 // sort langEntries, copy entries that fit to provided array
163 qsort(langEntries, entryCount, sizeof(UALanguageEntry), compareLangEntries);
164 if (entryCount > entriesCapacity) {
165 entryCount = entriesCapacity;
166 }
167 uprv_memcpy(entries, langEntries, entryCount*sizeof(UALanguageEntry));
168 if (langEntries != localLangEntries) {
169 free(langEntries);
170 }
171 }
172 return entryCount;
173 }
174
175 static const char * forceParent[] = {
176 "en_150", "en_GB", // en for Europe
177 "en_AU", "en_GB",
178 "en_BD", "en_GB", // en for Bangladesh
179 "en_BE", "en_150", // en for Belgium goes to en for Europe
180 "en_DG", "en_GB",
181 "en_FK", "en_GB",
182 "en_GG", "en_GB",
183 "en_GI", "en_GB",
184 "en_HK", "en_GB", // en for Hong Kong
185 "en_IE", "en_GB",
186 "en_IM", "en_GB",
187 "en_IN", "en_GB",
188 "en_IO", "en_GB",
189 "en_JE", "en_GB",
190 "en_MO", "en_GB",
191 "en_MT", "en_GB",
192 "en_MY", "en_GB", // en for Malaysia
193 "en_NZ", "en_GB",
194 "en_PK", "en_GB", // en for Pakistan
195 "en_SG", "en_GB",
196 "en_SH", "en_GB",
197 "en_VG", "en_GB",
198 "zh", "zh_CN",
199 "zh_CN", "root",
200 "zh_Hant", "zh_TW",
201 "zh_TW", "root",
202 NULL
203 };
204
205 U_CAPI int32_t U_EXPORT2
206 ualoc_getAppleParent(const char* localeID,
207 char * parent,
208 int32_t parentCapacity,
209 UErrorCode* err)
210 {
211 UResourceBundle *rb;
212 int32_t len;
213 UErrorCode tempStatus;
214 char locbuf[ULOC_FULLNAME_CAPACITY+1];
215 char * foundDoubleUnderscore;
216
217 if (U_FAILURE(*err)) {
218 return 0;
219 }
220 if ( (parent==NULL)? parentCapacity!=0: parentCapacity<0 ) {
221 *err = U_ILLEGAL_ARGUMENT_ERROR;
222 return 0;
223 }
224 len = uloc_getBaseName(localeID, locbuf, ULOC_FULLNAME_CAPACITY, err); /* canonicalize and strip keywords */
225 if (U_FAILURE(*err)) {
226 return 0;
227 }
228 if (*err == U_STRING_NOT_TERMINATED_WARNING) {
229 locbuf[ULOC_FULLNAME_CAPACITY] = 0;
230 *err = U_ZERO_ERROR;
231 }
232 foundDoubleUnderscore = uprv_strstr(locbuf, "__"); /* __ comes from bad/missing subtag or variant */
233 if (foundDoubleUnderscore != NULL) {
234 *foundDoubleUnderscore = 0; /* terminate at the __ */
235 len = uprv_strlen(locbuf);
236 }
237 if (len >= 2 && (uprv_strncmp(locbuf, "en", 2) == 0 || uprv_strncmp(locbuf, "zh", 2) == 0)) {
238 const char ** forceParentPtr = forceParent;
239 const char * testCurLoc;
240 while ( (testCurLoc = *forceParentPtr++) != NULL ) {
241 int cmp = uprv_strcmp(locbuf, testCurLoc);
242 if (cmp <= 0) {
243 if (cmp == 0) {
244 len = uprv_strlen(*forceParentPtr);
245 if (len < parentCapacity) {
246 uprv_strcpy(parent, *forceParentPtr);
247 } else {
248 *err = U_BUFFER_OVERFLOW_ERROR;
249 }
250 return len;
251 }
252 break;
253 }
254 forceParentPtr++;
255 }
256 }
257 tempStatus = U_ZERO_ERROR;
258 rb = ures_openDirect(NULL, locbuf, &tempStatus);
259 if (U_SUCCESS(tempStatus)) {
260 const char * actualLocale = ures_getLocaleByType(rb, ULOC_ACTUAL_LOCALE, &tempStatus);
261 if (U_SUCCESS(tempStatus) && uprv_strcmp(locbuf, actualLocale) != 0) {
262 // we have followed an alias
263 len = uprv_strlen(actualLocale);
264 if (len < parentCapacity) {
265 uprv_strcpy(parent, actualLocale);
266 } else {
267 *err = U_BUFFER_OVERFLOW_ERROR;
268 }
269 ures_close(rb);
270 return len;
271 }
272 tempStatus = U_ZERO_ERROR;
273 const UChar * parentUName = ures_getStringByKey(rb, "%%Parent", &len, &tempStatus);
274 if (U_SUCCESS(tempStatus) && tempStatus != U_USING_FALLBACK_WARNING) {
275 if (len < parentCapacity) {
276 u_UCharsToChars(parentUName, parent, len + 1);
277 } else {
278 *err = U_BUFFER_OVERFLOW_ERROR;
279 }
280 ures_close(rb);
281 return len;
282 }
283 ures_close(rb);
284 }
285 len = uloc_getParent(locbuf, parent, parentCapacity, err);
286 if (U_SUCCESS(*err) && len == 0) {
287 len = 4;
288 if (len < parentCapacity) {
289 uprv_strcpy(parent, "root");
290 } else {
291 *err = U_BUFFER_OVERFLOW_ERROR;
292 }
293 }
294 return len;
295 }
296
297 // =================
298 // Data and related functions for ualoc_localizationsToUse
299 // =================
300
301 static const char * appleAliasMap[][2] = {
302 // names are lowercase here because they are looked up after being processed by uloc_getBaseName
303 { "arabic", "ar" }, // T2
304 { "chinese", "zh_Hans" }, // T0
305 { "danish", "da" }, // T2
306 { "dutch", "nl" }, // T1, still in use
307 { "english", "en" }, // T0, still in use
308 { "finnish", "fi" }, // T2
309 { "french", "fr" }, // T0, still in use
310 { "german", "de" }, // T0, still in use
311 { "italian", "it" }, // T1, still in use
312 { "japanese", "ja" }, // T0, still in use
313 { "korean", "ko" }, // T1
314 { "no_NO", "nb_NO" }, // special
315 { "norwegian", "nb" }, // T2
316 { "polish", "pl" }, // T2
317 { "portuguese", "pt" }, // T2
318 { "russian", "ru" }, // T2
319 { "spanish", "es" }, // T1, still in use
320 { "swedish", "sv" }, // T2
321 { "thai", "th" }, // T2
322 { "turkish", "tr" }, // T2
323 { "zh", "zh_Hans" }, // special
324 };
325 enum { kAppleAliasMapCount = sizeof(appleAliasMap)/sizeof(appleAliasMap[0]) };
326
327 static const char * appleParentMap[][2] = {
328 { "en_150", "en_GB" }, // Apple custom parent
329 { "en_AD", "en_150" }, // Apple locale addition
330 { "en_AL", "en_150" }, // Apple locale addition
331 { "en_AT", "en_150" }, // Apple locale addition
332 { "en_AU", "en_GB" }, // Apple custom parent
333 { "en_BA", "en_150" }, // Apple locale addition
334 { "en_BD", "en_GB" }, // Apple custom parent
335 { "en_BE", "en_150" }, // Apple custom parent
336 { "en_CH", "en_150" }, // Apple locale addition
337 { "en_CY", "en_150" }, // Apple locale addition
338 { "en_CZ", "en_150" }, // Apple locale addition
339 { "en_DE", "en_150" }, // Apple locale addition
340 { "en_DG", "en_GB" },
341 { "en_DK", "en_150" }, // Apple locale addition
342 { "en_EE", "en_150" }, // Apple locale addition
343 { "en_ES", "en_150" }, // Apple locale addition
344 { "en_FI", "en_150" }, // Apple locale addition
345 { "en_FK", "en_GB" },
346 { "en_FR", "en_150" }, // Apple locale addition
347 { "en_GG", "en_GB" },
348 { "en_GI", "en_GB" },
349 { "en_GR", "en_150" }, // Apple locale addition
350 { "en_HK", "en_GB" }, // Apple custom parent
351 { "en_HR", "en_150" }, // Apple locale addition
352 { "en_HU", "en_150" }, // Apple locale addition
353 { "en_IE", "en_GB" },
354 { "en_IL", "en_001" }, // Apple locale addition
355 { "en_IM", "en_GB" },
356 { "en_IN", "en_GB" }, // Apple custom parent
357 { "en_IO", "en_GB" },
358 { "en_IS", "en_150" }, // Apple locale addition
359 { "en_IT", "en_150" }, // Apple locale addition
360 { "en_JE", "en_GB" },
361 { "en_LT", "en_150" }, // Apple locale addition
362 { "en_LU", "en_150" }, // Apple locale addition
363 { "en_LV", "en_150" }, // Apple locale addition
364 { "en_ME", "en_150" }, // Apple locale addition
365 { "en_MO", "en_GB" },
366 { "en_MT", "en_GB" },
367 { "en_MY", "en_GB" }, // Apple custom parent
368 { "en_NL", "en_150" }, // Apple locale addition
369 { "en_NO", "en_150" }, // Apple locale addition
370 { "en_NZ", "en_GB" },
371 { "en_PK", "en_GB" }, // Apple custom parent
372 { "en_PL", "en_150" }, // Apple locale addition
373 { "en_PT", "en_150" }, // Apple locale addition
374 { "en_RO", "en_150" }, // Apple locale addition
375 { "en_RU", "en_150" }, // Apple locale addition
376 { "en_SE", "en_150" }, // Apple locale addition
377 { "en_SG", "en_GB" },
378 { "en_SH", "en_GB" },
379 { "en_SI", "en_150" }, // Apple locale addition
380 { "en_SK", "en_150" }, // Apple locale addition
381 { "en_TR", "en_150" }, // Apple locale addition
382 { "en_VG", "en_GB" },
383 };
384 enum { kAppleParentMapCount = sizeof(appleParentMap)/sizeof(appleParentMap[0]) };
385
386 // Might do something better for this, perhaps maximizing locales then stripping.
387 // Selected parents of available localizations, add as necessary.
388 static const char * locParentMap[][2] = {
389 { "pt_BR", "pt" },
390 { "pt_PT", "pt" },
391 { "zh_Hans_CN", "zh_Hans" },
392 { "zh_Hant_TW", "zh_Hant" },
393 };
394 enum { kLocParentMapCount = sizeof(locParentMap)/sizeof(locParentMap[0]) };
395
396 enum {
397 kStringsAllocSize = 4096, // cannot expand; current actual usage 3610
398 kParentMapInitCount = 161 // can expand; current actual usage 161
399 };
400
401 U_CDECL_BEGIN
402 static UBool U_CALLCONV ualocale_cleanup(void);
403 U_CDECL_END
404
405 U_NAMESPACE_BEGIN
406
407 static UInitOnce gUALocaleCacheInitOnce = U_INITONCE_INITIALIZER;
408
409 static int gMapDataState = 0; // 0 = not initialized, 1 = initialized, -1 = failure
410 static char* gStrings = NULL;
411 static UHashtable* gAliasMap = NULL;
412 static UHashtable* gParentMap = NULL;
413
414 U_NAMESPACE_END
415
416 U_CDECL_BEGIN
417
418 static UBool U_CALLCONV ualocale_cleanup(void)
419 {
420 U_NAMESPACE_USE
421
422 gUALocaleCacheInitOnce.reset();
423
424 if (gMapDataState > 0) {
425 uhash_close(gParentMap);
426 gParentMap = NULL;
427 uhash_close(gAliasMap);
428 gAliasMap = NULL;
429 uprv_free(gStrings);
430 gStrings = NULL;
431 }
432 gMapDataState = 0;
433 return TRUE;
434 }
435
436 static void initializeMapData() {
437 U_NAMESPACE_USE
438
439 UResourceBundle * curBundle;
440 char* stringsPtr;
441 char* stringsEnd;
442 UErrorCode status;
443 int32_t entryIndex, icuEntryCount;
444
445 ucln_common_registerCleanup(UCLN_COMMON_LOCALE, ualocale_cleanup);
446
447 gStrings = (char*)uprv_malloc(kStringsAllocSize);
448 if (gStrings) {
449 stringsPtr = gStrings;
450 stringsEnd = gStrings + kStringsAllocSize;
451 }
452
453 status = U_ZERO_ERROR;
454 curBundle = NULL;
455 icuEntryCount = 0;
456 if (gStrings) {
457 curBundle = ures_openDirect(NULL, "metadata", &status);
458 curBundle = ures_getByKey(curBundle, "alias", curBundle, &status);
459 curBundle = ures_getByKey(curBundle, "language", curBundle, &status); // language resource is URES_TABLE
460 if (U_SUCCESS(status)) {
461 icuEntryCount = ures_getSize(curBundle); // currently 331
462 }
463 }
464 status = U_ZERO_ERROR;
465 gAliasMap = uhash_openSize(uhash_hashIChars, uhash_compareIChars, uhash_compareIChars,
466 kAppleAliasMapCount + icuEntryCount, &status);
467 // defaults to keyDeleter NULL
468 if (U_SUCCESS(status)) {
469 for (entryIndex = 0; entryIndex < kAppleAliasMapCount && U_SUCCESS(status); entryIndex++) {
470 uhash_put(gAliasMap, (void*)appleAliasMap[entryIndex][0], (void*)appleAliasMap[entryIndex][1], &status);
471 }
472 status = U_ZERO_ERROR;
473 UResourceBundle * aliasMapBundle = NULL;
474 for (entryIndex = 0; entryIndex < icuEntryCount && U_SUCCESS(status); entryIndex++) {
475 aliasMapBundle = ures_getByIndex(curBundle, entryIndex, aliasMapBundle, &status);
476 if (U_FAILURE(status)) {
477 break; // error
478 }
479 const char * keyStr = ures_getKey(aliasMapBundle);
480 int32_t len = uprv_strlen(keyStr);
481 if (len >= stringsEnd - stringsPtr) {
482 break; // error
483 }
484 uprv_strcpy(stringsPtr, keyStr);
485 char * inLocStr = stringsPtr;
486 stringsPtr += len + 1;
487
488 len = stringsEnd - stringsPtr - 1;
489 ures_getUTF8StringByKey(aliasMapBundle, "replacement", stringsPtr, &len, TRUE, &status);
490 if (U_FAILURE(status)) {
491 break; // error
492 }
493 stringsPtr[len] = 0;
494 uhash_put(gAliasMap, inLocStr, stringsPtr, &status);
495 stringsPtr += len + 1;
496 }
497 ures_close(aliasMapBundle);
498 } else {
499 ures_close(curBundle);
500 uprv_free(gStrings);
501 gMapDataState = -1; // failure
502 return;
503 }
504 ures_close(curBundle);
505
506 status = U_ZERO_ERROR;
507 gParentMap = uhash_openSize(uhash_hashIChars, uhash_compareIChars, uhash_compareIChars,
508 kParentMapInitCount, &status);
509 // defaults to keyDeleter NULL
510 if (U_SUCCESS(status)) {
511 curBundle = ures_openDirect(NULL, "supplementalData", &status);
512 curBundle = ures_getByKey(curBundle, "parentLocales", curBundle, &status); // parentLocales resource is URES_TABLE
513 if (U_SUCCESS(status)) {
514 UResourceBundle * parentMapBundle = NULL;
515 while (TRUE) {
516 parentMapBundle = ures_getNextResource(curBundle, parentMapBundle, &status);
517 if (U_FAILURE(status)) {
518 break; // no more parent bundles, normal exit
519 }
520 const char * keyStr = ures_getKey(parentMapBundle);
521 int32_t len = uprv_strlen(keyStr);
522 if (len >= stringsEnd - stringsPtr) {
523 break; // error
524 }
525 uprv_strcpy(stringsPtr, keyStr);
526 char * parentStr = stringsPtr;
527 stringsPtr += len + 1;
528
529 if (ures_getType(parentMapBundle) == URES_STRING) {
530 len = stringsEnd - stringsPtr - 1;
531 ures_getUTF8String(parentMapBundle, stringsPtr, &len, TRUE, &status);
532 if (U_FAILURE(status)) {
533 break; // error
534 }
535 stringsPtr[len] = 0;
536 uhash_put(gParentMap, stringsPtr, parentStr, &status);
537 stringsPtr += len + 1;
538 } else {
539 // should be URES_ARRAY
540 icuEntryCount = ures_getSize(parentMapBundle);
541 for (entryIndex = 0; entryIndex < icuEntryCount && U_SUCCESS(status); entryIndex++) {
542 len = stringsEnd - stringsPtr - 1;
543 ures_getUTF8StringByIndex(parentMapBundle, entryIndex, stringsPtr, &len, TRUE, &status);
544 if (U_FAILURE(status)) {
545 break;
546 }
547 stringsPtr[len] = 0;
548 uhash_put(gParentMap, stringsPtr, parentStr, &status);
549 stringsPtr += len + 1;
550 }
551 }
552 }
553 ures_close(parentMapBundle);
554 }
555 ures_close(curBundle);
556
557 status = U_ZERO_ERROR;
558 for (entryIndex = 0; entryIndex < kAppleParentMapCount && U_SUCCESS(status); entryIndex++) {
559 uhash_put(gParentMap, (void*)appleParentMap[entryIndex][0], (void*)appleParentMap[entryIndex][1], &status);
560 }
561 } else {
562 uhash_close(gAliasMap);
563 gAliasMap = NULL;
564 uprv_free(gStrings);
565 gMapDataState = -1; // failure
566 return;
567 }
568
569 //printf("# gStrings size %ld\n", stringsPtr - gStrings);
570 //printf("# gParentMap count %d\n", uhash_count(gParentMap));
571 gMapDataState = 1;
572 }
573
574 U_CDECL_END
575
576 // The following maps aliases, etc. Ensures 0-termination if no error.
577 static void ualoc_normalize(const char *locale, char *normalized, int32_t normalizedCapacity, UErrorCode *status)
578 {
579 if (U_FAILURE(*status)) {
580 return;
581 }
582 // uloc_minimizeSubtags(locale, normalized, normalizedCapacity, status);
583
584 const char *replacement = NULL;
585 if (gMapDataState > 0) {
586 replacement = (const char *)uhash_get(gAliasMap, locale);
587 }
588 if (replacement == NULL) {
589 replacement = locale;
590 }
591 int32_t len = uprv_strlen(replacement);
592 if (len < normalizedCapacity) { // allow for 0 termination
593 uprv_strcpy(normalized, replacement);
594 } else {
595 *status = U_BUFFER_OVERFLOW_ERROR;
596 }
597 }
598
599 static void ualoc_getParent(const char *locale, char *parent, int32_t parentCapacity, UErrorCode *status)
600 {
601 if (U_FAILURE(*status)) {
602 return;
603 }
604 if (gMapDataState > 0) {
605 const char *replacement = (const char *)uhash_get(gParentMap, locale);
606 if (replacement) {
607 int32_t len = uprv_strlen(replacement);
608 if (len < parentCapacity) { // allow for 0 termination
609 uprv_strcpy(parent, replacement);
610 } else {
611 *status = U_BUFFER_OVERFLOW_ERROR;
612 }
613 return;
614 }
615 }
616 uloc_getParent(locale, parent, parentCapacity - 1, status);
617 parent[parentCapacity - 1] = 0; // ensure 0 termination in case of U_STRING_NOT_TERMINATED_WARNING
618 }
619
620 // Might do something better for this, perhaps maximizing locales then stripping
621 const char * getLocParent(const char *locale)
622 {
623 int32_t locParentIndex;
624 for (locParentIndex = 0; locParentIndex < kLocParentMapCount; locParentIndex++) {
625 if (uprv_strcmp(locale, locParentMap[locParentIndex][0]) == 0) {
626 return locParentMap[locParentIndex][1];
627 }
628 }
629 return NULL;
630 }
631
632 // this just checks if the *pointer* value is already in the array
633 static UBool locInArray(const char* *localizationsToUse, int32_t locsToUseCount, const char *locToCheck)
634 {
635 int32_t locIndex;
636 for (locIndex = 0; locIndex < locsToUseCount; locIndex++) {
637 if (locToCheck == localizationsToUse[locIndex]) {
638 return TRUE;
639 }
640 }
641 return FALSE;
642 }
643
644 enum { kLangScriptRegMaxLen = ULOC_LANG_CAPACITY + ULOC_SCRIPT_CAPACITY + ULOC_COUNTRY_CAPACITY }; // currently 22
645
646 int32_t
647 ualoc_localizationsToUse( const char* const *preferredLanguages,
648 int32_t preferredLanguagesCount,
649 const char* const *availableLocalizations,
650 int32_t availableLocalizationsCount,
651 const char* *localizationsToUse,
652 int32_t localizationsToUseCapacity,
653 UErrorCode *status )
654 {
655 if (U_FAILURE(*status)) {
656 return -1;
657 }
658 if (preferredLanguages == NULL || availableLocalizations == NULL || localizationsToUse == NULL) {
659 *status = U_ILLEGAL_ARGUMENT_ERROR;
660 return -1;
661 }
662 // get resource data, need to protect with mutex
663 if (gMapDataState == 0) {
664 umtx_initOnce(gUALocaleCacheInitOnce, initializeMapData);
665 }
666 int32_t locsToUseCount = 0;
667 int32_t prefLangIndex, availLocIndex = 0;
668 char (*availLocBase)[kLangScriptRegMaxLen + 1] = NULL;
669 char (*availLocNorm)[kLangScriptRegMaxLen + 1] = NULL;
670 UBool checkAvailLocParents = FALSE;
671 UBool foundMatch = FALSE;
672
673 // Part 1, find the best matching localization, if any
674 for (prefLangIndex = 0; prefLangIndex < preferredLanguagesCount; prefLangIndex++) {
675 char prefLangBaseName[kLangScriptRegMaxLen + 1];
676 char prefLangNormName[kLangScriptRegMaxLen + 1];
677 char prefLangParentName[kLangScriptRegMaxLen + 1];
678 UErrorCode tmpStatus = U_ZERO_ERROR;
679
680 if (preferredLanguages[prefLangIndex] == NULL) {
681 continue; // skip NULL preferredLanguages entry, go to next one
682 }
683 // use underscores, fix bad capitalization, delete any keywords
684 uloc_getBaseName(preferredLanguages[prefLangIndex], prefLangBaseName, kLangScriptRegMaxLen, &tmpStatus);
685 if (U_FAILURE(tmpStatus) || prefLangBaseName[0] == 0 ||
686 uprv_strcmp(prefLangBaseName, "root") == 0 || prefLangBaseName[0] == '_') {
687 continue; // can't handle this preferredLanguages entry or it is invalid, go to next one
688 }
689 prefLangBaseName[kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING
690 //printf(" # prefLangBaseName %s\n", prefLangBaseName);
691
692 // if we have not already allocated and filled the array of
693 // base availableLocalizations, do so now.
694 if (availLocBase == NULL) {
695 availLocBase = (char (*)[kLangScriptRegMaxLen + 1])uprv_malloc(availableLocalizationsCount * (kLangScriptRegMaxLen + 1));
696 if (availLocBase == NULL) {
697 continue; // cannot further check this preferredLanguages entry, go to next one
698 }
699 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
700 tmpStatus = U_ZERO_ERROR;
701 uloc_getBaseName(availableLocalizations[availLocIndex], availLocBase[availLocIndex], kLangScriptRegMaxLen, &tmpStatus);
702 if (U_FAILURE(tmpStatus) || uprv_strcmp(availLocBase[availLocIndex], "root") == 0 || availLocBase[availLocIndex][0] == '_') {
703 availLocBase[availLocIndex][0] = 0; // effectively remove this entry
704 } else {
705 availLocBase[availLocIndex][kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING
706 }
707 }
708 }
709 // first compare base preferredLanguage to base versions of availableLocalizations names
710 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
711 if (uprv_strcmp(prefLangBaseName, availLocBase[availLocIndex]) == 0) {
712 foundMatch = TRUE; // availLocIndex records where
713 break;
714 }
715 }
716 if (foundMatch) {
717 //printf(" # matched actualLocName\n");
718 break; // found a loc for this preferredLanguages entry
719 }
720
721 // get normalized preferredLanguage
722 tmpStatus = U_ZERO_ERROR;
723 ualoc_normalize(prefLangBaseName, prefLangNormName, kLangScriptRegMaxLen + 1, &tmpStatus);
724 if (U_FAILURE(tmpStatus)) {
725 continue; // can't handle this preferredLanguages entry, go to next one
726 }
727 //printf(" # prefLangNormName %s\n", prefLangNormName);
728 // if we have not already allocated and filled the array of
729 // normalized availableLocalizations, do so now.
730 // Note: ualoc_normalize turns "zh_TW" into "zh_Hant_TW", zh_HK" into "zh_Hant_HK",
731 // and fixes deprecated codes "iw" > "he", "in" > "id" etc.
732 if (availLocNorm == NULL) {
733 availLocNorm = (char (*)[kLangScriptRegMaxLen + 1])uprv_malloc(availableLocalizationsCount * (kLangScriptRegMaxLen + 1));
734 if (availLocNorm == NULL) {
735 continue; // cannot further check this preferredLanguages entry, go to next one
736 }
737 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
738 tmpStatus = U_ZERO_ERROR;
739 ualoc_normalize(availLocBase[availLocIndex], availLocNorm[availLocIndex], kLangScriptRegMaxLen + 1, &tmpStatus);
740 if (U_FAILURE(tmpStatus)) {
741 availLocNorm[availLocIndex][0] = 0; // effectively remove this entry
742 } else if (getLocParent(availLocNorm[availLocIndex]) != NULL) {
743 checkAvailLocParents = TRUE;
744 }
745 //printf(" # actualLoc %-11s -> norm %s\n", availableLocalizations[availLocIndex], availLocNorm[availLocIndex]);
746 }
747 }
748 // now compare normalized preferredLanguage to normalized localization names
749 // if matches, copy *original* localization name
750 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
751 if (uprv_strcmp(prefLangNormName, availLocNorm[availLocIndex]) == 0) {
752 foundMatch = TRUE; // availLocIndex records where
753 break;
754 }
755 }
756 if (foundMatch) {
757 //printf(" # matched actualLocNormName\n");
758 break; // found a loc for this preferredLanguages entry
759 }
760
761 // now walk up the parent chain for preferredLanguage
762 // until we find a match or hit root
763 uprv_strcpy(prefLangBaseName, prefLangNormName);
764 while (!foundMatch) {
765 tmpStatus = U_ZERO_ERROR;
766 ualoc_getParent(prefLangBaseName, prefLangParentName, kLangScriptRegMaxLen + 1, &tmpStatus);
767 if (U_FAILURE(tmpStatus) || uprv_strcmp(prefLangParentName, "root") == 0 || prefLangParentName[0] == 0) {
768 break; // reached root or cannot proceed further
769 }
770 //printf(" # prefLangParentName %s\n", prefLangParentName);
771
772 // now compare this preferredLanguage parent to normalized localization names
773 // if matches, copy *original* localization name
774 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
775 if (uprv_strcmp(prefLangParentName, availLocNorm[availLocIndex]) == 0) {
776 foundMatch = TRUE; // availLocIndex records where
777 break;
778 }
779 }
780 uprv_strcpy(prefLangBaseName, prefLangParentName);
781 }
782 if (foundMatch) {
783 break; // found a loc for this preferredLanguages entry
784 }
785
786 // last try, use parents of selected
787 if (checkAvailLocParents) {
788 // now walk up the parent chain for preferredLanguage again
789 // checking against parents of selected availLocNorm entries
790 // but this time start with current prefLangNormName
791 uprv_strcpy(prefLangBaseName, prefLangNormName);
792 while (TRUE) {
793 tmpStatus = U_ZERO_ERROR;
794 // now compare this preferredLanguage to normalized localization names
795 // parent if have one for this; if matches, copy *original* localization name
796 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
797 const char *availLocParent = getLocParent(availLocNorm[availLocIndex]);
798 if (availLocParent && uprv_strcmp(prefLangBaseName, availLocParent) == 0) {
799 foundMatch = TRUE; // availLocIndex records where
800 break;
801 }
802 }
803 if (foundMatch) {
804 break;
805 }
806 ualoc_getParent(prefLangBaseName, prefLangParentName, kLangScriptRegMaxLen + 1, &tmpStatus);
807 if (U_FAILURE(tmpStatus) || uprv_strcmp(prefLangParentName, "root") == 0 || prefLangParentName[0] == 0) {
808 break; // reached root or cannot proceed further
809 }
810 uprv_strcpy(prefLangBaseName, prefLangParentName);
811 }
812 }
813 if (foundMatch) {
814 break; // found a loc for this preferredLanguages entry
815 }
816 }
817
818 // Part 2, if we found a matching localization, then walk up its parent tree to find any fallback matches in availableLocalizations
819 if (foundMatch) {
820 // Here availLocIndex corresponds to the first matched localization
821 UErrorCode tmpStatus = U_ZERO_ERROR;
822 int32_t availLocMatchIndex = availLocIndex;
823 if (locsToUseCount < localizationsToUseCapacity) {
824 localizationsToUse[locsToUseCount++] = availableLocalizations[availLocMatchIndex];
825 }
826 // at this point we must have availLocBase, and minimally matched against that.
827 // if we have not already allocated and filled the array of
828 // normalized availableLocalizations, do so now, but don't require it
829 if (availLocNorm == NULL) {
830 availLocNorm = (char (*)[kLangScriptRegMaxLen + 1])uprv_malloc(availableLocalizationsCount * (kLangScriptRegMaxLen + 1));
831 if (availLocNorm != NULL) {
832 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
833 tmpStatus = U_ZERO_ERROR;
834 ualoc_normalize(availLocBase[availLocIndex], availLocNorm[availLocIndex], kLangScriptRegMaxLen + 1, &tmpStatus);
835 if (U_FAILURE(tmpStatus)) {
836 availLocNorm[availLocIndex][0] = 0; // effectively remove this entry
837 }
838 }
839 }
840 }
841
842 // add normalized form of matching loc, if different and in availLocBase
843 if (locsToUseCount < localizationsToUseCapacity) {
844 tmpStatus = U_ZERO_ERROR;
845 char matchedLocNormName[kLangScriptRegMaxLen + 1];
846 char matchedLocParentName[kLangScriptRegMaxLen + 1];
847 // get normalized form of matching loc
848 if (availLocNorm != NULL) {
849 uprv_strcpy(matchedLocNormName, availLocNorm[availLocMatchIndex]);
850 } else {
851 ualoc_normalize(availLocBase[availLocMatchIndex], matchedLocNormName, kLangScriptRegMaxLen + 1, &tmpStatus);
852 }
853 if (U_SUCCESS(tmpStatus)) {
854 // add normalized form of matching loc, if different and in availLocBase
855 if (uprv_strcmp(matchedLocNormName, localizationsToUse[0]) != 0) {
856 // normalization of matched localization is different, see if we have the normalization in availableLocalizations
857 // from this point on, availLocIndex no longer corresponds to the matched localization.
858 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
859 if ( (uprv_strcmp(matchedLocNormName, availLocBase[availLocIndex]) == 0
860 || (availLocNorm != NULL && uprv_strcmp(matchedLocNormName, availLocNorm[availLocIndex]) == 0))
861 && !locInArray(localizationsToUse, locsToUseCount, availableLocalizations[availLocIndex])) {
862 localizationsToUse[locsToUseCount++] = availableLocalizations[availLocIndex];
863 break;
864 }
865 }
866 }
867
868 // now walk up the parent chain from matchedLocNormName, adding parents if they are in availLocBase
869 while (locsToUseCount < localizationsToUseCapacity) {
870 ualoc_getParent(matchedLocNormName, matchedLocParentName, kLangScriptRegMaxLen + 1, &tmpStatus);
871 if (U_FAILURE(tmpStatus) || uprv_strcmp(matchedLocParentName, "root") == 0 || matchedLocParentName[0] == 0) {
872 break; // reached root or cannot proceed further
873 }
874
875 // now compare this matchedLocParentName parent to base localization names (and norm ones if we have them)
876 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
877 if ( (uprv_strcmp(matchedLocParentName, availLocBase[availLocIndex]) == 0
878 || (availLocNorm != NULL && uprv_strcmp(matchedLocParentName, availLocNorm[availLocIndex]) == 0))
879 && !locInArray(localizationsToUse, locsToUseCount, availableLocalizations[availLocIndex])) {
880 localizationsToUse[locsToUseCount++] = availableLocalizations[availLocIndex];
881 break;
882 }
883 }
884 uprv_strcpy(matchedLocNormName, matchedLocParentName);
885 }
886
887 // The above still fails to include "zh_TW" if it is in availLocBase and the matched localization
888 // base name is "zh_HK" or "zh_MO". One option would be to walk up the parent chain from
889 // matchedLocNormName again, comparing against parents of of selected availLocNorm entries.
890 // But this picks up too many matches that are not parents of the matched localization. So
891 // we just handle these specially.
892 if ( locsToUseCount < localizationsToUseCapacity
893 && (uprv_strcmp(availLocBase[availLocMatchIndex], "zh_HK") == 0
894 || uprv_strcmp(availLocBase[availLocMatchIndex], "zh_MO") == 0) ) {
895 int32_t zhTW_matchIndex = -1;
896 UBool zhHant_found = FALSE;
897 for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) {
898 if ( zhTW_matchIndex < 0 && uprv_strcmp("zh_TW", availLocBase[availLocIndex]) == 0 ) {
899 zhTW_matchIndex = availLocIndex;
900 }
901 if ( !zhHant_found && uprv_strcmp("zh_Hant", availLocBase[availLocIndex]) == 0 ) {
902 zhHant_found = TRUE;
903 }
904 }
905 if (zhTW_matchIndex >= 0 && !zhHant_found
906 && !locInArray(localizationsToUse, locsToUseCount, availableLocalizations[zhTW_matchIndex])) {
907 localizationsToUse[locsToUseCount++] = availableLocalizations[zhTW_matchIndex];
908 }
909 }
910 }
911 }
912 }
913
914 uprv_free(availLocNorm);
915 uprv_free(availLocBase);
916 return locsToUseCount;
917 }
918