]>
Commit | Line | Data |
---|---|---|
57a6839d A |
1 | /* |
2 | ***************************************************************************************** | |
b331163b | 3 | * Copyright (C) 2014-2015 Apple Inc. All Rights Reserved. |
57a6839d A |
4 | ***************************************************************************************** |
5 | */ | |
6 | ||
7 | #include "unicode/utypes.h" | |
8 | #include "unicode/ualoc.h" | |
9 | #include "unicode/uloc.h" | |
10 | #include "unicode/ures.h" | |
11 | #include "unicode/putil.h" | |
12 | #include "cstring.h" | |
13 | #include "cmemory.h" | |
b331163b A |
14 | #include "uhash.h" |
15 | #include "umutex.h" | |
16 | #include "ucln_cmn.h" | |
57a6839d A |
17 | // the following has replacements for some math.h funcs etc |
18 | #include "putilimp.h" | |
19 | ||
20 | ||
21 | // The numeric values in territoryInfo are in "IntF" format from LDML2ICUConverter. | |
22 | // From its docs (adapted): [IntF is] a special integer that represents the number in | |
23 | // normalized scientific notation. | |
24 | // Resultant integers are in the form -?xxyyyyyy, where xx is the exponent | |
25 | // offset by 50 and yyyyyy is the coefficient to 5 decimal places (range 1.0 to 9.99999), e.g. | |
26 | // 14660000000000 -> 1.46600E13 -> 63146600 | |
27 | // 0.0001 -> 1.00000E-4 -> 46100000 | |
28 | // -123.456 -> -1.23456E-2 -> -48123456 | |
29 | // | |
30 | // Here to avoid an extra division we have the max coefficient as 999999 (instead of | |
31 | // 9.99999) and instead offset the exponent by -55. | |
32 | // | |
33 | static double doubleFromIntF(int32_t intF) { | |
34 | double coefficient = (double)(intF % 1000000); | |
35 | int32_t exponent = (intF / 1000000) - 55; | |
36 | return coefficient * uprv_pow10(exponent); | |
37 | } | |
38 | ||
39 | static int compareLangEntries(const void * entry1, const void * entry2) { | |
40 | double fraction1 = ((const UALanguageEntry *)entry1)->userFraction; | |
41 | double fraction2 = ((const UALanguageEntry *)entry2)->userFraction; | |
42 | // want descending order | |
43 | if (fraction1 > fraction2) return -1; | |
44 | if (fraction1 < fraction2) return 1; | |
45 | // userFractions the same, sort by languageCode | |
46 | return uprv_strcmp(((const UALanguageEntry *)entry1)->languageCode,((const UALanguageEntry *)entry2)->languageCode); | |
47 | } | |
48 | ||
49 | static const UChar ustrLangStatusDefacto[] = {0x64,0x65,0x5F,0x66,0x61,0x63,0x74,0x6F,0x5F,0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0}; //"de_facto_official" | |
50 | static const UChar ustrLangStatusOfficial[] = {0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0}; //"official" | |
51 | static const UChar ustrLangStatusRegional[] = {0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0x5F,0x72,0x65,0x67,0x69,0x6F,0x6E,0x61,0x6C,0}; //"official_regional" | |
52 | ||
53 | enum { | |
54 | kLocalLangEntriesMax = 26, // enough for most regions to minimumFraction 0.001 except India | |
55 | kLangEntriesFactor = 3 // if we have to allocate, multiply existing size by this | |
56 | }; | |
57 | ||
58 | U_CAPI int32_t U_EXPORT2 | |
59 | ualoc_getLanguagesForRegion(const char *regionID, double minimumFraction, | |
60 | UALanguageEntry *entries, int32_t entriesCapacity, | |
61 | UErrorCode *err) | |
62 | { | |
63 | if (U_FAILURE(*err)) { | |
64 | return 0; | |
65 | } | |
66 | if ( regionID == NULL || minimumFraction < 0.0 || minimumFraction > 1.0 || | |
67 | ((entries==NULL)? entriesCapacity!=0: entriesCapacity<0) ) { | |
68 | *err = U_ILLEGAL_ARGUMENT_ERROR; | |
69 | return 0; | |
70 | } | |
71 | UResourceBundle *rb = ures_openDirect(NULL, "supplementalData", err); | |
72 | rb = ures_getByKey(rb, "territoryInfo", rb, err); | |
73 | rb = ures_getByKey(rb, regionID, rb, err); | |
74 | if (U_FAILURE(*err)) { | |
75 | ures_close(rb); | |
76 | return 0; | |
77 | } | |
78 | ||
79 | int32_t entryCount = 0; | |
80 | UResourceBundle *langBund = NULL; | |
81 | int32_t lbIdx, lbCount = ures_getSize(rb); | |
82 | UALanguageEntry localLangEntries[kLocalLangEntriesMax]; | |
83 | UALanguageEntry * langEntries = localLangEntries; | |
84 | int32_t langEntriesMax = kLocalLangEntriesMax; | |
85 | ||
86 | for (lbIdx = 0; lbIdx < lbCount; lbIdx++) { | |
87 | langBund = ures_getByIndex(rb, lbIdx, langBund, err); | |
88 | if (U_FAILURE(*err)) { | |
89 | break; | |
90 | } | |
91 | const char * langCode = ures_getKey(langBund); | |
92 | if (uprv_strcmp(langCode,"territoryF") == 0) { | |
93 | continue; | |
94 | } | |
95 | if (strnlen(langCode, UALANGDATA_CODELEN+1) > UALANGDATA_CODELEN) { // no uprv_strnlen | |
96 | continue; // a code we cannot handle | |
97 | } | |
98 | ||
99 | UErrorCode localErr = U_ZERO_ERROR; | |
100 | double userFraction = 0.0; | |
101 | UResourceBundle *itemBund = ures_getByKey(langBund, "populationShareF", NULL, &localErr); | |
102 | if (U_SUCCESS(localErr)) { | |
103 | int32_t intF = ures_getInt(itemBund, &localErr); | |
104 | if (U_SUCCESS(localErr)) { | |
105 | userFraction = doubleFromIntF(intF); | |
106 | } | |
107 | ures_close(itemBund); | |
108 | } | |
109 | if (userFraction < minimumFraction) { | |
110 | continue; | |
111 | } | |
112 | if (entries != NULL) { | |
113 | localErr = U_ZERO_ERROR; | |
114 | UALanguageStatus langStatus = UALANGSTATUS_UNSPECIFIED; | |
115 | int32_t ulen; | |
116 | const UChar * ustrLangStatus = ures_getStringByKey(langBund, "officialStatus", &ulen, &localErr); | |
117 | if (U_SUCCESS(localErr)) { | |
118 | int32_t cmp = u_strcmp(ustrLangStatus, ustrLangStatusOfficial); | |
119 | if (cmp == 0) { | |
120 | langStatus = UALANGSTATUS_OFFICIAL; | |
121 | } else if (cmp < 0 && u_strcmp(ustrLangStatus, ustrLangStatusDefacto) == 0) { | |
122 | langStatus = UALANGSTATUS_DEFACTO_OFFICIAL; | |
123 | } else if (u_strcmp(ustrLangStatus, ustrLangStatusRegional) == 0) { | |
124 | langStatus = UALANGSTATUS_REGIONAL_OFFICIAL; | |
125 | } | |
126 | } | |
127 | // Now we have all of the info for our next entry | |
128 | if (entryCount >= langEntriesMax) { | |
129 | int32_t newMax = langEntriesMax * kLangEntriesFactor; | |
130 | if (langEntries == localLangEntries) { | |
131 | // first allocation, copy from local buf | |
132 | langEntries = (UALanguageEntry*)uprv_malloc(newMax*sizeof(UALanguageEntry)); | |
133 | if (langEntries == NULL) { | |
134 | *err = U_MEMORY_ALLOCATION_ERROR; | |
135 | break; | |
136 | } | |
137 | uprv_memcpy(langEntries, localLangEntries, entryCount*sizeof(UALanguageEntry)); | |
138 | } else { | |
139 | langEntries = (UALanguageEntry*)uprv_realloc(langEntries, newMax*sizeof(UALanguageEntry)); | |
140 | if (langEntries == NULL) { | |
141 | *err = U_MEMORY_ALLOCATION_ERROR; | |
142 | break; | |
143 | } | |
144 | } | |
145 | langEntriesMax = newMax; | |
146 | } | |
147 | uprv_strcpy(langEntries[entryCount].languageCode, langCode); | |
148 | langEntries[entryCount].userFraction = userFraction; | |
149 | langEntries[entryCount].status = langStatus; | |
150 | } | |
151 | entryCount++; | |
152 | } | |
153 | ures_close(langBund); | |
154 | ures_close(rb); | |
155 | if (U_FAILURE(*err)) { | |
156 | if (langEntries != localLangEntries) { | |
157 | free(langEntries); | |
158 | } | |
159 | return 0; | |
160 | } | |
161 | if (entries != NULL) { | |
162 | // sort langEntries, copy entries that fit to provided array | |
163 | qsort(langEntries, entryCount, sizeof(UALanguageEntry), compareLangEntries); | |
164 | if (entryCount > entriesCapacity) { | |
165 | entryCount = entriesCapacity; | |
166 | } | |
167 | uprv_memcpy(entries, langEntries, entryCount*sizeof(UALanguageEntry)); | |
168 | if (langEntries != localLangEntries) { | |
169 | free(langEntries); | |
170 | } | |
171 | } | |
172 | return entryCount; | |
173 | } | |
174 | ||
57a6839d | 175 | static const char * forceParent[] = { |
b331163b A |
176 | "en_AU", "en_GB", |
177 | "en_BD", "en_GB", // en for Bangladesh | |
178 | "en_HK", "en_GB", // en for Hong Kong | |
179 | "en_IN", "en_GB", | |
180 | "en_MY", "en_GB", // en for Malaysia | |
181 | "en_PK", "en_GB", // en for Pakistan | |
57a6839d A |
182 | "zh", "zh_CN", |
183 | "zh_CN", "root", | |
184 | "zh_Hant", "zh_TW", | |
185 | "zh_TW", "root", | |
186 | NULL | |
187 | }; | |
188 | ||
189 | U_CAPI int32_t U_EXPORT2 | |
190 | ualoc_getAppleParent(const char* localeID, | |
191 | char * parent, | |
192 | int32_t parentCapacity, | |
193 | UErrorCode* err) | |
194 | { | |
195 | UResourceBundle *rb; | |
196 | int32_t len; | |
197 | UErrorCode tempStatus; | |
198 | char locbuf[ULOC_FULLNAME_CAPACITY+1]; | |
08b89b0a | 199 | char * foundDoubleUnderscore; |
57a6839d A |
200 | |
201 | if (U_FAILURE(*err)) { | |
202 | return 0; | |
203 | } | |
204 | if ( (parent==NULL)? parentCapacity!=0: parentCapacity<0 ) { | |
205 | *err = U_ILLEGAL_ARGUMENT_ERROR; | |
206 | return 0; | |
207 | } | |
08b89b0a | 208 | len = uloc_getBaseName(localeID, locbuf, ULOC_FULLNAME_CAPACITY, err); /* canonicalize and strip keywords */ |
57a6839d A |
209 | if (U_FAILURE(*err)) { |
210 | return 0; | |
211 | } | |
212 | if (*err == U_STRING_NOT_TERMINATED_WARNING) { | |
213 | locbuf[ULOC_FULLNAME_CAPACITY] = 0; | |
214 | *err = U_ZERO_ERROR; | |
215 | } | |
08b89b0a A |
216 | foundDoubleUnderscore = uprv_strstr(locbuf, "__"); /* __ comes from bad/missing subtag or variant */ |
217 | if (foundDoubleUnderscore != NULL) { | |
218 | *foundDoubleUnderscore = 0; /* terminate at the __ */ | |
219 | len = uprv_strlen(locbuf); | |
220 | } | |
b331163b | 221 | if (len >= 2 && (uprv_strncmp(locbuf, "en", 2) == 0 || uprv_strncmp(locbuf, "zh", 2) == 0)) { |
57a6839d A |
222 | const char ** forceParentPtr = forceParent; |
223 | const char * testCurLoc; | |
224 | while ( (testCurLoc = *forceParentPtr++) != NULL ) { | |
225 | int cmp = uprv_strcmp(locbuf, testCurLoc); | |
226 | if (cmp <= 0) { | |
227 | if (cmp == 0) { | |
228 | len = uprv_strlen(*forceParentPtr); | |
229 | if (len < parentCapacity) { | |
230 | uprv_strcpy(parent, *forceParentPtr); | |
231 | } else { | |
232 | *err = U_BUFFER_OVERFLOW_ERROR; | |
233 | } | |
234 | return len; | |
235 | } | |
236 | break; | |
237 | } | |
238 | forceParentPtr++; | |
239 | } | |
240 | } | |
241 | tempStatus = U_ZERO_ERROR; | |
242 | rb = ures_openDirect(NULL, locbuf, &tempStatus); | |
243 | if (U_SUCCESS(tempStatus)) { | |
244 | const char * actualLocale = ures_getLocaleByType(rb, ULOC_ACTUAL_LOCALE, &tempStatus); | |
245 | if (U_SUCCESS(tempStatus) && uprv_strcmp(locbuf, actualLocale) != 0) { | |
246 | // we have followed an alias | |
247 | len = uprv_strlen(actualLocale); | |
248 | if (len < parentCapacity) { | |
249 | uprv_strcpy(parent, actualLocale); | |
250 | } else { | |
251 | *err = U_BUFFER_OVERFLOW_ERROR; | |
252 | } | |
253 | ures_close(rb); | |
254 | return len; | |
255 | } | |
256 | tempStatus = U_ZERO_ERROR; | |
257 | const UChar * parentUName = ures_getStringByKey(rb, "%%Parent", &len, &tempStatus); | |
258 | if (U_SUCCESS(tempStatus) && tempStatus != U_USING_FALLBACK_WARNING) { | |
259 | if (len < parentCapacity) { | |
260 | u_UCharsToChars(parentUName, parent, len + 1); | |
261 | } else { | |
262 | *err = U_BUFFER_OVERFLOW_ERROR; | |
263 | } | |
264 | ures_close(rb); | |
265 | return len; | |
266 | } | |
267 | ures_close(rb); | |
268 | } | |
269 | len = uloc_getParent(locbuf, parent, parentCapacity, err); | |
270 | if (U_SUCCESS(*err) && len == 0) { | |
271 | len = 4; | |
272 | if (len < parentCapacity) { | |
273 | uprv_strcpy(parent, "root"); | |
274 | } else { | |
275 | *err = U_BUFFER_OVERFLOW_ERROR; | |
276 | } | |
277 | } | |
278 | return len; | |
279 | } | |
280 | ||
b331163b A |
281 | // ================= |
282 | // Data and related functions for ualoc_localizationsToUse | |
283 | // ================= | |
284 | ||
285 | static const char * appleAliasMap[][2] = { | |
286 | // names are lowercase here because they are looked up after being processed by uloc_getBaseName | |
287 | { "arabic", "ar" }, // T2 | |
288 | { "chinese", "zh_Hans" }, // T0 | |
289 | { "danish", "da" }, // T2 | |
290 | { "dutch", "nl" }, // T1, still in use | |
291 | { "english", "en" }, // T0, still in use | |
292 | { "finnish", "fi" }, // T2 | |
293 | { "french", "fr" }, // T0, still in use | |
294 | { "german", "de" }, // T0, still in use | |
295 | { "italian", "it" }, // T1, still in use | |
296 | { "japanese", "ja" }, // T0, still in use | |
297 | { "korean", "ko" }, // T1 | |
298 | { "norwegian", "nb" }, // T2 | |
299 | { "polish", "pl" }, // T2 | |
300 | { "portuguese", "pt" }, // T2 | |
301 | { "russian", "ru" }, // T2 | |
302 | { "spanish", "es" }, // T1, still in use | |
303 | { "swedish", "sv" }, // T2 | |
304 | { "thai", "th" }, // T2 | |
305 | { "turkish", "tr" }, // T2 | |
306 | { "zh", "zh_Hans" }, // special | |
307 | }; | |
308 | enum { kAppleAliasMapCount = sizeof(appleAliasMap)/sizeof(appleAliasMap[0]) }; | |
309 | ||
310 | static const char * appleParentMap[][2] = { | |
311 | { "en_150", "en_GB" }, // Apple custom parent | |
312 | { "en_AD", "en_150" }, // Apple locale addition | |
313 | { "en_AL", "en_150" }, // Apple locale addition | |
314 | { "en_AT", "en_150" }, // Apple locale addition | |
315 | { "en_AU", "en_GB" }, // Apple custom parent | |
316 | { "en_BA", "en_150" }, // Apple locale addition | |
317 | { "en_BD", "en_GB" }, // Apple custom parent | |
318 | { "en_CH", "en_150" }, // Apple locale addition | |
319 | { "en_CY", "en_150" }, // Apple locale addition | |
320 | { "en_CZ", "en_150" }, // Apple locale addition | |
321 | { "en_DE", "en_150" }, // Apple locale addition | |
322 | { "en_DK", "en_150" }, // Apple locale addition | |
323 | { "en_EE", "en_150" }, // Apple locale addition | |
324 | { "en_ES", "en_150" }, // Apple locale addition | |
325 | { "en_FI", "en_150" }, // Apple locale addition | |
326 | { "en_FR", "en_150" }, // Apple locale addition | |
327 | { "en_GR", "en_150" }, // Apple locale addition | |
328 | { "en_HK", "en_GB" }, // Apple custom parent | |
329 | { "en_HR", "en_150" }, // Apple locale addition | |
330 | { "en_HU", "en_150" }, // Apple locale addition | |
331 | { "en_IL", "en_001" }, // Apple locale addition | |
332 | { "en_IN", "en_GB" }, // Apple custom parent | |
333 | { "en_IS", "en_150" }, // Apple locale addition | |
334 | { "en_IT", "en_150" }, // Apple locale addition | |
335 | { "en_LT", "en_150" }, // Apple locale addition | |
336 | { "en_LU", "en_150" }, // Apple locale addition | |
337 | { "en_LV", "en_150" }, // Apple locale addition | |
338 | { "en_ME", "en_150" }, // Apple locale addition | |
339 | { "en_MY", "en_GB" }, // Apple custom parent | |
340 | { "en_NL", "en_150" }, // Apple locale addition | |
341 | { "en_NO", "en_150" }, // Apple locale addition | |
342 | { "en_PK", "en_GB" }, // Apple custom parent | |
343 | { "en_PL", "en_150" }, // Apple locale addition | |
344 | { "en_PT", "en_150" }, // Apple locale addition | |
345 | { "en_RO", "en_150" }, // Apple locale addition | |
346 | { "en_RU", "en_150" }, // Apple locale addition | |
347 | { "en_SE", "en_150" }, // Apple locale addition | |
348 | { "en_SI", "en_150" }, // Apple locale addition | |
349 | { "en_SK", "en_150" }, // Apple locale addition | |
350 | { "en_TR", "en_150" }, // Apple locale addition | |
351 | }; | |
352 | enum { kAppleParentMapCount = sizeof(appleParentMap)/sizeof(appleParentMap[0]) }; | |
353 | ||
354 | // Might do something better for this, perhaps maximizing locales then stripping. | |
355 | // Selected parents of available localizations, add as necessary. | |
356 | static const char * locParentMap[][2] = { | |
357 | { "pt_BR", "pt" }, | |
358 | { "pt_PT", "pt" }, | |
359 | { "zh_Hans_CN", "zh_Hans" }, | |
360 | { "zh_Hant_TW", "zh_Hant" }, | |
361 | }; | |
362 | enum { kLocParentMapCount = sizeof(locParentMap)/sizeof(locParentMap[0]) }; | |
363 | ||
364 | enum { | |
365 | kStringsAllocSize = 4096, // cannot expand; current actual usage 3610 | |
366 | kParentMapInitCount = 161 // can expand; current actual usage 161 | |
367 | }; | |
368 | ||
369 | U_CDECL_BEGIN | |
370 | static UBool U_CALLCONV ualocale_cleanup(void); | |
371 | U_CDECL_END | |
372 | ||
373 | U_NAMESPACE_BEGIN | |
374 | ||
375 | static UInitOnce gUALocaleCacheInitOnce = U_INITONCE_INITIALIZER; | |
376 | ||
377 | static int gMapDataState = 0; // 0 = not initialized, 1 = initialized, -1 = failure | |
378 | static char* gStrings = NULL; | |
379 | static UHashtable* gAliasMap = NULL; | |
380 | static UHashtable* gParentMap = NULL; | |
381 | ||
382 | U_NAMESPACE_END | |
383 | ||
384 | U_CDECL_BEGIN | |
385 | ||
386 | static UBool U_CALLCONV ualocale_cleanup(void) | |
387 | { | |
388 | U_NAMESPACE_USE | |
389 | ||
390 | gUALocaleCacheInitOnce.reset(); | |
391 | ||
392 | if (gMapDataState > 0) { | |
393 | uhash_close(gParentMap); | |
394 | gParentMap = NULL; | |
395 | uhash_close(gAliasMap); | |
396 | gAliasMap = NULL; | |
397 | uprv_free(gStrings); | |
398 | gStrings = NULL; | |
399 | } | |
400 | gMapDataState = 0; | |
401 | return TRUE; | |
402 | } | |
403 | ||
404 | static void initializeMapData() { | |
405 | U_NAMESPACE_USE | |
406 | ||
407 | UResourceBundle * curBundle; | |
408 | char* stringsPtr; | |
409 | char* stringsEnd; | |
410 | UErrorCode status; | |
411 | int32_t entryIndex, icuEntryCount; | |
412 | ||
413 | ucln_common_registerCleanup(UCLN_COMMON_LOCALE, ualocale_cleanup); | |
414 | ||
415 | gStrings = (char*)uprv_malloc(kStringsAllocSize); | |
416 | if (gStrings) { | |
417 | stringsPtr = gStrings; | |
418 | stringsEnd = gStrings + kStringsAllocSize; | |
419 | } | |
420 | ||
421 | status = U_ZERO_ERROR; | |
422 | curBundle = NULL; | |
423 | icuEntryCount = 0; | |
424 | if (gStrings) { | |
425 | curBundle = ures_openDirect(NULL, "metadata", &status); | |
426 | curBundle = ures_getByKey(curBundle, "alias", curBundle, &status); | |
427 | curBundle = ures_getByKey(curBundle, "language", curBundle, &status); // language resource is URES_TABLE | |
428 | if (U_SUCCESS(status)) { | |
429 | icuEntryCount = ures_getSize(curBundle); // currently 331 | |
430 | } | |
431 | } | |
432 | status = U_ZERO_ERROR; | |
433 | gAliasMap = uhash_openSize(uhash_hashIChars, uhash_compareIChars, uhash_compareIChars, | |
434 | kAppleAliasMapCount + icuEntryCount, &status); | |
435 | // defaults to keyDeleter NULL | |
436 | if (U_SUCCESS(status)) { | |
437 | for (entryIndex = 0; entryIndex < kAppleAliasMapCount && U_SUCCESS(status); entryIndex++) { | |
438 | uhash_put(gAliasMap, (void*)appleAliasMap[entryIndex][0], (void*)appleAliasMap[entryIndex][1], &status); | |
439 | } | |
440 | status = U_ZERO_ERROR; | |
441 | UResourceBundle * aliasMapBundle = NULL; | |
442 | for (entryIndex = 0; entryIndex < icuEntryCount && U_SUCCESS(status); entryIndex++) { | |
443 | aliasMapBundle = ures_getByIndex(curBundle, entryIndex, aliasMapBundle, &status); | |
444 | if (U_FAILURE(status)) { | |
445 | break; // error | |
446 | } | |
447 | const char * keyStr = ures_getKey(aliasMapBundle); | |
448 | int32_t len = uprv_strlen(keyStr); | |
449 | if (len >= stringsEnd - stringsPtr) { | |
450 | break; // error | |
451 | } | |
452 | uprv_strcpy(stringsPtr, keyStr); | |
453 | char * inLocStr = stringsPtr; | |
454 | stringsPtr += len + 1; | |
455 | ||
456 | len = stringsEnd - stringsPtr - 1; | |
457 | ures_getUTF8StringByKey(aliasMapBundle, "replacement", stringsPtr, &len, TRUE, &status); | |
458 | if (U_FAILURE(status)) { | |
459 | break; // error | |
460 | } | |
461 | stringsPtr[len] = 0; | |
462 | uhash_put(gAliasMap, inLocStr, stringsPtr, &status); | |
463 | stringsPtr += len + 1; | |
464 | } | |
465 | ures_close(aliasMapBundle); | |
466 | } else { | |
467 | ures_close(curBundle); | |
468 | uprv_free(gStrings); | |
469 | gMapDataState = -1; // failure | |
470 | return; | |
471 | } | |
472 | ures_close(curBundle); | |
473 | ||
474 | status = U_ZERO_ERROR; | |
475 | gParentMap = uhash_openSize(uhash_hashIChars, uhash_compareIChars, uhash_compareIChars, | |
476 | kParentMapInitCount, &status); | |
477 | // defaults to keyDeleter NULL | |
478 | if (U_SUCCESS(status)) { | |
479 | curBundle = ures_openDirect(NULL, "supplementalData", &status); | |
480 | curBundle = ures_getByKey(curBundle, "parentLocales", curBundle, &status); // parentLocales resource is URES_TABLE | |
481 | if (U_SUCCESS(status)) { | |
482 | UResourceBundle * parentMapBundle = NULL; | |
483 | while (TRUE) { | |
484 | parentMapBundle = ures_getNextResource(curBundle, parentMapBundle, &status); | |
485 | if (U_FAILURE(status)) { | |
486 | break; // no more parent bundles, normal exit | |
487 | } | |
488 | const char * keyStr = ures_getKey(parentMapBundle); | |
489 | int32_t len = uprv_strlen(keyStr); | |
490 | if (len >= stringsEnd - stringsPtr) { | |
491 | break; // error | |
492 | } | |
493 | uprv_strcpy(stringsPtr, keyStr); | |
494 | char * parentStr = stringsPtr; | |
495 | stringsPtr += len + 1; | |
496 | ||
497 | if (ures_getType(parentMapBundle) == URES_STRING) { | |
498 | len = stringsEnd - stringsPtr - 1; | |
499 | ures_getUTF8String(parentMapBundle, stringsPtr, &len, TRUE, &status); | |
500 | if (U_FAILURE(status)) { | |
501 | break; // error | |
502 | } | |
503 | stringsPtr[len] = 0; | |
504 | uhash_put(gParentMap, stringsPtr, parentStr, &status); | |
505 | stringsPtr += len + 1; | |
506 | } else { | |
507 | // should be URES_ARRAY | |
508 | icuEntryCount = ures_getSize(parentMapBundle); | |
509 | for (entryIndex = 0; entryIndex < icuEntryCount && U_SUCCESS(status); entryIndex++) { | |
510 | len = stringsEnd - stringsPtr - 1; | |
511 | ures_getUTF8StringByIndex(parentMapBundle, entryIndex, stringsPtr, &len, TRUE, &status); | |
512 | if (U_FAILURE(status)) { | |
513 | break; | |
514 | } | |
515 | stringsPtr[len] = 0; | |
516 | uhash_put(gParentMap, stringsPtr, parentStr, &status); | |
517 | stringsPtr += len + 1; | |
518 | } | |
519 | } | |
520 | } | |
521 | ures_close(parentMapBundle); | |
522 | } | |
523 | ures_close(curBundle); | |
524 | ||
525 | status = U_ZERO_ERROR; | |
526 | for (entryIndex = 0; entryIndex < kAppleParentMapCount && U_SUCCESS(status); entryIndex++) { | |
527 | uhash_put(gParentMap, (void*)appleParentMap[entryIndex][0], (void*)appleParentMap[entryIndex][1], &status); | |
528 | } | |
529 | } else { | |
530 | uhash_close(gAliasMap); | |
531 | gAliasMap = NULL; | |
532 | uprv_free(gStrings); | |
533 | gMapDataState = -1; // failure | |
534 | return; | |
535 | } | |
536 | ||
537 | //printf("# gStrings size %ld\n", stringsPtr - gStrings); | |
538 | //printf("# gParentMap count %d\n", uhash_count(gParentMap)); | |
539 | gMapDataState = 1; | |
540 | } | |
541 | ||
542 | U_CDECL_END | |
543 | ||
544 | // The following maps aliases, etc. Ensures 0-termination if no error. | |
545 | static void ualoc_normalize(const char *locale, char *normalized, int32_t normalizedCapacity, UErrorCode *status) | |
546 | { | |
547 | if (U_FAILURE(*status)) { | |
548 | return; | |
549 | } | |
550 | // uloc_minimizeSubtags(locale, normalized, normalizedCapacity, status); | |
551 | ||
552 | const char *replacement = NULL; | |
553 | if (gMapDataState > 0) { | |
554 | replacement = (const char *)uhash_get(gAliasMap, locale); | |
555 | } | |
556 | if (replacement == NULL) { | |
557 | replacement = locale; | |
558 | } | |
559 | int32_t len = uprv_strlen(replacement); | |
560 | if (len < normalizedCapacity) { // allow for 0 termination | |
561 | uprv_strcpy(normalized, replacement); | |
562 | } else { | |
563 | *status = U_BUFFER_OVERFLOW_ERROR; | |
564 | } | |
565 | } | |
566 | ||
567 | static void ualoc_getParent(const char *locale, char *parent, int32_t parentCapacity, UErrorCode *status) | |
568 | { | |
569 | if (U_FAILURE(*status)) { | |
570 | return; | |
571 | } | |
572 | if (gMapDataState > 0) { | |
573 | const char *replacement = (const char *)uhash_get(gParentMap, locale); | |
574 | if (replacement) { | |
575 | int32_t len = uprv_strlen(replacement); | |
576 | if (len < parentCapacity) { // allow for 0 termination | |
577 | uprv_strcpy(parent, replacement); | |
578 | } else { | |
579 | *status = U_BUFFER_OVERFLOW_ERROR; | |
580 | } | |
581 | return; | |
582 | } | |
583 | } | |
584 | uloc_getParent(locale, parent, parentCapacity - 1, status); | |
585 | parent[parentCapacity - 1] = 0; // ensure 0 termination in case of U_STRING_NOT_TERMINATED_WARNING | |
586 | } | |
587 | ||
588 | // Might do something better for this, perhaps maximizing locales then stripping | |
589 | const char * getLocParent(const char *locale) | |
590 | { | |
591 | int32_t locParentIndex; | |
592 | for (locParentIndex = 0; locParentIndex < kLocParentMapCount; locParentIndex++) { | |
593 | if (uprv_strcmp(locale, locParentMap[locParentIndex][0]) == 0) { | |
594 | return locParentMap[locParentIndex][1]; | |
595 | } | |
596 | } | |
597 | return NULL; | |
598 | } | |
599 | ||
600 | // this just checks if the *pointer* value is already in the array | |
601 | static UBool locInArray(const char* *localizationsToUse, int32_t locsToUseCount, const char *locToCheck) | |
602 | { | |
603 | int32_t locIndex; | |
604 | for (locIndex = 0; locIndex < locsToUseCount; locIndex++) { | |
605 | if (locToCheck == localizationsToUse[locIndex]) { | |
606 | return TRUE; | |
607 | } | |
608 | } | |
609 | return FALSE; | |
610 | } | |
611 | ||
612 | enum { kLangScriptRegMaxLen = ULOC_LANG_CAPACITY + ULOC_SCRIPT_CAPACITY + ULOC_COUNTRY_CAPACITY }; // currently 22 | |
613 | ||
614 | int32_t | |
615 | ualoc_localizationsToUse( const char* const *preferredLanguages, | |
616 | int32_t preferredLanguagesCount, | |
617 | const char* const *availableLocalizations, | |
618 | int32_t availableLocalizationsCount, | |
619 | const char* *localizationsToUse, | |
620 | int32_t localizationsToUseCapacity, | |
621 | UErrorCode *status ) | |
622 | { | |
623 | if (U_FAILURE(*status)) { | |
624 | return -1; | |
625 | } | |
626 | if (preferredLanguages == NULL || availableLocalizations == NULL || localizationsToUse == NULL) { | |
627 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
628 | return -1; | |
629 | } | |
630 | // get resource data, need to protect with mutex | |
631 | if (gMapDataState == 0) { | |
632 | umtx_initOnce(gUALocaleCacheInitOnce, initializeMapData); | |
633 | } | |
634 | int32_t locsToUseCount = 0; | |
635 | int32_t prefLangIndex, availLocIndex = 0; | |
636 | char (*availLocBase)[kLangScriptRegMaxLen + 1] = NULL; | |
637 | char (*availLocNorm)[kLangScriptRegMaxLen + 1] = NULL; | |
638 | UBool checkAvailLocParents = FALSE; | |
639 | UBool foundMatch = FALSE; | |
640 | ||
641 | // Part 1, find the best matching localization, if any | |
642 | for (prefLangIndex = 0; prefLangIndex < preferredLanguagesCount; prefLangIndex++) { | |
643 | char prefLangBaseName[kLangScriptRegMaxLen + 1]; | |
644 | char prefLangNormName[kLangScriptRegMaxLen + 1]; | |
645 | char prefLangParentName[kLangScriptRegMaxLen + 1]; | |
646 | UErrorCode tmpStatus = U_ZERO_ERROR; | |
647 | ||
648 | if (preferredLanguages[prefLangIndex] == NULL) { | |
649 | continue; // skip NULL preferredLanguages entry, go to next one | |
650 | } | |
651 | // use underscores, fix bad capitalization, delete any keywords | |
652 | uloc_getBaseName(preferredLanguages[prefLangIndex], prefLangBaseName, kLangScriptRegMaxLen, &tmpStatus); | |
653 | if (U_FAILURE(tmpStatus) || prefLangBaseName[0] == 0 || | |
654 | uprv_strcmp(prefLangBaseName, "root") == 0 || prefLangBaseName[0] == '_') { | |
655 | continue; // can't handle this preferredLanguages entry or it is invalid, go to next one | |
656 | } | |
657 | prefLangBaseName[kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING | |
658 | //printf(" # prefLangBaseName %s\n", prefLangBaseName); | |
659 | ||
660 | // if we have not already allocated and filled the array of | |
661 | // base availableLocalizations, do so now. | |
662 | if (availLocBase == NULL) { | |
663 | availLocBase = (char (*)[kLangScriptRegMaxLen + 1])uprv_malloc(availableLocalizationsCount * (kLangScriptRegMaxLen + 1)); | |
664 | if (availLocBase == NULL) { | |
665 | continue; // cannot further check this preferredLanguages entry, go to next one | |
666 | } | |
667 | for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { | |
668 | tmpStatus = U_ZERO_ERROR; | |
669 | uloc_getBaseName(availableLocalizations[availLocIndex], availLocBase[availLocIndex], kLangScriptRegMaxLen, &tmpStatus); | |
670 | if (U_FAILURE(tmpStatus) || uprv_strcmp(availLocBase[availLocIndex], "root") == 0 || availLocBase[availLocIndex][0] == '_') { | |
671 | availLocBase[availLocIndex][0] = 0; // effectively remove this entry | |
672 | } else { | |
673 | availLocBase[availLocIndex][kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING | |
674 | } | |
675 | } | |
676 | } | |
677 | // first compare base preferredLanguage to base versions of availableLocalizations names | |
678 | for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { | |
679 | if (uprv_strcmp(prefLangBaseName, availLocBase[availLocIndex]) == 0) { | |
680 | foundMatch = TRUE; // availLocIndex records where | |
681 | break; | |
682 | } | |
683 | } | |
684 | if (foundMatch) { | |
685 | //printf(" # matched actualLocName\n"); | |
686 | break; // found a loc for this preferredLanguages entry | |
687 | } | |
688 | ||
689 | // get normalized preferredLanguage | |
690 | tmpStatus = U_ZERO_ERROR; | |
691 | ualoc_normalize(prefLangBaseName, prefLangNormName, kLangScriptRegMaxLen + 1, &tmpStatus); | |
692 | if (U_FAILURE(tmpStatus)) { | |
693 | continue; // can't handle this preferredLanguages entry, go to next one | |
694 | } | |
695 | //printf(" # prefLangNormName %s\n", prefLangNormName); | |
696 | // if we have not already allocated and filled the array of | |
697 | // normalized availableLocalizations, do so now. | |
698 | // Note: ualoc_normalize turns "zh_TW" into "zh_Hant_TW", zh_HK" into "zh_Hant_HK", | |
699 | // and fixes deprecated codes "iw" > "he", "in" > "id" etc. | |
700 | if (availLocNorm == NULL) { | |
701 | availLocNorm = (char (*)[kLangScriptRegMaxLen + 1])uprv_malloc(availableLocalizationsCount * (kLangScriptRegMaxLen + 1)); | |
702 | if (availLocNorm == NULL) { | |
703 | continue; // cannot further check this preferredLanguages entry, go to next one | |
704 | } | |
705 | for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { | |
706 | tmpStatus = U_ZERO_ERROR; | |
707 | ualoc_normalize(availLocBase[availLocIndex], availLocNorm[availLocIndex], kLangScriptRegMaxLen + 1, &tmpStatus); | |
708 | if (U_FAILURE(tmpStatus)) { | |
709 | availLocNorm[availLocIndex][0] = 0; // effectively remove this entry | |
710 | } else if (getLocParent(availLocNorm[availLocIndex]) != NULL) { | |
711 | checkAvailLocParents = TRUE; | |
712 | } | |
713 | //printf(" # actualLoc %-11s -> norm %s\n", availableLocalizations[availLocIndex], availLocNorm[availLocIndex]); | |
714 | } | |
715 | } | |
716 | // now compare normalized preferredLanguage to normalized localization names | |
717 | // if matches, copy *original* localization name | |
718 | for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { | |
719 | if (uprv_strcmp(prefLangNormName, availLocNorm[availLocIndex]) == 0) { | |
720 | foundMatch = TRUE; // availLocIndex records where | |
721 | break; | |
722 | } | |
723 | } | |
724 | if (foundMatch) { | |
725 | //printf(" # matched actualLocNormName\n"); | |
726 | break; // found a loc for this preferredLanguages entry | |
727 | } | |
728 | ||
729 | // now walk up the parent chain for preferredLanguage | |
730 | // until we find a match or hit root | |
731 | uprv_strcpy(prefLangBaseName, prefLangNormName); | |
732 | while (!foundMatch) { | |
733 | tmpStatus = U_ZERO_ERROR; | |
734 | ualoc_getParent(prefLangBaseName, prefLangParentName, kLangScriptRegMaxLen + 1, &tmpStatus); | |
735 | if (U_FAILURE(tmpStatus) || uprv_strcmp(prefLangParentName, "root") == 0 || prefLangParentName[0] == 0) { | |
736 | break; // reached root or cannot proceed further | |
737 | } | |
738 | //printf(" # prefLangParentName %s\n", prefLangParentName); | |
739 | ||
740 | // now compare this preferredLanguage parent to normalized localization names | |
741 | // if matches, copy *original* localization name | |
742 | for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { | |
743 | if (uprv_strcmp(prefLangParentName, availLocNorm[availLocIndex]) == 0) { | |
744 | foundMatch = TRUE; // availLocIndex records where | |
745 | break; | |
746 | } | |
747 | } | |
748 | uprv_strcpy(prefLangBaseName, prefLangParentName); | |
749 | } | |
750 | if (foundMatch) { | |
751 | break; // found a loc for this preferredLanguages entry | |
752 | } | |
753 | ||
754 | // last try, use parents of selected | |
755 | if (checkAvailLocParents) { | |
756 | // now walk up the parent chain for preferredLanguage again | |
757 | // checking against parents of selected availLocNorm entries | |
758 | // but this time start with current prefLangNormName | |
759 | uprv_strcpy(prefLangBaseName, prefLangNormName); | |
760 | while (TRUE) { | |
761 | tmpStatus = U_ZERO_ERROR; | |
762 | // now compare this preferredLanguage to normalized localization names | |
763 | // parent if have one for this; if matches, copy *original* localization name | |
764 | for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { | |
765 | const char *availLocParent = getLocParent(availLocNorm[availLocIndex]); | |
766 | if (availLocParent && uprv_strcmp(prefLangBaseName, availLocParent) == 0) { | |
767 | foundMatch = TRUE; // availLocIndex records where | |
768 | break; | |
769 | } | |
770 | } | |
771 | if (foundMatch) { | |
772 | break; | |
773 | } | |
774 | ualoc_getParent(prefLangBaseName, prefLangParentName, kLangScriptRegMaxLen + 1, &tmpStatus); | |
775 | if (U_FAILURE(tmpStatus) || uprv_strcmp(prefLangParentName, "root") == 0 || prefLangParentName[0] == 0) { | |
776 | break; // reached root or cannot proceed further | |
777 | } | |
778 | uprv_strcpy(prefLangBaseName, prefLangParentName); | |
779 | } | |
780 | } | |
781 | if (foundMatch) { | |
782 | break; // found a loc for this preferredLanguages entry | |
783 | } | |
784 | } | |
785 | ||
786 | // Part 2, if we found a matching localization, then walk up its parent tree to find any fallback matches in availableLocalizations | |
787 | if (foundMatch) { | |
788 | // Here availLocIndex corresponds to the first matched localization | |
789 | UErrorCode tmpStatus = U_ZERO_ERROR; | |
790 | int32_t availLocMatchIndex = availLocIndex; | |
791 | if (locsToUseCount < localizationsToUseCapacity) { | |
792 | localizationsToUse[locsToUseCount++] = availableLocalizations[availLocMatchIndex]; | |
793 | } | |
794 | // at this point we must have availLocBase, and minimally matched against that. | |
795 | // if we have not already allocated and filled the array of | |
796 | // normalized availableLocalizations, do so now, but don't require it | |
797 | if (availLocNorm == NULL) { | |
798 | availLocNorm = (char (*)[kLangScriptRegMaxLen + 1])uprv_malloc(availableLocalizationsCount * (kLangScriptRegMaxLen + 1)); | |
799 | if (availLocNorm != NULL) { | |
800 | for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { | |
801 | tmpStatus = U_ZERO_ERROR; | |
802 | ualoc_normalize(availLocBase[availLocIndex], availLocNorm[availLocIndex], kLangScriptRegMaxLen + 1, &tmpStatus); | |
803 | if (U_FAILURE(tmpStatus)) { | |
804 | availLocNorm[availLocIndex][0] = 0; // effectively remove this entry | |
805 | } | |
806 | } | |
807 | } | |
808 | } | |
809 | ||
810 | // add normalized form of matching loc, if different and in availLocBase | |
811 | if (locsToUseCount < localizationsToUseCapacity) { | |
812 | tmpStatus = U_ZERO_ERROR; | |
813 | char matchedLocNormName[kLangScriptRegMaxLen + 1]; | |
814 | char matchedLocParentName[kLangScriptRegMaxLen + 1]; | |
815 | // get normalized form of matching loc | |
816 | if (availLocNorm != NULL) { | |
817 | uprv_strcpy(matchedLocNormName, availLocNorm[availLocMatchIndex]); | |
818 | } else { | |
819 | ualoc_normalize(availLocBase[availLocMatchIndex], matchedLocNormName, kLangScriptRegMaxLen + 1, &tmpStatus); | |
820 | } | |
821 | if (U_SUCCESS(tmpStatus)) { | |
822 | // add normalized form of matching loc, if different and in availLocBase | |
823 | if (uprv_strcmp(matchedLocNormName, localizationsToUse[0]) != 0) { | |
824 | // normalization of matched localization is different, see if we have the normalization in availableLocalizations | |
825 | // from this point on, availLocIndex no longer corresponds to the matched localization. | |
826 | for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { | |
827 | if ( (uprv_strcmp(matchedLocNormName, availLocBase[availLocIndex]) == 0 | |
828 | || (availLocNorm != NULL && uprv_strcmp(matchedLocNormName, availLocNorm[availLocIndex]) == 0)) | |
829 | && !locInArray(localizationsToUse, locsToUseCount, availableLocalizations[availLocIndex])) { | |
830 | localizationsToUse[locsToUseCount++] = availableLocalizations[availLocIndex]; | |
831 | break; | |
832 | } | |
833 | } | |
834 | } | |
835 | ||
836 | // now walk up the parent chain from matchedLocNormName, adding parents if they are in availLocBase | |
837 | while (locsToUseCount < localizationsToUseCapacity) { | |
838 | ualoc_getParent(matchedLocNormName, matchedLocParentName, kLangScriptRegMaxLen + 1, &tmpStatus); | |
839 | if (U_FAILURE(tmpStatus) || uprv_strcmp(matchedLocParentName, "root") == 0 || matchedLocParentName[0] == 0) { | |
840 | break; // reached root or cannot proceed further | |
841 | } | |
842 | ||
843 | // now compare this matchedLocParentName parent to base localization names (and norm ones if we have them) | |
844 | for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { | |
845 | if ( (uprv_strcmp(matchedLocParentName, availLocBase[availLocIndex]) == 0 | |
846 | || (availLocNorm != NULL && uprv_strcmp(matchedLocParentName, availLocNorm[availLocIndex]) == 0)) | |
847 | && !locInArray(localizationsToUse, locsToUseCount, availableLocalizations[availLocIndex])) { | |
848 | localizationsToUse[locsToUseCount++] = availableLocalizations[availLocIndex]; | |
849 | break; | |
850 | } | |
851 | } | |
852 | uprv_strcpy(matchedLocNormName, matchedLocParentName); | |
853 | } | |
854 | ||
855 | // The above still fails to include "zh_TW" if it is in availLocBase and the matched localization | |
856 | // base name is "zh_HK" or "zh_MO". One option would be to walk up the parent chain from | |
857 | // matchedLocNormName again, comparing against parents of of selected availLocNorm entries. | |
858 | // But this picks up too many matches that are not parents of the matched localization. So | |
859 | // we just handle these specially. | |
860 | if ( locsToUseCount < localizationsToUseCapacity | |
861 | && (uprv_strcmp(availLocBase[availLocMatchIndex], "zh_HK") == 0 | |
862 | || uprv_strcmp(availLocBase[availLocMatchIndex], "zh_MO") == 0) ) { | |
863 | int32_t zhTW_matchIndex = -1; | |
864 | UBool zhHant_found = FALSE; | |
865 | for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { | |
866 | if ( zhTW_matchIndex < 0 && uprv_strcmp("zh_TW", availLocBase[availLocIndex]) == 0 ) { | |
867 | zhTW_matchIndex = availLocIndex; | |
868 | } | |
869 | if ( !zhHant_found && uprv_strcmp("zh_Hant", availLocBase[availLocIndex]) == 0 ) { | |
870 | zhHant_found = TRUE; | |
871 | } | |
872 | } | |
873 | if (zhTW_matchIndex >= 0 && !zhHant_found | |
874 | && !locInArray(localizationsToUse, locsToUseCount, availableLocalizations[zhTW_matchIndex])) { | |
875 | localizationsToUse[locsToUseCount++] = availableLocalizations[zhTW_matchIndex]; | |
876 | } | |
877 | } | |
878 | } | |
879 | } | |
880 | } | |
881 | ||
882 | uprv_free(availLocNorm); | |
883 | uprv_free(availLocBase); | |
884 | return locsToUseCount; | |
885 | } | |
886 |