]>
Commit | Line | Data |
---|---|---|
57a6839d A |
1 | /* |
2 | ***************************************************************************************** | |
a961784b | 3 | * Copyright (C) 2014-2016 Apple Inc. All Rights Reserved. |
57a6839d A |
4 | ***************************************************************************************** |
5 | */ | |
6 | ||
2ca993e8 A |
7 | #define DEBUG_UALOC 0 |
8 | #if DEBUG_UALOC | |
9 | #include <stdio.h> | |
10 | #endif | |
11 | #include <string.h> | |
57a6839d A |
12 | #include "unicode/utypes.h" |
13 | #include "unicode/ualoc.h" | |
14 | #include "unicode/uloc.h" | |
15 | #include "unicode/ures.h" | |
16 | #include "unicode/putil.h" | |
17 | #include "cstring.h" | |
18 | #include "cmemory.h" | |
b331163b A |
19 | #include "uhash.h" |
20 | #include "umutex.h" | |
21 | #include "ucln_cmn.h" | |
57a6839d A |
22 | // the following has replacements for some math.h funcs etc |
23 | #include "putilimp.h" | |
24 | ||
25 | ||
26 | // The numeric values in territoryInfo are in "IntF" format from LDML2ICUConverter. | |
27 | // From its docs (adapted): [IntF is] a special integer that represents the number in | |
28 | // normalized scientific notation. | |
29 | // Resultant integers are in the form -?xxyyyyyy, where xx is the exponent | |
30 | // offset by 50 and yyyyyy is the coefficient to 5 decimal places (range 1.0 to 9.99999), e.g. | |
31 | // 14660000000000 -> 1.46600E13 -> 63146600 | |
32 | // 0.0001 -> 1.00000E-4 -> 46100000 | |
33 | // -123.456 -> -1.23456E-2 -> -48123456 | |
34 | // | |
35 | // Here to avoid an extra division we have the max coefficient as 999999 (instead of | |
36 | // 9.99999) and instead offset the exponent by -55. | |
37 | // | |
38 | static double doubleFromIntF(int32_t intF) { | |
39 | double coefficient = (double)(intF % 1000000); | |
40 | int32_t exponent = (intF / 1000000) - 55; | |
41 | return coefficient * uprv_pow10(exponent); | |
42 | } | |
43 | ||
44 | static int compareLangEntries(const void * entry1, const void * entry2) { | |
45 | double fraction1 = ((const UALanguageEntry *)entry1)->userFraction; | |
46 | double fraction2 = ((const UALanguageEntry *)entry2)->userFraction; | |
47 | // want descending order | |
48 | if (fraction1 > fraction2) return -1; | |
49 | if (fraction1 < fraction2) return 1; | |
50 | // userFractions the same, sort by languageCode | |
51 | return uprv_strcmp(((const UALanguageEntry *)entry1)->languageCode,((const UALanguageEntry *)entry2)->languageCode); | |
52 | } | |
53 | ||
54 | static const UChar ustrLangStatusDefacto[] = {0x64,0x65,0x5F,0x66,0x61,0x63,0x74,0x6F,0x5F,0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0}; //"de_facto_official" | |
55 | static const UChar ustrLangStatusOfficial[] = {0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0}; //"official" | |
56 | static const UChar ustrLangStatusRegional[] = {0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0x5F,0x72,0x65,0x67,0x69,0x6F,0x6E,0x61,0x6C,0}; //"official_regional" | |
57 | ||
58 | enum { | |
59 | kLocalLangEntriesMax = 26, // enough for most regions to minimumFraction 0.001 except India | |
60 | kLangEntriesFactor = 3 // if we have to allocate, multiply existing size by this | |
61 | }; | |
62 | ||
63 | U_CAPI int32_t U_EXPORT2 | |
64 | ualoc_getLanguagesForRegion(const char *regionID, double minimumFraction, | |
65 | UALanguageEntry *entries, int32_t entriesCapacity, | |
66 | UErrorCode *err) | |
67 | { | |
68 | if (U_FAILURE(*err)) { | |
69 | return 0; | |
70 | } | |
71 | if ( regionID == NULL || minimumFraction < 0.0 || minimumFraction > 1.0 || | |
72 | ((entries==NULL)? entriesCapacity!=0: entriesCapacity<0) ) { | |
73 | *err = U_ILLEGAL_ARGUMENT_ERROR; | |
74 | return 0; | |
75 | } | |
76 | UResourceBundle *rb = ures_openDirect(NULL, "supplementalData", err); | |
77 | rb = ures_getByKey(rb, "territoryInfo", rb, err); | |
78 | rb = ures_getByKey(rb, regionID, rb, err); | |
79 | if (U_FAILURE(*err)) { | |
80 | ures_close(rb); | |
81 | return 0; | |
82 | } | |
83 | ||
84 | int32_t entryCount = 0; | |
85 | UResourceBundle *langBund = NULL; | |
86 | int32_t lbIdx, lbCount = ures_getSize(rb); | |
87 | UALanguageEntry localLangEntries[kLocalLangEntriesMax]; | |
88 | UALanguageEntry * langEntries = localLangEntries; | |
89 | int32_t langEntriesMax = kLocalLangEntriesMax; | |
90 | ||
91 | for (lbIdx = 0; lbIdx < lbCount; lbIdx++) { | |
92 | langBund = ures_getByIndex(rb, lbIdx, langBund, err); | |
93 | if (U_FAILURE(*err)) { | |
94 | break; | |
95 | } | |
96 | const char * langCode = ures_getKey(langBund); | |
97 | if (uprv_strcmp(langCode,"territoryF") == 0) { | |
98 | continue; | |
99 | } | |
100 | if (strnlen(langCode, UALANGDATA_CODELEN+1) > UALANGDATA_CODELEN) { // no uprv_strnlen | |
101 | continue; // a code we cannot handle | |
102 | } | |
103 | ||
104 | UErrorCode localErr = U_ZERO_ERROR; | |
105 | double userFraction = 0.0; | |
106 | UResourceBundle *itemBund = ures_getByKey(langBund, "populationShareF", NULL, &localErr); | |
107 | if (U_SUCCESS(localErr)) { | |
108 | int32_t intF = ures_getInt(itemBund, &localErr); | |
109 | if (U_SUCCESS(localErr)) { | |
110 | userFraction = doubleFromIntF(intF); | |
111 | } | |
112 | ures_close(itemBund); | |
113 | } | |
114 | if (userFraction < minimumFraction) { | |
115 | continue; | |
116 | } | |
117 | if (entries != NULL) { | |
118 | localErr = U_ZERO_ERROR; | |
119 | UALanguageStatus langStatus = UALANGSTATUS_UNSPECIFIED; | |
120 | int32_t ulen; | |
121 | const UChar * ustrLangStatus = ures_getStringByKey(langBund, "officialStatus", &ulen, &localErr); | |
122 | if (U_SUCCESS(localErr)) { | |
123 | int32_t cmp = u_strcmp(ustrLangStatus, ustrLangStatusOfficial); | |
124 | if (cmp == 0) { | |
125 | langStatus = UALANGSTATUS_OFFICIAL; | |
126 | } else if (cmp < 0 && u_strcmp(ustrLangStatus, ustrLangStatusDefacto) == 0) { | |
127 | langStatus = UALANGSTATUS_DEFACTO_OFFICIAL; | |
128 | } else if (u_strcmp(ustrLangStatus, ustrLangStatusRegional) == 0) { | |
129 | langStatus = UALANGSTATUS_REGIONAL_OFFICIAL; | |
130 | } | |
131 | } | |
132 | // Now we have all of the info for our next entry | |
133 | if (entryCount >= langEntriesMax) { | |
134 | int32_t newMax = langEntriesMax * kLangEntriesFactor; | |
135 | if (langEntries == localLangEntries) { | |
136 | // first allocation, copy from local buf | |
137 | langEntries = (UALanguageEntry*)uprv_malloc(newMax*sizeof(UALanguageEntry)); | |
138 | if (langEntries == NULL) { | |
139 | *err = U_MEMORY_ALLOCATION_ERROR; | |
140 | break; | |
141 | } | |
142 | uprv_memcpy(langEntries, localLangEntries, entryCount*sizeof(UALanguageEntry)); | |
143 | } else { | |
144 | langEntries = (UALanguageEntry*)uprv_realloc(langEntries, newMax*sizeof(UALanguageEntry)); | |
145 | if (langEntries == NULL) { | |
146 | *err = U_MEMORY_ALLOCATION_ERROR; | |
147 | break; | |
148 | } | |
149 | } | |
150 | langEntriesMax = newMax; | |
151 | } | |
152 | uprv_strcpy(langEntries[entryCount].languageCode, langCode); | |
153 | langEntries[entryCount].userFraction = userFraction; | |
154 | langEntries[entryCount].status = langStatus; | |
155 | } | |
156 | entryCount++; | |
157 | } | |
158 | ures_close(langBund); | |
159 | ures_close(rb); | |
160 | if (U_FAILURE(*err)) { | |
161 | if (langEntries != localLangEntries) { | |
162 | free(langEntries); | |
163 | } | |
164 | return 0; | |
165 | } | |
166 | if (entries != NULL) { | |
167 | // sort langEntries, copy entries that fit to provided array | |
168 | qsort(langEntries, entryCount, sizeof(UALanguageEntry), compareLangEntries); | |
169 | if (entryCount > entriesCapacity) { | |
170 | entryCount = entriesCapacity; | |
171 | } | |
172 | uprv_memcpy(entries, langEntries, entryCount*sizeof(UALanguageEntry)); | |
173 | if (langEntries != localLangEntries) { | |
174 | free(langEntries); | |
175 | } | |
176 | } | |
177 | return entryCount; | |
178 | } | |
179 | ||
57a6839d | 180 | static const char * forceParent[] = { |
a961784b | 181 | "en_150", "en_GB", // en for Europe |
b331163b | 182 | "en_AU", "en_GB", |
a961784b A |
183 | "en_BD", "en_GB", // en for Bangladesh |
184 | "en_BE", "en_150", // en for Belgium goes to en for Europe | |
185 | "en_DG", "en_GB", | |
186 | "en_FK", "en_GB", | |
187 | "en_GG", "en_GB", | |
188 | "en_GI", "en_GB", | |
189 | "en_HK", "en_GB", // en for Hong Kong | |
190 | "en_IE", "en_GB", | |
191 | "en_IM", "en_GB", | |
b331163b | 192 | "en_IN", "en_GB", |
a961784b A |
193 | "en_IO", "en_GB", |
194 | "en_JE", "en_GB", | |
a62d09fc | 195 | "en_JM", "en_GB", |
a961784b A |
196 | "en_MO", "en_GB", |
197 | "en_MT", "en_GB", | |
2ca993e8 | 198 | "en_MV", "en_GB", // for Maldives |
a961784b | 199 | "en_MY", "en_GB", // en for Malaysia |
2ca993e8 | 200 | "en_NZ", "en_AU", |
a961784b A |
201 | "en_PK", "en_GB", // en for Pakistan |
202 | "en_SG", "en_GB", | |
203 | "en_SH", "en_GB", | |
204 | "en_VG", "en_GB", | |
57a6839d A |
205 | "zh", "zh_CN", |
206 | "zh_CN", "root", | |
207 | "zh_Hant", "zh_TW", | |
208 | "zh_TW", "root", | |
209 | NULL | |
210 | }; | |
211 | ||
2ca993e8 A |
212 | enum { kLocBaseNameMax = 16 }; |
213 | ||
57a6839d A |
214 | U_CAPI int32_t U_EXPORT2 |
215 | ualoc_getAppleParent(const char* localeID, | |
216 | char * parent, | |
217 | int32_t parentCapacity, | |
218 | UErrorCode* err) | |
219 | { | |
220 | UResourceBundle *rb; | |
221 | int32_t len; | |
222 | UErrorCode tempStatus; | |
223 | char locbuf[ULOC_FULLNAME_CAPACITY+1]; | |
08b89b0a | 224 | char * foundDoubleUnderscore; |
57a6839d A |
225 | |
226 | if (U_FAILURE(*err)) { | |
227 | return 0; | |
228 | } | |
229 | if ( (parent==NULL)? parentCapacity!=0: parentCapacity<0 ) { | |
230 | *err = U_ILLEGAL_ARGUMENT_ERROR; | |
231 | return 0; | |
232 | } | |
08b89b0a | 233 | len = uloc_getBaseName(localeID, locbuf, ULOC_FULLNAME_CAPACITY, err); /* canonicalize and strip keywords */ |
57a6839d A |
234 | if (U_FAILURE(*err)) { |
235 | return 0; | |
236 | } | |
237 | if (*err == U_STRING_NOT_TERMINATED_WARNING) { | |
238 | locbuf[ULOC_FULLNAME_CAPACITY] = 0; | |
239 | *err = U_ZERO_ERROR; | |
240 | } | |
08b89b0a A |
241 | foundDoubleUnderscore = uprv_strstr(locbuf, "__"); /* __ comes from bad/missing subtag or variant */ |
242 | if (foundDoubleUnderscore != NULL) { | |
243 | *foundDoubleUnderscore = 0; /* terminate at the __ */ | |
244 | len = uprv_strlen(locbuf); | |
245 | } | |
b331163b | 246 | if (len >= 2 && (uprv_strncmp(locbuf, "en", 2) == 0 || uprv_strncmp(locbuf, "zh", 2) == 0)) { |
57a6839d A |
247 | const char ** forceParentPtr = forceParent; |
248 | const char * testCurLoc; | |
249 | while ( (testCurLoc = *forceParentPtr++) != NULL ) { | |
250 | int cmp = uprv_strcmp(locbuf, testCurLoc); | |
251 | if (cmp <= 0) { | |
252 | if (cmp == 0) { | |
253 | len = uprv_strlen(*forceParentPtr); | |
254 | if (len < parentCapacity) { | |
255 | uprv_strcpy(parent, *forceParentPtr); | |
256 | } else { | |
257 | *err = U_BUFFER_OVERFLOW_ERROR; | |
258 | } | |
259 | return len; | |
260 | } | |
261 | break; | |
262 | } | |
263 | forceParentPtr++; | |
264 | } | |
265 | } | |
266 | tempStatus = U_ZERO_ERROR; | |
267 | rb = ures_openDirect(NULL, locbuf, &tempStatus); | |
268 | if (U_SUCCESS(tempStatus)) { | |
269 | const char * actualLocale = ures_getLocaleByType(rb, ULOC_ACTUAL_LOCALE, &tempStatus); | |
2ca993e8 | 270 | ures_close(rb); |
57a6839d A |
271 | if (U_SUCCESS(tempStatus) && uprv_strcmp(locbuf, actualLocale) != 0) { |
272 | // we have followed an alias | |
273 | len = uprv_strlen(actualLocale); | |
274 | if (len < parentCapacity) { | |
275 | uprv_strcpy(parent, actualLocale); | |
276 | } else { | |
277 | *err = U_BUFFER_OVERFLOW_ERROR; | |
278 | } | |
57a6839d A |
279 | return len; |
280 | } | |
2ca993e8 A |
281 | } |
282 | tempStatus = U_ZERO_ERROR; | |
283 | rb = ures_openDirect(NULL, "supplementalData", &tempStatus); | |
284 | rb = ures_getByKey(rb, "parentLocales", rb, &tempStatus); | |
285 | if (U_SUCCESS(tempStatus)) { | |
286 | UResourceBundle * parentMapBundle = NULL; | |
287 | int32_t childLen = 0; | |
288 | while (childLen == 0) { | |
289 | tempStatus = U_ZERO_ERROR; | |
290 | parentMapBundle = ures_getNextResource(rb, parentMapBundle, &tempStatus); | |
291 | if (U_FAILURE(tempStatus)) { | |
292 | break; // no more parent bundles, normal exit | |
293 | } | |
294 | char childName[kLocBaseNameMax + 1]; | |
295 | childName[kLocBaseNameMax] = 0; | |
296 | const char * childPtr = NULL; | |
297 | if (ures_getType(parentMapBundle) == URES_STRING) { | |
298 | childLen = kLocBaseNameMax; | |
299 | childPtr = ures_getUTF8String(parentMapBundle, childName, &childLen, FALSE, &tempStatus); | |
300 | if (U_FAILURE(tempStatus) || uprv_strncmp(locbuf, childPtr, kLocBaseNameMax) != 0) { | |
301 | childLen = 0; | |
302 | } | |
303 | } else { // should be URES_ARRAY | |
304 | int32_t childCur, childCount = ures_getSize(parentMapBundle); | |
305 | for (childCur = 0; childCur < childCount && childLen == 0; childCur++) { | |
306 | tempStatus = U_ZERO_ERROR; | |
307 | childLen = kLocBaseNameMax; | |
308 | childPtr = ures_getUTF8StringByIndex(parentMapBundle, childCur, childName, &childLen, FALSE, &tempStatus); | |
309 | if (U_FAILURE(tempStatus) || uprv_strncmp(locbuf, childPtr, kLocBaseNameMax) != 0) { | |
310 | childLen = 0; | |
311 | } | |
312 | } | |
313 | } | |
314 | } | |
315 | ures_close(rb); | |
316 | if (childLen > 0) { | |
317 | // parentMapBundle key is the parent we are looking for | |
318 | const char * keyStr = ures_getKey(parentMapBundle); | |
319 | len = uprv_strlen(keyStr); | |
57a6839d | 320 | if (len < parentCapacity) { |
2ca993e8 | 321 | uprv_strcpy(parent, keyStr); |
57a6839d A |
322 | } else { |
323 | *err = U_BUFFER_OVERFLOW_ERROR; | |
324 | } | |
2ca993e8 | 325 | ures_close(parentMapBundle); |
57a6839d A |
326 | return len; |
327 | } | |
2ca993e8 | 328 | ures_close(parentMapBundle); |
57a6839d | 329 | } |
2ca993e8 | 330 | |
57a6839d A |
331 | len = uloc_getParent(locbuf, parent, parentCapacity, err); |
332 | if (U_SUCCESS(*err) && len == 0) { | |
333 | len = 4; | |
334 | if (len < parentCapacity) { | |
335 | uprv_strcpy(parent, "root"); | |
336 | } else { | |
337 | *err = U_BUFFER_OVERFLOW_ERROR; | |
338 | } | |
339 | } | |
340 | return len; | |
341 | } | |
342 | ||
b331163b A |
343 | // ================= |
344 | // Data and related functions for ualoc_localizationsToUse | |
345 | // ================= | |
346 | ||
347 | static const char * appleAliasMap[][2] = { | |
348 | // names are lowercase here because they are looked up after being processed by uloc_getBaseName | |
349 | { "arabic", "ar" }, // T2 | |
350 | { "chinese", "zh_Hans" }, // T0 | |
351 | { "danish", "da" }, // T2 | |
352 | { "dutch", "nl" }, // T1, still in use | |
353 | { "english", "en" }, // T0, still in use | |
354 | { "finnish", "fi" }, // T2 | |
355 | { "french", "fr" }, // T0, still in use | |
356 | { "german", "de" }, // T0, still in use | |
357 | { "italian", "it" }, // T1, still in use | |
358 | { "japanese", "ja" }, // T0, still in use | |
359 | { "korean", "ko" }, // T1 | |
a961784b | 360 | { "no_NO", "nb_NO" }, // special |
b331163b A |
361 | { "norwegian", "nb" }, // T2 |
362 | { "polish", "pl" }, // T2 | |
363 | { "portuguese", "pt" }, // T2 | |
364 | { "russian", "ru" }, // T2 | |
365 | { "spanish", "es" }, // T1, still in use | |
366 | { "swedish", "sv" }, // T2 | |
367 | { "thai", "th" }, // T2 | |
368 | { "turkish", "tr" }, // T2 | |
369 | { "zh", "zh_Hans" }, // special | |
370 | }; | |
2ca993e8 | 371 | enum { kAppleAliasMapCount = UPRV_LENGTHOF(appleAliasMap) }; |
b331163b A |
372 | |
373 | static const char * appleParentMap[][2] = { | |
374 | { "en_150", "en_GB" }, // Apple custom parent | |
375 | { "en_AD", "en_150" }, // Apple locale addition | |
376 | { "en_AL", "en_150" }, // Apple locale addition | |
377 | { "en_AT", "en_150" }, // Apple locale addition | |
378 | { "en_AU", "en_GB" }, // Apple custom parent | |
379 | { "en_BA", "en_150" }, // Apple locale addition | |
380 | { "en_BD", "en_GB" }, // Apple custom parent | |
a961784b | 381 | { "en_BE", "en_150" }, // Apple custom parent |
b331163b A |
382 | { "en_CH", "en_150" }, // Apple locale addition |
383 | { "en_CY", "en_150" }, // Apple locale addition | |
384 | { "en_CZ", "en_150" }, // Apple locale addition | |
385 | { "en_DE", "en_150" }, // Apple locale addition | |
a961784b | 386 | { "en_DG", "en_GB" }, |
b331163b A |
387 | { "en_DK", "en_150" }, // Apple locale addition |
388 | { "en_EE", "en_150" }, // Apple locale addition | |
389 | { "en_ES", "en_150" }, // Apple locale addition | |
390 | { "en_FI", "en_150" }, // Apple locale addition | |
a961784b | 391 | { "en_FK", "en_GB" }, |
b331163b | 392 | { "en_FR", "en_150" }, // Apple locale addition |
a961784b A |
393 | { "en_GG", "en_GB" }, |
394 | { "en_GI", "en_GB" }, | |
b331163b A |
395 | { "en_GR", "en_150" }, // Apple locale addition |
396 | { "en_HK", "en_GB" }, // Apple custom parent | |
397 | { "en_HR", "en_150" }, // Apple locale addition | |
398 | { "en_HU", "en_150" }, // Apple locale addition | |
a961784b | 399 | { "en_IE", "en_GB" }, |
b331163b | 400 | { "en_IL", "en_001" }, // Apple locale addition |
a961784b | 401 | { "en_IM", "en_GB" }, |
b331163b | 402 | { "en_IN", "en_GB" }, // Apple custom parent |
a961784b | 403 | { "en_IO", "en_GB" }, |
b331163b A |
404 | { "en_IS", "en_150" }, // Apple locale addition |
405 | { "en_IT", "en_150" }, // Apple locale addition | |
a961784b | 406 | { "en_JE", "en_GB" }, |
a62d09fc | 407 | { "en_JM", "en_GB" }, |
b331163b A |
408 | { "en_LT", "en_150" }, // Apple locale addition |
409 | { "en_LU", "en_150" }, // Apple locale addition | |
410 | { "en_LV", "en_150" }, // Apple locale addition | |
411 | { "en_ME", "en_150" }, // Apple locale addition | |
a961784b A |
412 | { "en_MO", "en_GB" }, |
413 | { "en_MT", "en_GB" }, | |
2ca993e8 | 414 | { "en_MV", "en_GB" }, |
b331163b A |
415 | { "en_MY", "en_GB" }, // Apple custom parent |
416 | { "en_NL", "en_150" }, // Apple locale addition | |
417 | { "en_NO", "en_150" }, // Apple locale addition | |
2ca993e8 | 418 | { "en_NZ", "en_AU" }, |
b331163b A |
419 | { "en_PK", "en_GB" }, // Apple custom parent |
420 | { "en_PL", "en_150" }, // Apple locale addition | |
421 | { "en_PT", "en_150" }, // Apple locale addition | |
422 | { "en_RO", "en_150" }, // Apple locale addition | |
423 | { "en_RU", "en_150" }, // Apple locale addition | |
424 | { "en_SE", "en_150" }, // Apple locale addition | |
a961784b A |
425 | { "en_SG", "en_GB" }, |
426 | { "en_SH", "en_GB" }, | |
b331163b A |
427 | { "en_SI", "en_150" }, // Apple locale addition |
428 | { "en_SK", "en_150" }, // Apple locale addition | |
429 | { "en_TR", "en_150" }, // Apple locale addition | |
a961784b | 430 | { "en_VG", "en_GB" }, |
b331163b | 431 | }; |
2ca993e8 A |
432 | enum { kAppleParentMapCount = UPRV_LENGTHOF(appleParentMap) }; |
433 | ||
434 | typedef struct { | |
435 | const char * locale; | |
436 | const char * parent; | |
437 | int8_t distance; | |
438 | } LocParentAndDistance; | |
439 | ||
440 | static LocParentAndDistance locParentMap[] = { | |
441 | // The localizations listed in the first column are in | |
442 | // normalized form (e.g. zh_CN -> zh_Hans_CN, etc.). | |
443 | // The distance is a rough measure of distance from | |
444 | // the localization to its parent, used as a weight. | |
445 | { "en_100", "en", 2 }, | |
446 | { "en_150", "en_GB", 1 }, | |
447 | { "en_AU", "en_GB", 1 }, | |
448 | { "en_GB", "en_100", 0 }, | |
449 | { "es_419", "es", 2 }, | |
450 | { "es_MX", "es_419", 0 }, | |
451 | { "pt_PT", "pt", 2 }, | |
452 | { "zh_Hans_CN", "zh_Hans", 0 }, | |
453 | { "zh_Hant_HK", "zh_Hant", 1 }, | |
454 | { "zh_Hant_TW", "zh_Hant", 0 }, | |
b331163b | 455 | }; |
2ca993e8 | 456 | enum { kLocParentMapCount = UPRV_LENGTHOF(locParentMap), kMaxParentDistance = 8 }; |
b331163b A |
457 | |
458 | enum { | |
459 | kStringsAllocSize = 4096, // cannot expand; current actual usage 3610 | |
460 | kParentMapInitCount = 161 // can expand; current actual usage 161 | |
461 | }; | |
462 | ||
463 | U_CDECL_BEGIN | |
464 | static UBool U_CALLCONV ualocale_cleanup(void); | |
465 | U_CDECL_END | |
466 | ||
467 | U_NAMESPACE_BEGIN | |
468 | ||
469 | static UInitOnce gUALocaleCacheInitOnce = U_INITONCE_INITIALIZER; | |
470 | ||
471 | static int gMapDataState = 0; // 0 = not initialized, 1 = initialized, -1 = failure | |
472 | static char* gStrings = NULL; | |
473 | static UHashtable* gAliasMap = NULL; | |
474 | static UHashtable* gParentMap = NULL; | |
475 | ||
476 | U_NAMESPACE_END | |
477 | ||
478 | U_CDECL_BEGIN | |
479 | ||
480 | static UBool U_CALLCONV ualocale_cleanup(void) | |
481 | { | |
482 | U_NAMESPACE_USE | |
483 | ||
484 | gUALocaleCacheInitOnce.reset(); | |
485 | ||
486 | if (gMapDataState > 0) { | |
487 | uhash_close(gParentMap); | |
488 | gParentMap = NULL; | |
489 | uhash_close(gAliasMap); | |
490 | gAliasMap = NULL; | |
491 | uprv_free(gStrings); | |
492 | gStrings = NULL; | |
493 | } | |
494 | gMapDataState = 0; | |
495 | return TRUE; | |
496 | } | |
497 | ||
498 | static void initializeMapData() { | |
499 | U_NAMESPACE_USE | |
500 | ||
501 | UResourceBundle * curBundle; | |
502 | char* stringsPtr; | |
503 | char* stringsEnd; | |
504 | UErrorCode status; | |
505 | int32_t entryIndex, icuEntryCount; | |
506 | ||
507 | ucln_common_registerCleanup(UCLN_COMMON_LOCALE, ualocale_cleanup); | |
508 | ||
509 | gStrings = (char*)uprv_malloc(kStringsAllocSize); | |
510 | if (gStrings) { | |
511 | stringsPtr = gStrings; | |
512 | stringsEnd = gStrings + kStringsAllocSize; | |
513 | } | |
514 | ||
515 | status = U_ZERO_ERROR; | |
516 | curBundle = NULL; | |
517 | icuEntryCount = 0; | |
518 | if (gStrings) { | |
519 | curBundle = ures_openDirect(NULL, "metadata", &status); | |
520 | curBundle = ures_getByKey(curBundle, "alias", curBundle, &status); | |
521 | curBundle = ures_getByKey(curBundle, "language", curBundle, &status); // language resource is URES_TABLE | |
522 | if (U_SUCCESS(status)) { | |
523 | icuEntryCount = ures_getSize(curBundle); // currently 331 | |
524 | } | |
525 | } | |
526 | status = U_ZERO_ERROR; | |
527 | gAliasMap = uhash_openSize(uhash_hashIChars, uhash_compareIChars, uhash_compareIChars, | |
528 | kAppleAliasMapCount + icuEntryCount, &status); | |
529 | // defaults to keyDeleter NULL | |
530 | if (U_SUCCESS(status)) { | |
531 | for (entryIndex = 0; entryIndex < kAppleAliasMapCount && U_SUCCESS(status); entryIndex++) { | |
532 | uhash_put(gAliasMap, (void*)appleAliasMap[entryIndex][0], (void*)appleAliasMap[entryIndex][1], &status); | |
533 | } | |
534 | status = U_ZERO_ERROR; | |
535 | UResourceBundle * aliasMapBundle = NULL; | |
536 | for (entryIndex = 0; entryIndex < icuEntryCount && U_SUCCESS(status); entryIndex++) { | |
537 | aliasMapBundle = ures_getByIndex(curBundle, entryIndex, aliasMapBundle, &status); | |
538 | if (U_FAILURE(status)) { | |
539 | break; // error | |
540 | } | |
541 | const char * keyStr = ures_getKey(aliasMapBundle); | |
542 | int32_t len = uprv_strlen(keyStr); | |
543 | if (len >= stringsEnd - stringsPtr) { | |
544 | break; // error | |
545 | } | |
546 | uprv_strcpy(stringsPtr, keyStr); | |
547 | char * inLocStr = stringsPtr; | |
548 | stringsPtr += len + 1; | |
549 | ||
550 | len = stringsEnd - stringsPtr - 1; | |
551 | ures_getUTF8StringByKey(aliasMapBundle, "replacement", stringsPtr, &len, TRUE, &status); | |
552 | if (U_FAILURE(status)) { | |
553 | break; // error | |
554 | } | |
555 | stringsPtr[len] = 0; | |
556 | uhash_put(gAliasMap, inLocStr, stringsPtr, &status); | |
557 | stringsPtr += len + 1; | |
558 | } | |
559 | ures_close(aliasMapBundle); | |
560 | } else { | |
561 | ures_close(curBundle); | |
562 | uprv_free(gStrings); | |
563 | gMapDataState = -1; // failure | |
564 | return; | |
565 | } | |
566 | ures_close(curBundle); | |
567 | ||
568 | status = U_ZERO_ERROR; | |
569 | gParentMap = uhash_openSize(uhash_hashIChars, uhash_compareIChars, uhash_compareIChars, | |
570 | kParentMapInitCount, &status); | |
571 | // defaults to keyDeleter NULL | |
572 | if (U_SUCCESS(status)) { | |
573 | curBundle = ures_openDirect(NULL, "supplementalData", &status); | |
574 | curBundle = ures_getByKey(curBundle, "parentLocales", curBundle, &status); // parentLocales resource is URES_TABLE | |
575 | if (U_SUCCESS(status)) { | |
576 | UResourceBundle * parentMapBundle = NULL; | |
577 | while (TRUE) { | |
578 | parentMapBundle = ures_getNextResource(curBundle, parentMapBundle, &status); | |
579 | if (U_FAILURE(status)) { | |
580 | break; // no more parent bundles, normal exit | |
581 | } | |
582 | const char * keyStr = ures_getKey(parentMapBundle); | |
583 | int32_t len = uprv_strlen(keyStr); | |
584 | if (len >= stringsEnd - stringsPtr) { | |
585 | break; // error | |
586 | } | |
587 | uprv_strcpy(stringsPtr, keyStr); | |
588 | char * parentStr = stringsPtr; | |
589 | stringsPtr += len + 1; | |
590 | ||
591 | if (ures_getType(parentMapBundle) == URES_STRING) { | |
592 | len = stringsEnd - stringsPtr - 1; | |
593 | ures_getUTF8String(parentMapBundle, stringsPtr, &len, TRUE, &status); | |
594 | if (U_FAILURE(status)) { | |
595 | break; // error | |
596 | } | |
597 | stringsPtr[len] = 0; | |
598 | uhash_put(gParentMap, stringsPtr, parentStr, &status); | |
599 | stringsPtr += len + 1; | |
600 | } else { | |
601 | // should be URES_ARRAY | |
602 | icuEntryCount = ures_getSize(parentMapBundle); | |
603 | for (entryIndex = 0; entryIndex < icuEntryCount && U_SUCCESS(status); entryIndex++) { | |
604 | len = stringsEnd - stringsPtr - 1; | |
605 | ures_getUTF8StringByIndex(parentMapBundle, entryIndex, stringsPtr, &len, TRUE, &status); | |
606 | if (U_FAILURE(status)) { | |
607 | break; | |
608 | } | |
609 | stringsPtr[len] = 0; | |
610 | uhash_put(gParentMap, stringsPtr, parentStr, &status); | |
611 | stringsPtr += len + 1; | |
612 | } | |
613 | } | |
614 | } | |
615 | ures_close(parentMapBundle); | |
616 | } | |
617 | ures_close(curBundle); | |
618 | ||
619 | status = U_ZERO_ERROR; | |
620 | for (entryIndex = 0; entryIndex < kAppleParentMapCount && U_SUCCESS(status); entryIndex++) { | |
621 | uhash_put(gParentMap, (void*)appleParentMap[entryIndex][0], (void*)appleParentMap[entryIndex][1], &status); | |
622 | } | |
623 | } else { | |
624 | uhash_close(gAliasMap); | |
625 | gAliasMap = NULL; | |
626 | uprv_free(gStrings); | |
627 | gMapDataState = -1; // failure | |
628 | return; | |
629 | } | |
630 | ||
2ca993e8 A |
631 | #if DEBUG_UALOC |
632 | printf("# gStrings size %ld\n", stringsPtr - gStrings); | |
633 | printf("# gParentMap count %d\n", uhash_count(gParentMap)); | |
634 | #endif | |
b331163b A |
635 | gMapDataState = 1; |
636 | } | |
637 | ||
638 | U_CDECL_END | |
639 | ||
640 | // The following maps aliases, etc. Ensures 0-termination if no error. | |
641 | static void ualoc_normalize(const char *locale, char *normalized, int32_t normalizedCapacity, UErrorCode *status) | |
642 | { | |
643 | if (U_FAILURE(*status)) { | |
644 | return; | |
645 | } | |
646 | // uloc_minimizeSubtags(locale, normalized, normalizedCapacity, status); | |
647 | ||
648 | const char *replacement = NULL; | |
649 | if (gMapDataState > 0) { | |
650 | replacement = (const char *)uhash_get(gAliasMap, locale); | |
651 | } | |
652 | if (replacement == NULL) { | |
653 | replacement = locale; | |
654 | } | |
2ca993e8 | 655 | int32_t len = strnlen(replacement, normalizedCapacity); |
b331163b A |
656 | if (len < normalizedCapacity) { // allow for 0 termination |
657 | uprv_strcpy(normalized, replacement); | |
658 | } else { | |
659 | *status = U_BUFFER_OVERFLOW_ERROR; | |
660 | } | |
661 | } | |
662 | ||
663 | static void ualoc_getParent(const char *locale, char *parent, int32_t parentCapacity, UErrorCode *status) | |
664 | { | |
665 | if (U_FAILURE(*status)) { | |
666 | return; | |
667 | } | |
668 | if (gMapDataState > 0) { | |
669 | const char *replacement = (const char *)uhash_get(gParentMap, locale); | |
670 | if (replacement) { | |
671 | int32_t len = uprv_strlen(replacement); | |
672 | if (len < parentCapacity) { // allow for 0 termination | |
673 | uprv_strcpy(parent, replacement); | |
674 | } else { | |
675 | *status = U_BUFFER_OVERFLOW_ERROR; | |
676 | } | |
677 | return; | |
678 | } | |
679 | } | |
680 | uloc_getParent(locale, parent, parentCapacity - 1, status); | |
681 | parent[parentCapacity - 1] = 0; // ensure 0 termination in case of U_STRING_NOT_TERMINATED_WARNING | |
682 | } | |
683 | ||
684 | // Might do something better for this, perhaps maximizing locales then stripping | |
2ca993e8 | 685 | static const char * getLocParent(const char *locale, int32_t* distance) |
b331163b A |
686 | { |
687 | int32_t locParentIndex; | |
688 | for (locParentIndex = 0; locParentIndex < kLocParentMapCount; locParentIndex++) { | |
2ca993e8 A |
689 | if (uprv_strcmp(locale, locParentMap[locParentIndex].locale) == 0) { |
690 | *distance = locParentMap[locParentIndex].distance; | |
691 | return locParentMap[locParentIndex].parent; | |
b331163b A |
692 | } |
693 | } | |
694 | return NULL; | |
695 | } | |
696 | ||
697 | // this just checks if the *pointer* value is already in the array | |
698 | static UBool locInArray(const char* *localizationsToUse, int32_t locsToUseCount, const char *locToCheck) | |
699 | { | |
700 | int32_t locIndex; | |
701 | for (locIndex = 0; locIndex < locsToUseCount; locIndex++) { | |
702 | if (locToCheck == localizationsToUse[locIndex]) { | |
703 | return TRUE; | |
704 | } | |
705 | } | |
706 | return FALSE; | |
707 | } | |
708 | ||
709 | enum { kLangScriptRegMaxLen = ULOC_LANG_CAPACITY + ULOC_SCRIPT_CAPACITY + ULOC_COUNTRY_CAPACITY }; // currently 22 | |
710 | ||
711 | int32_t | |
712 | ualoc_localizationsToUse( const char* const *preferredLanguages, | |
713 | int32_t preferredLanguagesCount, | |
714 | const char* const *availableLocalizations, | |
715 | int32_t availableLocalizationsCount, | |
716 | const char* *localizationsToUse, | |
717 | int32_t localizationsToUseCapacity, | |
718 | UErrorCode *status ) | |
719 | { | |
720 | if (U_FAILURE(*status)) { | |
721 | return -1; | |
722 | } | |
723 | if (preferredLanguages == NULL || availableLocalizations == NULL || localizationsToUse == NULL) { | |
724 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
725 | return -1; | |
726 | } | |
727 | // get resource data, need to protect with mutex | |
728 | if (gMapDataState == 0) { | |
729 | umtx_initOnce(gUALocaleCacheInitOnce, initializeMapData); | |
730 | } | |
731 | int32_t locsToUseCount = 0; | |
732 | int32_t prefLangIndex, availLocIndex = 0; | |
2ca993e8 A |
733 | int32_t availLocIndexBackup = -1; // if >= 0, contains index of backup match |
734 | int32_t foundMatchPrefLangIndex = 0, backupMatchPrefLangIndex = 0; | |
b331163b A |
735 | char (*availLocBase)[kLangScriptRegMaxLen + 1] = NULL; |
736 | char (*availLocNorm)[kLangScriptRegMaxLen + 1] = NULL; | |
b331163b A |
737 | UBool foundMatch = FALSE; |
738 | ||
2ca993e8 A |
739 | #if DEBUG_UALOC |
740 | if (preferredLanguagesCount > 0 && availableLocalizationsCount > 0) { | |
741 | printf("\n # ualoc_localizationsToUse start, preferredLanguages %d: %s, ..., availableLocalizations %d: %s, ...\n", | |
742 | preferredLanguagesCount, preferredLanguages[0], availableLocalizationsCount, availableLocalizations[0]); | |
743 | } else { | |
744 | printf("\n # ualoc_localizationsToUse start, preferredLanguages %d: ..., availableLocalizations %d: ...\n", | |
745 | preferredLanguagesCount, availableLocalizationsCount); | |
746 | } | |
747 | #endif | |
748 | ||
b331163b A |
749 | // Part 1, find the best matching localization, if any |
750 | for (prefLangIndex = 0; prefLangIndex < preferredLanguagesCount; prefLangIndex++) { | |
751 | char prefLangBaseName[kLangScriptRegMaxLen + 1]; | |
752 | char prefLangNormName[kLangScriptRegMaxLen + 1]; | |
753 | char prefLangParentName[kLangScriptRegMaxLen + 1]; | |
754 | UErrorCode tmpStatus = U_ZERO_ERROR; | |
755 | ||
756 | if (preferredLanguages[prefLangIndex] == NULL) { | |
757 | continue; // skip NULL preferredLanguages entry, go to next one | |
758 | } | |
759 | // use underscores, fix bad capitalization, delete any keywords | |
760 | uloc_getBaseName(preferredLanguages[prefLangIndex], prefLangBaseName, kLangScriptRegMaxLen, &tmpStatus); | |
761 | if (U_FAILURE(tmpStatus) || prefLangBaseName[0] == 0 || | |
762 | uprv_strcmp(prefLangBaseName, "root") == 0 || prefLangBaseName[0] == '_') { | |
763 | continue; // can't handle this preferredLanguages entry or it is invalid, go to next one | |
764 | } | |
765 | prefLangBaseName[kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING | |
2ca993e8 A |
766 | #if DEBUG_UALOC |
767 | printf(" # loop: try prefLangBaseName %s\n", prefLangBaseName); | |
768 | #endif | |
b331163b A |
769 | |
770 | // if we have not already allocated and filled the array of | |
771 | // base availableLocalizations, do so now. | |
772 | if (availLocBase == NULL) { | |
773 | availLocBase = (char (*)[kLangScriptRegMaxLen + 1])uprv_malloc(availableLocalizationsCount * (kLangScriptRegMaxLen + 1)); | |
774 | if (availLocBase == NULL) { | |
775 | continue; // cannot further check this preferredLanguages entry, go to next one | |
776 | } | |
2ca993e8 A |
777 | #if DEBUG_UALOC |
778 | printf(" # allocate & fill availLocBase\n"); | |
779 | #endif | |
b331163b A |
780 | for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { |
781 | tmpStatus = U_ZERO_ERROR; | |
2ca993e8 A |
782 | if (availableLocalizations[availLocIndex] == NULL) { |
783 | availLocBase[availLocIndex][0] = 0; // effectively remove this entry | |
784 | continue; | |
785 | } | |
b331163b A |
786 | uloc_getBaseName(availableLocalizations[availLocIndex], availLocBase[availLocIndex], kLangScriptRegMaxLen, &tmpStatus); |
787 | if (U_FAILURE(tmpStatus) || uprv_strcmp(availLocBase[availLocIndex], "root") == 0 || availLocBase[availLocIndex][0] == '_') { | |
788 | availLocBase[availLocIndex][0] = 0; // effectively remove this entry | |
2ca993e8 | 789 | continue; |
b331163b | 790 | } |
2ca993e8 A |
791 | availLocBase[availLocIndex][kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING |
792 | #if DEBUG_UALOC | |
793 | printf(" # add availLocBase %s\n", availLocBase[availLocIndex]); | |
794 | #endif | |
b331163b A |
795 | } |
796 | } | |
797 | // first compare base preferredLanguage to base versions of availableLocalizations names | |
798 | for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { | |
799 | if (uprv_strcmp(prefLangBaseName, availLocBase[availLocIndex]) == 0) { | |
800 | foundMatch = TRUE; // availLocIndex records where | |
2ca993e8 A |
801 | foundMatchPrefLangIndex = prefLangIndex; |
802 | #if DEBUG_UALOC | |
803 | printf(" # FOUND: matched availLocBase %s -> actualLoc %s\n", availLocBase[availLocIndex], availableLocalizations[availLocIndex]); | |
804 | #endif | |
b331163b A |
805 | break; |
806 | } | |
807 | } | |
808 | if (foundMatch) { | |
b331163b A |
809 | break; // found a loc for this preferredLanguages entry |
810 | } | |
811 | ||
812 | // get normalized preferredLanguage | |
813 | tmpStatus = U_ZERO_ERROR; | |
814 | ualoc_normalize(prefLangBaseName, prefLangNormName, kLangScriptRegMaxLen + 1, &tmpStatus); | |
815 | if (U_FAILURE(tmpStatus)) { | |
816 | continue; // can't handle this preferredLanguages entry, go to next one | |
817 | } | |
2ca993e8 A |
818 | #if DEBUG_UALOC |
819 | printf(" # prefLangNormName %s\n", prefLangNormName); | |
820 | #endif | |
b331163b A |
821 | // if we have not already allocated and filled the array of |
822 | // normalized availableLocalizations, do so now. | |
823 | // Note: ualoc_normalize turns "zh_TW" into "zh_Hant_TW", zh_HK" into "zh_Hant_HK", | |
824 | // and fixes deprecated codes "iw" > "he", "in" > "id" etc. | |
825 | if (availLocNorm == NULL) { | |
826 | availLocNorm = (char (*)[kLangScriptRegMaxLen + 1])uprv_malloc(availableLocalizationsCount * (kLangScriptRegMaxLen + 1)); | |
827 | if (availLocNorm == NULL) { | |
828 | continue; // cannot further check this preferredLanguages entry, go to next one | |
829 | } | |
2ca993e8 A |
830 | #if DEBUG_UALOC |
831 | printf(" # allocate & fill availLocNorm\n"); | |
832 | #endif | |
b331163b A |
833 | for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { |
834 | tmpStatus = U_ZERO_ERROR; | |
835 | ualoc_normalize(availLocBase[availLocIndex], availLocNorm[availLocIndex], kLangScriptRegMaxLen + 1, &tmpStatus); | |
836 | if (U_FAILURE(tmpStatus)) { | |
837 | availLocNorm[availLocIndex][0] = 0; // effectively remove this entry | |
2ca993e8 A |
838 | #if DEBUG_UALOC |
839 | } else { | |
840 | printf(" # actualLoc %-11s -> norm %s\n", availableLocalizations[availLocIndex], availLocNorm[availLocIndex]); | |
841 | #endif | |
b331163b | 842 | } |
b331163b A |
843 | } |
844 | } | |
845 | // now compare normalized preferredLanguage to normalized localization names | |
846 | // if matches, copy *original* localization name | |
847 | for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { | |
848 | if (uprv_strcmp(prefLangNormName, availLocNorm[availLocIndex]) == 0) { | |
849 | foundMatch = TRUE; // availLocIndex records where | |
2ca993e8 A |
850 | foundMatchPrefLangIndex = prefLangIndex; |
851 | #if DEBUG_UALOC | |
852 | printf(" # FOUND: matched availLocNorm %s -> actualLoc %s\n", availLocNorm[availLocIndex], availableLocalizations[availLocIndex]); | |
853 | #endif | |
b331163b A |
854 | break; |
855 | } | |
856 | } | |
857 | if (foundMatch) { | |
b331163b A |
858 | break; // found a loc for this preferredLanguages entry |
859 | } | |
860 | ||
861 | // now walk up the parent chain for preferredLanguage | |
862 | // until we find a match or hit root | |
863 | uprv_strcpy(prefLangBaseName, prefLangNormName); | |
864 | while (!foundMatch) { | |
865 | tmpStatus = U_ZERO_ERROR; | |
866 | ualoc_getParent(prefLangBaseName, prefLangParentName, kLangScriptRegMaxLen + 1, &tmpStatus); | |
867 | if (U_FAILURE(tmpStatus) || uprv_strcmp(prefLangParentName, "root") == 0 || prefLangParentName[0] == 0) { | |
868 | break; // reached root or cannot proceed further | |
869 | } | |
2ca993e8 A |
870 | #if DEBUG_UALOC |
871 | printf(" # prefLangParentName %s\n", prefLangParentName); | |
872 | #endif | |
b331163b A |
873 | |
874 | // now compare this preferredLanguage parent to normalized localization names | |
875 | // if matches, copy *original* localization name | |
876 | for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { | |
877 | if (uprv_strcmp(prefLangParentName, availLocNorm[availLocIndex]) == 0) { | |
878 | foundMatch = TRUE; // availLocIndex records where | |
2ca993e8 A |
879 | foundMatchPrefLangIndex = prefLangIndex; |
880 | #if DEBUG_UALOC | |
881 | printf(" # FOUND: matched availLocNorm %s -> actualLoc %s\n", availLocNorm[availLocIndex], availableLocalizations[availLocIndex]); | |
882 | #endif | |
b331163b A |
883 | break; |
884 | } | |
885 | } | |
886 | uprv_strcpy(prefLangBaseName, prefLangParentName); | |
887 | } | |
888 | if (foundMatch) { | |
889 | break; // found a loc for this preferredLanguages entry | |
890 | } | |
891 | ||
2ca993e8 A |
892 | // last try, use parents of selected language to try for backup match |
893 | // if we have not already found one | |
894 | if (availLocIndexBackup < 0) { | |
b331163b A |
895 | // now walk up the parent chain for preferredLanguage again |
896 | // checking against parents of selected availLocNorm entries | |
897 | // but this time start with current prefLangNormName | |
898 | uprv_strcpy(prefLangBaseName, prefLangNormName); | |
2ca993e8 | 899 | int32_t minDistance = kMaxParentDistance; |
b331163b | 900 | while (TRUE) { |
b331163b A |
901 | // now compare this preferredLanguage to normalized localization names |
902 | // parent if have one for this; if matches, copy *original* localization name | |
2ca993e8 A |
903 | #if DEBUG_UALOC |
904 | printf(" # BACKUP: trying prefLangBaseName %s\n", prefLangBaseName); | |
905 | #endif | |
b331163b | 906 | for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { |
2ca993e8 A |
907 | char availLocMinOrParent[kLangScriptRegMaxLen + 1]; |
908 | int32_t distance; | |
909 | // first check for special Apple parents of availLocNorm - | |
910 | // - the number of locales with such parents is small - | |
911 | // or if not such parent, then try stripping region. | |
912 | const char *availLocParent = getLocParent(availLocNorm[availLocIndex], &distance); | |
913 | if (availLocParent) { | |
914 | #if DEBUG_UALOC | |
915 | printf(" # availLocAppleParentName %s\n", availLocParent); | |
916 | #endif | |
917 | if (uprv_strcmp(prefLangBaseName, availLocParent) == 0 && distance < minDistance) { | |
918 | availLocIndexBackup = availLocIndex; // records where the match occurred | |
919 | backupMatchPrefLangIndex = prefLangIndex; | |
920 | minDistance = distance; | |
921 | #if DEBUG_UALOC | |
922 | printf(" # BACKUP: LocAppleParent matched prefLangNormName with distance %d\n", distance); | |
923 | #endif | |
924 | continue; | |
925 | } | |
926 | } | |
927 | if (minDistance <= 1) { | |
928 | continue; // we can't get any closer in the rest of this iteration | |
929 | } | |
930 | if (availLocParent == NULL) { | |
931 | tmpStatus = U_ZERO_ERROR; | |
932 | int32_t regLen = uloc_getCountry(availLocNorm[availLocIndex], availLocMinOrParent, kLangScriptRegMaxLen, &tmpStatus); | |
933 | if (U_SUCCESS(tmpStatus) && regLen > 1) { | |
934 | uloc_addLikelySubtags(availLocNorm[availLocIndex], availLocMinOrParent, kLangScriptRegMaxLen, &tmpStatus); | |
935 | if (U_SUCCESS(tmpStatus)) { | |
936 | availLocMinOrParent[kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING | |
937 | #if DEBUG_UALOC | |
938 | printf(" # availLocRegMaxName %s\n", availLocMinOrParent); | |
939 | #endif | |
940 | char availLocTemp[kLangScriptRegMaxLen + 1]; | |
941 | uloc_getParent(availLocMinOrParent, availLocTemp, kLangScriptRegMaxLen, &tmpStatus); | |
942 | if (U_SUCCESS(tmpStatus)) { | |
943 | availLocTemp[kLangScriptRegMaxLen] = 0; | |
944 | uloc_minimizeSubtags(availLocTemp, availLocMinOrParent, kLangScriptRegMaxLen, &tmpStatus); | |
945 | if (U_SUCCESS(tmpStatus)) { | |
946 | availLocMinOrParent[kLangScriptRegMaxLen] = 0; | |
947 | #if DEBUG_UALOC | |
948 | printf(" # availLocNoRegParentName %s\n", availLocMinOrParent); | |
949 | #endif | |
950 | if (uprv_strcmp(prefLangBaseName, availLocMinOrParent) == 0) { | |
951 | availLocIndexBackup = availLocIndex; // records where the match occurred | |
952 | backupMatchPrefLangIndex = prefLangIndex; | |
953 | minDistance = 1; | |
954 | #if DEBUG_UALOC | |
955 | printf(" # BACKUP: LocNoRegParent matched prefLangNormName with distance 1\n"); | |
956 | #endif | |
957 | continue; | |
958 | } | |
959 | } | |
960 | } | |
961 | } | |
962 | } | |
963 | } | |
964 | // then check against minimized version of availLocNorm | |
965 | tmpStatus = U_ZERO_ERROR; | |
966 | uloc_minimizeSubtags(availLocNorm[availLocIndex], availLocMinOrParent, kLangScriptRegMaxLen, &tmpStatus); | |
967 | if (U_FAILURE(tmpStatus)) { | |
968 | continue; | |
969 | } | |
970 | availLocMinOrParent[kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING | |
971 | #if DEBUG_UALOC | |
972 | printf(" # availLocMinimized %s\n", availLocMinOrParent); | |
973 | #endif | |
974 | if (uprv_strcmp(prefLangBaseName, availLocMinOrParent) == 0) { | |
975 | availLocIndexBackup = availLocIndex; // records where the match occurred | |
976 | backupMatchPrefLangIndex = prefLangIndex; | |
977 | minDistance = 1; | |
978 | #if DEBUG_UALOC | |
979 | printf(" # BACKUP: LocMinimized matched prefLangNormName with distance 1\n"); | |
980 | #endif | |
b331163b A |
981 | } |
982 | } | |
2ca993e8 | 983 | if (availLocIndexBackup >= 0) { |
b331163b A |
984 | break; |
985 | } | |
2ca993e8 | 986 | tmpStatus = U_ZERO_ERROR; |
b331163b A |
987 | ualoc_getParent(prefLangBaseName, prefLangParentName, kLangScriptRegMaxLen + 1, &tmpStatus); |
988 | if (U_FAILURE(tmpStatus) || uprv_strcmp(prefLangParentName, "root") == 0 || prefLangParentName[0] == 0) { | |
989 | break; // reached root or cannot proceed further | |
990 | } | |
991 | uprv_strcpy(prefLangBaseName, prefLangParentName); | |
992 | } | |
993 | } | |
2ca993e8 A |
994 | } |
995 | // If we have a backup match, decide what to do | |
996 | if (availLocIndexBackup >= 0) { | |
997 | if (!foundMatch) { | |
998 | // no main match, just use the backup | |
999 | availLocIndex = availLocIndexBackup; | |
1000 | foundMatch = TRUE; | |
1001 | #if DEBUG_UALOC | |
1002 | printf(" # no main match, have backup => use availLocIndexBackup %d\n", availLocIndexBackup); | |
1003 | #endif | |
1004 | } else if (backupMatchPrefLangIndex < foundMatchPrefLangIndex && uprv_strncmp(availLocNorm[availLocIndexBackup], "pt_BR", ULOC_LANG_CAPACITY) != 0) { | |
1005 | // have a main match but backup match was higher in the prefs, use it if for a different language | |
1006 | #if DEBUG_UALOC | |
1007 | printf(" # have backup match higher in prefs, comparing its language and script to main match\n"); | |
1008 | #endif | |
1009 | char mainLang[ULOC_LANG_CAPACITY + 1]; | |
1010 | char backupLang[ULOC_LANG_CAPACITY + 1]; | |
1011 | UErrorCode tmpStatus = U_ZERO_ERROR; | |
1012 | uloc_getLanguage(availLocNorm[availLocIndex], mainLang, ULOC_LANG_CAPACITY, &tmpStatus); | |
1013 | mainLang[ULOC_LANG_CAPACITY] = 0; // ensure zero termination | |
1014 | uloc_getLanguage(availLocNorm[availLocIndexBackup], backupLang, ULOC_LANG_CAPACITY, &tmpStatus); | |
1015 | backupLang[ULOC_LANG_CAPACITY] = 0; // ensure zero termination | |
1016 | if (U_SUCCESS(tmpStatus)) { | |
1017 | if (uprv_strncmp(mainLang, backupLang, ULOC_LANG_CAPACITY) != 0) { | |
1018 | // backup match has different language than main match | |
1019 | availLocIndex = availLocIndexBackup; | |
1020 | // foundMatch is already TRUE | |
1021 | #if DEBUG_UALOC | |
1022 | printf(" # main match but backup is for a different lang higher in prefs => use availLocIndexBackup %d\n", availLocIndexBackup); | |
1023 | #endif | |
1024 | } else { | |
1025 | // backup match has same language as main match, check scripts too | |
1026 | char availLocMaximized[kLangScriptRegMaxLen + 1]; | |
1027 | ||
1028 | uloc_addLikelySubtags(availLocNorm[availLocIndex], availLocMaximized, kLangScriptRegMaxLen, &tmpStatus); | |
1029 | availLocMaximized[kLangScriptRegMaxLen] = 0; | |
1030 | uloc_getScript(availLocMaximized, mainLang, ULOC_LANG_CAPACITY, &tmpStatus); | |
1031 | mainLang[ULOC_LANG_CAPACITY] = 0; | |
1032 | ||
1033 | uloc_addLikelySubtags(availLocNorm[availLocIndexBackup], availLocMaximized, kLangScriptRegMaxLen, &tmpStatus); | |
1034 | availLocMaximized[kLangScriptRegMaxLen] = 0; | |
1035 | uloc_getScript(availLocMaximized, backupLang, ULOC_LANG_CAPACITY, &tmpStatus); | |
1036 | backupLang[ULOC_LANG_CAPACITY] = 0; | |
1037 | ||
1038 | if (U_SUCCESS(tmpStatus) && uprv_strncmp(mainLang, backupLang, ULOC_LANG_CAPACITY) != 0) { | |
1039 | // backup match has different script than main match | |
1040 | availLocIndex = availLocIndexBackup; | |
1041 | // foundMatch is already TRUE | |
1042 | #if DEBUG_UALOC | |
1043 | printf(" # main match but backup is for a different script higher in prefs => use availLocIndexBackup %d\n", availLocIndexBackup); | |
1044 | #endif | |
1045 | } | |
1046 | } | |
1047 | } | |
b331163b A |
1048 | } |
1049 | } | |
1050 | ||
1051 | // Part 2, if we found a matching localization, then walk up its parent tree to find any fallback matches in availableLocalizations | |
1052 | if (foundMatch) { | |
1053 | // Here availLocIndex corresponds to the first matched localization | |
1054 | UErrorCode tmpStatus = U_ZERO_ERROR; | |
1055 | int32_t availLocMatchIndex = availLocIndex; | |
1056 | if (locsToUseCount < localizationsToUseCapacity) { | |
1057 | localizationsToUse[locsToUseCount++] = availableLocalizations[availLocMatchIndex]; | |
1058 | } | |
1059 | // at this point we must have availLocBase, and minimally matched against that. | |
1060 | // if we have not already allocated and filled the array of | |
1061 | // normalized availableLocalizations, do so now, but don't require it | |
1062 | if (availLocNorm == NULL) { | |
1063 | availLocNorm = (char (*)[kLangScriptRegMaxLen + 1])uprv_malloc(availableLocalizationsCount * (kLangScriptRegMaxLen + 1)); | |
1064 | if (availLocNorm != NULL) { | |
1065 | for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { | |
1066 | tmpStatus = U_ZERO_ERROR; | |
1067 | ualoc_normalize(availLocBase[availLocIndex], availLocNorm[availLocIndex], kLangScriptRegMaxLen + 1, &tmpStatus); | |
1068 | if (U_FAILURE(tmpStatus)) { | |
1069 | availLocNorm[availLocIndex][0] = 0; // effectively remove this entry | |
1070 | } | |
1071 | } | |
1072 | } | |
1073 | } | |
1074 | ||
1075 | // add normalized form of matching loc, if different and in availLocBase | |
1076 | if (locsToUseCount < localizationsToUseCapacity) { | |
1077 | tmpStatus = U_ZERO_ERROR; | |
1078 | char matchedLocNormName[kLangScriptRegMaxLen + 1]; | |
1079 | char matchedLocParentName[kLangScriptRegMaxLen + 1]; | |
1080 | // get normalized form of matching loc | |
1081 | if (availLocNorm != NULL) { | |
1082 | uprv_strcpy(matchedLocNormName, availLocNorm[availLocMatchIndex]); | |
1083 | } else { | |
1084 | ualoc_normalize(availLocBase[availLocMatchIndex], matchedLocNormName, kLangScriptRegMaxLen + 1, &tmpStatus); | |
1085 | } | |
1086 | if (U_SUCCESS(tmpStatus)) { | |
1087 | // add normalized form of matching loc, if different and in availLocBase | |
1088 | if (uprv_strcmp(matchedLocNormName, localizationsToUse[0]) != 0) { | |
1089 | // normalization of matched localization is different, see if we have the normalization in availableLocalizations | |
1090 | // from this point on, availLocIndex no longer corresponds to the matched localization. | |
1091 | for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { | |
1092 | if ( (uprv_strcmp(matchedLocNormName, availLocBase[availLocIndex]) == 0 | |
1093 | || (availLocNorm != NULL && uprv_strcmp(matchedLocNormName, availLocNorm[availLocIndex]) == 0)) | |
1094 | && !locInArray(localizationsToUse, locsToUseCount, availableLocalizations[availLocIndex])) { | |
1095 | localizationsToUse[locsToUseCount++] = availableLocalizations[availLocIndex]; | |
1096 | break; | |
1097 | } | |
1098 | } | |
1099 | } | |
1100 | ||
1101 | // now walk up the parent chain from matchedLocNormName, adding parents if they are in availLocBase | |
1102 | while (locsToUseCount < localizationsToUseCapacity) { | |
1103 | ualoc_getParent(matchedLocNormName, matchedLocParentName, kLangScriptRegMaxLen + 1, &tmpStatus); | |
1104 | if (U_FAILURE(tmpStatus) || uprv_strcmp(matchedLocParentName, "root") == 0 || matchedLocParentName[0] == 0) { | |
1105 | break; // reached root or cannot proceed further | |
1106 | } | |
1107 | ||
1108 | // now compare this matchedLocParentName parent to base localization names (and norm ones if we have them) | |
1109 | for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { | |
1110 | if ( (uprv_strcmp(matchedLocParentName, availLocBase[availLocIndex]) == 0 | |
1111 | || (availLocNorm != NULL && uprv_strcmp(matchedLocParentName, availLocNorm[availLocIndex]) == 0)) | |
1112 | && !locInArray(localizationsToUse, locsToUseCount, availableLocalizations[availLocIndex])) { | |
1113 | localizationsToUse[locsToUseCount++] = availableLocalizations[availLocIndex]; | |
1114 | break; | |
1115 | } | |
1116 | } | |
1117 | uprv_strcpy(matchedLocNormName, matchedLocParentName); | |
1118 | } | |
1119 | ||
1120 | // The above still fails to include "zh_TW" if it is in availLocBase and the matched localization | |
1121 | // base name is "zh_HK" or "zh_MO". One option would be to walk up the parent chain from | |
1122 | // matchedLocNormName again, comparing against parents of of selected availLocNorm entries. | |
1123 | // But this picks up too many matches that are not parents of the matched localization. So | |
1124 | // we just handle these specially. | |
1125 | if ( locsToUseCount < localizationsToUseCapacity | |
1126 | && (uprv_strcmp(availLocBase[availLocMatchIndex], "zh_HK") == 0 | |
1127 | || uprv_strcmp(availLocBase[availLocMatchIndex], "zh_MO") == 0) ) { | |
1128 | int32_t zhTW_matchIndex = -1; | |
1129 | UBool zhHant_found = FALSE; | |
1130 | for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { | |
1131 | if ( zhTW_matchIndex < 0 && uprv_strcmp("zh_TW", availLocBase[availLocIndex]) == 0 ) { | |
1132 | zhTW_matchIndex = availLocIndex; | |
1133 | } | |
1134 | if ( !zhHant_found && uprv_strcmp("zh_Hant", availLocBase[availLocIndex]) == 0 ) { | |
1135 | zhHant_found = TRUE; | |
1136 | } | |
1137 | } | |
1138 | if (zhTW_matchIndex >= 0 && !zhHant_found | |
1139 | && !locInArray(localizationsToUse, locsToUseCount, availableLocalizations[zhTW_matchIndex])) { | |
1140 | localizationsToUse[locsToUseCount++] = availableLocalizations[zhTW_matchIndex]; | |
1141 | } | |
1142 | } | |
1143 | } | |
1144 | } | |
1145 | } | |
1146 | ||
1147 | uprv_free(availLocNorm); | |
1148 | uprv_free(availLocBase); | |
1149 | return locsToUseCount; | |
1150 | } | |
1151 |