]>
Commit | Line | Data |
---|---|---|
57a6839d A |
1 | /* |
2 | ***************************************************************************************** | |
a961784b | 3 | * Copyright (C) 2014-2016 Apple Inc. All Rights Reserved. |
57a6839d A |
4 | ***************************************************************************************** |
5 | */ | |
6 | ||
2ca993e8 A |
7 | #define DEBUG_UALOC 0 |
8 | #if DEBUG_UALOC | |
9 | #include <stdio.h> | |
10 | #endif | |
11 | #include <string.h> | |
57a6839d A |
12 | #include "unicode/utypes.h" |
13 | #include "unicode/ualoc.h" | |
14 | #include "unicode/uloc.h" | |
15 | #include "unicode/ures.h" | |
16 | #include "unicode/putil.h" | |
f3c0d7a5 | 17 | #include "unicode/ustring.h" |
57a6839d A |
18 | #include "cstring.h" |
19 | #include "cmemory.h" | |
b331163b A |
20 | #include "uhash.h" |
21 | #include "umutex.h" | |
22 | #include "ucln_cmn.h" | |
57a6839d A |
23 | // the following has replacements for some math.h funcs etc |
24 | #include "putilimp.h" | |
25 | ||
26 | ||
27 | // The numeric values in territoryInfo are in "IntF" format from LDML2ICUConverter. | |
28 | // From its docs (adapted): [IntF is] a special integer that represents the number in | |
29 | // normalized scientific notation. | |
30 | // Resultant integers are in the form -?xxyyyyyy, where xx is the exponent | |
31 | // offset by 50 and yyyyyy is the coefficient to 5 decimal places (range 1.0 to 9.99999), e.g. | |
32 | // 14660000000000 -> 1.46600E13 -> 63146600 | |
33 | // 0.0001 -> 1.00000E-4 -> 46100000 | |
34 | // -123.456 -> -1.23456E-2 -> -48123456 | |
35 | // | |
36 | // Here to avoid an extra division we have the max coefficient as 999999 (instead of | |
37 | // 9.99999) and instead offset the exponent by -55. | |
38 | // | |
39 | static double doubleFromIntF(int32_t intF) { | |
40 | double coefficient = (double)(intF % 1000000); | |
41 | int32_t exponent = (intF / 1000000) - 55; | |
42 | return coefficient * uprv_pow10(exponent); | |
43 | } | |
44 | ||
45 | static int compareLangEntries(const void * entry1, const void * entry2) { | |
46 | double fraction1 = ((const UALanguageEntry *)entry1)->userFraction; | |
47 | double fraction2 = ((const UALanguageEntry *)entry2)->userFraction; | |
48 | // want descending order | |
49 | if (fraction1 > fraction2) return -1; | |
50 | if (fraction1 < fraction2) return 1; | |
51 | // userFractions the same, sort by languageCode | |
52 | return uprv_strcmp(((const UALanguageEntry *)entry1)->languageCode,((const UALanguageEntry *)entry2)->languageCode); | |
53 | } | |
54 | ||
f3c0d7a5 A |
55 | // language codes to version with default script |
56 | // must be sorted by language code | |
57 | static const char * langToDefaultScript[] = { | |
58 | "az", "az_Latn", | |
59 | "bs", "bs_Latn", | |
60 | "iu", "iu_Cans", | |
61 | "kk", "kk_Arab", | |
62 | "ks", "ks_Arab", | |
63 | "ku", "ku_Latn", | |
64 | "ky", "ky_Cyrl", | |
65 | "mn", "mn_Cyrl", | |
66 | "ms", "ms_Latn", | |
67 | "pa", "pa_Guru", | |
68 | "rif", "rif_Tfng", | |
69 | "shi", "shi_Tfng", | |
70 | "sr", "sr_Cyrl", | |
71 | "tg", "tg_Cyrl", | |
72 | "tk", "tk_Latn", | |
73 | "ug", "ug_Arab", | |
74 | "uz", "uz_Latn", | |
75 | "vai", "vai_Vaii", | |
76 | "yue", "yue_Hant", | |
77 | "zh", "zh_Hans", | |
78 | NULL | |
79 | }; | |
80 | ||
81 | static const char * langCodeWithScriptIfAmbig(const char * langCode) { | |
82 | const char ** langToDefScriptPtr = langToDefaultScript; | |
83 | const char * testCurLoc; | |
84 | while ( (testCurLoc = *langToDefScriptPtr++) != NULL ) { | |
85 | int cmp = uprv_strcmp(langCode, testCurLoc); | |
86 | if (cmp <= 0) { | |
87 | if (cmp == 0) { | |
88 | return *langToDefScriptPtr; | |
89 | } | |
90 | break; | |
91 | } | |
92 | langToDefScriptPtr++; | |
93 | } | |
94 | return langCode; | |
95 | } | |
96 | ||
57a6839d A |
97 | static const UChar ustrLangStatusDefacto[] = {0x64,0x65,0x5F,0x66,0x61,0x63,0x74,0x6F,0x5F,0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0}; //"de_facto_official" |
98 | static const UChar ustrLangStatusOfficial[] = {0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0}; //"official" | |
99 | static const UChar ustrLangStatusRegional[] = {0x6F,0x66,0x66,0x69,0x63,0x69,0x61,0x6C,0x5F,0x72,0x65,0x67,0x69,0x6F,0x6E,0x61,0x6C,0}; //"official_regional" | |
100 | ||
101 | enum { | |
102 | kLocalLangEntriesMax = 26, // enough for most regions to minimumFraction 0.001 except India | |
103 | kLangEntriesFactor = 3 // if we have to allocate, multiply existing size by this | |
104 | }; | |
105 | ||
106 | U_CAPI int32_t U_EXPORT2 | |
107 | ualoc_getLanguagesForRegion(const char *regionID, double minimumFraction, | |
108 | UALanguageEntry *entries, int32_t entriesCapacity, | |
109 | UErrorCode *err) | |
110 | { | |
111 | if (U_FAILURE(*err)) { | |
112 | return 0; | |
113 | } | |
114 | if ( regionID == NULL || minimumFraction < 0.0 || minimumFraction > 1.0 || | |
115 | ((entries==NULL)? entriesCapacity!=0: entriesCapacity<0) ) { | |
116 | *err = U_ILLEGAL_ARGUMENT_ERROR; | |
117 | return 0; | |
118 | } | |
119 | UResourceBundle *rb = ures_openDirect(NULL, "supplementalData", err); | |
120 | rb = ures_getByKey(rb, "territoryInfo", rb, err); | |
121 | rb = ures_getByKey(rb, regionID, rb, err); | |
122 | if (U_FAILURE(*err)) { | |
123 | ures_close(rb); | |
124 | return 0; | |
125 | } | |
126 | ||
127 | int32_t entryCount = 0; | |
128 | UResourceBundle *langBund = NULL; | |
129 | int32_t lbIdx, lbCount = ures_getSize(rb); | |
130 | UALanguageEntry localLangEntries[kLocalLangEntriesMax]; | |
131 | UALanguageEntry * langEntries = localLangEntries; | |
132 | int32_t langEntriesMax = kLocalLangEntriesMax; | |
133 | ||
134 | for (lbIdx = 0; lbIdx < lbCount; lbIdx++) { | |
135 | langBund = ures_getByIndex(rb, lbIdx, langBund, err); | |
136 | if (U_FAILURE(*err)) { | |
137 | break; | |
138 | } | |
139 | const char * langCode = ures_getKey(langBund); | |
140 | if (uprv_strcmp(langCode,"territoryF") == 0) { | |
141 | continue; | |
142 | } | |
143 | if (strnlen(langCode, UALANGDATA_CODELEN+1) > UALANGDATA_CODELEN) { // no uprv_strnlen | |
144 | continue; // a code we cannot handle | |
145 | } | |
146 | ||
147 | UErrorCode localErr = U_ZERO_ERROR; | |
148 | double userFraction = 0.0; | |
149 | UResourceBundle *itemBund = ures_getByKey(langBund, "populationShareF", NULL, &localErr); | |
150 | if (U_SUCCESS(localErr)) { | |
151 | int32_t intF = ures_getInt(itemBund, &localErr); | |
152 | if (U_SUCCESS(localErr)) { | |
153 | userFraction = doubleFromIntF(intF); | |
154 | } | |
155 | ures_close(itemBund); | |
156 | } | |
157 | if (userFraction < minimumFraction) { | |
158 | continue; | |
159 | } | |
160 | if (entries != NULL) { | |
161 | localErr = U_ZERO_ERROR; | |
162 | UALanguageStatus langStatus = UALANGSTATUS_UNSPECIFIED; | |
163 | int32_t ulen; | |
164 | const UChar * ustrLangStatus = ures_getStringByKey(langBund, "officialStatus", &ulen, &localErr); | |
165 | if (U_SUCCESS(localErr)) { | |
166 | int32_t cmp = u_strcmp(ustrLangStatus, ustrLangStatusOfficial); | |
167 | if (cmp == 0) { | |
168 | langStatus = UALANGSTATUS_OFFICIAL; | |
169 | } else if (cmp < 0 && u_strcmp(ustrLangStatus, ustrLangStatusDefacto) == 0) { | |
170 | langStatus = UALANGSTATUS_DEFACTO_OFFICIAL; | |
171 | } else if (u_strcmp(ustrLangStatus, ustrLangStatusRegional) == 0) { | |
172 | langStatus = UALANGSTATUS_REGIONAL_OFFICIAL; | |
173 | } | |
174 | } | |
175 | // Now we have all of the info for our next entry | |
176 | if (entryCount >= langEntriesMax) { | |
177 | int32_t newMax = langEntriesMax * kLangEntriesFactor; | |
178 | if (langEntries == localLangEntries) { | |
179 | // first allocation, copy from local buf | |
180 | langEntries = (UALanguageEntry*)uprv_malloc(newMax*sizeof(UALanguageEntry)); | |
181 | if (langEntries == NULL) { | |
182 | *err = U_MEMORY_ALLOCATION_ERROR; | |
183 | break; | |
184 | } | |
185 | uprv_memcpy(langEntries, localLangEntries, entryCount*sizeof(UALanguageEntry)); | |
186 | } else { | |
187 | langEntries = (UALanguageEntry*)uprv_realloc(langEntries, newMax*sizeof(UALanguageEntry)); | |
188 | if (langEntries == NULL) { | |
189 | *err = U_MEMORY_ALLOCATION_ERROR; | |
190 | break; | |
191 | } | |
192 | } | |
193 | langEntriesMax = newMax; | |
194 | } | |
f3c0d7a5 | 195 | uprv_strcpy(langEntries[entryCount].languageCode, langCodeWithScriptIfAmbig(langCode)); |
57a6839d A |
196 | langEntries[entryCount].userFraction = userFraction; |
197 | langEntries[entryCount].status = langStatus; | |
198 | } | |
199 | entryCount++; | |
200 | } | |
201 | ures_close(langBund); | |
202 | ures_close(rb); | |
203 | if (U_FAILURE(*err)) { | |
204 | if (langEntries != localLangEntries) { | |
205 | free(langEntries); | |
206 | } | |
207 | return 0; | |
208 | } | |
209 | if (entries != NULL) { | |
210 | // sort langEntries, copy entries that fit to provided array | |
211 | qsort(langEntries, entryCount, sizeof(UALanguageEntry), compareLangEntries); | |
212 | if (entryCount > entriesCapacity) { | |
213 | entryCount = entriesCapacity; | |
214 | } | |
215 | uprv_memcpy(entries, langEntries, entryCount*sizeof(UALanguageEntry)); | |
216 | if (langEntries != localLangEntries) { | |
217 | free(langEntries); | |
218 | } | |
219 | } | |
220 | return entryCount; | |
221 | } | |
222 | ||
57a6839d | 223 | static const char * forceParent[] = { |
a961784b | 224 | "en_150", "en_GB", // en for Europe |
b331163b | 225 | "en_AU", "en_GB", |
a961784b A |
226 | "en_BD", "en_GB", // en for Bangladesh |
227 | "en_BE", "en_150", // en for Belgium goes to en for Europe | |
228 | "en_DG", "en_GB", | |
229 | "en_FK", "en_GB", | |
230 | "en_GG", "en_GB", | |
231 | "en_GI", "en_GB", | |
232 | "en_HK", "en_GB", // en for Hong Kong | |
233 | "en_IE", "en_GB", | |
234 | "en_IM", "en_GB", | |
b331163b | 235 | "en_IN", "en_GB", |
a961784b A |
236 | "en_IO", "en_GB", |
237 | "en_JE", "en_GB", | |
a62d09fc | 238 | "en_JM", "en_GB", |
a961784b A |
239 | "en_MO", "en_GB", |
240 | "en_MT", "en_GB", | |
2ca993e8 | 241 | "en_MV", "en_GB", // for Maldives |
a961784b | 242 | "en_MY", "en_GB", // en for Malaysia |
2ca993e8 | 243 | "en_NZ", "en_AU", |
a961784b A |
244 | "en_PK", "en_GB", // en for Pakistan |
245 | "en_SG", "en_GB", | |
246 | "en_SH", "en_GB", | |
247 | "en_VG", "en_GB", | |
f3c0d7a5 A |
248 | "yue", "yue_CN", // yue_CN has 71M users (5.2% of 1.37G), yue_HK has 6.5M (90% of 7.17M) |
249 | "yue_CN", "root", | |
250 | "yue_HK", "root", | |
251 | "yue_Hans","yue_CN", | |
252 | "yue_Hant","yue_HK", | |
57a6839d A |
253 | "zh", "zh_CN", |
254 | "zh_CN", "root", | |
255 | "zh_Hant", "zh_TW", | |
256 | "zh_TW", "root", | |
257 | NULL | |
258 | }; | |
259 | ||
2ca993e8 A |
260 | enum { kLocBaseNameMax = 16 }; |
261 | ||
57a6839d A |
262 | U_CAPI int32_t U_EXPORT2 |
263 | ualoc_getAppleParent(const char* localeID, | |
264 | char * parent, | |
265 | int32_t parentCapacity, | |
266 | UErrorCode* err) | |
267 | { | |
268 | UResourceBundle *rb; | |
269 | int32_t len; | |
270 | UErrorCode tempStatus; | |
271 | char locbuf[ULOC_FULLNAME_CAPACITY+1]; | |
08b89b0a | 272 | char * foundDoubleUnderscore; |
57a6839d A |
273 | |
274 | if (U_FAILURE(*err)) { | |
275 | return 0; | |
276 | } | |
277 | if ( (parent==NULL)? parentCapacity!=0: parentCapacity<0 ) { | |
278 | *err = U_ILLEGAL_ARGUMENT_ERROR; | |
279 | return 0; | |
280 | } | |
08b89b0a | 281 | len = uloc_getBaseName(localeID, locbuf, ULOC_FULLNAME_CAPACITY, err); /* canonicalize and strip keywords */ |
57a6839d A |
282 | if (U_FAILURE(*err)) { |
283 | return 0; | |
284 | } | |
285 | if (*err == U_STRING_NOT_TERMINATED_WARNING) { | |
286 | locbuf[ULOC_FULLNAME_CAPACITY] = 0; | |
287 | *err = U_ZERO_ERROR; | |
288 | } | |
08b89b0a A |
289 | foundDoubleUnderscore = uprv_strstr(locbuf, "__"); /* __ comes from bad/missing subtag or variant */ |
290 | if (foundDoubleUnderscore != NULL) { | |
291 | *foundDoubleUnderscore = 0; /* terminate at the __ */ | |
292 | len = uprv_strlen(locbuf); | |
293 | } | |
b331163b | 294 | if (len >= 2 && (uprv_strncmp(locbuf, "en", 2) == 0 || uprv_strncmp(locbuf, "zh", 2) == 0)) { |
57a6839d A |
295 | const char ** forceParentPtr = forceParent; |
296 | const char * testCurLoc; | |
297 | while ( (testCurLoc = *forceParentPtr++) != NULL ) { | |
298 | int cmp = uprv_strcmp(locbuf, testCurLoc); | |
299 | if (cmp <= 0) { | |
300 | if (cmp == 0) { | |
301 | len = uprv_strlen(*forceParentPtr); | |
302 | if (len < parentCapacity) { | |
303 | uprv_strcpy(parent, *forceParentPtr); | |
304 | } else { | |
305 | *err = U_BUFFER_OVERFLOW_ERROR; | |
306 | } | |
307 | return len; | |
308 | } | |
309 | break; | |
310 | } | |
311 | forceParentPtr++; | |
312 | } | |
313 | } | |
314 | tempStatus = U_ZERO_ERROR; | |
315 | rb = ures_openDirect(NULL, locbuf, &tempStatus); | |
316 | if (U_SUCCESS(tempStatus)) { | |
317 | const char * actualLocale = ures_getLocaleByType(rb, ULOC_ACTUAL_LOCALE, &tempStatus); | |
2ca993e8 | 318 | ures_close(rb); |
57a6839d A |
319 | if (U_SUCCESS(tempStatus) && uprv_strcmp(locbuf, actualLocale) != 0) { |
320 | // we have followed an alias | |
321 | len = uprv_strlen(actualLocale); | |
322 | if (len < parentCapacity) { | |
323 | uprv_strcpy(parent, actualLocale); | |
324 | } else { | |
325 | *err = U_BUFFER_OVERFLOW_ERROR; | |
326 | } | |
57a6839d A |
327 | return len; |
328 | } | |
2ca993e8 A |
329 | } |
330 | tempStatus = U_ZERO_ERROR; | |
331 | rb = ures_openDirect(NULL, "supplementalData", &tempStatus); | |
332 | rb = ures_getByKey(rb, "parentLocales", rb, &tempStatus); | |
333 | if (U_SUCCESS(tempStatus)) { | |
334 | UResourceBundle * parentMapBundle = NULL; | |
335 | int32_t childLen = 0; | |
336 | while (childLen == 0) { | |
337 | tempStatus = U_ZERO_ERROR; | |
338 | parentMapBundle = ures_getNextResource(rb, parentMapBundle, &tempStatus); | |
339 | if (U_FAILURE(tempStatus)) { | |
340 | break; // no more parent bundles, normal exit | |
341 | } | |
342 | char childName[kLocBaseNameMax + 1]; | |
343 | childName[kLocBaseNameMax] = 0; | |
344 | const char * childPtr = NULL; | |
345 | if (ures_getType(parentMapBundle) == URES_STRING) { | |
346 | childLen = kLocBaseNameMax; | |
347 | childPtr = ures_getUTF8String(parentMapBundle, childName, &childLen, FALSE, &tempStatus); | |
348 | if (U_FAILURE(tempStatus) || uprv_strncmp(locbuf, childPtr, kLocBaseNameMax) != 0) { | |
349 | childLen = 0; | |
350 | } | |
351 | } else { // should be URES_ARRAY | |
352 | int32_t childCur, childCount = ures_getSize(parentMapBundle); | |
353 | for (childCur = 0; childCur < childCount && childLen == 0; childCur++) { | |
354 | tempStatus = U_ZERO_ERROR; | |
355 | childLen = kLocBaseNameMax; | |
356 | childPtr = ures_getUTF8StringByIndex(parentMapBundle, childCur, childName, &childLen, FALSE, &tempStatus); | |
357 | if (U_FAILURE(tempStatus) || uprv_strncmp(locbuf, childPtr, kLocBaseNameMax) != 0) { | |
358 | childLen = 0; | |
359 | } | |
360 | } | |
361 | } | |
362 | } | |
363 | ures_close(rb); | |
364 | if (childLen > 0) { | |
365 | // parentMapBundle key is the parent we are looking for | |
366 | const char * keyStr = ures_getKey(parentMapBundle); | |
367 | len = uprv_strlen(keyStr); | |
57a6839d | 368 | if (len < parentCapacity) { |
2ca993e8 | 369 | uprv_strcpy(parent, keyStr); |
57a6839d A |
370 | } else { |
371 | *err = U_BUFFER_OVERFLOW_ERROR; | |
372 | } | |
2ca993e8 | 373 | ures_close(parentMapBundle); |
57a6839d A |
374 | return len; |
375 | } | |
2ca993e8 | 376 | ures_close(parentMapBundle); |
57a6839d | 377 | } |
2ca993e8 | 378 | |
57a6839d A |
379 | len = uloc_getParent(locbuf, parent, parentCapacity, err); |
380 | if (U_SUCCESS(*err) && len == 0) { | |
381 | len = 4; | |
382 | if (len < parentCapacity) { | |
383 | uprv_strcpy(parent, "root"); | |
384 | } else { | |
385 | *err = U_BUFFER_OVERFLOW_ERROR; | |
386 | } | |
387 | } | |
388 | return len; | |
389 | } | |
390 | ||
b331163b A |
391 | // ================= |
392 | // Data and related functions for ualoc_localizationsToUse | |
393 | // ================= | |
394 | ||
395 | static const char * appleAliasMap[][2] = { | |
396 | // names are lowercase here because they are looked up after being processed by uloc_getBaseName | |
397 | { "arabic", "ar" }, // T2 | |
398 | { "chinese", "zh_Hans" }, // T0 | |
399 | { "danish", "da" }, // T2 | |
400 | { "dutch", "nl" }, // T1, still in use | |
401 | { "english", "en" }, // T0, still in use | |
402 | { "finnish", "fi" }, // T2 | |
403 | { "french", "fr" }, // T0, still in use | |
404 | { "german", "de" }, // T0, still in use | |
405 | { "italian", "it" }, // T1, still in use | |
406 | { "japanese", "ja" }, // T0, still in use | |
407 | { "korean", "ko" }, // T1 | |
a961784b | 408 | { "no_NO", "nb_NO" }, // special |
b331163b A |
409 | { "norwegian", "nb" }, // T2 |
410 | { "polish", "pl" }, // T2 | |
411 | { "portuguese", "pt" }, // T2 | |
412 | { "russian", "ru" }, // T2 | |
413 | { "spanish", "es" }, // T1, still in use | |
414 | { "swedish", "sv" }, // T2 | |
415 | { "thai", "th" }, // T2 | |
416 | { "turkish", "tr" }, // T2 | |
f3c0d7a5 | 417 | { "yue", "yue_Hans"}, // special |
b331163b A |
418 | { "zh", "zh_Hans" }, // special |
419 | }; | |
2ca993e8 | 420 | enum { kAppleAliasMapCount = UPRV_LENGTHOF(appleAliasMap) }; |
b331163b A |
421 | |
422 | static const char * appleParentMap[][2] = { | |
423 | { "en_150", "en_GB" }, // Apple custom parent | |
424 | { "en_AD", "en_150" }, // Apple locale addition | |
f3c0d7a5 A |
425 | { "en_AG", "en_GB" }, // Antigua & Barbuda |
426 | { "en_AI", "en_GB" }, // Anguilla | |
b331163b A |
427 | { "en_AL", "en_150" }, // Apple locale addition |
428 | { "en_AT", "en_150" }, // Apple locale addition | |
429 | { "en_AU", "en_GB" }, // Apple custom parent | |
430 | { "en_BA", "en_150" }, // Apple locale addition | |
f3c0d7a5 | 431 | { "en_BB", "en_GB" }, // Barbados |
b331163b | 432 | { "en_BD", "en_GB" }, // Apple custom parent |
a961784b | 433 | { "en_BE", "en_150" }, // Apple custom parent |
f3c0d7a5 A |
434 | { "en_BM", "en_GB" }, // Bermuda |
435 | { "en_BS", "en_GB" }, // Bahamas | |
436 | { "en_BW", "en_GB" }, // Botswana | |
437 | { "en_BZ", "en_GB" }, // Belize | |
438 | { "en_CC", "en_AU" }, // Cocos (Keeling) Islands | |
b331163b | 439 | { "en_CH", "en_150" }, // Apple locale addition |
f3c0d7a5 A |
440 | { "en_CK", "en_AU" }, // Cook Islands (maybe to en_NZ instead?) |
441 | { "en_CX", "en_AU" }, // Christmas Island | |
b331163b A |
442 | { "en_CY", "en_150" }, // Apple locale addition |
443 | { "en_CZ", "en_150" }, // Apple locale addition | |
444 | { "en_DE", "en_150" }, // Apple locale addition | |
a961784b | 445 | { "en_DG", "en_GB" }, |
b331163b | 446 | { "en_DK", "en_150" }, // Apple locale addition |
f3c0d7a5 | 447 | { "en_DM", "en_GB" }, // Dominica |
b331163b A |
448 | { "en_EE", "en_150" }, // Apple locale addition |
449 | { "en_ES", "en_150" }, // Apple locale addition | |
450 | { "en_FI", "en_150" }, // Apple locale addition | |
f3c0d7a5 | 451 | { "en_FJ", "en_GB" }, // Fiji |
a961784b | 452 | { "en_FK", "en_GB" }, |
b331163b | 453 | { "en_FR", "en_150" }, // Apple locale addition |
f3c0d7a5 | 454 | { "en_GD", "en_GB" }, // Grenada |
a961784b | 455 | { "en_GG", "en_GB" }, |
f3c0d7a5 | 456 | { "en_GH", "en_GB" }, // Ghana |
a961784b | 457 | { "en_GI", "en_GB" }, |
f3c0d7a5 | 458 | { "en_GM", "en_GB" }, // Gambia |
b331163b | 459 | { "en_GR", "en_150" }, // Apple locale addition |
f3c0d7a5 | 460 | { "en_GY", "en_GB" }, // Guyana |
b331163b A |
461 | { "en_HK", "en_GB" }, // Apple custom parent |
462 | { "en_HR", "en_150" }, // Apple locale addition | |
463 | { "en_HU", "en_150" }, // Apple locale addition | |
a961784b | 464 | { "en_IE", "en_GB" }, |
b331163b | 465 | { "en_IL", "en_001" }, // Apple locale addition |
a961784b | 466 | { "en_IM", "en_GB" }, |
b331163b | 467 | { "en_IN", "en_GB" }, // Apple custom parent |
a961784b | 468 | { "en_IO", "en_GB" }, |
b331163b A |
469 | { "en_IS", "en_150" }, // Apple locale addition |
470 | { "en_IT", "en_150" }, // Apple locale addition | |
a961784b | 471 | { "en_JE", "en_GB" }, |
a62d09fc | 472 | { "en_JM", "en_GB" }, |
f3c0d7a5 A |
473 | { "en_KE", "en_GB" }, // Kenya |
474 | { "en_KI", "en_GB" }, // Kiribati | |
475 | { "en_KN", "en_GB" }, // St. Kitts & Nevis | |
476 | { "en_KY", "en_GB" }, // Cayman Islands | |
477 | { "en_LC", "en_GB" }, // St. Lucia | |
478 | { "en_LS", "en_GB" }, // Lesotho | |
b331163b A |
479 | { "en_LT", "en_150" }, // Apple locale addition |
480 | { "en_LU", "en_150" }, // Apple locale addition | |
481 | { "en_LV", "en_150" }, // Apple locale addition | |
482 | { "en_ME", "en_150" }, // Apple locale addition | |
a961784b | 483 | { "en_MO", "en_GB" }, |
f3c0d7a5 | 484 | { "en_MS", "en_GB" }, // Montserrat |
a961784b | 485 | { "en_MT", "en_GB" }, |
f3c0d7a5 | 486 | { "en_MU", "en_GB" }, // Mauritius |
2ca993e8 | 487 | { "en_MV", "en_GB" }, |
f3c0d7a5 | 488 | { "en_MW", "en_GB" }, // Malawi |
b331163b | 489 | { "en_MY", "en_GB" }, // Apple custom parent |
f3c0d7a5 A |
490 | { "en_NA", "en_GB" }, // Namibia |
491 | { "en_NF", "en_AU" }, // Norfolk Island | |
492 | { "en_NG", "en_GB" }, // Nigeria | |
b331163b A |
493 | { "en_NL", "en_150" }, // Apple locale addition |
494 | { "en_NO", "en_150" }, // Apple locale addition | |
f3c0d7a5 A |
495 | { "en_NR", "en_AU" }, // Nauru |
496 | { "en_NU", "en_AU" }, // Niue (maybe to en_NZ instead?) | |
2ca993e8 | 497 | { "en_NZ", "en_AU" }, |
f3c0d7a5 | 498 | { "en_PG", "en_AU" }, // Papua New Guinea |
b331163b A |
499 | { "en_PK", "en_GB" }, // Apple custom parent |
500 | { "en_PL", "en_150" }, // Apple locale addition | |
f3c0d7a5 | 501 | { "en_PN", "en_GB" }, // Pitcairn Islands |
b331163b A |
502 | { "en_PT", "en_150" }, // Apple locale addition |
503 | { "en_RO", "en_150" }, // Apple locale addition | |
504 | { "en_RU", "en_150" }, // Apple locale addition | |
f3c0d7a5 A |
505 | { "en_SB", "en_GB" }, // Solomon Islands |
506 | { "en_SC", "en_GB" }, // Seychelles | |
507 | { "en_SD", "en_GB" }, // Sudan | |
b331163b | 508 | { "en_SE", "en_150" }, // Apple locale addition |
a961784b A |
509 | { "en_SG", "en_GB" }, |
510 | { "en_SH", "en_GB" }, | |
b331163b A |
511 | { "en_SI", "en_150" }, // Apple locale addition |
512 | { "en_SK", "en_150" }, // Apple locale addition | |
f3c0d7a5 A |
513 | { "en_SL", "en_GB" }, // Sierra Leone |
514 | { "en_SS", "en_GB" }, // South Sudan | |
515 | { "en_SZ", "en_GB" }, // Swaziland | |
516 | { "en_TC", "en_GB" }, // Tristan da Cunha | |
517 | { "en_TO", "en_GB" }, // Tonga | |
518 | { "en_TT", "en_GB" }, // Trinidad & Tobago | |
519 | { "en_TV", "en_GB" }, // Tuvalu | |
520 | { "en_TZ", "en_GB" }, // Tanzania | |
521 | { "en_UG", "en_GB" }, // Uganda | |
522 | { "en_VC", "en_GB" }, // St. Vincent & Grenadines | |
a961784b | 523 | { "en_VG", "en_GB" }, |
f3c0d7a5 A |
524 | { "en_VU", "en_GB" }, // Vanuatu |
525 | { "en_WS", "en_AU" }, // Samoa (maybe to en_NZ instead?) | |
526 | { "en_ZA", "en_GB" }, // South Africa | |
527 | { "en_ZM", "en_GB" }, // Zambia | |
528 | { "en_ZW", "en_GB" }, // Zimbabwe | |
b331163b | 529 | }; |
2ca993e8 A |
530 | enum { kAppleParentMapCount = UPRV_LENGTHOF(appleParentMap) }; |
531 | ||
532 | typedef struct { | |
533 | const char * locale; | |
534 | const char * parent; | |
535 | int8_t distance; | |
536 | } LocParentAndDistance; | |
537 | ||
538 | static LocParentAndDistance locParentMap[] = { | |
539 | // The localizations listed in the first column are in | |
540 | // normalized form (e.g. zh_CN -> zh_Hans_CN, etc.). | |
541 | // The distance is a rough measure of distance from | |
542 | // the localization to its parent, used as a weight. | |
f3c0d7a5 | 543 | { "en_001", "en", 2 }, |
2ca993e8 A |
544 | { "en_150", "en_GB", 1 }, |
545 | { "en_AU", "en_GB", 1 }, | |
f3c0d7a5 A |
546 | { "en_GB", "en_001", 0 }, |
547 | { "en_US", "en", 0 }, | |
2ca993e8 A |
548 | { "es_419", "es", 2 }, |
549 | { "es_MX", "es_419", 0 }, | |
550 | { "pt_PT", "pt", 2 }, | |
f3c0d7a5 A |
551 | { "yue_Hans_CN","yue_Hans",0 }, |
552 | { "yue_Hant_HK","yue_Hant",0 }, | |
2ca993e8 A |
553 | { "zh_Hans_CN", "zh_Hans", 0 }, |
554 | { "zh_Hant_HK", "zh_Hant", 1 }, | |
555 | { "zh_Hant_TW", "zh_Hant", 0 }, | |
b331163b | 556 | }; |
2ca993e8 | 557 | enum { kLocParentMapCount = UPRV_LENGTHOF(locParentMap), kMaxParentDistance = 8 }; |
b331163b A |
558 | |
559 | enum { | |
f3c0d7a5 A |
560 | kStringsAllocSize = 4480, // cannot expand; current actual usage 4150 |
561 | kParentMapInitCount = 205 // can expand; current actual usage 205 | |
b331163b A |
562 | }; |
563 | ||
564 | U_CDECL_BEGIN | |
565 | static UBool U_CALLCONV ualocale_cleanup(void); | |
566 | U_CDECL_END | |
567 | ||
568 | U_NAMESPACE_BEGIN | |
569 | ||
570 | static UInitOnce gUALocaleCacheInitOnce = U_INITONCE_INITIALIZER; | |
571 | ||
572 | static int gMapDataState = 0; // 0 = not initialized, 1 = initialized, -1 = failure | |
573 | static char* gStrings = NULL; | |
574 | static UHashtable* gAliasMap = NULL; | |
575 | static UHashtable* gParentMap = NULL; | |
576 | ||
577 | U_NAMESPACE_END | |
578 | ||
579 | U_CDECL_BEGIN | |
580 | ||
581 | static UBool U_CALLCONV ualocale_cleanup(void) | |
582 | { | |
583 | U_NAMESPACE_USE | |
584 | ||
585 | gUALocaleCacheInitOnce.reset(); | |
586 | ||
587 | if (gMapDataState > 0) { | |
588 | uhash_close(gParentMap); | |
589 | gParentMap = NULL; | |
590 | uhash_close(gAliasMap); | |
591 | gAliasMap = NULL; | |
592 | uprv_free(gStrings); | |
593 | gStrings = NULL; | |
594 | } | |
595 | gMapDataState = 0; | |
596 | return TRUE; | |
597 | } | |
598 | ||
599 | static void initializeMapData() { | |
600 | U_NAMESPACE_USE | |
601 | ||
602 | UResourceBundle * curBundle; | |
603 | char* stringsPtr; | |
604 | char* stringsEnd; | |
605 | UErrorCode status; | |
606 | int32_t entryIndex, icuEntryCount; | |
607 | ||
608 | ucln_common_registerCleanup(UCLN_COMMON_LOCALE, ualocale_cleanup); | |
609 | ||
610 | gStrings = (char*)uprv_malloc(kStringsAllocSize); | |
611 | if (gStrings) { | |
612 | stringsPtr = gStrings; | |
613 | stringsEnd = gStrings + kStringsAllocSize; | |
614 | } | |
615 | ||
616 | status = U_ZERO_ERROR; | |
617 | curBundle = NULL; | |
618 | icuEntryCount = 0; | |
619 | if (gStrings) { | |
620 | curBundle = ures_openDirect(NULL, "metadata", &status); | |
621 | curBundle = ures_getByKey(curBundle, "alias", curBundle, &status); | |
622 | curBundle = ures_getByKey(curBundle, "language", curBundle, &status); // language resource is URES_TABLE | |
623 | if (U_SUCCESS(status)) { | |
624 | icuEntryCount = ures_getSize(curBundle); // currently 331 | |
625 | } | |
626 | } | |
627 | status = U_ZERO_ERROR; | |
628 | gAliasMap = uhash_openSize(uhash_hashIChars, uhash_compareIChars, uhash_compareIChars, | |
629 | kAppleAliasMapCount + icuEntryCount, &status); | |
630 | // defaults to keyDeleter NULL | |
631 | if (U_SUCCESS(status)) { | |
632 | for (entryIndex = 0; entryIndex < kAppleAliasMapCount && U_SUCCESS(status); entryIndex++) { | |
633 | uhash_put(gAliasMap, (void*)appleAliasMap[entryIndex][0], (void*)appleAliasMap[entryIndex][1], &status); | |
634 | } | |
635 | status = U_ZERO_ERROR; | |
636 | UResourceBundle * aliasMapBundle = NULL; | |
637 | for (entryIndex = 0; entryIndex < icuEntryCount && U_SUCCESS(status); entryIndex++) { | |
638 | aliasMapBundle = ures_getByIndex(curBundle, entryIndex, aliasMapBundle, &status); | |
639 | if (U_FAILURE(status)) { | |
640 | break; // error | |
641 | } | |
642 | const char * keyStr = ures_getKey(aliasMapBundle); | |
643 | int32_t len = uprv_strlen(keyStr); | |
644 | if (len >= stringsEnd - stringsPtr) { | |
645 | break; // error | |
646 | } | |
647 | uprv_strcpy(stringsPtr, keyStr); | |
648 | char * inLocStr = stringsPtr; | |
649 | stringsPtr += len + 1; | |
650 | ||
651 | len = stringsEnd - stringsPtr - 1; | |
652 | ures_getUTF8StringByKey(aliasMapBundle, "replacement", stringsPtr, &len, TRUE, &status); | |
653 | if (U_FAILURE(status)) { | |
654 | break; // error | |
655 | } | |
656 | stringsPtr[len] = 0; | |
657 | uhash_put(gAliasMap, inLocStr, stringsPtr, &status); | |
658 | stringsPtr += len + 1; | |
659 | } | |
660 | ures_close(aliasMapBundle); | |
661 | } else { | |
662 | ures_close(curBundle); | |
663 | uprv_free(gStrings); | |
664 | gMapDataState = -1; // failure | |
665 | return; | |
666 | } | |
667 | ures_close(curBundle); | |
668 | ||
669 | status = U_ZERO_ERROR; | |
670 | gParentMap = uhash_openSize(uhash_hashIChars, uhash_compareIChars, uhash_compareIChars, | |
671 | kParentMapInitCount, &status); | |
672 | // defaults to keyDeleter NULL | |
673 | if (U_SUCCESS(status)) { | |
674 | curBundle = ures_openDirect(NULL, "supplementalData", &status); | |
675 | curBundle = ures_getByKey(curBundle, "parentLocales", curBundle, &status); // parentLocales resource is URES_TABLE | |
676 | if (U_SUCCESS(status)) { | |
677 | UResourceBundle * parentMapBundle = NULL; | |
678 | while (TRUE) { | |
679 | parentMapBundle = ures_getNextResource(curBundle, parentMapBundle, &status); | |
680 | if (U_FAILURE(status)) { | |
681 | break; // no more parent bundles, normal exit | |
682 | } | |
683 | const char * keyStr = ures_getKey(parentMapBundle); | |
684 | int32_t len = uprv_strlen(keyStr); | |
685 | if (len >= stringsEnd - stringsPtr) { | |
686 | break; // error | |
687 | } | |
688 | uprv_strcpy(stringsPtr, keyStr); | |
689 | char * parentStr = stringsPtr; | |
690 | stringsPtr += len + 1; | |
691 | ||
692 | if (ures_getType(parentMapBundle) == URES_STRING) { | |
693 | len = stringsEnd - stringsPtr - 1; | |
694 | ures_getUTF8String(parentMapBundle, stringsPtr, &len, TRUE, &status); | |
695 | if (U_FAILURE(status)) { | |
696 | break; // error | |
697 | } | |
698 | stringsPtr[len] = 0; | |
699 | uhash_put(gParentMap, stringsPtr, parentStr, &status); | |
700 | stringsPtr += len + 1; | |
701 | } else { | |
702 | // should be URES_ARRAY | |
703 | icuEntryCount = ures_getSize(parentMapBundle); | |
704 | for (entryIndex = 0; entryIndex < icuEntryCount && U_SUCCESS(status); entryIndex++) { | |
705 | len = stringsEnd - stringsPtr - 1; | |
706 | ures_getUTF8StringByIndex(parentMapBundle, entryIndex, stringsPtr, &len, TRUE, &status); | |
707 | if (U_FAILURE(status)) { | |
708 | break; | |
709 | } | |
710 | stringsPtr[len] = 0; | |
711 | uhash_put(gParentMap, stringsPtr, parentStr, &status); | |
712 | stringsPtr += len + 1; | |
713 | } | |
714 | } | |
715 | } | |
716 | ures_close(parentMapBundle); | |
717 | } | |
718 | ures_close(curBundle); | |
719 | ||
720 | status = U_ZERO_ERROR; | |
721 | for (entryIndex = 0; entryIndex < kAppleParentMapCount && U_SUCCESS(status); entryIndex++) { | |
722 | uhash_put(gParentMap, (void*)appleParentMap[entryIndex][0], (void*)appleParentMap[entryIndex][1], &status); | |
723 | } | |
724 | } else { | |
725 | uhash_close(gAliasMap); | |
726 | gAliasMap = NULL; | |
727 | uprv_free(gStrings); | |
728 | gMapDataState = -1; // failure | |
729 | return; | |
730 | } | |
731 | ||
2ca993e8 A |
732 | #if DEBUG_UALOC |
733 | printf("# gStrings size %ld\n", stringsPtr - gStrings); | |
734 | printf("# gParentMap count %d\n", uhash_count(gParentMap)); | |
735 | #endif | |
b331163b A |
736 | gMapDataState = 1; |
737 | } | |
738 | ||
739 | U_CDECL_END | |
740 | ||
741 | // The following maps aliases, etc. Ensures 0-termination if no error. | |
742 | static void ualoc_normalize(const char *locale, char *normalized, int32_t normalizedCapacity, UErrorCode *status) | |
743 | { | |
744 | if (U_FAILURE(*status)) { | |
745 | return; | |
746 | } | |
747 | // uloc_minimizeSubtags(locale, normalized, normalizedCapacity, status); | |
748 | ||
749 | const char *replacement = NULL; | |
750 | if (gMapDataState > 0) { | |
751 | replacement = (const char *)uhash_get(gAliasMap, locale); | |
752 | } | |
753 | if (replacement == NULL) { | |
754 | replacement = locale; | |
755 | } | |
2ca993e8 | 756 | int32_t len = strnlen(replacement, normalizedCapacity); |
b331163b A |
757 | if (len < normalizedCapacity) { // allow for 0 termination |
758 | uprv_strcpy(normalized, replacement); | |
759 | } else { | |
760 | *status = U_BUFFER_OVERFLOW_ERROR; | |
761 | } | |
762 | } | |
763 | ||
764 | static void ualoc_getParent(const char *locale, char *parent, int32_t parentCapacity, UErrorCode *status) | |
765 | { | |
766 | if (U_FAILURE(*status)) { | |
767 | return; | |
768 | } | |
769 | if (gMapDataState > 0) { | |
770 | const char *replacement = (const char *)uhash_get(gParentMap, locale); | |
771 | if (replacement) { | |
772 | int32_t len = uprv_strlen(replacement); | |
773 | if (len < parentCapacity) { // allow for 0 termination | |
774 | uprv_strcpy(parent, replacement); | |
775 | } else { | |
776 | *status = U_BUFFER_OVERFLOW_ERROR; | |
777 | } | |
778 | return; | |
779 | } | |
780 | } | |
781 | uloc_getParent(locale, parent, parentCapacity - 1, status); | |
782 | parent[parentCapacity - 1] = 0; // ensure 0 termination in case of U_STRING_NOT_TERMINATED_WARNING | |
783 | } | |
784 | ||
785 | // Might do something better for this, perhaps maximizing locales then stripping | |
2ca993e8 | 786 | static const char * getLocParent(const char *locale, int32_t* distance) |
b331163b A |
787 | { |
788 | int32_t locParentIndex; | |
789 | for (locParentIndex = 0; locParentIndex < kLocParentMapCount; locParentIndex++) { | |
2ca993e8 A |
790 | if (uprv_strcmp(locale, locParentMap[locParentIndex].locale) == 0) { |
791 | *distance = locParentMap[locParentIndex].distance; | |
792 | return locParentMap[locParentIndex].parent; | |
b331163b A |
793 | } |
794 | } | |
f3c0d7a5 A |
795 | if (gMapDataState > 0) { |
796 | const char *replacement = (const char *)uhash_get(gParentMap, locale); | |
797 | if (replacement) { | |
798 | *distance = 1; | |
799 | return replacement; | |
800 | } | |
801 | } | |
b331163b A |
802 | return NULL; |
803 | } | |
804 | ||
805 | // this just checks if the *pointer* value is already in the array | |
806 | static UBool locInArray(const char* *localizationsToUse, int32_t locsToUseCount, const char *locToCheck) | |
807 | { | |
808 | int32_t locIndex; | |
809 | for (locIndex = 0; locIndex < locsToUseCount; locIndex++) { | |
810 | if (locToCheck == localizationsToUse[locIndex]) { | |
811 | return TRUE; | |
812 | } | |
813 | } | |
814 | return FALSE; | |
815 | } | |
816 | ||
817 | enum { kLangScriptRegMaxLen = ULOC_LANG_CAPACITY + ULOC_SCRIPT_CAPACITY + ULOC_COUNTRY_CAPACITY }; // currently 22 | |
818 | ||
819 | int32_t | |
820 | ualoc_localizationsToUse( const char* const *preferredLanguages, | |
821 | int32_t preferredLanguagesCount, | |
822 | const char* const *availableLocalizations, | |
823 | int32_t availableLocalizationsCount, | |
824 | const char* *localizationsToUse, | |
825 | int32_t localizationsToUseCapacity, | |
826 | UErrorCode *status ) | |
827 | { | |
828 | if (U_FAILURE(*status)) { | |
829 | return -1; | |
830 | } | |
831 | if (preferredLanguages == NULL || availableLocalizations == NULL || localizationsToUse == NULL) { | |
832 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
833 | return -1; | |
834 | } | |
835 | // get resource data, need to protect with mutex | |
836 | if (gMapDataState == 0) { | |
837 | umtx_initOnce(gUALocaleCacheInitOnce, initializeMapData); | |
838 | } | |
839 | int32_t locsToUseCount = 0; | |
840 | int32_t prefLangIndex, availLocIndex = 0; | |
2ca993e8 A |
841 | int32_t availLocIndexBackup = -1; // if >= 0, contains index of backup match |
842 | int32_t foundMatchPrefLangIndex = 0, backupMatchPrefLangIndex = 0; | |
b331163b A |
843 | char (*availLocBase)[kLangScriptRegMaxLen + 1] = NULL; |
844 | char (*availLocNorm)[kLangScriptRegMaxLen + 1] = NULL; | |
b331163b | 845 | UBool foundMatch = FALSE; |
f3c0d7a5 | 846 | UBool backupMatchPrefLang_pt_PT = FALSE; |
b331163b | 847 | |
2ca993e8 A |
848 | #if DEBUG_UALOC |
849 | if (preferredLanguagesCount > 0 && availableLocalizationsCount > 0) { | |
850 | printf("\n # ualoc_localizationsToUse start, preferredLanguages %d: %s, ..., availableLocalizations %d: %s, ...\n", | |
851 | preferredLanguagesCount, preferredLanguages[0], availableLocalizationsCount, availableLocalizations[0]); | |
852 | } else { | |
853 | printf("\n # ualoc_localizationsToUse start, preferredLanguages %d: ..., availableLocalizations %d: ...\n", | |
854 | preferredLanguagesCount, availableLocalizationsCount); | |
855 | } | |
856 | #endif | |
857 | ||
b331163b A |
858 | // Part 1, find the best matching localization, if any |
859 | for (prefLangIndex = 0; prefLangIndex < preferredLanguagesCount; prefLangIndex++) { | |
860 | char prefLangBaseName[kLangScriptRegMaxLen + 1]; | |
861 | char prefLangNormName[kLangScriptRegMaxLen + 1]; | |
862 | char prefLangParentName[kLangScriptRegMaxLen + 1]; | |
863 | UErrorCode tmpStatus = U_ZERO_ERROR; | |
864 | ||
865 | if (preferredLanguages[prefLangIndex] == NULL) { | |
866 | continue; // skip NULL preferredLanguages entry, go to next one | |
867 | } | |
868 | // use underscores, fix bad capitalization, delete any keywords | |
869 | uloc_getBaseName(preferredLanguages[prefLangIndex], prefLangBaseName, kLangScriptRegMaxLen, &tmpStatus); | |
870 | if (U_FAILURE(tmpStatus) || prefLangBaseName[0] == 0 || | |
871 | uprv_strcmp(prefLangBaseName, "root") == 0 || prefLangBaseName[0] == '_') { | |
872 | continue; // can't handle this preferredLanguages entry or it is invalid, go to next one | |
873 | } | |
874 | prefLangBaseName[kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING | |
2ca993e8 A |
875 | #if DEBUG_UALOC |
876 | printf(" # loop: try prefLangBaseName %s\n", prefLangBaseName); | |
877 | #endif | |
b331163b A |
878 | |
879 | // if we have not already allocated and filled the array of | |
880 | // base availableLocalizations, do so now. | |
881 | if (availLocBase == NULL) { | |
882 | availLocBase = (char (*)[kLangScriptRegMaxLen + 1])uprv_malloc(availableLocalizationsCount * (kLangScriptRegMaxLen + 1)); | |
883 | if (availLocBase == NULL) { | |
884 | continue; // cannot further check this preferredLanguages entry, go to next one | |
885 | } | |
2ca993e8 A |
886 | #if DEBUG_UALOC |
887 | printf(" # allocate & fill availLocBase\n"); | |
888 | #endif | |
b331163b A |
889 | for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { |
890 | tmpStatus = U_ZERO_ERROR; | |
2ca993e8 A |
891 | if (availableLocalizations[availLocIndex] == NULL) { |
892 | availLocBase[availLocIndex][0] = 0; // effectively remove this entry | |
893 | continue; | |
894 | } | |
b331163b A |
895 | uloc_getBaseName(availableLocalizations[availLocIndex], availLocBase[availLocIndex], kLangScriptRegMaxLen, &tmpStatus); |
896 | if (U_FAILURE(tmpStatus) || uprv_strcmp(availLocBase[availLocIndex], "root") == 0 || availLocBase[availLocIndex][0] == '_') { | |
897 | availLocBase[availLocIndex][0] = 0; // effectively remove this entry | |
2ca993e8 | 898 | continue; |
b331163b | 899 | } |
2ca993e8 A |
900 | availLocBase[availLocIndex][kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING |
901 | #if DEBUG_UALOC | |
902 | printf(" # add availLocBase %s\n", availLocBase[availLocIndex]); | |
903 | #endif | |
b331163b A |
904 | } |
905 | } | |
906 | // first compare base preferredLanguage to base versions of availableLocalizations names | |
907 | for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { | |
908 | if (uprv_strcmp(prefLangBaseName, availLocBase[availLocIndex]) == 0) { | |
909 | foundMatch = TRUE; // availLocIndex records where | |
2ca993e8 A |
910 | foundMatchPrefLangIndex = prefLangIndex; |
911 | #if DEBUG_UALOC | |
912 | printf(" # FOUND: matched availLocBase %s -> actualLoc %s\n", availLocBase[availLocIndex], availableLocalizations[availLocIndex]); | |
913 | #endif | |
b331163b A |
914 | break; |
915 | } | |
916 | } | |
917 | if (foundMatch) { | |
b331163b A |
918 | break; // found a loc for this preferredLanguages entry |
919 | } | |
920 | ||
921 | // get normalized preferredLanguage | |
922 | tmpStatus = U_ZERO_ERROR; | |
923 | ualoc_normalize(prefLangBaseName, prefLangNormName, kLangScriptRegMaxLen + 1, &tmpStatus); | |
924 | if (U_FAILURE(tmpStatus)) { | |
925 | continue; // can't handle this preferredLanguages entry, go to next one | |
926 | } | |
2ca993e8 A |
927 | #if DEBUG_UALOC |
928 | printf(" # prefLangNormName %s\n", prefLangNormName); | |
929 | #endif | |
b331163b A |
930 | // if we have not already allocated and filled the array of |
931 | // normalized availableLocalizations, do so now. | |
932 | // Note: ualoc_normalize turns "zh_TW" into "zh_Hant_TW", zh_HK" into "zh_Hant_HK", | |
933 | // and fixes deprecated codes "iw" > "he", "in" > "id" etc. | |
934 | if (availLocNorm == NULL) { | |
935 | availLocNorm = (char (*)[kLangScriptRegMaxLen + 1])uprv_malloc(availableLocalizationsCount * (kLangScriptRegMaxLen + 1)); | |
936 | if (availLocNorm == NULL) { | |
937 | continue; // cannot further check this preferredLanguages entry, go to next one | |
938 | } | |
2ca993e8 A |
939 | #if DEBUG_UALOC |
940 | printf(" # allocate & fill availLocNorm\n"); | |
941 | #endif | |
b331163b A |
942 | for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { |
943 | tmpStatus = U_ZERO_ERROR; | |
944 | ualoc_normalize(availLocBase[availLocIndex], availLocNorm[availLocIndex], kLangScriptRegMaxLen + 1, &tmpStatus); | |
945 | if (U_FAILURE(tmpStatus)) { | |
946 | availLocNorm[availLocIndex][0] = 0; // effectively remove this entry | |
2ca993e8 A |
947 | #if DEBUG_UALOC |
948 | } else { | |
949 | printf(" # actualLoc %-11s -> norm %s\n", availableLocalizations[availLocIndex], availLocNorm[availLocIndex]); | |
950 | #endif | |
b331163b | 951 | } |
b331163b A |
952 | } |
953 | } | |
954 | // now compare normalized preferredLanguage to normalized localization names | |
955 | // if matches, copy *original* localization name | |
956 | for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { | |
957 | if (uprv_strcmp(prefLangNormName, availLocNorm[availLocIndex]) == 0) { | |
958 | foundMatch = TRUE; // availLocIndex records where | |
2ca993e8 A |
959 | foundMatchPrefLangIndex = prefLangIndex; |
960 | #if DEBUG_UALOC | |
961 | printf(" # FOUND: matched availLocNorm %s -> actualLoc %s\n", availLocNorm[availLocIndex], availableLocalizations[availLocIndex]); | |
962 | #endif | |
b331163b A |
963 | break; |
964 | } | |
965 | } | |
966 | if (foundMatch) { | |
b331163b A |
967 | break; // found a loc for this preferredLanguages entry |
968 | } | |
969 | ||
970 | // now walk up the parent chain for preferredLanguage | |
971 | // until we find a match or hit root | |
972 | uprv_strcpy(prefLangBaseName, prefLangNormName); | |
973 | while (!foundMatch) { | |
974 | tmpStatus = U_ZERO_ERROR; | |
975 | ualoc_getParent(prefLangBaseName, prefLangParentName, kLangScriptRegMaxLen + 1, &tmpStatus); | |
976 | if (U_FAILURE(tmpStatus) || uprv_strcmp(prefLangParentName, "root") == 0 || prefLangParentName[0] == 0) { | |
977 | break; // reached root or cannot proceed further | |
978 | } | |
2ca993e8 A |
979 | #if DEBUG_UALOC |
980 | printf(" # prefLangParentName %s\n", prefLangParentName); | |
981 | #endif | |
b331163b A |
982 | |
983 | // now compare this preferredLanguage parent to normalized localization names | |
984 | // if matches, copy *original* localization name | |
985 | for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { | |
986 | if (uprv_strcmp(prefLangParentName, availLocNorm[availLocIndex]) == 0) { | |
987 | foundMatch = TRUE; // availLocIndex records where | |
2ca993e8 A |
988 | foundMatchPrefLangIndex = prefLangIndex; |
989 | #if DEBUG_UALOC | |
990 | printf(" # FOUND: matched availLocNorm %s -> actualLoc %s\n", availLocNorm[availLocIndex], availableLocalizations[availLocIndex]); | |
991 | #endif | |
b331163b A |
992 | break; |
993 | } | |
994 | } | |
995 | uprv_strcpy(prefLangBaseName, prefLangParentName); | |
996 | } | |
997 | if (foundMatch) { | |
998 | break; // found a loc for this preferredLanguages entry | |
999 | } | |
1000 | ||
2ca993e8 A |
1001 | // last try, use parents of selected language to try for backup match |
1002 | // if we have not already found one | |
1003 | if (availLocIndexBackup < 0) { | |
b331163b A |
1004 | // now walk up the parent chain for preferredLanguage again |
1005 | // checking against parents of selected availLocNorm entries | |
1006 | // but this time start with current prefLangNormName | |
1007 | uprv_strcpy(prefLangBaseName, prefLangNormName); | |
2ca993e8 | 1008 | int32_t minDistance = kMaxParentDistance; |
b331163b | 1009 | while (TRUE) { |
b331163b A |
1010 | // now compare this preferredLanguage to normalized localization names |
1011 | // parent if have one for this; if matches, copy *original* localization name | |
2ca993e8 A |
1012 | #if DEBUG_UALOC |
1013 | printf(" # BACKUP: trying prefLangBaseName %s\n", prefLangBaseName); | |
1014 | #endif | |
b331163b | 1015 | for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { |
2ca993e8 A |
1016 | char availLocMinOrParent[kLangScriptRegMaxLen + 1]; |
1017 | int32_t distance; | |
1018 | // first check for special Apple parents of availLocNorm - | |
1019 | // - the number of locales with such parents is small - | |
1020 | // or if not such parent, then try stripping region. | |
1021 | const char *availLocParent = getLocParent(availLocNorm[availLocIndex], &distance); | |
1022 | if (availLocParent) { | |
1023 | #if DEBUG_UALOC | |
1024 | printf(" # availLocAppleParentName %s\n", availLocParent); | |
1025 | #endif | |
1026 | if (uprv_strcmp(prefLangBaseName, availLocParent) == 0 && distance < minDistance) { | |
1027 | availLocIndexBackup = availLocIndex; // records where the match occurred | |
1028 | backupMatchPrefLangIndex = prefLangIndex; | |
1029 | minDistance = distance; | |
1030 | #if DEBUG_UALOC | |
1031 | printf(" # BACKUP: LocAppleParent matched prefLangNormName with distance %d\n", distance); | |
1032 | #endif | |
1033 | continue; | |
1034 | } | |
1035 | } | |
1036 | if (minDistance <= 1) { | |
1037 | continue; // we can't get any closer in the rest of this iteration | |
1038 | } | |
1039 | if (availLocParent == NULL) { | |
1040 | tmpStatus = U_ZERO_ERROR; | |
1041 | int32_t regLen = uloc_getCountry(availLocNorm[availLocIndex], availLocMinOrParent, kLangScriptRegMaxLen, &tmpStatus); | |
1042 | if (U_SUCCESS(tmpStatus) && regLen > 1) { | |
1043 | uloc_addLikelySubtags(availLocNorm[availLocIndex], availLocMinOrParent, kLangScriptRegMaxLen, &tmpStatus); | |
1044 | if (U_SUCCESS(tmpStatus)) { | |
1045 | availLocMinOrParent[kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING | |
1046 | #if DEBUG_UALOC | |
1047 | printf(" # availLocRegMaxName %s\n", availLocMinOrParent); | |
1048 | #endif | |
1049 | char availLocTemp[kLangScriptRegMaxLen + 1]; | |
1050 | uloc_getParent(availLocMinOrParent, availLocTemp, kLangScriptRegMaxLen, &tmpStatus); | |
1051 | if (U_SUCCESS(tmpStatus)) { | |
1052 | availLocTemp[kLangScriptRegMaxLen] = 0; | |
1053 | uloc_minimizeSubtags(availLocTemp, availLocMinOrParent, kLangScriptRegMaxLen, &tmpStatus); | |
1054 | if (U_SUCCESS(tmpStatus)) { | |
1055 | availLocMinOrParent[kLangScriptRegMaxLen] = 0; | |
1056 | #if DEBUG_UALOC | |
1057 | printf(" # availLocNoRegParentName %s\n", availLocMinOrParent); | |
1058 | #endif | |
1059 | if (uprv_strcmp(prefLangBaseName, availLocMinOrParent) == 0) { | |
1060 | availLocIndexBackup = availLocIndex; // records where the match occurred | |
1061 | backupMatchPrefLangIndex = prefLangIndex; | |
1062 | minDistance = 1; | |
f3c0d7a5 | 1063 | backupMatchPrefLang_pt_PT = (uprv_strcmp(prefLangNormName, "pt_PT") == 0); |
2ca993e8 A |
1064 | #if DEBUG_UALOC |
1065 | printf(" # BACKUP: LocNoRegParent matched prefLangNormName with distance 1\n"); | |
1066 | #endif | |
1067 | continue; | |
1068 | } | |
1069 | } | |
1070 | } | |
1071 | } | |
1072 | } | |
1073 | } | |
1074 | // then check against minimized version of availLocNorm | |
1075 | tmpStatus = U_ZERO_ERROR; | |
1076 | uloc_minimizeSubtags(availLocNorm[availLocIndex], availLocMinOrParent, kLangScriptRegMaxLen, &tmpStatus); | |
1077 | if (U_FAILURE(tmpStatus)) { | |
1078 | continue; | |
1079 | } | |
1080 | availLocMinOrParent[kLangScriptRegMaxLen] = 0; // ensure 0 termination, could have U_STRING_NOT_TERMINATED_WARNING | |
1081 | #if DEBUG_UALOC | |
1082 | printf(" # availLocMinimized %s\n", availLocMinOrParent); | |
1083 | #endif | |
1084 | if (uprv_strcmp(prefLangBaseName, availLocMinOrParent) == 0) { | |
1085 | availLocIndexBackup = availLocIndex; // records where the match occurred | |
1086 | backupMatchPrefLangIndex = prefLangIndex; | |
1087 | minDistance = 1; | |
1088 | #if DEBUG_UALOC | |
1089 | printf(" # BACKUP: LocMinimized matched prefLangNormName with distance 1\n"); | |
1090 | #endif | |
b331163b A |
1091 | } |
1092 | } | |
2ca993e8 | 1093 | if (availLocIndexBackup >= 0) { |
b331163b A |
1094 | break; |
1095 | } | |
2ca993e8 | 1096 | tmpStatus = U_ZERO_ERROR; |
b331163b A |
1097 | ualoc_getParent(prefLangBaseName, prefLangParentName, kLangScriptRegMaxLen + 1, &tmpStatus); |
1098 | if (U_FAILURE(tmpStatus) || uprv_strcmp(prefLangParentName, "root") == 0 || prefLangParentName[0] == 0) { | |
1099 | break; // reached root or cannot proceed further | |
1100 | } | |
1101 | uprv_strcpy(prefLangBaseName, prefLangParentName); | |
1102 | } | |
1103 | } | |
2ca993e8 A |
1104 | } |
1105 | // If we have a backup match, decide what to do | |
1106 | if (availLocIndexBackup >= 0) { | |
1107 | if (!foundMatch) { | |
1108 | // no main match, just use the backup | |
1109 | availLocIndex = availLocIndexBackup; | |
1110 | foundMatch = TRUE; | |
1111 | #if DEBUG_UALOC | |
1112 | printf(" # no main match, have backup => use availLocIndexBackup %d\n", availLocIndexBackup); | |
1113 | #endif | |
f3c0d7a5 | 1114 | } else if (backupMatchPrefLangIndex < foundMatchPrefLangIndex && (!backupMatchPrefLang_pt_PT || uprv_strcmp(availLocNorm[availLocIndexBackup], "pt_BR") != 0)) { |
2ca993e8 A |
1115 | // have a main match but backup match was higher in the prefs, use it if for a different language |
1116 | #if DEBUG_UALOC | |
1117 | printf(" # have backup match higher in prefs, comparing its language and script to main match\n"); | |
1118 | #endif | |
1119 | char mainLang[ULOC_LANG_CAPACITY + 1]; | |
1120 | char backupLang[ULOC_LANG_CAPACITY + 1]; | |
1121 | UErrorCode tmpStatus = U_ZERO_ERROR; | |
1122 | uloc_getLanguage(availLocNorm[availLocIndex], mainLang, ULOC_LANG_CAPACITY, &tmpStatus); | |
1123 | mainLang[ULOC_LANG_CAPACITY] = 0; // ensure zero termination | |
1124 | uloc_getLanguage(availLocNorm[availLocIndexBackup], backupLang, ULOC_LANG_CAPACITY, &tmpStatus); | |
1125 | backupLang[ULOC_LANG_CAPACITY] = 0; // ensure zero termination | |
1126 | if (U_SUCCESS(tmpStatus)) { | |
1127 | if (uprv_strncmp(mainLang, backupLang, ULOC_LANG_CAPACITY) != 0) { | |
1128 | // backup match has different language than main match | |
1129 | availLocIndex = availLocIndexBackup; | |
1130 | // foundMatch is already TRUE | |
1131 | #if DEBUG_UALOC | |
1132 | printf(" # main match but backup is for a different lang higher in prefs => use availLocIndexBackup %d\n", availLocIndexBackup); | |
1133 | #endif | |
1134 | } else { | |
1135 | // backup match has same language as main match, check scripts too | |
1136 | char availLocMaximized[kLangScriptRegMaxLen + 1]; | |
1137 | ||
1138 | uloc_addLikelySubtags(availLocNorm[availLocIndex], availLocMaximized, kLangScriptRegMaxLen, &tmpStatus); | |
1139 | availLocMaximized[kLangScriptRegMaxLen] = 0; | |
1140 | uloc_getScript(availLocMaximized, mainLang, ULOC_LANG_CAPACITY, &tmpStatus); | |
1141 | mainLang[ULOC_LANG_CAPACITY] = 0; | |
1142 | ||
1143 | uloc_addLikelySubtags(availLocNorm[availLocIndexBackup], availLocMaximized, kLangScriptRegMaxLen, &tmpStatus); | |
1144 | availLocMaximized[kLangScriptRegMaxLen] = 0; | |
1145 | uloc_getScript(availLocMaximized, backupLang, ULOC_LANG_CAPACITY, &tmpStatus); | |
1146 | backupLang[ULOC_LANG_CAPACITY] = 0; | |
1147 | ||
1148 | if (U_SUCCESS(tmpStatus) && uprv_strncmp(mainLang, backupLang, ULOC_LANG_CAPACITY) != 0) { | |
1149 | // backup match has different script than main match | |
1150 | availLocIndex = availLocIndexBackup; | |
1151 | // foundMatch is already TRUE | |
1152 | #if DEBUG_UALOC | |
1153 | printf(" # main match but backup is for a different script higher in prefs => use availLocIndexBackup %d\n", availLocIndexBackup); | |
1154 | #endif | |
1155 | } | |
1156 | } | |
1157 | } | |
b331163b A |
1158 | } |
1159 | } | |
1160 | ||
1161 | // Part 2, if we found a matching localization, then walk up its parent tree to find any fallback matches in availableLocalizations | |
1162 | if (foundMatch) { | |
1163 | // Here availLocIndex corresponds to the first matched localization | |
1164 | UErrorCode tmpStatus = U_ZERO_ERROR; | |
1165 | int32_t availLocMatchIndex = availLocIndex; | |
1166 | if (locsToUseCount < localizationsToUseCapacity) { | |
1167 | localizationsToUse[locsToUseCount++] = availableLocalizations[availLocMatchIndex]; | |
1168 | } | |
1169 | // at this point we must have availLocBase, and minimally matched against that. | |
1170 | // if we have not already allocated and filled the array of | |
1171 | // normalized availableLocalizations, do so now, but don't require it | |
1172 | if (availLocNorm == NULL) { | |
1173 | availLocNorm = (char (*)[kLangScriptRegMaxLen + 1])uprv_malloc(availableLocalizationsCount * (kLangScriptRegMaxLen + 1)); | |
1174 | if (availLocNorm != NULL) { | |
1175 | for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { | |
1176 | tmpStatus = U_ZERO_ERROR; | |
1177 | ualoc_normalize(availLocBase[availLocIndex], availLocNorm[availLocIndex], kLangScriptRegMaxLen + 1, &tmpStatus); | |
1178 | if (U_FAILURE(tmpStatus)) { | |
1179 | availLocNorm[availLocIndex][0] = 0; // effectively remove this entry | |
1180 | } | |
1181 | } | |
1182 | } | |
1183 | } | |
1184 | ||
1185 | // add normalized form of matching loc, if different and in availLocBase | |
1186 | if (locsToUseCount < localizationsToUseCapacity) { | |
1187 | tmpStatus = U_ZERO_ERROR; | |
1188 | char matchedLocNormName[kLangScriptRegMaxLen + 1]; | |
1189 | char matchedLocParentName[kLangScriptRegMaxLen + 1]; | |
1190 | // get normalized form of matching loc | |
1191 | if (availLocNorm != NULL) { | |
1192 | uprv_strcpy(matchedLocNormName, availLocNorm[availLocMatchIndex]); | |
1193 | } else { | |
1194 | ualoc_normalize(availLocBase[availLocMatchIndex], matchedLocNormName, kLangScriptRegMaxLen + 1, &tmpStatus); | |
1195 | } | |
1196 | if (U_SUCCESS(tmpStatus)) { | |
1197 | // add normalized form of matching loc, if different and in availLocBase | |
1198 | if (uprv_strcmp(matchedLocNormName, localizationsToUse[0]) != 0) { | |
1199 | // normalization of matched localization is different, see if we have the normalization in availableLocalizations | |
1200 | // from this point on, availLocIndex no longer corresponds to the matched localization. | |
1201 | for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { | |
1202 | if ( (uprv_strcmp(matchedLocNormName, availLocBase[availLocIndex]) == 0 | |
1203 | || (availLocNorm != NULL && uprv_strcmp(matchedLocNormName, availLocNorm[availLocIndex]) == 0)) | |
1204 | && !locInArray(localizationsToUse, locsToUseCount, availableLocalizations[availLocIndex])) { | |
1205 | localizationsToUse[locsToUseCount++] = availableLocalizations[availLocIndex]; | |
1206 | break; | |
1207 | } | |
1208 | } | |
1209 | } | |
1210 | ||
1211 | // now walk up the parent chain from matchedLocNormName, adding parents if they are in availLocBase | |
1212 | while (locsToUseCount < localizationsToUseCapacity) { | |
1213 | ualoc_getParent(matchedLocNormName, matchedLocParentName, kLangScriptRegMaxLen + 1, &tmpStatus); | |
1214 | if (U_FAILURE(tmpStatus) || uprv_strcmp(matchedLocParentName, "root") == 0 || matchedLocParentName[0] == 0) { | |
1215 | break; // reached root or cannot proceed further | |
1216 | } | |
1217 | ||
1218 | // now compare this matchedLocParentName parent to base localization names (and norm ones if we have them) | |
1219 | for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { | |
1220 | if ( (uprv_strcmp(matchedLocParentName, availLocBase[availLocIndex]) == 0 | |
1221 | || (availLocNorm != NULL && uprv_strcmp(matchedLocParentName, availLocNorm[availLocIndex]) == 0)) | |
1222 | && !locInArray(localizationsToUse, locsToUseCount, availableLocalizations[availLocIndex])) { | |
1223 | localizationsToUse[locsToUseCount++] = availableLocalizations[availLocIndex]; | |
1224 | break; | |
1225 | } | |
1226 | } | |
1227 | uprv_strcpy(matchedLocNormName, matchedLocParentName); | |
1228 | } | |
1229 | ||
1230 | // The above still fails to include "zh_TW" if it is in availLocBase and the matched localization | |
1231 | // base name is "zh_HK" or "zh_MO". One option would be to walk up the parent chain from | |
1232 | // matchedLocNormName again, comparing against parents of of selected availLocNorm entries. | |
1233 | // But this picks up too many matches that are not parents of the matched localization. So | |
1234 | // we just handle these specially. | |
1235 | if ( locsToUseCount < localizationsToUseCapacity | |
1236 | && (uprv_strcmp(availLocBase[availLocMatchIndex], "zh_HK") == 0 | |
1237 | || uprv_strcmp(availLocBase[availLocMatchIndex], "zh_MO") == 0) ) { | |
1238 | int32_t zhTW_matchIndex = -1; | |
1239 | UBool zhHant_found = FALSE; | |
1240 | for (availLocIndex = 0; availLocIndex < availableLocalizationsCount; availLocIndex++) { | |
1241 | if ( zhTW_matchIndex < 0 && uprv_strcmp("zh_TW", availLocBase[availLocIndex]) == 0 ) { | |
1242 | zhTW_matchIndex = availLocIndex; | |
1243 | } | |
1244 | if ( !zhHant_found && uprv_strcmp("zh_Hant", availLocBase[availLocIndex]) == 0 ) { | |
1245 | zhHant_found = TRUE; | |
1246 | } | |
1247 | } | |
1248 | if (zhTW_matchIndex >= 0 && !zhHant_found | |
1249 | && !locInArray(localizationsToUse, locsToUseCount, availableLocalizations[zhTW_matchIndex])) { | |
1250 | localizationsToUse[locsToUseCount++] = availableLocalizations[zhTW_matchIndex]; | |
1251 | } | |
1252 | } | |
1253 | } | |
1254 | } | |
1255 | } | |
1256 | ||
1257 | uprv_free(availLocNorm); | |
1258 | uprv_free(availLocBase); | |
1259 | return locsToUseCount; | |
1260 | } | |
1261 |