]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/locmap.c
ICU-6.2.22.tar.gz
[apple/icu.git] / icuSources / common / locmap.c
1 /*
2 **********************************************************************
3 * Copyright (C) 1996-2004, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 *
7 * Provides functionality for mapping between
8 * LCID and Posix IDs or ICU locale to codepage
9 *
10 * Note: All classes and code in this file are
11 * intended for internal use only.
12 *
13 * Methods of interest:
14 * unsigned long convertToLCID(const char*);
15 * const char* convertToPosix(unsigned long);
16 *
17 * Kathleen Wilson, 4/30/96
18 *
19 * Date Name Description
20 * 3/11/97 aliu Fixed off-by-one bug in assignment operator. Added
21 * setId() method and safety check against
22 * MAX_ID_LENGTH.
23 * 04/23/99 stephen Added C wrapper for convertToPosix.
24 * 09/18/00 george Removed the memory leaks.
25 * 08/23/01 george Convert to C
26 */
27
28 #include "locmap.h"
29 #include "cstring.h"
30
31 /*
32 * Note:
33 * The mapping from Win32 locale ID numbers to POSIX locale strings should
34 * be the faster one.
35 *
36 * Many LCID values come from winnt.h
37 * Some also come from http://www.microsoft.com/globaldev/reference/lcid-all.mspx
38 */
39
40 /*
41 ////////////////////////////////////////////////
42 //
43 // Internal Classes for LCID <--> POSIX Mapping
44 //
45 /////////////////////////////////////////////////
46 */
47
48 typedef struct ILcidPosixElement
49 {
50 const uint32_t hostID;
51 const char * const posixID;
52 } ILcidPosixElement;
53
54 typedef struct ILcidPosixMap
55 {
56 const uint32_t numRegions;
57 const struct ILcidPosixElement* const regionMaps;
58 } ILcidPosixMap;
59
60
61 /*
62 /////////////////////////////////////////////////
63 //
64 // Easy macros to make the LCID <--> POSIX Mapping
65 //
66 /////////////////////////////////////////////////
67 */
68
69 /*
70 The standard one language/one country mapping for LCID.
71 The first element must be the language, and the following
72 elements are the language with the country.
73 */
74 #define ILCID_POSIX_ELEMENT_ARRAY(hostID, languageID, posixID) \
75 static const ILcidPosixElement languageID[] = { \
76 {LANGUAGE_LCID(hostID), #languageID}, /* parent locale */ \
77 {hostID, #posixID}, \
78 };
79
80 /*
81 Create the map for the posixID. This macro supposes that the language string
82 name is the same as the global variable name, and that the first element
83 in the ILcidPosixElement is just the language.
84 */
85 #define ILCID_POSIX_MAP(_posixID) \
86 {sizeof(_posixID)/sizeof(ILcidPosixElement), _posixID}
87
88 /*
89 ////////////////////////////////////////////
90 //
91 // Create the table of LCID to POSIX Mapping
92 // None of it should be dynamically created.
93 //
94 // Keep static locale variables inside the function so that
95 // it can be created properly during static init.
96 //
97 ////////////////////////////////////////////
98 */
99
100 ILCID_POSIX_ELEMENT_ARRAY(0x0436, af, af_ZA)
101
102 static const ILcidPosixElement ar[] = {
103 {0x01, "ar"},
104 {0x3801, "ar_AE"},
105 {0x3c01, "ar_BH"},
106 {0x1401, "ar_DZ"},
107 {0x0c01, "ar_EG"},
108 {0x0801, "ar_IQ"},
109 {0x2c01, "ar_JO"},
110 {0x3401, "ar_KW"},
111 {0x3001, "ar_LB"},
112 {0x1001, "ar_LY"},
113 {0x1801, "ar_MA"},
114 {0x2001, "ar_OM"},
115 {0x4001, "ar_QA"},
116 {0x0401, "ar_SA"},
117 {0x2801, "ar_SY"},
118 {0x1c01, "ar_TN"},
119 {0x2401, "ar_YE"}
120 };
121
122 ILCID_POSIX_ELEMENT_ARRAY(0x044d, as, as_IN)
123 ILCID_POSIX_ELEMENT_ARRAY(0x045e, am, am_ET)
124
125 static const ILcidPosixElement az[] = {
126 {0x2c, "az"},
127 {0x082c, "az_AZ"}, /* Cyrillic based */
128 {0x042c, "az_Latn_AZ"} /* Latin based */
129 };
130
131 ILCID_POSIX_ELEMENT_ARRAY(0x0423, be, be_BY)
132 ILCID_POSIX_ELEMENT_ARRAY(0x0402, bg, bg_BG)
133
134 static const ILcidPosixElement bn[] = {
135 {0x45, "bn"},
136 {0x0845, "bn_BD"},
137 {0x0445, "bn_IN"}
138 };
139
140
141 static const ILcidPosixElement bo[] = {
142 {0x51, "bo"},
143 {0x0851, "bo_BT"},
144 {0x0451, "bo_CN"}
145 };
146
147 ILCID_POSIX_ELEMENT_ARRAY(0x0403, ca, ca_ES)
148 ILCID_POSIX_ELEMENT_ARRAY(0x045c, chr,chr_US)
149
150 /* Declared as cs_CZ to get around compiler errors on z/OS, which defines cs as a function */
151 static const ILcidPosixElement cs_CZ[] = {
152 {0x05, "cs"},
153 {0x0405, "cs_CZ"},
154 };
155
156 ILCID_POSIX_ELEMENT_ARRAY(0x0452, cy, cy_GB)
157 ILCID_POSIX_ELEMENT_ARRAY(0x0406, da, da_DK)
158
159 static const ILcidPosixElement de[] = {
160 {0x07, "de"},
161 {0x0c07, "de_AT"},
162 {0x0807, "de_CH"},
163 {0x0407, "de_DE"},
164 {0x1407, "de_LI"},
165 {0x1007, "de_LU"},
166 {0x10407,"de__PHONEBOOK"} /*This is really de_DE_PHONEBOOK on Windows, maybe 10007*/
167 };
168
169 ILCID_POSIX_ELEMENT_ARRAY(0x0465, dv, dv_MV)
170 ILCID_POSIX_ELEMENT_ARRAY(0x0408, el, el_GR)
171
172 static const ILcidPosixElement en[] = {
173 {0x09, "en"},
174 {0x0c09, "en_AU"},
175 {0x2809, "en_BZ"},
176 {0x1009, "en_CA"},
177 {0x0809, "en_GB"},
178 {0x1809, "en_IE"},
179 {0x2009, "en_JM"},
180 {0x1409, "en_NZ"},
181 {0x3409, "en_PH"},
182 {0x2C09, "en_TT"},
183 {0x0409, "en_US"},
184 {0x007f, "en_US_POSIX"}, /* duplicate for roundtripping */
185 {0x2409, "en_VI"}, /* Virgin Islands AKA Caribbean Islands (en_CB). */
186 {0x1c09, "en_ZA"},
187 {0x3009, "en_ZW"},
188 {0x0409, "en_AS"}, /* Alias for en_US. Leave last. */
189 {0x0409, "en_GU"}, /* Alias for en_US. Leave last. */
190 {0x0409, "en_MH"}, /* Alias for en_US. Leave last. */
191 {0x0409, "en_MP"}, /* Alias for en_US. Leave last. */
192 {0x0409, "en_UM"} /* Alias for en_US. Leave last. */
193 };
194
195 static const ILcidPosixElement en_US_POSIX[] = {
196 {0x007f, "en_US_POSIX"}, /* duplicate for roundtripping */
197 };
198
199 static const ILcidPosixElement es[] = {
200 {0x0a, "es"},
201 {0x2c0a, "es_AR"},
202 {0x400a, "es_BO"},
203 {0x340a, "es_CL"},
204 {0x240a, "es_CO"},
205 {0x140a, "es_CR"},
206 {0x1c0a, "es_DO"},
207 {0x300a, "es_EC"},
208 {0x0c0a, "es_ES"}, /*Modern sort.*/
209 {0x100a, "es_GT"},
210 {0x480a, "es_HN"},
211 {0x080a, "es_MX"},
212 {0x4c0a, "es_NI"},
213 {0x180a, "es_PA"},
214 {0x280a, "es_PE"},
215 {0x500a, "es_PR"},
216 {0x3c0a, "es_PY"},
217 {0x440a, "es_SV"},
218 {0x380a, "es_UY"},
219 {0x200a, "es_VE"},
220 {0x040a, "es__TRADITIONAL"} /*This is really es_ES_TRADITIONAL on Windows */
221 };
222
223 ILCID_POSIX_ELEMENT_ARRAY(0x0425, et, et_EE)
224 ILCID_POSIX_ELEMENT_ARRAY(0x042d, eu, eu_ES)
225 ILCID_POSIX_ELEMENT_ARRAY(0x0429, fa, fa_IR)
226 ILCID_POSIX_ELEMENT_ARRAY(0x040b, fi, fi_FI)
227 ILCID_POSIX_ELEMENT_ARRAY(0x0438, fo, fo_FO)
228
229 static const ILcidPosixElement fr[] = {
230 {0x0c, "fr"},
231 {0x080c, "fr_BE"},
232 {0x0c0c, "fr_CA"},
233 {0x240c, "fr_CD"},
234 {0x100c, "fr_CH"},
235 {0x300c, "fr_CI"},
236 {0x2c0c, "fr_CM"},
237 {0x040c, "fr_FR"},
238 {0x3c0c, "fr_HT"},
239 {0x140c, "fr_LU"},
240 {0x380c, "fr_MA"},
241 {0x180c, "fr_MC"},
242 {0x340c, "fr_ML"},
243 {0x200c, "fr_RE"},
244 {0x280c, "fr_SN"}
245 };
246
247 ILCID_POSIX_ELEMENT_ARRAY(0x0462, fy, fy_NL)
248
249 /* This LCID is really two different locales.*/
250 static const ILcidPosixElement ga[] = {
251 {0x3c, "ga"},
252 {0x3c, "gd"},
253 {0x083c, "ga_IE"}, /* Gaelic (Ireland) */
254 {0x043c, "gd_GB"} /* Gaelic (Scotland) */
255 };
256
257 ILCID_POSIX_ELEMENT_ARRAY(0x0456, gl, gl_ES)
258 ILCID_POSIX_ELEMENT_ARRAY(0x0447, gu, gu_IN)
259 ILCID_POSIX_ELEMENT_ARRAY(0x0474, gn, gn_PY)
260 ILCID_POSIX_ELEMENT_ARRAY(0x0468, ha, ha_NG)
261 ILCID_POSIX_ELEMENT_ARRAY(0x0475, haw,haw_US)
262 ILCID_POSIX_ELEMENT_ARRAY(0x040d, he, he_IL)
263 ILCID_POSIX_ELEMENT_ARRAY(0x0439, hi, hi_IN)
264
265 /* This LCID is really four different locales.*/
266 static const ILcidPosixElement hr[] = {
267 {0x1a, "hr"},
268 {0x141a, "bs_BA"}, /* Bosnian, Bosnia and Herzegovina */
269 {0x141a, "bs"}, /* Bosnian */
270 {0x041a, "hr_HR"}, /* Croatian*/
271 {0x081a, "sr_Latn_YU"}, /* Serbo-Croatian*/
272 {0x081a, "sr_Latn"}, /* It's 0x1a or 0x081a, pick one to make the test program happy. */
273 {0x0c1a, "sr_Cyrl_YU"}, /* Serbian*/
274 {0x0c1a, "sr_Cyrl"}, /* It's 0x1a or 0x0c1a, pick one to make the test program happy. */
275 {0x0c1a, "sr"}, /* It's 0x1a or 0x0c1a, pick one to make the test program happy. */
276 };
277
278 ILCID_POSIX_ELEMENT_ARRAY(0x040e, hu, hu_HU)
279 ILCID_POSIX_ELEMENT_ARRAY(0x042b, hy, hy_AM)
280 ILCID_POSIX_ELEMENT_ARRAY(0x0421, id, id_ID)
281 ILCID_POSIX_ELEMENT_ARRAY(0x0470, ig, ig_NG)
282 ILCID_POSIX_ELEMENT_ARRAY(0x040f, is, is_IS)
283
284 static const ILcidPosixElement it[] = {
285 {0x10, "it"},
286 {0x0810, "it_CH"},
287 {0x0410, "it_IT"}
288 };
289
290 ILCID_POSIX_ELEMENT_ARRAY(0x045d, iu, iu_CA) /* TODO: verify country */
291 ILCID_POSIX_ELEMENT_ARRAY(0x040d, iw, iw_IL) /*Left in for compatibility*/
292 ILCID_POSIX_ELEMENT_ARRAY(0x0411, ja, ja_JP)
293 ILCID_POSIX_ELEMENT_ARRAY(0x0437, ka, ka_GE)
294 ILCID_POSIX_ELEMENT_ARRAY(0x043f, kk, kk_KZ)
295 ILCID_POSIX_ELEMENT_ARRAY(0x0453, km, km_KH)
296 ILCID_POSIX_ELEMENT_ARRAY(0x044b, kn, kn_IN)
297
298 static const ILcidPosixElement ko[] = {
299 {0x12, "ko"},
300 {0x0812, "ko_KP"},
301 {0x0412, "ko_KR"}
302 };
303
304 ILCID_POSIX_ELEMENT_ARRAY(0x0457, kok, kok_IN)
305 ILCID_POSIX_ELEMENT_ARRAY(0x0471, kr, kr_NG)
306
307 static const ILcidPosixElement ks[] = { /* We could add PK and CN too */
308 {0x60, "ks"},
309 {0x0860, "ks_IN"}, /* Documentation doesn't mention script */
310 {0x0460, "ks_Arab_IN"}
311 };
312
313 ILCID_POSIX_ELEMENT_ARRAY(0x0440, ky, ky_KG) /* Kyrgyz is spoken in Kyrgyzstan */
314 ILCID_POSIX_ELEMENT_ARRAY(0x0476, la, la_IT) /* TODO: Verify the country */
315 ILCID_POSIX_ELEMENT_ARRAY(0x0454, lo, lo_LA)
316 ILCID_POSIX_ELEMENT_ARRAY(0x0427, lt, lt_LT)
317 ILCID_POSIX_ELEMENT_ARRAY(0x0426, lv, lv_LV)
318 ILCID_POSIX_ELEMENT_ARRAY(0x0481, mi, mi_NZ)
319 ILCID_POSIX_ELEMENT_ARRAY(0x042f, mk, mk_MK)
320 ILCID_POSIX_ELEMENT_ARRAY(0x044c, ml, ml_IN)
321 ILCID_POSIX_ELEMENT_ARRAY(0x0450, mn, mn_MN)
322 ILCID_POSIX_ELEMENT_ARRAY(0x0458, mni,mni_IN)
323 ILCID_POSIX_ELEMENT_ARRAY(0x044e, mr, mr_IN)
324
325 static const ILcidPosixElement ms[] = {
326 {0x3e, "ms"},
327 {0x083e, "ms_BN"}, /* Brunei Darussalam*/
328 {0x043e, "ms_MY"} /* Malaysia*/
329 };
330
331 /* The MSJDK documentation says this is maltese, but it's not supported.*/
332 ILCID_POSIX_ELEMENT_ARRAY(0x043a, mt, mt_MT)
333
334 static const ILcidPosixElement ne[] = {
335 {0x61, "ne"},
336 {0x0861, "ne_IN"}, /* India*/
337 {0x0461, "ne_NP"} /* Nepal*/
338 };
339
340 static const ILcidPosixElement nl[] = {
341 {0x13, "nl"},
342 {0x0813, "nl_BE"},
343 {0x0413, "nl_NL"}
344 };
345
346 /* The "no" locale split into nb and nn. By default in ICU, "no" is nb.*/
347 static const ILcidPosixElement no[] = {
348 {0x14, "nb"}, /* really nb */
349 {0x0414, "nb_NO"}, /* really nb_NO. Keep first in the 414 list. */
350 {0x0414, "no"}, /* really nb_NO */
351 {0x0414, "no_NO"}, /* really nb_NO */
352 {0x0814, "nn_NO"}, /* really nn_NO. Keep first in the 814 list. */
353 {0x0814, "nn"}, /* It's 0x14 or 0x814, pick one to make the test program happy. */
354 {0x0814, "no_NO_NY"}/* really nn_NO */
355 };
356
357 ILCID_POSIX_ELEMENT_ARRAY(0x046c, nso,nso_ZA) /* TODO: Verify the country */
358 ILCID_POSIX_ELEMENT_ARRAY(0x0472, om, om_ET) /* TODO: Verify the country */
359
360 /* Declared as or_IN to get around compiler errors*/
361 static const ILcidPosixElement or_IN[] = {
362 {0x48, "or"},
363 {0x0448, "or_IN"},
364 };
365
366 static const ILcidPosixElement pa[] = {
367 {0x46, "pa"},
368 {0x0446, "pa_IN"},
369 {0x0846, "pa_PK"}
370 };
371
372 ILCID_POSIX_ELEMENT_ARRAY(0x0415, pl, pl_PL)
373 ILCID_POSIX_ELEMENT_ARRAY(0x0463, ps, ps_AF)
374
375 static const ILcidPosixElement pt[] = {
376 {0x16, "pt"},
377 {0x0416, "pt_BR"},
378 {0x0816, "pt_PT"}
379 };
380
381 static const ILcidPosixElement qu[] = {
382 {0x6B, "qu"},
383 {0x046B, "qu_BO"},
384 {0x086B, "qu_EC"},
385 {0x0C6B, "qu_PE"}
386 };
387
388 ILCID_POSIX_ELEMENT_ARRAY(0x0418, ro, ro_RO)
389
390 static const ILcidPosixElement root[] = {
391 {0x00, "root"}
392 };
393
394 ILCID_POSIX_ELEMENT_ARRAY(0x0419, ru, ru_RU)
395 ILCID_POSIX_ELEMENT_ARRAY(0x044f, sa, sa_IN)
396
397 static const ILcidPosixElement sd[] = {
398 {0x59, "sd"},
399 {0x0459, "sd_IN"},
400 {0x0859, "sd_PK"}
401 };
402
403 ILCID_POSIX_ELEMENT_ARRAY(0x045b, si, si_LK)
404 ILCID_POSIX_ELEMENT_ARRAY(0x041b, sk, sk_SK)
405 ILCID_POSIX_ELEMENT_ARRAY(0x0424, sl, sl_SI)
406 ILCID_POSIX_ELEMENT_ARRAY(0x0477, so, so_ET) /* TODO: Verify the country */
407 ILCID_POSIX_ELEMENT_ARRAY(0x041c, sq, sq_AL)
408
409 static const ILcidPosixElement sv[] = {
410 {0x1d, "sv"},
411 {0x081d, "sv_FI"},
412 {0x041d, "sv_SE"}
413 };
414
415 ILCID_POSIX_ELEMENT_ARRAY(0x0441, sw, sw_KE)
416 ILCID_POSIX_ELEMENT_ARRAY(0x045A, syr, syr_SY)
417 ILCID_POSIX_ELEMENT_ARRAY(0x0449, ta, ta_IN)
418 ILCID_POSIX_ELEMENT_ARRAY(0x044a, te, te_IN)
419 ILCID_POSIX_ELEMENT_ARRAY(0x041e, th, th_TH)
420
421 static const ILcidPosixElement ti[] = {
422 {0x73, "ti"},
423 {0x0873, "ti_ER"},
424 {0x0473, "ti_ET"}
425 };
426
427 ILCID_POSIX_ELEMENT_ARRAY(0x0442, tk, tk_TM)
428 ILCID_POSIX_ELEMENT_ARRAY(0x0464, tl, tl_PH)
429 ILCID_POSIX_ELEMENT_ARRAY(0x0432, tn, tn_BW)
430 ILCID_POSIX_ELEMENT_ARRAY(0x041f, tr, tr_TR)
431 ILCID_POSIX_ELEMENT_ARRAY(0x0444, tt, tt_RU)
432 ILCID_POSIX_ELEMENT_ARRAY(0x0480, ug, ug_CN)
433 ILCID_POSIX_ELEMENT_ARRAY(0x0422, uk, uk_UA)
434
435 static const ILcidPosixElement ur[] = {
436 {0x20, "ur"},
437 {0x0820, "ur_IN"},
438 {0x0420, "ur_PK"}
439 };
440
441 static const ILcidPosixElement uz[] = {
442 {0x43, "uz"},
443 {0x0843, "uz_UZ"}, /* Cyrillic based */
444 {0x0443, "uz_Latn_UZ"} /* Latin based */
445 };
446
447 ILCID_POSIX_ELEMENT_ARRAY(0x0433, ve, ve_ZA) /* TODO: Verify the country */
448 ILCID_POSIX_ELEMENT_ARRAY(0x042a, vi, vi_VN)
449 ILCID_POSIX_ELEMENT_ARRAY(0x0434, xh, xh_ZA) /* TODO: Verify the country */
450 ILCID_POSIX_ELEMENT_ARRAY(0x046a, yo, yo_NG) /* TODO: Verify the country */
451
452 /* TODO: Make the locales with the script the primary locale once the names are implemented in the resources. */
453 static const ILcidPosixElement zh[] = {
454 {0x04, "zh"},
455 {0x0804, "zh_Hans_CN"},
456 {0x0804, "zh_CN"},
457 {0x0c04, "zh_Hant_HK"},
458 {0x0c04, "zh_HK"},
459 {0x1404, "zh_Hant_MO"},
460 {0x1404, "zh_MO"},
461 {0x1004, "zh_Hans_SG"},
462 {0x1004, "zh_SG"},
463 {0x0404, "zh_Hant_TW"},
464 {0x0404, "zh_TW"},
465 {0x30404,"zh_Hant_TW"},
466 {0x30404,"zh_TW"}, /* Bopomofo order */
467 {0x20404,"zh_TW@collation=STROKE"}
468 };
469
470 ILCID_POSIX_ELEMENT_ARRAY(0x0435, zu, zu_ZA) /* TODO: Verify the country */
471
472 /* This must be static and grouped by LCID. */
473
474 /* non-existent ISO-639 codes */
475 /*
476 0x466 Edo
477 0x467 Fulfulde - Nigeria
478 0x43b Sami (Lappish)
479 0x42e Sorbian (iso639 = dsb, hsb, wen)
480 0x430 Sutu
481 0x45f Tamazight (Arabic script)
482 0x85f Tamazight (Latin script)
483 0x478 Yi
484 */
485 static const ILcidPosixMap gPosixIDmap[] = {
486 ILCID_POSIX_MAP(af), /* af Afrikaans 0x36 */
487 ILCID_POSIX_MAP(am), /* am Amharic 0x5e */
488 ILCID_POSIX_MAP(ar), /* ar Arabic 0x01 */
489 ILCID_POSIX_MAP(as), /* as Assamese 0x4d */
490 ILCID_POSIX_MAP(az), /* az Azerbaijani 0x2c */
491 ILCID_POSIX_MAP(be), /* be Belarusian 0x23 */
492 ILCID_POSIX_MAP(bg), /* bg Bulgarian 0x02 */
493 ILCID_POSIX_MAP(bn), /* bn Bengali; Bangla 0x45 */
494 ILCID_POSIX_MAP(bo), /* bo Tibetan 0x51 */
495 ILCID_POSIX_MAP(ca), /* ca Catalan 0x03 */
496 ILCID_POSIX_MAP(chr), /* chr Cherokee 0x5c */
497 ILCID_POSIX_MAP(cs_CZ), /* cs Czech 0x05 */
498 ILCID_POSIX_MAP(cy), /* cy Welsh 0x52 */
499 ILCID_POSIX_MAP(da), /* da Danish 0x06 */
500 ILCID_POSIX_MAP(de), /* de German 0x07 */
501 ILCID_POSIX_MAP(dv), /* dv Divehi 0x65 */
502 ILCID_POSIX_MAP(el), /* el Greek 0x08 */
503 ILCID_POSIX_MAP(en), /* en English 0x09 */
504 ILCID_POSIX_MAP(en_US_POSIX), /* invariant 0x7f */
505 ILCID_POSIX_MAP(es), /* es Spanish 0x0a */
506 ILCID_POSIX_MAP(et), /* et Estonian 0x25 */
507 ILCID_POSIX_MAP(eu), /* eu Basque 0x2d */
508 ILCID_POSIX_MAP(fa), /* fa Farsi 0x29 */
509 ILCID_POSIX_MAP(fi), /* fi Finnish 0x0b */
510 ILCID_POSIX_MAP(fo), /* fo Faroese 0x38 */
511 ILCID_POSIX_MAP(fr), /* fr French 0x0c */
512 ILCID_POSIX_MAP(fy), /* fy Frisian 0x62 */
513 ILCID_POSIX_MAP(ga), /* * Gaelic (Ireland,Scotland) 0x3c */
514 ILCID_POSIX_MAP(gl), /* gl Galician 0x56 */
515 ILCID_POSIX_MAP(gn), /* gn Guarani 0x74 */
516 ILCID_POSIX_MAP(gu), /* gu Gujarati 0x47 */
517 ILCID_POSIX_MAP(ha), /* ha Hausa 0x68 */
518 ILCID_POSIX_MAP(haw), /* haw Hawaiian 0x75 */
519 ILCID_POSIX_MAP(he), /* he Hebrew (formerly iw) 0x0d */
520 ILCID_POSIX_MAP(hi), /* hi Hindi 0x39 */
521 ILCID_POSIX_MAP(hr), /* * Croatian and others 0x1a */
522 ILCID_POSIX_MAP(hu), /* hu Hungarian 0x0e */
523 ILCID_POSIX_MAP(hy), /* hy Armenian 0x2b */
524 ILCID_POSIX_MAP(id), /* id Indonesian (formerly in) 0x21 */
525 ILCID_POSIX_MAP(ig), /* ig Igbo 0x70 */
526 ILCID_POSIX_MAP(is), /* is Icelandic 0x0f */
527 ILCID_POSIX_MAP(it), /* it Italian 0x10 */
528 ILCID_POSIX_MAP(iu), /* iu Inuktitut 0x5d */
529 ILCID_POSIX_MAP(iw), /* iw Hebrew 0x0d */
530 ILCID_POSIX_MAP(ja), /* ja Japanese 0x11 */
531 ILCID_POSIX_MAP(ka), /* ka Georgian 0x37 */
532 ILCID_POSIX_MAP(kk), /* kk Kazakh 0x3f */
533 ILCID_POSIX_MAP(km), /* km Khmer 0x53 */
534 ILCID_POSIX_MAP(kn), /* kn Kannada 0x4b */
535 ILCID_POSIX_MAP(ko), /* ko Korean 0x12 */
536 ILCID_POSIX_MAP(kok), /* kok Konkani 0x57 */
537 ILCID_POSIX_MAP(kr), /* kr Kanuri 0x71 */
538 ILCID_POSIX_MAP(ks), /* ks Kashmiri 0x60 */
539 ILCID_POSIX_MAP(ky), /* ky Kyrgyz 0x40 */
540 ILCID_POSIX_MAP(la), /* la Latin 0x76 */
541 ILCID_POSIX_MAP(lo), /* lo Lao 0x54 */
542 ILCID_POSIX_MAP(lt), /* lt Lithuanian 0x27 */
543 ILCID_POSIX_MAP(lv), /* lv Latvian, Lettish 0x26 */
544 ILCID_POSIX_MAP(mi), /* mi Maori 0x81 */
545 ILCID_POSIX_MAP(mk), /* mk Macedonian 0x2f */
546 ILCID_POSIX_MAP(ml), /* ml Malayalam 0x4c */
547 ILCID_POSIX_MAP(mn), /* mn Mongolian 0x50 */
548 ILCID_POSIX_MAP(mni), /* mni Manipuri 0x58 */
549 ILCID_POSIX_MAP(mr), /* mr Marathi 0x4e */
550 ILCID_POSIX_MAP(ms), /* ms Malay 0x3e */
551 ILCID_POSIX_MAP(mt), /* mt Maltese 0x3a */
552 /* ILCID_POSIX_MAP(nb), // no Norwegian 0x14 */
553 ILCID_POSIX_MAP(ne), /* ne Nepali 0x61 */
554 ILCID_POSIX_MAP(nl), /* nl Dutch 0x13 */
555 /* ILCID_POSIX_MAP(nn), // no Norwegian 0x14 */
556 ILCID_POSIX_MAP(no), /* * Norwegian 0x14 */
557 ILCID_POSIX_MAP(nso), /* nso Sotho, Northern (Sepedi dialect) 0x6c */
558 ILCID_POSIX_MAP(om), /* om Oromo 0x72 */
559 ILCID_POSIX_MAP(or_IN), /* or Oriya 0x48 */
560 ILCID_POSIX_MAP(pa), /* pa Punjabi 0x46 */
561 ILCID_POSIX_MAP(pl), /* pl Polish 0x15 */
562 ILCID_POSIX_MAP(ps), /* ps Pashto 0x63 */
563 ILCID_POSIX_MAP(pt), /* pt Portuguese 0x16 */
564 ILCID_POSIX_MAP(qu), /* qu Quechua (correct spelling)0x6B */
565 ILCID_POSIX_MAP(ro), /* ro Romanian 0x18 */
566 ILCID_POSIX_MAP(root), /* root 0x00 */
567 ILCID_POSIX_MAP(ru), /* ru Russian 0x19 */
568 ILCID_POSIX_MAP(sa), /* sa Sanskrit 0x4f */
569 ILCID_POSIX_MAP(sd), /* sd Sindhi 0x59 */
570 /* ILCID_POSIX_MAP(sh), // sh Serbo-Croatian 0x1a */
571 ILCID_POSIX_MAP(si), /* si Sinhalese 0x5b */
572 ILCID_POSIX_MAP(sk), /* sk Slovak 0x1b */
573 ILCID_POSIX_MAP(sl), /* sl Slovenian 0x24 */
574 ILCID_POSIX_MAP(so), /* so Somali 0x77 */
575 ILCID_POSIX_MAP(sq), /* sq Albanian 0x1c */
576 /* ILCID_POSIX_MAP(sr), // sr Serbian 0x1a */
577 ILCID_POSIX_MAP(sv), /* sv Swedish 0x1d */
578 ILCID_POSIX_MAP(sw), /* sw Swahili 0x41 */
579 ILCID_POSIX_MAP(syr), /* syr Syriac 0x5A */
580 ILCID_POSIX_MAP(ta), /* ta Tamil 0x49 */
581 ILCID_POSIX_MAP(te), /* te Telugu 0x4a */
582 ILCID_POSIX_MAP(th), /* th Thai 0x1e */
583 ILCID_POSIX_MAP(ti), /* ti Tigrigna 0x73 */
584 ILCID_POSIX_MAP(tk), /* tk Turkmen 0x42 */
585 ILCID_POSIX_MAP(tl), /* tl Tagalog (Filipino) 0x64 */
586 ILCID_POSIX_MAP(tn), /* tn Tswana 0x32 */
587 ILCID_POSIX_MAP(tr), /* tr Turkish 0x1f */
588 ILCID_POSIX_MAP(tt), /* tt Tatar 0x44 */
589 ILCID_POSIX_MAP(ug), /* ug Uighur 0x80 */
590 ILCID_POSIX_MAP(uk), /* uk Ukrainian 0x22 */
591 ILCID_POSIX_MAP(ur), /* ur Urdu 0x20 */
592 ILCID_POSIX_MAP(uz), /* uz Uzbek 0x43 */
593 ILCID_POSIX_MAP(ve), /* ve Venda 0x33 */
594 ILCID_POSIX_MAP(vi), /* vi Vietnamese 0x2a */
595 ILCID_POSIX_MAP(xh), /* xh Xhosa 0x34 */
596 ILCID_POSIX_MAP(yo), /* yo Yoruba 0x6a */
597 ILCID_POSIX_MAP(zh), /* zh Chinese 0x04 */
598 ILCID_POSIX_MAP(zu), /* zu Zulu 0x35 */
599 };
600
601 static const uint32_t gLocaleCount = sizeof(gPosixIDmap)/sizeof(ILcidPosixMap);
602
603 /**
604 * Do not call this function. It is called by hostID.
605 * The function is not private because this struct must stay as a C struct,
606 * and this is an internal class.
607 */
608 static int32_t
609 idCmp(const char* id1, const char* id2)
610 {
611 int32_t diffIdx = 0;
612 while (*id1 == *id2 && *id1 != 0) {
613 diffIdx++;
614 id1++;
615 id2++;
616 }
617 return diffIdx;
618 }
619
620 /**
621 * Searches for a Windows LCID
622 *
623 * @param posixid the Posix style locale id.
624 * @param status gets set to U_ILLEGAL_ARGUMENT_ERROR when the Posix ID has
625 * no equivalent Windows LCID.
626 * @return the LCID
627 */
628 static uint32_t
629 getHostID(const ILcidPosixMap *this_0, const char* posixID, UErrorCode* status)
630 {
631 int32_t bestIdx = 0;
632 int32_t bestIdxDiff = 0;
633 int32_t posixIDlen = (int32_t)uprv_strlen(posixID) + 1;
634 uint32_t idx;
635
636 for (idx = 0; idx < this_0->numRegions; idx++ ) {
637 int32_t sameChars = idCmp(posixID, this_0->regionMaps[idx].posixID);
638 if (sameChars > bestIdxDiff && this_0->regionMaps[idx].posixID[sameChars] == 0) {
639 if (posixIDlen == sameChars) {
640 /* Exact match */
641 return this_0->regionMaps[idx].hostID;
642 }
643 bestIdxDiff = sameChars;
644 bestIdx = idx;
645 }
646 }
647 if (this_0->regionMaps[bestIdx].posixID[bestIdxDiff] == 0) {
648 *status = U_USING_FALLBACK_WARNING;
649 return this_0->regionMaps[bestIdx].hostID;
650 }
651
652 /*no match found */
653 *status = U_ILLEGAL_ARGUMENT_ERROR;
654 return this_0->regionMaps->hostID;
655 }
656
657 static const char*
658 getPosixID(const ILcidPosixMap *this_0, uint32_t hostID)
659 {
660 uint32_t i;
661 for (i = 0; i <= this_0->numRegions; i++)
662 {
663 if (this_0->regionMaps[i].hostID == hostID)
664 {
665 return this_0->regionMaps[i].posixID;
666 }
667 }
668
669 /* If you get here, then no matching region was found,
670 so return the language id with the wild card region. */
671 return this_0->regionMaps[0].posixID;
672 }
673
674 /*
675 //////////////////////////////////////
676 //
677 // LCID --> POSIX
678 //
679 /////////////////////////////////////
680 */
681
682 U_CAPI const char *
683 uprv_convertToPosix(uint32_t hostid, UErrorCode* status)
684 {
685 uint16_t langID = LANGUAGE_LCID(hostid);
686 uint32_t index;
687
688 for (index = 0; index < gLocaleCount; index++)
689 {
690 if (langID == gPosixIDmap[index].regionMaps->hostID)
691 {
692 return getPosixID(&gPosixIDmap[index], hostid);
693 }
694 }
695
696 /* no match found */
697 *status = U_ILLEGAL_ARGUMENT_ERROR;
698 return NULL;
699 }
700
701 /*
702 //////////////////////////////////////
703 //
704 // POSIX --> LCID
705 // This should only be called from uloc_getLCID.
706 // The locale ID must be in canonical form.
707 // langID is separate so that this file doesn't depend on the uloc_* API.
708 //
709 /////////////////////////////////////
710 */
711
712 U_CAPI uint32_t
713 uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status)
714 {
715
716 uint32_t low = 0;
717 uint32_t high = gLocaleCount;
718 uint32_t mid = high;
719 uint32_t oldmid = 0;
720 int32_t compVal;
721
722 uint32_t value = 0;
723 uint32_t fallbackValue = (uint32_t)-1;
724 UErrorCode myStatus;
725 uint32_t idx;
726
727 /* Check for incomplete id. */
728 if (!langID || !posixID || uprv_strlen(langID) < 2 || uprv_strlen(posixID) < 2) {
729 return 0;
730 }
731
732 /*Binary search for the map entry for normal cases */
733
734 while (high > low) /*binary search*/{
735
736 mid = (high+low) >> 1; /*Finds median*/
737
738 if (mid == oldmid)
739 break;
740
741 compVal = uprv_strcmp(langID, gPosixIDmap[mid].regionMaps->posixID);
742 if (compVal < 0){
743 high = mid;
744 }
745 else if (compVal > 0){
746 low = mid;
747 }
748 else /*we found it*/{
749 return getHostID(&gPosixIDmap[mid], posixID, status);
750 }
751 oldmid = mid;
752 }
753
754 /*
755 * Sometimes we can't do a binary search on posixID because some LCIDs
756 * go to different locales. We hit one of those special cases.
757 */
758 for (idx = 0; idx < gLocaleCount; idx++ ) {
759 myStatus = U_ZERO_ERROR;
760 value = getHostID(&gPosixIDmap[idx], posixID, &myStatus);
761 if (myStatus == U_ZERO_ERROR) {
762 return value;
763 }
764 else if (myStatus == U_USING_FALLBACK_WARNING) {
765 fallbackValue = value;
766 }
767 }
768
769 if (fallbackValue != (uint32_t)-1) {
770 *status = U_USING_FALLBACK_WARNING;
771 return fallbackValue;
772 }
773
774 /* no match found */
775 *status = U_ILLEGAL_ARGUMENT_ERROR;
776 return 0; /* return international (root) */
777 }
778