]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/locmap.c
ICU-3.13.tar.gz
[apple/icu.git] / icuSources / common / locmap.c
1 /*
2 **********************************************************************
3 * Copyright (C) 1996-2003, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 */
7 /*
8 *
9 * Provides functionality for mapping between
10 * LCID and Posix IDs or ICU locale to codepage
11 *
12 * Note: All classes and code in this file are
13 * intended for internal use only.
14 *
15 * Methods of interest:
16 * unsigned long convertToLCID(const int8_t*);
17 * const int8_t* convertToPosix(unsigned long);
18 *
19 * Kathleen Wilson, 4/30/96
20 *
21 * Date Name Description
22 * 3/11/97 aliu Fixed off-by-one bug in assignment operator. Added
23 * setId() method and safety check against
24 * MAX_ID_LENGTH.
25 * 04/23/99 stephen Added C wrapper for convertToPosix.
26 * 09/18/00 george Removed the memory leaks.
27 * 08/23/01 george Convert to C
28 */
29
30 #include "locmap.h"
31 #include "cstring.h"
32
33 static struct
34 {
35 const char *loc;
36 const char *charmap;
37 }
38 const _localeToDefaultCharmapTable [] =
39 {
40 /*
41 See: http://czyborra.com/charsets/iso8859.html
42 */
43
44 /* xx_XX locales first, so they will match: */
45 { "zh_CN", "gb2312" }, /* Chinese (Simplified) */
46 { "zh_TW", "Big5" }, /* Chinese (Traditional) */
47
48 { "af", "iso-8859-1" }, /* Afrikaans */
49 { "ar", "iso-8859-6" }, /* Arabic */
50 { "be", "iso-8859-5" }, /* Byelorussian */
51 { "bg", "iso-8859-5" }, /* Bulgarian */
52 { "ca", "iso-8859-1" }, /* Catalan */
53 { "cs", "iso-8859-2" }, /* Czech */
54 { "da", "iso-8859-1" }, /* Danish */
55 { "de", "iso-8859-1" }, /* German */
56 { "el", "iso-8859-7" }, /* Greek */
57 { "en", "iso-8859-1" }, /* English */
58 { "eo", "iso-8859-3" }, /* Esperanto */
59 { "es", "iso-8859-1" }, /* Spanish */
60 { "et", "iso-8859-4" }, /* Estonian */
61 { "eu", "iso-8859-1" }, /* basque */
62 { "fi", "iso-8859-1" }, /* Finnish */
63 { "fo", "iso-8859-1" }, /* faroese */
64 { "fr", "iso-8859-1" }, /* French */
65 { "ga", "iso-8859-1" }, /* Irish (Gaelic) */
66 { "gd", "iso-8859-1" }, /* Scottish */
67 { "he", "iso-8859-8" }, /* hebrew */
68 { "hr", "iso-8859-2" }, /* Croatian */
69 { "hu", "iso-8859-2" }, /* Hungarian */
70 { "in", "iso-8859-1" }, /* Indonesian */
71 { "is", "iso-8859-1" }, /* Icelandic */
72 { "it", "iso-8859-1" }, /* Italian */
73 { "iw", "iso-8859-8" }, /* hebrew old ISO name */
74 { "ja", "Shift_JIS" }, /* Japanese [was: ja_JP ] */
75 { "ji", "iso-8859-8" }, /* Yiddish */
76 { "kl", "iso-8859-4" }, /* Greenlandic */
77 { "ko", "euc-kr" }, /* korean [was: ko_KR ] */
78 { "lt", "iso-8859-4" }, /* Lithuanian */
79 { "lv", "iso-8859-4" }, /* latvian (lettish) */
80 { "mk", "iso-8859-5" }, /* Macedonian */
81 { "mt", "iso-8859-3" }, /* Maltese */
82 { "nb", "iso-8859-1" }, /* Norwegian Bokmal */
83 { "nl", "iso-8859-1" }, /* dutch */
84 { "no", "iso-8859-1" }, /* Norwegian old ISO name*/
85 { "nn", "iso-8859-1" }, /* Norwegian Nynorsk */
86 { "pl", "iso-8859-2" }, /* Polish */
87 { "pt", "iso-8859-1" }, /* Portugese */
88 { "rm", "iso-8859-1" }, /* Rhaeto-romance */
89 { "ro", "iso-8859-2" }, /* Romanian */
90 { "ru", "iso-8859-5" }, /* Russian */
91 { "sk", "iso-8859-2" }, /* Slovak */
92 { "sl", "iso-8859-2" }, /* Slovenian */
93 { "sq", "iso-8859-1" }, /* albanian */
94 { "sr", "iso-8859-5" }, /* Serbian */
95 { "sv", "iso-8859-1" }, /* Swedish */
96 { "sw", "iso-8859-1" }, /* Swahili */
97 { "th", "tis-620" }, /* Thai [windows-874] */
98 { "tr", "iso-8859-9" }, /* Turkish */
99 { "uk", "iso-8859-5" }, /* pre 1990 Ukranian... see: <http://czyborra.com/charsets/cyrillic.html#KOI8-U> */
100 { "zh", "Big-5" }, /* Chinese (Traditional) */
101 { NULL, NULL }
102 };
103
104 U_CAPI const char* U_EXPORT2
105 uprv_defaultCodePageForLocale(const char *locale)
106 {
107 int32_t i;
108 int32_t locale_len;
109
110 if (locale == NULL)
111 {
112 return NULL;
113 }
114 locale_len = (int32_t)uprv_strlen(locale);
115
116 if(locale_len < 2)
117 {
118 return NULL; /* non existent. Not a complete check, but it will
119 * make sure that 'c' doesn't match catalan, etc.
120 */
121 }
122
123 for(i=0; _localeToDefaultCharmapTable[i].loc; i++)
124 {
125 if(uprv_strncmp(locale, _localeToDefaultCharmapTable[i].loc,
126 uprv_min(locale_len,
127 (int32_t)uprv_strlen(_localeToDefaultCharmapTable[i].loc)))
128 == 0)
129 {
130 return _localeToDefaultCharmapTable[i].charmap;
131 }
132 }
133
134 return NULL;
135 }
136
137 #ifdef WIN32
138
139 /*
140 * Note:
141 * This code is used only internally by putil.c/uprv_getDefaultLocaleID().
142 * The mapping from Win32 locale ID numbers to POSIX locale strings should
143 * be the faster one. It is more important to get the LCID to ICU locale
144 * mapping correct than to get a correct ICU locale to LCID mapping.
145 *
146 * In order to test this code, please use the lcid test program.
147 *
148 * The LCID values come from winnt.h
149 */
150
151 #include "unicode/uloc.h"
152
153
154 /*
155 ////////////////////////////////////////////////
156 //
157 // Internal Classes for LCID <--> POSIX Mapping
158 //
159 /////////////////////////////////////////////////
160 */
161
162 typedef struct ILcidPosixElement
163 {
164 const uint32_t hostID;
165 const char * const posixID;
166 } ILcidPosixElement;
167
168 typedef struct ILcidPosixMap
169 {
170 const uint32_t numRegions;
171 const struct ILcidPosixElement* const regionMaps;
172 } ILcidPosixMap;
173
174 static const char* posixID(const ILcidPosixMap *this_0, uint32_t fromHostID);
175
176 /**
177 * Searches for a Windows LCID
178 *
179 * @param posixid the Posix style locale id.
180 * @param status gets set to U_ILLEGAL_ARGUMENT_ERROR when the Posix ID has
181 * no equivalent Windows LCID.
182 * @return the LCID
183 */
184 static uint32_t hostID(const ILcidPosixMap *this_0, const char* fromPosixID, UErrorCode* status);
185
186 /**
187 * Do not call this function. It is called by hostID.
188 * The function is not private because this struct must stay as a C struct,
189 * and this is an internal class.
190 */
191 static int32_t idCmp(const char* id1, const char* id2);
192
193
194 /*
195 /////////////////////////////////////////////////
196 //
197 // Easy macros to make the LCID <--> POSIX Mapping
198 //
199 /////////////////////////////////////////////////
200 */
201
202 /*
203 The standard one language/one country mapping for LCID.
204 The first element must be the language, and the following
205 elements are the language with the country.
206 */
207 #define ILCID_POSIX_ELEMENT_ARRAY(hostID, languageID, posixID) \
208 static const ILcidPosixElement languageID[] = { \
209 {LANGUAGE_LCID(hostID), #languageID}, /* parent locale */ \
210 {hostID, #posixID}, \
211 };
212
213 /*
214 Create the map for the posixID. This macro supposes that the language string
215 name is the same as the global variable name, and that the first element
216 in the ILcidPosixElement is just the language.
217 */
218 #define ILCID_POSIX_MAP(_posixID) \
219 {sizeof(_posixID)/sizeof(ILcidPosixElement), _posixID}
220
221 /*
222 ////////////////////////////////////////////
223 //
224 // Create the table of LCID to POSIX Mapping
225 // None of it should be dynamically created.
226 //
227 // Keep static locale variables inside the function so that
228 // it can be created properly during static init.
229 //
230 ////////////////////////////////////////////
231 */
232
233 ILCID_POSIX_ELEMENT_ARRAY(0x0436, af, af_ZA)
234
235 static const ILcidPosixElement ar[] = {
236 {0x01, "ar"},
237 {0x3801, "ar_AE"},
238 {0x3c01, "ar_BH"},
239 {0x1401, "ar_DZ"},
240 {0x0c01, "ar_EG"},
241 {0x0801, "ar_IQ"},
242 {0x2c01, "ar_JO"},
243 {0x3401, "ar_KW"},
244 {0x3001, "ar_LB"},
245 {0x1001, "ar_LY"},
246 {0x1801, "ar_MA"},
247 {0x2001, "ar_OM"},
248 {0x4001, "ar_QA"},
249 {0x0401, "ar_SA"},
250 {0x2801, "ar_SY"},
251 {0x1c01, "ar_TN"},
252 {0x2401, "ar_YE"}
253 };
254
255 ILCID_POSIX_ELEMENT_ARRAY(0x044d, as, as_IN) /*Todo: Data does not exist*/
256
257 static const ILcidPosixElement az[] = {
258 {0x2c, "az"},
259 {0x082c, "az_AZ"}, /* Cyrillic based */
260 {0x042c, "az_AZ_LATN"} /* Latin based */
261 };
262
263 ILCID_POSIX_ELEMENT_ARRAY(0x0423, be, be_BY)
264 ILCID_POSIX_ELEMENT_ARRAY(0x0402, bg, bg_BG)
265 ILCID_POSIX_ELEMENT_ARRAY(0x0445, bn, bn_IN)
266 ILCID_POSIX_ELEMENT_ARRAY(0x0403, ca, ca_ES)
267 ILCID_POSIX_ELEMENT_ARRAY(0x0405, cs, cs_CZ)
268 ILCID_POSIX_ELEMENT_ARRAY(0x0406, da, da_DK)
269
270 static const ILcidPosixElement de[] = {
271 {0x07, "de"},
272 {0x0c07, "de_AT"},
273 {0x0807, "de_CH"},
274 {0x0407, "de_DE"},
275 {0x1407, "de_LI"},
276 {0x1007, "de_LU"},
277 {0x10407,"de__PHONEBOOK"} /*This is really de_DE_PHONEBOOK on Windows, maybe 10007*/
278 };
279
280 ILCID_POSIX_ELEMENT_ARRAY(0x0465, dv, dv_MV)
281 ILCID_POSIX_ELEMENT_ARRAY(0x0408, el, el_GR)
282
283 static const ILcidPosixElement en[] = {
284 {0x09, "en"},
285 {0x0c09, "en_AU"},
286 {0x2809, "en_BZ"},
287 {0x1009, "en_CA"},
288 {0x0809, "en_GB"},
289 {0x1809, "en_IE"},
290 {0x2009, "en_JM"},
291 {0x1409, "en_NZ"},
292 {0x3409, "en_PH"},
293 {0x2C09, "en_TT"},
294 {0x0409, "en_US"},
295 {0x007f, "en_US_POSIX"}, /* duplicate for roundtripping */
296 {0x2409, "en_VI"}, /* Virgin Islands AKA Caribbean Islands (en_CB). */
297 {0x1c09, "en_ZA"},
298 {0x3009, "en_ZW"}
299 };
300
301 static const ILcidPosixElement en_US_POSIX[] = {
302 {0x007f, "en_US_POSIX"}, /* duplicate for roundtripping */
303 };
304
305 static const ILcidPosixElement es[] = {
306 {0x0a, "es"},
307 {0x2c0a, "es_AR"},
308 {0x400a, "es_BO"},
309 {0x340a, "es_CL"},
310 {0x240a, "es_CO"},
311 {0x140a, "es_CR"},
312 {0x1c0a, "es_DO"},
313 {0x300a, "es_EC"},
314 {0x0c0a, "es_ES"}, /*Modern sort.*/
315 {0x100a, "es_GT"},
316 {0x480a, "es_HN"},
317 {0x080a, "es_MX"},
318 {0x4c0a, "es_NI"},
319 {0x180a, "es_PA"},
320 {0x280a, "es_PE"},
321 {0x500a, "es_PR"},
322 {0x3c0a, "es_PY"},
323 {0x440a, "es_SV"},
324 {0x380a, "es_UY"},
325 {0x200a, "es_VE"},
326 {0x040a, "es__TRADITIONAL"} /*This is really es_ES_TRADITIONAL on Windows */
327 };
328
329 ILCID_POSIX_ELEMENT_ARRAY(0x0425, et, et_EE)
330 ILCID_POSIX_ELEMENT_ARRAY(0x042d, eu, eu_ES)
331 ILCID_POSIX_ELEMENT_ARRAY(0x0429, fa, fa_IR)
332 ILCID_POSIX_ELEMENT_ARRAY(0x040b, fi, fi_FI)
333 ILCID_POSIX_ELEMENT_ARRAY(0x0438, fo, fo_FO)
334
335 static const ILcidPosixElement fr[] = {
336 {0x0c, "fr"},
337 {0x080c, "fr_BE"},
338 {0x0c0c, "fr_CA"},
339 {0x100c, "fr_CH"},
340 {0x040c, "fr_FR"},
341 {0x140c, "fr_LU"},
342 {0x180c, "fr_MC"}
343 };
344
345 ILCID_POSIX_ELEMENT_ARRAY(0x0456, gl, gl_ES)
346 ILCID_POSIX_ELEMENT_ARRAY(0x0447, gu, gu_IN)
347 ILCID_POSIX_ELEMENT_ARRAY(0x040d, he, he_IL)
348 ILCID_POSIX_ELEMENT_ARRAY(0x0439, hi, hi_IN)
349
350 /* This LCID is really three different locales.*/
351 static const ILcidPosixElement hr[] = {
352 {0x1a, "hr"},
353 {0x041a, "hr_HR"}, /* Croatian*/
354 {0x081a, "sh_YU"}, /* Serbo-Croatian*/
355 {0x081a, "sh"}, /* It's 0x1a or 0x081a, pick one to make the test program happy. */
356 {0x0c1a, "sr_YU"}, /* Serbian*/
357 {0x0c1a, "sr"}, /* It's 0x1a or 0x0c1a, pick one to make the test program happy. */
358 };
359
360 ILCID_POSIX_ELEMENT_ARRAY(0x040e, hu, hu_HU)
361 ILCID_POSIX_ELEMENT_ARRAY(0x042b, hy, hy_AM)
362 ILCID_POSIX_ELEMENT_ARRAY(0x0421, id, id_ID)
363 ILCID_POSIX_ELEMENT_ARRAY(0x040f, is, is_IS)
364
365 static const ILcidPosixElement it[] = {
366 {0x10, "it"},
367 {0x0810, "it_CH"},
368 {0x0410, "it_IT"}
369 };
370
371 ILCID_POSIX_ELEMENT_ARRAY(0x040d, iw, iw_IL) /*Left in for compatibility*/
372 ILCID_POSIX_ELEMENT_ARRAY(0x0411, ja, ja_JP)
373 ILCID_POSIX_ELEMENT_ARRAY(0x0437, ka, ka_GE)
374 ILCID_POSIX_ELEMENT_ARRAY(0x043f, kk, kk_KZ)
375 ILCID_POSIX_ELEMENT_ARRAY(0x044b, kn, kn_IN)
376
377 static const ILcidPosixElement ko[] = {
378 {0x12, "ko"},
379 {0x0812, "ko_KP"},
380 {0x0412, "ko_KR"}
381 };
382
383 ILCID_POSIX_ELEMENT_ARRAY(0x0457, kok, kok_IN)
384 ILCID_POSIX_ELEMENT_ARRAY(0x0460, ks, ks_IN) /*Todo: Data does not exist*/
385 ILCID_POSIX_ELEMENT_ARRAY(0x0440, ky, ky_KG) /* Kyrgyz is spoken in Kyrgyzstan */
386 ILCID_POSIX_ELEMENT_ARRAY(0x0427, lt, lt_LT)
387 ILCID_POSIX_ELEMENT_ARRAY(0x0426, lv, lv_LV)
388 ILCID_POSIX_ELEMENT_ARRAY(0x042f, mk, mk_MK)
389 ILCID_POSIX_ELEMENT_ARRAY(0x044c, ml, ml_IN) /*Todo: Data does not exist*/
390 ILCID_POSIX_ELEMENT_ARRAY(0x0450, mn, mn_MN)
391 ILCID_POSIX_ELEMENT_ARRAY(0x0458, mni, mni_IN) /*Todo: Data does not exist*/
392 ILCID_POSIX_ELEMENT_ARRAY(0x044e, mr, mr_IN)
393
394 static const ILcidPosixElement ms[] = {
395 {0x3e, "ms"},
396 {0x083e, "ms_BN"}, /* Brunei Darussalam*/
397 {0x043e, "ms_MY"} /* Malaysia*/
398 };
399
400 /* The MSJDK documentation says this is maltese, but it's not supported.*/
401 ILCID_POSIX_ELEMENT_ARRAY(0x043a, mt, mt_MT)
402
403 static const ILcidPosixElement ne[] = { /*Todo: Data does not exist*/
404 {0x61, "ne"},
405 {0x0861, "ne_IN"}, /* India*/
406 {0x0461, "ne_NP"} /* Nepal*/
407 };
408
409 static const ILcidPosixElement nl[] = {
410 {0x13, "nl"},
411 {0x0813, "nl_BE"},
412 {0x0413, "nl_NL"}
413 };
414
415 /* The "no" locale split into nb and nn. By default in ICU, "no" is nb.*/
416 static const ILcidPosixElement no[] = {
417 {0x14, "nb"}, /* really nb */
418 {0x0414, "nb_NO"}, /* really nb_NO */
419 {0x0814, "nn_NO"}, /* really nn_NO */
420 {0x0814, "nn"} /* It's 0x14 or 0x814, pick one to make the test program happy. */
421 };
422
423 /* Declared as or_IN to get around compiler errors*/
424 static const ILcidPosixElement or_IN[] = {
425 {0x48, "or"},
426 {0x0448, "or_IN"},
427 };
428
429 ILCID_POSIX_ELEMENT_ARRAY(0x0446, pa, pa_IN)
430 ILCID_POSIX_ELEMENT_ARRAY(0x0415, pl, pl_PL)
431
432 static const ILcidPosixElement pt[] = {
433 {0x16, "pt"},
434 {0x0416, "pt_BR"},
435 {0x0816, "pt_PT"}
436 };
437
438 ILCID_POSIX_ELEMENT_ARRAY(0x0418, ro, ro_RO)
439
440 static const ILcidPosixElement root[] = {
441 {0x00, "root"}
442 };
443
444 ILCID_POSIX_ELEMENT_ARRAY(0x0419, ru, ru_RU)
445 ILCID_POSIX_ELEMENT_ARRAY(0x044f, sa, sa_IN)
446 ILCID_POSIX_ELEMENT_ARRAY(0x0459, sd, sd_IN) /*Todo: Data does not exist*/
447 ILCID_POSIX_ELEMENT_ARRAY(0x041b, sk, sk_SK)
448 ILCID_POSIX_ELEMENT_ARRAY(0x0424, sl, sl_SI)
449 ILCID_POSIX_ELEMENT_ARRAY(0x041c, sq, sq_AL)
450
451 static const ILcidPosixElement sv[] = {
452 {0x1d, "sv"},
453 {0x081d, "sv_FI"},
454 {0x041d, "sv_SE"}
455 };
456
457 ILCID_POSIX_ELEMENT_ARRAY(0x0441, sw, sw_KE)
458 ILCID_POSIX_ELEMENT_ARRAY(0x045A, syr, syr_SY)
459 ILCID_POSIX_ELEMENT_ARRAY(0x0449, ta, ta_IN)
460 ILCID_POSIX_ELEMENT_ARRAY(0x044a, te, te_IN)
461 ILCID_POSIX_ELEMENT_ARRAY(0x041e, th, th_TH)
462 ILCID_POSIX_ELEMENT_ARRAY(0x041f, tr, tr_TR)
463 ILCID_POSIX_ELEMENT_ARRAY(0x0444, tt, tt_RU)
464 ILCID_POSIX_ELEMENT_ARRAY(0x0422, uk, uk_UA)
465
466 static const ILcidPosixElement ur[] = {
467 {0x20, "ur"},
468 {0x0820, "ur_IN"},
469 {0x0420, "ur_PK"}
470 };
471
472 static const ILcidPosixElement uz[] = {
473 {0x43, "uz"},
474 {0x0843, "uz_UZ"}, /* Cyrillic based */
475 {0x0443, "uz_UZ_LATN"} /* Latin based */
476 };
477
478 ILCID_POSIX_ELEMENT_ARRAY(0x042a, vi, vi_VN)
479
480 static const ILcidPosixElement zh[] = {
481 {0x04, "zh"},
482 {0x0804, "zh_CN"},
483 {0x0c04, "zh_HK"},
484 {0x1404, "zh_MO"},
485 {0x1004, "zh_SG"},
486 {0x0404, "zh_TW"},
487 {0x30404,"zh_TW"},
488 {0x20404,"zh_TW_STROKE"}
489 };
490
491 /* This must be static and grouped by LCID. */
492 static const ILcidPosixMap gPosixIDmap[] = {
493 ILCID_POSIX_MAP(af), /* af Afrikaans 0x36 */
494 ILCID_POSIX_MAP(ar), /* ar Arabic 0x01 */
495 ILCID_POSIX_MAP(as), /* as Assamese 0x4d */
496 ILCID_POSIX_MAP(az), /* az Azerbaijani 0x2c */
497 ILCID_POSIX_MAP(be), /* be Byelorussian 0x23 */
498 ILCID_POSIX_MAP(bg), /* bg Bulgarian 0x02 */
499 ILCID_POSIX_MAP(bn), /* bn Bengali; Bangla 0x45 */
500 ILCID_POSIX_MAP(ca), /* ca Catalan 0x03 */
501 ILCID_POSIX_MAP(cs), /* cs Czech 0x05 */
502 ILCID_POSIX_MAP(da), /* da Danish 0x06 */
503 ILCID_POSIX_MAP(de), /* de German 0x07 */
504 ILCID_POSIX_MAP(dv), /* dv Divehi 0x65 */
505 ILCID_POSIX_MAP(el), /* el Greek 0x08 */
506 ILCID_POSIX_MAP(en), /* en English 0x09 */
507 ILCID_POSIX_MAP(en_US_POSIX), /* invariant 0x7f */
508 ILCID_POSIX_MAP(es), /* es Spanish 0x0a */
509 ILCID_POSIX_MAP(et), /* et Estonian 0x25 */
510 ILCID_POSIX_MAP(eu), /* eu Basque 0x2d */
511 ILCID_POSIX_MAP(fa), /* fa Farsi 0x29 */
512 ILCID_POSIX_MAP(fi), /* fi Finnish 0x0b */
513 ILCID_POSIX_MAP(fo), /* fo Faroese 0x38 */
514 ILCID_POSIX_MAP(fr), /* fr French 0x0c */
515 ILCID_POSIX_MAP(gl), /* gl Galician 0x56 */
516 ILCID_POSIX_MAP(gu), /* gu Gujarati 0x47 */
517 ILCID_POSIX_MAP(he), /* he Hebrew (formerly iw) 0x0d */
518 ILCID_POSIX_MAP(hi), /* hi Hindi 0x39 */
519 ILCID_POSIX_MAP(hr), /* hr Croatian 0x1a */
520 ILCID_POSIX_MAP(hu), /* hu Hungarian 0x0e */
521 ILCID_POSIX_MAP(hy), /* hy Armenian 0x2b */
522 ILCID_POSIX_MAP(id), /* id Indonesian (formerly in) 0x21 */
523 /* ILCID_POSIX_MAP(in), // in Indonesian 0x21 */
524 ILCID_POSIX_MAP(is), /* is Icelandic 0x0f */
525 ILCID_POSIX_MAP(it), /* it Italian 0x10 */
526 ILCID_POSIX_MAP(iw), /* iw Hebrew 0x0d */
527 ILCID_POSIX_MAP(ja), /* ja Japanese 0x11 */
528 ILCID_POSIX_MAP(ka), /* ka Georgian 0x37 */
529 ILCID_POSIX_MAP(kk), /* kk Kazakh 0x3f */
530 ILCID_POSIX_MAP(kn), /* kn Kannada 0x4b */
531 ILCID_POSIX_MAP(ky), /* ky Kyrgyz 0x40 */
532 ILCID_POSIX_MAP(ko), /* ko Korean 0x12 */
533 ILCID_POSIX_MAP(kok), /* kok Konkani 0x57 */
534 ILCID_POSIX_MAP(ks), /* ks Kashmiri 0x60 */
535 ILCID_POSIX_MAP(lt), /* lt Lithuanian 0x27 */
536 ILCID_POSIX_MAP(lv), /* lv Latvian, Lettish 0x26 */
537 ILCID_POSIX_MAP(mk), /* mk Macedonian 0x2f */
538 ILCID_POSIX_MAP(ml), /* ml Malayalam 0x4c */
539 ILCID_POSIX_MAP(mn), /* mn Mongolian 0x50 */
540 ILCID_POSIX_MAP(mni), /* mni Manipuri 0x58 */
541 ILCID_POSIX_MAP(mr), /* mr Marathi 0x4e */
542 ILCID_POSIX_MAP(ms), /* ms Malay 0x3e */
543 ILCID_POSIX_MAP(mt), /* mt Maltese 0x3a */
544 /* ILCID_POSIX_MAP(nb), // no Norwegian 0x14 */
545 ILCID_POSIX_MAP(ne), /* ne Nepali 0x61 */
546 ILCID_POSIX_MAP(nl), /* nl Dutch 0x13 */
547 /* ILCID_POSIX_MAP(nn), // no Norwegian 0x14 */
548 ILCID_POSIX_MAP(no), /* nb/nn Norwegian (formerly no) 0x14 */
549 ILCID_POSIX_MAP(or_IN), /* or Oriya 0x48 */
550 ILCID_POSIX_MAP(pa), /* pa Punjabi 0x46 */
551 ILCID_POSIX_MAP(pl), /* pl Polish 0x15 */
552 ILCID_POSIX_MAP(pt), /* pt Portuguese 0x16 */
553 ILCID_POSIX_MAP(ro), /* ro Romanian 0x18 */
554 ILCID_POSIX_MAP(root), /* root 0x00 */
555 ILCID_POSIX_MAP(ru), /* ru Russian 0x19 */
556 ILCID_POSIX_MAP(sa), /* sa Sanskrit 0x4f */
557 ILCID_POSIX_MAP(sd), /* sd Sindhi 0x59 */
558 /* ILCID_POSIX_MAP(sh), // sh Serbo-Croatian 0x1a */
559 ILCID_POSIX_MAP(sk), /* sk Slovak 0x1b */
560 ILCID_POSIX_MAP(sl), /* sl Slovenian 0x24 */
561 ILCID_POSIX_MAP(sq), /* sq Albanian 0x1c */
562 /* ILCID_POSIX_MAP(sr), // sr Serbian 0x1a */
563 ILCID_POSIX_MAP(sv), /* sv Swedish 0x1d */
564 ILCID_POSIX_MAP(sw), /* sw Swahili 0x41 */
565 ILCID_POSIX_MAP(syr), /* syr Syriac 0x5A */
566 ILCID_POSIX_MAP(ta), /* ta Tamil 0x49 */
567 ILCID_POSIX_MAP(te), /* te Telugu 0x4a */
568 ILCID_POSIX_MAP(th), /* th Thai 0x1e */
569 ILCID_POSIX_MAP(tr), /* tr Turkish 0x1f */
570 ILCID_POSIX_MAP(tt), /* tt Tatar 0x44 */
571 ILCID_POSIX_MAP(uk), /* uk Ukrainian 0x22 */
572 ILCID_POSIX_MAP(ur), /* ur Urdu 0x20 */
573 ILCID_POSIX_MAP(uz), /* uz Uzbek 0x43 */
574 ILCID_POSIX_MAP(vi), /* vi Vietnamese 0x2a */
575 ILCID_POSIX_MAP(zh), /* zh Chinese 0x04 */
576 };
577
578 static const uint32_t gLocaleCount = sizeof(gPosixIDmap)/sizeof(ILcidPosixMap);
579
580 static int32_t
581 idCmp(const char* id1, const char* id2)
582 {
583 int32_t diffIdx = 0;
584 while (*id1 == *id2 && *id1 != 0) {
585 diffIdx++;
586 id1++;
587 id2++;
588 }
589 return diffIdx;
590 }
591
592 /**
593 * Searches for a Windows LCID
594 *
595 * @param posixid the Posix style locale id.
596 * @param status gets set to U_ILLEGAL_ARGUMENT_ERROR when the Posix ID has
597 * no equivalent Windows LCID.
598 * @return the LCID
599 */
600 static uint32_t
601 hostID(const ILcidPosixMap *this_0, const char* posixID, UErrorCode* status)
602 {
603 int32_t bestIdx = 0;
604 int32_t bestIdxDiff = 0;
605 int32_t posixIDlen = (int32_t)uprv_strlen(posixID) + 1;
606 uint32_t idx;
607
608 for (idx = 0; idx < this_0->numRegions; idx++ ) {
609 int32_t sameChars = idCmp(posixID, this_0->regionMaps[idx].posixID);
610 if (sameChars > bestIdxDiff && this_0->regionMaps[idx].posixID[sameChars] == 0) {
611 if (posixIDlen == sameChars) {
612 /* Exact match */
613 return this_0->regionMaps[idx].hostID;
614 }
615 bestIdxDiff = sameChars;
616 bestIdx = idx;
617 }
618 }
619 if (this_0->regionMaps[bestIdx].posixID[bestIdxDiff] == 0) {
620 *status = U_USING_FALLBACK_WARNING;
621 return this_0->regionMaps[bestIdx].hostID;
622 }
623
624 /*no match found */
625 *status = U_ILLEGAL_ARGUMENT_ERROR;
626 return this_0->regionMaps->hostID;
627 }
628
629 static const char*
630 posixID(const ILcidPosixMap *this_0, uint32_t hostID)
631 {
632 uint32_t i;
633 for (i = 0; i <= this_0->numRegions; i++)
634 {
635 if (this_0->regionMaps[i].hostID == hostID)
636 {
637 return this_0->regionMaps[i].posixID;
638 }
639 }
640
641 /* If you get here, then no matching region was found,
642 so return the language id with the wild card region. */
643 return this_0->regionMaps[0].posixID;
644 }
645
646 /*
647 //////////////////////////////////////
648 //
649 // LCID --> POSIX
650 //
651 /////////////////////////////////////
652 */
653
654 U_CAPI const char *
655 uprv_convertToPosix(uint32_t hostid, UErrorCode* status)
656 {
657 uint16_t langID = LANGUAGE_LCID(hostid);
658 uint32_t index;
659
660 for (index = 0; index < gLocaleCount; index++)
661 {
662 if (langID == gPosixIDmap[index].regionMaps->hostID)
663 {
664 return posixID(&gPosixIDmap[index], hostid);
665 }
666 }
667
668 /* no match found */
669 *status = U_ILLEGAL_ARGUMENT_ERROR;
670 return "??_??";
671 }
672
673 /*
674 //////////////////////////////////////
675 //
676 // POSIX --> LCID
677 //
678 /////////////////////////////////////
679 */
680
681 U_CAPI uint32_t
682 uprv_convertToLCID(const char* posixID, UErrorCode* status)
683 {
684
685 uint32_t low = 0;
686 uint32_t high = gLocaleCount - 1;
687 uint32_t mid = high;
688 int32_t compVal;
689 char langID[ULOC_FULLNAME_CAPACITY];
690
691 uint32_t value = 0;
692 uint32_t fallbackValue = (uint32_t)-1;
693 UErrorCode myStatus;
694 uint32_t idx;
695
696 /* Check for incomplete id. */
697 if (!posixID || uprv_strlen(posixID) < 2) {
698 return 0;
699 }
700
701 uloc_getLanguage(posixID, langID, sizeof(langID), status);
702 if (U_FAILURE(*status)) {
703 return 0;
704 }
705
706 /*Binary search for the map entry for normal cases */
707 /* When mid == 0, it's not found */
708 while (low <= high && mid != 0) {
709
710 mid = (low + high + 1) / 2; /* +1 is to round properly */
711
712 compVal = uprv_strcmp(langID, gPosixIDmap[mid].regionMaps->posixID);
713
714 if (compVal < 0)
715 high = mid - 1;
716 else if (compVal > 0)
717 low = mid + 1;
718 else /* found match! */
719 return hostID(&gPosixIDmap[mid], posixID, status);
720 }
721
722 /*
723 * Sometimes we can't do a binary search on posixID because some LCIDs
724 * go to different locales. We hit one of those special cases.
725 */
726 for (idx = 0; idx < gLocaleCount; idx++ ) {
727 myStatus = U_ZERO_ERROR;
728 value = hostID(&gPosixIDmap[idx], posixID, &myStatus);
729 if (myStatus == U_ZERO_ERROR) {
730 return value;
731 }
732 else if (myStatus == U_USING_FALLBACK_WARNING) {
733 fallbackValue = value;
734 }
735 }
736
737 if (fallbackValue != (uint32_t)-1) {
738 *status = U_USING_FALLBACK_WARNING;
739 return fallbackValue;
740 }
741
742 /* no match found */
743 *status = U_ILLEGAL_ARGUMENT_ERROR;
744 return 0; /* return international (root) */
745 }
746
747 #endif
748