2 **********************************************************************
3 * Copyright (C) 1996-2010, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
7 * Provides functionality for mapping between
8 * LCID and Posix IDs or ICU locale to codepage
10 * Note: All classes and code in this file are
11 * intended for internal use only.
13 * Methods of interest:
14 * unsigned long convertToLCID(const char*);
15 * const char* convertToPosix(unsigned long);
17 * Kathleen Wilson, 4/30/96
19 * Date Name Description
20 * 3/11/97 aliu Fixed off-by-one bug in assignment operator. Added
21 * setId() method and safety check against
23 * 04/23/99 stephen Added C wrapper for convertToPosix.
24 * 09/18/00 george Removed the memory leaks.
25 * 08/23/01 george Convert to C
29 #include "unicode/uloc.h"
33 #if defined(U_WINDOWS) && defined(_MSC_VER) && (_MSC_VER >= 1500)
34 #define USE_WINDOWS_LOCALE_API
37 #ifdef USE_WINDOWS_LOCALE_API
44 * The mapping from Win32 locale ID numbers to POSIX locale strings should
47 * Many LCID values come from winnt.h
48 * Some also come from http://www.microsoft.com/globaldev/reference/lcid-all.mspx
52 ////////////////////////////////////////////////
54 // Internal Classes for LCID <--> POSIX Mapping
56 /////////////////////////////////////////////////
59 typedef struct ILcidPosixElement
61 const uint32_t hostID
;
62 const char * const posixID
;
65 typedef struct ILcidPosixMap
67 const uint32_t numRegions
;
68 const struct ILcidPosixElement
* const regionMaps
;
73 /////////////////////////////////////////////////
75 // Easy macros to make the LCID <--> POSIX Mapping
77 /////////////////////////////////////////////////
81 * The standard one language/one country mapping for LCID.
82 * The first element must be the language, and the following
83 * elements are the language with the country.
84 * @param hostID LCID in host format such as 0x044d
85 * @param languageID posix ID of just the language such as 'de'
86 * @param posixID posix ID of the language_TERRITORY such as 'de_CH'
88 #define ILCID_POSIX_ELEMENT_ARRAY(hostID, languageID, posixID) \
89 static const ILcidPosixElement locmap_ ## languageID [] = { \
90 {LANGUAGE_LCID(hostID), #languageID}, /* parent locale */ \
95 * Define a subtable by ID
96 * @param id the POSIX ID, either a language or language_TERRITORY
98 #define ILCID_POSIX_SUBTABLE(id) \
99 static const ILcidPosixElement locmap_ ## id [] =
103 * Create the map for the posixID. This macro supposes that the language string
104 * name is the same as the global variable name, and that the first element
105 * in the ILcidPosixElement is just the language.
106 * @param _posixID the full POSIX ID for this entry.
108 #define ILCID_POSIX_MAP(_posixID) \
109 {sizeof(locmap_ ## _posixID)/sizeof(ILcidPosixElement), locmap_ ## _posixID}
112 ////////////////////////////////////////////
114 // Create the table of LCID to POSIX Mapping
115 // None of it should be dynamically created.
117 // Keep static locale variables inside the function so that
118 // it can be created properly during static init.
120 // Note: This table should be updated periodically. Check the National Lanaguage Support API Reference Website.
121 // Microsoft is moving away from LCID in favor of locale name as of Vista. This table needs to be
122 // maintained for support of older Windows version.
123 // Update: Windows 7 (091130)
124 ////////////////////////////////////////////
127 ILCID_POSIX_ELEMENT_ARRAY(0x0436, af
, af_ZA
)
129 ILCID_POSIX_SUBTABLE(ar
) {
149 ILCID_POSIX_ELEMENT_ARRAY(0x044d, as
, as_IN
)
150 ILCID_POSIX_ELEMENT_ARRAY(0x045e, am
, am_ET
)
151 ILCID_POSIX_ELEMENT_ARRAY(0x047a, arn
,arn_CL
)
153 ILCID_POSIX_SUBTABLE(az
) {
155 {0x082c, "az_Cyrl_AZ"}, /* Cyrillic based */
156 {0x742c, "az_Cyrl"}, /* Cyrillic based */
157 {0x042c, "az_Latn_AZ"}, /* Latin based */
158 {0x782c, "az_Latn"}, /* Latin based */
159 {0x042c, "az_AZ"} /* Latin based */
162 ILCID_POSIX_ELEMENT_ARRAY(0x046d, ba
, ba_RU
)
163 ILCID_POSIX_ELEMENT_ARRAY(0x0423, be
, be_BY
)
165 ILCID_POSIX_SUBTABLE(ber
) {
167 {0x045f, "ber_Arab_DZ"},
168 {0x045f, "ber_Arab"},
169 {0x085f, "ber_Latn_DZ"},
173 ILCID_POSIX_ELEMENT_ARRAY(0x0402, bg
, bg_BG
)
175 ILCID_POSIX_SUBTABLE(bn
) {
181 ILCID_POSIX_SUBTABLE(bo
) {
187 ILCID_POSIX_ELEMENT_ARRAY(0x047e, br
, br_FR
)
188 ILCID_POSIX_ELEMENT_ARRAY(0x0403, ca
, ca_ES
)
189 ILCID_POSIX_ELEMENT_ARRAY(0x0483, co
, co_FR
)
190 ILCID_POSIX_ELEMENT_ARRAY(0x045c, chr
,chr_US
)
192 /* Declared as cs_CZ to get around compiler errors on z/OS, which defines cs as a function */
193 ILCID_POSIX_ELEMENT_ARRAY(0x0405, cs
, cs_CZ
)
195 ILCID_POSIX_ELEMENT_ARRAY(0x0452, cy
, cy_GB
)
196 ILCID_POSIX_ELEMENT_ARRAY(0x0406, da
, da_DK
)
198 ILCID_POSIX_SUBTABLE(de
) {
205 {0x10407,"de_DE@collation=phonebook"}, /*This is really de_DE_PHONEBOOK on Windows*/
206 {0x10407,"de@collation=phonebook"} /*This is really de_DE_PHONEBOOK on Windows*/
209 ILCID_POSIX_ELEMENT_ARRAY(0x0465, dv
, dv_MV
)
210 ILCID_POSIX_ELEMENT_ARRAY(0x0408, el
, el_GR
)
212 ILCID_POSIX_SUBTABLE(en
) {
227 {0x007f, "en_US_POSIX"}, /* duplicate for roundtripping */
228 {0x2409, "en_VI"}, /* Virgin Islands AKA Caribbean Islands (en_CB). */
232 {0x0409, "en_AS"}, /* Alias for en_US. Leave last. */
233 {0x0409, "en_GU"}, /* Alias for en_US. Leave last. */
234 {0x0409, "en_MH"}, /* Alias for en_US. Leave last. */
235 {0x0409, "en_MP"}, /* Alias for en_US. Leave last. */
236 {0x0409, "en_UM"} /* Alias for en_US. Leave last. */
239 ILCID_POSIX_SUBTABLE(en_US_POSIX
) {
240 {0x007f, "en_US_POSIX"} /* duplicate for roundtripping */
243 ILCID_POSIX_SUBTABLE(es
) {
252 {0x0c0a, "es_ES"}, /*Modern sort.*/
265 {0x040a, "es_ES@collation=traditional"},
266 {0x040a, "es@collation=traditional"}
269 ILCID_POSIX_ELEMENT_ARRAY(0x0425, et
, et_EE
)
270 ILCID_POSIX_ELEMENT_ARRAY(0x042d, eu
, eu_ES
)
272 /* ISO-639 doesn't distinguish between Persian and Dari.*/
273 ILCID_POSIX_SUBTABLE(fa
) {
275 {0x0429, "fa_IR"}, /* Persian/Farsi (Iran) */
276 {0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */
279 /* duplicate for roundtripping */
280 ILCID_POSIX_SUBTABLE(fa_AF
) {
281 {0x8c, "fa_AF"}, /* Persian/Dari (Afghanistan) */
282 {0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */
285 ILCID_POSIX_ELEMENT_ARRAY(0x040b, fi
, fi_FI
)
286 ILCID_POSIX_ELEMENT_ARRAY(0x0464, fil
,fil_PH
)
287 ILCID_POSIX_ELEMENT_ARRAY(0x0438, fo
, fo_FO
)
289 ILCID_POSIX_SUBTABLE(fr
) {
307 ILCID_POSIX_ELEMENT_ARRAY(0x0462, fy
, fy_NL
)
309 /* This LCID is really two different locales.*/
310 ILCID_POSIX_ELEMENT_ARRAY(0x083c, ga
, ga_IE
) /* Gaelic (Ireland) */
311 ILCID_POSIX_ELEMENT_ARRAY(0x0491, gd
, gd_GB
) /* Gaelic (Scotland) */
313 ILCID_POSIX_ELEMENT_ARRAY(0x0456, gl
, gl_ES
)
314 ILCID_POSIX_ELEMENT_ARRAY(0x0447, gu
, gu_IN
)
315 ILCID_POSIX_ELEMENT_ARRAY(0x0474, gn
, gn_PY
)
316 ILCID_POSIX_ELEMENT_ARRAY(0x0484, gsw
,gsw_FR
)
318 ILCID_POSIX_SUBTABLE(ha
) {
321 {0x0468, "ha_Latn_NG"},
324 ILCID_POSIX_ELEMENT_ARRAY(0x0475, haw
,haw_US
)
325 ILCID_POSIX_ELEMENT_ARRAY(0x040d, he
, he_IL
)
326 ILCID_POSIX_ELEMENT_ARRAY(0x0439, hi
, hi_IN
)
328 /* This LCID is really four different locales.*/
329 ILCID_POSIX_SUBTABLE(hr
) {
331 {0x141a, "bs_Latn_BA"}, /* Bosnian, Bosnia and Herzegovina */
332 {0x681a, "bs_Latn"}, /* Bosnian, Bosnia and Herzegovina */
333 {0x141a, "bs_BA"}, /* Bosnian, Bosnia and Herzegovina */
334 {0x781a, "bs"}, /* Bosnian */
335 {0x201a, "bs_Cyrl_BA"}, /* Bosnian, Bosnia and Herzegovina */
336 {0x641a, "bs_Cyrl"}, /* Bosnian, Bosnia and Herzegovina */
337 {0x101a, "hr_BA"}, /* Croatian in Bosnia */
338 {0x041a, "hr_HR"}, /* Croatian*/
339 {0x2c1a, "sr_Latn_ME"},
340 {0x241a, "sr_Latn_RS"},
341 {0x181a, "sr_Latn_BA"}, /* Serbo-Croatian in Bosnia */
342 {0x081a, "sr_Latn_CS"}, /* Serbo-Croatian*/
343 {0x701a, "sr_Latn"}, /* It's 0x1a or 0x081a, pick one to make the test program happy. */
344 {0x1c1a, "sr_Cyrl_BA"}, /* Serbo-Croatian in Bosnia */
345 {0x0c1a, "sr_Cyrl_CS"}, /* Serbian*/
346 {0x301a, "sr_Cyrl_ME"},
347 {0x281a, "sr_Cyrl_RS"},
348 {0x6c1a, "sr_Cyrl"}, /* It's 0x1a or 0x0c1a, pick one to make the test program happy. */
349 {0x7c1a, "sr"} /* In CLDR sr is sr_Cyrl. */
352 ILCID_POSIX_ELEMENT_ARRAY(0x040e, hu
, hu_HU
)
353 ILCID_POSIX_ELEMENT_ARRAY(0x042b, hy
, hy_AM
)
354 ILCID_POSIX_ELEMENT_ARRAY(0x0421, id
, id_ID
)
355 ILCID_POSIX_ELEMENT_ARRAY(0x0470, ig
, ig_NG
)
356 ILCID_POSIX_ELEMENT_ARRAY(0x0478, ii
, ii_CN
)
357 ILCID_POSIX_ELEMENT_ARRAY(0x040f, is
, is_IS
)
359 ILCID_POSIX_SUBTABLE(it
) {
365 ILCID_POSIX_SUBTABLE(iu
) {
367 {0x045d, "iu_Cans_CA"},
369 {0x085d, "iu_Latn_CA"},
373 ILCID_POSIX_ELEMENT_ARRAY(0x040d, iw
, iw_IL
) /*Left in for compatibility*/
374 ILCID_POSIX_ELEMENT_ARRAY(0x0411, ja
, ja_JP
)
375 ILCID_POSIX_ELEMENT_ARRAY(0x0437, ka
, ka_GE
)
376 ILCID_POSIX_ELEMENT_ARRAY(0x043f, kk
, kk_KZ
)
377 ILCID_POSIX_ELEMENT_ARRAY(0x046f, kl
, kl_GL
)
378 ILCID_POSIX_ELEMENT_ARRAY(0x0453, km
, km_KH
)
379 ILCID_POSIX_ELEMENT_ARRAY(0x044b, kn
, kn_IN
)
381 ILCID_POSIX_SUBTABLE(ko
) {
387 ILCID_POSIX_ELEMENT_ARRAY(0x0457, kok
, kok_IN
)
388 ILCID_POSIX_ELEMENT_ARRAY(0x0471, kr
, kr_NG
)
390 ILCID_POSIX_SUBTABLE(ks
) { /* We could add PK and CN too */
392 {0x0860, "ks_IN"}, /* Documentation doesn't mention script */
393 {0x0460, "ks_Arab_IN"}
396 ILCID_POSIX_ELEMENT_ARRAY(0x0440, ky
, ky_KG
) /* Kyrgyz is spoken in Kyrgyzstan */
397 ILCID_POSIX_ELEMENT_ARRAY(0x0476, la
, la_IT
) /* TODO: Verify the country */
398 ILCID_POSIX_ELEMENT_ARRAY(0x046e, lb
, lb_LU
)
399 ILCID_POSIX_ELEMENT_ARRAY(0x0454, lo
, lo_LA
)
400 ILCID_POSIX_ELEMENT_ARRAY(0x0427, lt
, lt_LT
)
401 ILCID_POSIX_ELEMENT_ARRAY(0x0426, lv
, lv_LV
)
402 ILCID_POSIX_ELEMENT_ARRAY(0x0481, mi
, mi_NZ
)
403 ILCID_POSIX_ELEMENT_ARRAY(0x042f, mk
, mk_MK
)
404 ILCID_POSIX_ELEMENT_ARRAY(0x044c, ml
, ml_IN
)
406 ILCID_POSIX_SUBTABLE(mn
) {
410 {0x0850, "mn_Mong_CN"},
415 ILCID_POSIX_ELEMENT_ARRAY(0x0458, mni
,mni_IN
)
416 ILCID_POSIX_ELEMENT_ARRAY(0x047c, moh
,moh_CA
)
417 ILCID_POSIX_ELEMENT_ARRAY(0x044e, mr
, mr_IN
)
419 ILCID_POSIX_SUBTABLE(ms
) {
421 {0x083e, "ms_BN"}, /* Brunei Darussalam*/
422 {0x043e, "ms_MY"} /* Malaysia*/
425 ILCID_POSIX_ELEMENT_ARRAY(0x043a, mt
, mt_MT
)
426 ILCID_POSIX_ELEMENT_ARRAY(0x0455, my
, my_MM
)
428 ILCID_POSIX_SUBTABLE(ne
) {
430 {0x0861, "ne_IN"}, /* India*/
431 {0x0461, "ne_NP"} /* Nepal*/
434 ILCID_POSIX_SUBTABLE(nl
) {
440 /* The "no" locale split into nb and nn. By default in ICU, "no" is nb.*/
441 ILCID_POSIX_SUBTABLE(no
) {
442 {0x14, "no"}, /* really nb_NO */
443 {0x7c14, "nb"}, /* really nb */
444 {0x0414, "nb_NO"}, /* really nb_NO. Keep first in the 414 list. */
445 {0x0414, "no_NO"}, /* really nb_NO */
446 {0x0814, "nn_NO"}, /* really nn_NO. Keep first in the 814 list. */
447 {0x7814, "nn"}, /* It's 0x14 or 0x814, pick one to make the test program happy. */
448 {0x0814, "no_NO_NY"}/* really nn_NO */
451 ILCID_POSIX_ELEMENT_ARRAY(0x046c, nso
,nso_ZA
) /* TODO: Verify the ISO-639 code */
452 ILCID_POSIX_ELEMENT_ARRAY(0x0482, oc
, oc_FR
)
453 ILCID_POSIX_ELEMENT_ARRAY(0x0472, om
, om_ET
) /* TODO: Verify the country */
455 /* Declared as or_IN to get around compiler errors*/
456 ILCID_POSIX_SUBTABLE(or_IN
) {
462 ILCID_POSIX_SUBTABLE(pa
) {
468 ILCID_POSIX_ELEMENT_ARRAY(0x0415, pl
, pl_PL
)
469 ILCID_POSIX_ELEMENT_ARRAY(0x0463, ps
, ps_AF
)
471 ILCID_POSIX_SUBTABLE(pt
) {
477 ILCID_POSIX_SUBTABLE(qu
) {
484 ILCID_POSIX_ELEMENT_ARRAY(0x0486, qut
, qut_GT
) /* qut is an ISO-639-3 code */
485 ILCID_POSIX_ELEMENT_ARRAY(0x0417, rm
, rm_CH
)
486 ILCID_POSIX_ELEMENT_ARRAY(0x0418, ro
, ro_RO
)
488 ILCID_POSIX_SUBTABLE(root
) {
492 ILCID_POSIX_ELEMENT_ARRAY(0x0419, ru
, ru_RU
)
493 ILCID_POSIX_ELEMENT_ARRAY(0x0487, rw
, rw_RW
)
494 ILCID_POSIX_ELEMENT_ARRAY(0x044f, sa
, sa_IN
)
495 ILCID_POSIX_ELEMENT_ARRAY(0x0485, sah
,sah_RU
)
497 ILCID_POSIX_SUBTABLE(sd
) {
503 ILCID_POSIX_SUBTABLE(se
) {
520 ILCID_POSIX_ELEMENT_ARRAY(0x045b, si
, si_LK
)
521 ILCID_POSIX_ELEMENT_ARRAY(0x041b, sk
, sk_SK
)
522 ILCID_POSIX_ELEMENT_ARRAY(0x0424, sl
, sl_SI
)
523 ILCID_POSIX_ELEMENT_ARRAY(0x0477, so
, so_ET
) /* TODO: Verify the country */
524 ILCID_POSIX_ELEMENT_ARRAY(0x041c, sq
, sq_AL
)
526 ILCID_POSIX_SUBTABLE(sv
) {
532 ILCID_POSIX_ELEMENT_ARRAY(0x0441, sw
, sw_KE
)
533 ILCID_POSIX_ELEMENT_ARRAY(0x045A, syr
, syr_SY
)
534 ILCID_POSIX_ELEMENT_ARRAY(0x0449, ta
, ta_IN
)
535 ILCID_POSIX_ELEMENT_ARRAY(0x044a, te
, te_IN
)
537 /* Cyrillic based by default */
538 ILCID_POSIX_SUBTABLE(tg
) {
541 {0x0428, "tg_Cyrl_TJ"}
544 ILCID_POSIX_ELEMENT_ARRAY(0x041e, th
, th_TH
)
546 ILCID_POSIX_SUBTABLE(ti
) {
552 ILCID_POSIX_ELEMENT_ARRAY(0x0442, tk
, tk_TM
)
553 ILCID_POSIX_ELEMENT_ARRAY(0x0432, tn
, tn_BW
)
554 ILCID_POSIX_ELEMENT_ARRAY(0x041f, tr
, tr_TR
)
555 ILCID_POSIX_ELEMENT_ARRAY(0x0444, tt
, tt_RU
)
557 ILCID_POSIX_SUBTABLE(tzm
) {
559 {0x7c5f, "tzm_Latn"},
560 {0x085f, "tzm_Latn_DZ"}
563 ILCID_POSIX_ELEMENT_ARRAY(0x0480, ug
, ug_CN
)
564 ILCID_POSIX_ELEMENT_ARRAY(0x0422, uk
, uk_UA
)
566 ILCID_POSIX_SUBTABLE(ur
) {
572 ILCID_POSIX_SUBTABLE(uz
) {
574 {0x0843, "uz_Cyrl_UZ"}, /* Cyrillic based */
575 {0x7843, "uz_Cyrl"}, /* Cyrillic based */
576 {0x0843, "uz_UZ"}, /* Cyrillic based */
577 {0x0443, "uz_Latn_UZ"}, /* Latin based */
578 {0x7c43, "uz_Latn"} /* Latin based */
581 ILCID_POSIX_ELEMENT_ARRAY(0x0433, ve
, ve_ZA
) /* TODO: Verify the country */
582 ILCID_POSIX_ELEMENT_ARRAY(0x042a, vi
, vi_VN
)
584 ILCID_POSIX_SUBTABLE(wen
) {
593 ILCID_POSIX_ELEMENT_ARRAY(0x0488, wo
, wo_SN
)
594 ILCID_POSIX_ELEMENT_ARRAY(0x0434, xh
, xh_ZA
)
595 ILCID_POSIX_ELEMENT_ARRAY(0x046a, yo
, yo_NG
)
597 ILCID_POSIX_SUBTABLE(zh
) {
601 {0x0804, "zh_Hans_CN"},
602 {0x0c04, "zh_Hant_HK"},
604 {0x1404, "zh_Hant_MO"},
606 {0x1004, "zh_Hans_SG"},
608 {0x0404, "zh_Hant_TW"},
611 {0x30404,"zh_Hant_TW"}, /* Bopomofo order */
612 {0x30404,"zh_TW"}, /* Bopomofo order */
613 {0x20004,"zh@collation=stroke"},
614 {0x20404,"zh_Hant@collation=stroke"},
615 {0x20404,"zh_Hant_TW@collation=stroke"},
616 {0x20404,"zh_TW@collation=stroke"},
617 {0x20804,"zh_Hans@collation=stroke"},
618 {0x20804,"zh_Hans_CN@collation=stroke"},
619 {0x20804,"zh_CN@collation=stroke"}
622 ILCID_POSIX_ELEMENT_ARRAY(0x0435, zu
, zu_ZA
)
624 /* This must be static and grouped by LCID. */
626 /* non-existent ISO-639-2 codes */
629 0x467 Fulfulde - Nigeria
630 0x486 K'iche - Guatemala
633 static const ILcidPosixMap gPosixIDmap
[] = {
634 ILCID_POSIX_MAP(af
), /* af Afrikaans 0x36 */
635 ILCID_POSIX_MAP(am
), /* am Amharic 0x5e */
636 ILCID_POSIX_MAP(ar
), /* ar Arabic 0x01 */
637 ILCID_POSIX_MAP(arn
), /* arn Araucanian/Mapudungun 0x7a */
638 ILCID_POSIX_MAP(as
), /* as Assamese 0x4d */
639 ILCID_POSIX_MAP(az
), /* az Azerbaijani 0x2c */
640 ILCID_POSIX_MAP(ba
), /* ba Bashkir 0x6d */
641 ILCID_POSIX_MAP(be
), /* be Belarusian 0x23 */
642 /* ILCID_POSIX_MAP(ber), ber Berber/Tamazight 0x5f */
643 ILCID_POSIX_MAP(bg
), /* bg Bulgarian 0x02 */
644 ILCID_POSIX_MAP(bn
), /* bn Bengali; Bangla 0x45 */
645 ILCID_POSIX_MAP(bo
), /* bo Tibetan 0x51 */
646 ILCID_POSIX_MAP(br
), /* br Breton 0x7e */
647 ILCID_POSIX_MAP(ca
), /* ca Catalan 0x03 */
648 ILCID_POSIX_MAP(chr
), /* chr Cherokee 0x5c */
649 ILCID_POSIX_MAP(co
), /* co Corsican 0x83 */
650 ILCID_POSIX_MAP(cs
), /* cs Czech 0x05 */
651 ILCID_POSIX_MAP(cy
), /* cy Welsh 0x52 */
652 ILCID_POSIX_MAP(da
), /* da Danish 0x06 */
653 ILCID_POSIX_MAP(de
), /* de German 0x07 */
654 ILCID_POSIX_MAP(dv
), /* dv Divehi 0x65 */
655 ILCID_POSIX_MAP(el
), /* el Greek 0x08 */
656 ILCID_POSIX_MAP(en
), /* en English 0x09 */
657 ILCID_POSIX_MAP(en_US_POSIX
), /* invariant 0x7f */
658 ILCID_POSIX_MAP(es
), /* es Spanish 0x0a */
659 ILCID_POSIX_MAP(et
), /* et Estonian 0x25 */
660 ILCID_POSIX_MAP(eu
), /* eu Basque 0x2d */
661 ILCID_POSIX_MAP(fa
), /* fa Persian/Farsi 0x29 */
662 ILCID_POSIX_MAP(fa_AF
), /* fa Persian/Dari 0x8c */
663 ILCID_POSIX_MAP(fi
), /* fi Finnish 0x0b */
664 ILCID_POSIX_MAP(fil
), /* fil Filipino 0x64 */
665 ILCID_POSIX_MAP(fo
), /* fo Faroese 0x38 */
666 ILCID_POSIX_MAP(fr
), /* fr French 0x0c */
667 ILCID_POSIX_MAP(fy
), /* fy Frisian 0x62 */
668 ILCID_POSIX_MAP(ga
), /* * Gaelic (Ireland,Scotland) 0x3c */
669 ILCID_POSIX_MAP(gd
), /* gd Gaelic (United Kingdom) 0x91 */
670 ILCID_POSIX_MAP(gl
), /* gl Galician 0x56 */
671 ILCID_POSIX_MAP(gn
), /* gn Guarani 0x74 */
672 ILCID_POSIX_MAP(gsw
), /* gsw Alemanic/Alsatian/Swiss German 0x84 */
673 ILCID_POSIX_MAP(gu
), /* gu Gujarati 0x47 */
674 ILCID_POSIX_MAP(ha
), /* ha Hausa 0x68 */
675 ILCID_POSIX_MAP(haw
), /* haw Hawaiian 0x75 */
676 ILCID_POSIX_MAP(he
), /* he Hebrew (formerly iw) 0x0d */
677 ILCID_POSIX_MAP(hi
), /* hi Hindi 0x39 */
678 ILCID_POSIX_MAP(hr
), /* * Croatian and others 0x1a */
679 ILCID_POSIX_MAP(hu
), /* hu Hungarian 0x0e */
680 ILCID_POSIX_MAP(hy
), /* hy Armenian 0x2b */
681 ILCID_POSIX_MAP(id
), /* id Indonesian (formerly in) 0x21 */
682 ILCID_POSIX_MAP(ig
), /* ig Igbo 0x70 */
683 ILCID_POSIX_MAP(ii
), /* ii Sichuan Yi 0x78 */
684 ILCID_POSIX_MAP(is
), /* is Icelandic 0x0f */
685 ILCID_POSIX_MAP(it
), /* it Italian 0x10 */
686 ILCID_POSIX_MAP(iu
), /* iu Inuktitut 0x5d */
687 ILCID_POSIX_MAP(iw
), /* iw Hebrew 0x0d */
688 ILCID_POSIX_MAP(ja
), /* ja Japanese 0x11 */
689 ILCID_POSIX_MAP(ka
), /* ka Georgian 0x37 */
690 ILCID_POSIX_MAP(kk
), /* kk Kazakh 0x3f */
691 ILCID_POSIX_MAP(kl
), /* kl Kalaallisut 0x6f */
692 ILCID_POSIX_MAP(km
), /* km Khmer 0x53 */
693 ILCID_POSIX_MAP(kn
), /* kn Kannada 0x4b */
694 ILCID_POSIX_MAP(ko
), /* ko Korean 0x12 */
695 ILCID_POSIX_MAP(kok
), /* kok Konkani 0x57 */
696 ILCID_POSIX_MAP(kr
), /* kr Kanuri 0x71 */
697 ILCID_POSIX_MAP(ks
), /* ks Kashmiri 0x60 */
698 ILCID_POSIX_MAP(ky
), /* ky Kyrgyz 0x40 */
699 ILCID_POSIX_MAP(lb
), /* lb Luxembourgish 0x6e */
700 ILCID_POSIX_MAP(la
), /* la Latin 0x76 */
701 ILCID_POSIX_MAP(lo
), /* lo Lao 0x54 */
702 ILCID_POSIX_MAP(lt
), /* lt Lithuanian 0x27 */
703 ILCID_POSIX_MAP(lv
), /* lv Latvian, Lettish 0x26 */
704 ILCID_POSIX_MAP(mi
), /* mi Maori 0x81 */
705 ILCID_POSIX_MAP(mk
), /* mk Macedonian 0x2f */
706 ILCID_POSIX_MAP(ml
), /* ml Malayalam 0x4c */
707 ILCID_POSIX_MAP(mn
), /* mn Mongolian 0x50 */
708 ILCID_POSIX_MAP(mni
), /* mni Manipuri 0x58 */
709 ILCID_POSIX_MAP(moh
), /* moh Mohawk 0x7c */
710 ILCID_POSIX_MAP(mr
), /* mr Marathi 0x4e */
711 ILCID_POSIX_MAP(ms
), /* ms Malay 0x3e */
712 ILCID_POSIX_MAP(mt
), /* mt Maltese 0x3a */
713 ILCID_POSIX_MAP(my
), /* my Burmese 0x55 */
714 /* ILCID_POSIX_MAP(nb), // no Norwegian 0x14 */
715 ILCID_POSIX_MAP(ne
), /* ne Nepali 0x61 */
716 ILCID_POSIX_MAP(nl
), /* nl Dutch 0x13 */
717 /* ILCID_POSIX_MAP(nn), // no Norwegian 0x14 */
718 ILCID_POSIX_MAP(no
), /* * Norwegian 0x14 */
719 ILCID_POSIX_MAP(nso
), /* nso Sotho, Northern (Sepedi dialect) 0x6c */
720 ILCID_POSIX_MAP(oc
), /* oc Occitan 0x82 */
721 ILCID_POSIX_MAP(om
), /* om Oromo 0x72 */
722 ILCID_POSIX_MAP(or_IN
), /* or Oriya 0x48 */
723 ILCID_POSIX_MAP(pa
), /* pa Punjabi 0x46 */
724 ILCID_POSIX_MAP(pl
), /* pl Polish 0x15 */
725 ILCID_POSIX_MAP(ps
), /* ps Pashto 0x63 */
726 ILCID_POSIX_MAP(pt
), /* pt Portuguese 0x16 */
727 ILCID_POSIX_MAP(qu
), /* qu Quechua 0x6B */
728 ILCID_POSIX_MAP(qut
), /* qut K'iche 0x86 */
729 ILCID_POSIX_MAP(rm
), /* rm Raeto-Romance/Romansh 0x17 */
730 ILCID_POSIX_MAP(ro
), /* ro Romanian 0x18 */
731 ILCID_POSIX_MAP(root
), /* root 0x00 */
732 ILCID_POSIX_MAP(ru
), /* ru Russian 0x19 */
733 ILCID_POSIX_MAP(rw
), /* rw Kinyarwanda 0x87 */
734 ILCID_POSIX_MAP(sa
), /* sa Sanskrit 0x4f */
735 ILCID_POSIX_MAP(sah
), /* sah Yakut 0x85 */
736 ILCID_POSIX_MAP(sd
), /* sd Sindhi 0x59 */
737 ILCID_POSIX_MAP(se
), /* se Sami 0x3b */
738 /* ILCID_POSIX_MAP(sh), // sh Serbo-Croatian 0x1a */
739 ILCID_POSIX_MAP(si
), /* si Sinhalese 0x5b */
740 ILCID_POSIX_MAP(sk
), /* sk Slovak 0x1b */
741 ILCID_POSIX_MAP(sl
), /* sl Slovenian 0x24 */
742 ILCID_POSIX_MAP(so
), /* so Somali 0x77 */
743 ILCID_POSIX_MAP(sq
), /* sq Albanian 0x1c */
744 /* ILCID_POSIX_MAP(sr), // sr Serbian 0x1a */
745 ILCID_POSIX_MAP(sv
), /* sv Swedish 0x1d */
746 ILCID_POSIX_MAP(sw
), /* sw Swahili 0x41 */
747 ILCID_POSIX_MAP(syr
), /* syr Syriac 0x5A */
748 ILCID_POSIX_MAP(ta
), /* ta Tamil 0x49 */
749 ILCID_POSIX_MAP(te
), /* te Telugu 0x4a */
750 ILCID_POSIX_MAP(tg
), /* tg Tajik 0x28 */
751 ILCID_POSIX_MAP(th
), /* th Thai 0x1e */
752 ILCID_POSIX_MAP(ti
), /* ti Tigrigna 0x73 */
753 ILCID_POSIX_MAP(tk
), /* tk Turkmen 0x42 */
754 ILCID_POSIX_MAP(tn
), /* tn Tswana 0x32 */
755 ILCID_POSIX_MAP(tr
), /* tr Turkish 0x1f */
756 ILCID_POSIX_MAP(tt
), /* tt Tatar 0x44 */
757 ILCID_POSIX_MAP(tzm
), /* tzm 0x5f */
758 ILCID_POSIX_MAP(ug
), /* ug Uighur 0x80 */
759 ILCID_POSIX_MAP(uk
), /* uk Ukrainian 0x22 */
760 ILCID_POSIX_MAP(ur
), /* ur Urdu 0x20 */
761 ILCID_POSIX_MAP(uz
), /* uz Uzbek 0x43 */
762 ILCID_POSIX_MAP(ve
), /* ve Venda 0x33 */
763 ILCID_POSIX_MAP(vi
), /* vi Vietnamese 0x2a */
764 ILCID_POSIX_MAP(wen
), /* wen Sorbian 0x2e */
765 ILCID_POSIX_MAP(wo
), /* wo Wolof 0x88 */
766 ILCID_POSIX_MAP(xh
), /* xh Xhosa 0x34 */
767 ILCID_POSIX_MAP(yo
), /* yo Yoruba 0x6a */
768 ILCID_POSIX_MAP(zh
), /* zh Chinese 0x04 */
769 ILCID_POSIX_MAP(zu
), /* zu Zulu 0x35 */
772 static const uint32_t gLocaleCount
= sizeof(gPosixIDmap
)/sizeof(ILcidPosixMap
);
775 * Do not call this function. It is called by hostID.
776 * The function is not private because this struct must stay as a C struct,
777 * and this is an internal class.
780 idCmp(const char* id1
, const char* id2
)
783 while (*id1
== *id2
&& *id1
!= 0) {
792 * Searches for a Windows LCID
794 * @param posixid the Posix style locale id.
795 * @param status gets set to U_ILLEGAL_ARGUMENT_ERROR when the Posix ID has
796 * no equivalent Windows LCID.
800 getHostID(const ILcidPosixMap
*this_0
, const char* posixID
, UErrorCode
* status
)
803 int32_t bestIdxDiff
= 0;
804 int32_t posixIDlen
= (int32_t)uprv_strlen(posixID
);
807 for (idx
= 0; idx
< this_0
->numRegions
; idx
++ ) {
808 int32_t sameChars
= idCmp(posixID
, this_0
->regionMaps
[idx
].posixID
);
809 if (sameChars
> bestIdxDiff
&& this_0
->regionMaps
[idx
].posixID
[sameChars
] == 0) {
810 if (posixIDlen
== sameChars
) {
812 return this_0
->regionMaps
[idx
].hostID
;
814 bestIdxDiff
= sameChars
;
818 /* We asked for something unusual, like en_ZZ, and we try to return the number for the same language. */
819 /* We also have to make sure that sid and si and similar string subsets don't match. */
820 if ((posixID
[bestIdxDiff
] == '_' || posixID
[bestIdxDiff
] == '@')
821 && this_0
->regionMaps
[bestIdx
].posixID
[bestIdxDiff
] == 0)
823 *status
= U_USING_FALLBACK_WARNING
;
824 return this_0
->regionMaps
[bestIdx
].hostID
;
828 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
829 return this_0
->regionMaps
->hostID
;
833 getPosixID(const ILcidPosixMap
*this_0
, uint32_t hostID
)
836 for (i
= 0; i
<= this_0
->numRegions
; i
++)
838 if (this_0
->regionMaps
[i
].hostID
== hostID
)
840 return this_0
->regionMaps
[i
].posixID
;
844 /* If you get here, then no matching region was found,
845 so return the language id with the wild card region. */
846 return this_0
->regionMaps
[0].posixID
;
850 //////////////////////////////////////
854 /////////////////////////////////////
856 #ifdef USE_WINDOWS_LOCALE_API
858 * Change the tag separator from '-' to '_'
860 #define FIX_LOCALE_ID_TAG_SEPARATOR(buffer, len, i) \
861 for(i = 0; i < len; i++) \
862 if (buffer[i] == '-') buffer[i] = '_';
865 * Various language tags needs to be changed:
869 #define FIX_LANGUAGE_ID_TAG(buffer, len) \
871 if (buffer[0] == 'q' && buffer[1] == 'u' && buffer[2] == 'z') {\
873 uprv_strcat(buffer, buffer+3); \
874 } else if (buffer[0] == 'p' && buffer[1] == 'r' && buffer[2] == 's') {\
875 buffer[0] = 'f'; buffer[1] = 'a'; buffer[2] = 0; \
876 uprv_strcat(buffer, buffer+3); \
880 static char gPosixFromLCID
[ULOC_FULLNAME_CAPACITY
];
883 uprv_convertToPosix(uint32_t hostid
, UErrorCode
* status
)
886 uint32_t localeIndex
;
887 #ifdef USE_WINDOWS_LOCALE_API
890 uprv_memset(gPosixFromLCID
, 0, sizeof(gPosixFromLCID
));
892 ret
= GetLocaleInfoA(hostid
, LOCALE_SNAME
, (LPSTR
)gPosixFromLCID
, sizeof(gPosixFromLCID
));
894 FIX_LOCALE_ID_TAG_SEPARATOR(gPosixFromLCID
, ret
, localeIndex
)
895 FIX_LANGUAGE_ID_TAG(gPosixFromLCID
, ret
)
897 return gPosixFromLCID
;
900 langID
= LANGUAGE_LCID(hostid
);
902 for (localeIndex
= 0; localeIndex
< gLocaleCount
; localeIndex
++)
904 if (langID
== gPosixIDmap
[localeIndex
].regionMaps
->hostID
)
906 return getPosixID(&gPosixIDmap
[localeIndex
], hostid
);
911 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
916 //////////////////////////////////////
919 // This should only be called from uloc_getLCID.
920 // The locale ID must be in canonical form.
921 // langID is separate so that this file doesn't depend on the uloc_* API.
923 /////////////////////////////////////
927 uprv_convertToLCID(const char *langID
, const char* posixID
, UErrorCode
* status
)
931 uint32_t high
= gLocaleCount
;
937 uint32_t fallbackValue
= (uint32_t)-1;
941 /* Check for incomplete id. */
942 if (!langID
|| !posixID
|| uprv_strlen(langID
) < 2 || uprv_strlen(posixID
) < 2) {
946 /*Binary search for the map entry for normal cases */
948 while (high
> low
) /*binary search*/{
950 mid
= (high
+low
) >> 1; /*Finds median*/
955 compVal
= uprv_strcmp(langID
, gPosixIDmap
[mid
].regionMaps
->posixID
);
959 else if (compVal
> 0){
962 else /*we found it*/{
963 return getHostID(&gPosixIDmap
[mid
], posixID
, status
);
969 * Sometimes we can't do a binary search on posixID because some LCIDs
970 * go to different locales. We hit one of those special cases.
972 for (idx
= 0; idx
< gLocaleCount
; idx
++ ) {
973 myStatus
= U_ZERO_ERROR
;
974 value
= getHostID(&gPosixIDmap
[idx
], posixID
, &myStatus
);
975 if (myStatus
== U_ZERO_ERROR
) {
978 else if (myStatus
== U_USING_FALLBACK_WARNING
) {
979 fallbackValue
= value
;
983 if (fallbackValue
!= (uint32_t)-1) {
984 *status
= U_USING_FALLBACK_WARNING
;
985 return fallbackValue
;
989 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
990 return 0; /* return international (root) */