2 **********************************************************************
3 * Copyright (C) 1996-2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
7 * Provides functionality for mapping between
8 * LCID and Posix IDs or ICU locale to codepage
10 * Note: All classes and code in this file are
11 * intended for internal use only.
13 * Methods of interest:
14 * unsigned long convertToLCID(const char*);
15 * const char* convertToPosix(unsigned long);
17 * Kathleen Wilson, 4/30/96
19 * Date Name Description
20 * 3/11/97 aliu Fixed off-by-one bug in assignment operator. Added
21 * setId() method and safety check against
23 * 04/23/99 stephen Added C wrapper for convertToPosix.
24 * 09/18/00 george Removed the memory leaks.
25 * 08/23/01 george Convert to C
32 #if U_PLATFORM == U_PF_WINDOWS && defined(_MSC_VER) && (_MSC_VER >= 1500)
34 * TODO: It seems like we should widen this to
35 * either U_PLATFORM_USES_ONLY_WIN32_API (includes MinGW)
36 * or U_PLATFORM_HAS_WIN32_API (includes MinGW and Cygwin)
37 * but those use gcc and won't have defined(_MSC_VER).
38 * We might need to #include some Windows header and test for some version macro from there.
39 * Or call some Windows function and see what it returns.
41 #define USE_WINDOWS_LOCALE_API
44 #ifdef USE_WINDOWS_LOCALE_API
51 * The mapping from Win32 locale ID numbers to POSIX locale strings should
54 * Many LCID values come from winnt.h
55 * Some also come from http://www.microsoft.com/globaldev/reference/lcid-all.mspx
59 ////////////////////////////////////////////////
61 // Internal Classes for LCID <--> POSIX Mapping
63 /////////////////////////////////////////////////
66 typedef struct ILcidPosixElement
68 const uint32_t hostID
;
69 const char * const posixID
;
72 typedef struct ILcidPosixMap
74 const uint32_t numRegions
;
75 const struct ILcidPosixElement
* const regionMaps
;
80 /////////////////////////////////////////////////
82 // Easy macros to make the LCID <--> POSIX Mapping
84 /////////////////////////////////////////////////
88 * The standard one language/one country mapping for LCID.
89 * The first element must be the language, and the following
90 * elements are the language with the country.
91 * @param hostID LCID in host format such as 0x044d
92 * @param languageID posix ID of just the language such as 'de'
93 * @param posixID posix ID of the language_TERRITORY such as 'de_CH'
95 #define ILCID_POSIX_ELEMENT_ARRAY(hostID, languageID, posixID) \
96 static const ILcidPosixElement locmap_ ## languageID [] = { \
97 {LANGUAGE_LCID(hostID), #languageID}, /* parent locale */ \
102 * Define a subtable by ID
103 * @param id the POSIX ID, either a language or language_TERRITORY
105 #define ILCID_POSIX_SUBTABLE(id) \
106 static const ILcidPosixElement locmap_ ## id [] =
110 * Create the map for the posixID. This macro supposes that the language string
111 * name is the same as the global variable name, and that the first element
112 * in the ILcidPosixElement is just the language.
113 * @param _posixID the full POSIX ID for this entry.
115 #define ILCID_POSIX_MAP(_posixID) \
116 {sizeof(locmap_ ## _posixID)/sizeof(ILcidPosixElement), locmap_ ## _posixID}
119 ////////////////////////////////////////////
121 // Create the table of LCID to POSIX Mapping
122 // None of it should be dynamically created.
124 // Keep static locale variables inside the function so that
125 // it can be created properly during static init.
127 // Note: This table should be updated periodically. Check the National Lanaguage Support API Reference Website.
128 // Microsoft is moving away from LCID in favor of locale name as of Vista. This table needs to be
129 // maintained for support of older Windows version.
130 // Update: Windows 7 (091130)
132 // Note: Microsoft assign a different LCID if a locale has a sorting variant. POSIX IDs below may contain
133 // @collation=XXX, but no other keywords are allowed (at least for now). When uprv_convertToLCID() is
134 // called from uloc_getLCID(), keywords other than collation are already removed. If we really need
135 // to support other keywords in this mapping data, we must update the implementation.
136 ////////////////////////////////////////////
139 ILCID_POSIX_ELEMENT_ARRAY(0x0436, af
, af_ZA
)
141 ILCID_POSIX_SUBTABLE(ar
) {
162 ILCID_POSIX_ELEMENT_ARRAY(0x044d, as
, as_IN
)
163 ILCID_POSIX_ELEMENT_ARRAY(0x045e, am
, am_ET
)
164 ILCID_POSIX_ELEMENT_ARRAY(0x047a, arn
,arn_CL
)
166 ILCID_POSIX_SUBTABLE(az
) {
168 {0x082c, "az_Cyrl_AZ"}, /* Cyrillic based */
169 {0x742c, "az_Cyrl"}, /* Cyrillic based */
170 {0x042c, "az_Latn_AZ"}, /* Latin based */
171 {0x782c, "az_Latn"}, /* Latin based */
172 {0x042c, "az_AZ"} /* Latin based */
175 ILCID_POSIX_ELEMENT_ARRAY(0x046d, ba
, ba_RU
)
176 ILCID_POSIX_ELEMENT_ARRAY(0x0423, be
, be_BY
)
178 /*ILCID_POSIX_SUBTABLE(ber) {
180 {0x045f, "ber_Arab_DZ"},
181 {0x045f, "ber_Arab"},
182 {0x085f, "ber_Latn_DZ"},
186 ILCID_POSIX_ELEMENT_ARRAY(0x0402, bg
, bg_BG
)
188 ILCID_POSIX_ELEMENT_ARRAY(0x0466, bin
, bin_NG
)
190 ILCID_POSIX_SUBTABLE(bn
) {
196 ILCID_POSIX_SUBTABLE(bo
) {
202 ILCID_POSIX_ELEMENT_ARRAY(0x047e, br
, br_FR
)
204 ILCID_POSIX_SUBTABLE(ca
) {
207 {0x0803, "ca_ES_VALENCIA"}
210 ILCID_POSIX_ELEMENT_ARRAY(0x0483, co
, co_FR
)
211 ILCID_POSIX_ELEMENT_ARRAY(0x045c, chr
,chr_US
)
213 ILCID_POSIX_SUBTABLE(ckb
) {
216 {0x7c92, "ckb_Arab"},
218 {0x0492, "ckb_Arab_IQ"},
219 {0x0492, "ku_Arab_IQ"}
222 /* Declared as cs_CZ to get around compiler errors on z/OS, which defines cs as a function */
223 ILCID_POSIX_ELEMENT_ARRAY(0x0405, cs
, cs_CZ
)
225 ILCID_POSIX_ELEMENT_ARRAY(0x0452, cy
, cy_GB
)
226 ILCID_POSIX_ELEMENT_ARRAY(0x0406, da
, da_DK
)
228 ILCID_POSIX_SUBTABLE(de
) {
235 {0x10407,"de_DE@collation=phonebook"}, /*This is really de_DE_PHONEBOOK on Windows*/
236 {0x10407,"de@collation=phonebook"} /*This is really de_DE_PHONEBOOK on Windows*/
239 ILCID_POSIX_ELEMENT_ARRAY(0x0465, dv
, dv_MV
)
240 ILCID_POSIX_ELEMENT_ARRAY(0x0408, el
, el_GR
)
242 ILCID_POSIX_SUBTABLE(en
) {
259 {0x007f, "en_US_POSIX"}, /* duplicate for roundtripping */
260 {0x2409, "en_VI"}, /* Virgin Islands AKA Caribbean Islands (en_CB). */
264 {0x0409, "en_AS"}, /* Alias for en_US. Leave last. */
265 {0x0409, "en_GU"}, /* Alias for en_US. Leave last. */
266 {0x0409, "en_MH"}, /* Alias for en_US. Leave last. */
267 {0x0409, "en_MP"}, /* Alias for en_US. Leave last. */
268 {0x0409, "en_UM"} /* Alias for en_US. Leave last. */
271 ILCID_POSIX_SUBTABLE(en_US_POSIX
) {
272 {0x007f, "en_US_POSIX"} /* duplicate for roundtripping */
275 ILCID_POSIX_SUBTABLE(es
) {
284 {0x0c0a, "es_ES"}, /*Modern sort.*/
298 {0x040a, "es_ES@collation=traditional"},
299 {0x040a, "es@collation=traditional"}
302 ILCID_POSIX_ELEMENT_ARRAY(0x0425, et
, et_EE
)
303 ILCID_POSIX_ELEMENT_ARRAY(0x042d, eu
, eu_ES
)
305 /* ISO-639 doesn't distinguish between Persian and Dari.*/
306 ILCID_POSIX_SUBTABLE(fa
) {
308 {0x0429, "fa_IR"}, /* Persian/Farsi (Iran) */
309 {0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */
312 /* duplicate for roundtripping */
313 ILCID_POSIX_SUBTABLE(fa_AF
) {
314 {0x8c, "fa_AF"}, /* Persian/Dari (Afghanistan) */
315 {0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */
318 ILCID_POSIX_SUBTABLE(ff
) {
321 {0x0867, "ff_Latn_SN"}
324 ILCID_POSIX_ELEMENT_ARRAY(0x040b, fi
, fi_FI
)
325 ILCID_POSIX_ELEMENT_ARRAY(0x0464, fil
,fil_PH
)
326 ILCID_POSIX_ELEMENT_ARRAY(0x0438, fo
, fo_FO
)
328 ILCID_POSIX_SUBTABLE(fr
) {
349 ILCID_POSIX_ELEMENT_ARRAY(0x0467, fuv
, fuv_NG
)
351 ILCID_POSIX_ELEMENT_ARRAY(0x0462, fy
, fy_NL
)
353 ILCID_POSIX_SUBTABLE(ga
) { /* Gaelic (Ireland) */
359 ILCID_POSIX_SUBTABLE(gd
) { /* Gaelic (Scotland) */
364 ILCID_POSIX_ELEMENT_ARRAY(0x0456, gl
, gl_ES
)
365 ILCID_POSIX_ELEMENT_ARRAY(0x0447, gu
, gu_IN
)
366 ILCID_POSIX_ELEMENT_ARRAY(0x0474, gn
, gn_PY
)
367 ILCID_POSIX_ELEMENT_ARRAY(0x0484, gsw
,gsw_FR
)
369 ILCID_POSIX_SUBTABLE(ha
) {
372 {0x0468, "ha_Latn_NG"},
375 ILCID_POSIX_ELEMENT_ARRAY(0x0475, haw
,haw_US
)
376 ILCID_POSIX_ELEMENT_ARRAY(0x040d, he
, he_IL
)
377 ILCID_POSIX_ELEMENT_ARRAY(0x0439, hi
, hi_IN
)
379 /* This LCID is really four different locales.*/
380 ILCID_POSIX_SUBTABLE(hr
) {
382 {0x141a, "bs_Latn_BA"}, /* Bosnian, Bosnia and Herzegovina */
383 {0x681a, "bs_Latn"}, /* Bosnian, Bosnia and Herzegovina */
384 {0x141a, "bs_BA"}, /* Bosnian, Bosnia and Herzegovina */
385 {0x781a, "bs"}, /* Bosnian */
386 {0x201a, "bs_Cyrl_BA"}, /* Bosnian, Bosnia and Herzegovina */
387 {0x641a, "bs_Cyrl"}, /* Bosnian, Bosnia and Herzegovina */
388 {0x101a, "hr_BA"}, /* Croatian in Bosnia */
389 {0x041a, "hr_HR"}, /* Croatian*/
390 {0x2c1a, "sr_Latn_ME"},
391 {0x241a, "sr_Latn_RS"},
392 {0x181a, "sr_Latn_BA"}, /* Serbo-Croatian in Bosnia */
393 {0x081a, "sr_Latn_CS"}, /* Serbo-Croatian*/
394 {0x701a, "sr_Latn"}, /* It's 0x1a or 0x081a, pick one to make the test program happy. */
395 {0x1c1a, "sr_Cyrl_BA"}, /* Serbo-Croatian in Bosnia */
396 {0x0c1a, "sr_Cyrl_CS"}, /* Serbian*/
397 {0x301a, "sr_Cyrl_ME"},
398 {0x281a, "sr_Cyrl_RS"},
399 {0x6c1a, "sr_Cyrl"}, /* It's 0x1a or 0x0c1a, pick one to make the test program happy. */
400 {0x7c1a, "sr"} /* In CLDR sr is sr_Cyrl. */
403 ILCID_POSIX_SUBTABLE(hsb
) {
410 ILCID_POSIX_ELEMENT_ARRAY(0x040e, hu
, hu_HU
)
411 ILCID_POSIX_ELEMENT_ARRAY(0x042b, hy
, hy_AM
)
412 ILCID_POSIX_ELEMENT_ARRAY(0x0469, ibb
, ibb_NG
)
413 ILCID_POSIX_ELEMENT_ARRAY(0x0421, id
, id_ID
)
414 ILCID_POSIX_ELEMENT_ARRAY(0x0470, ig
, ig_NG
)
415 ILCID_POSIX_ELEMENT_ARRAY(0x0478, ii
, ii_CN
)
416 ILCID_POSIX_ELEMENT_ARRAY(0x040f, is
, is_IS
)
418 ILCID_POSIX_SUBTABLE(it
) {
424 ILCID_POSIX_SUBTABLE(iu
) {
426 {0x045d, "iu_Cans_CA"},
428 {0x085d, "iu_Latn_CA"},
432 ILCID_POSIX_ELEMENT_ARRAY(0x040d, iw
, iw_IL
) /*Left in for compatibility*/
433 ILCID_POSIX_ELEMENT_ARRAY(0x0411, ja
, ja_JP
)
434 ILCID_POSIX_ELEMENT_ARRAY(0x0437, ka
, ka_GE
)
435 ILCID_POSIX_ELEMENT_ARRAY(0x043f, kk
, kk_KZ
)
436 ILCID_POSIX_ELEMENT_ARRAY(0x046f, kl
, kl_GL
)
437 ILCID_POSIX_ELEMENT_ARRAY(0x0453, km
, km_KH
)
438 ILCID_POSIX_ELEMENT_ARRAY(0x044b, kn
, kn_IN
)
440 ILCID_POSIX_SUBTABLE(ko
) {
446 ILCID_POSIX_ELEMENT_ARRAY(0x0457, kok
, kok_IN
)
447 ILCID_POSIX_ELEMENT_ARRAY(0x0471, kr
, kr_NG
)
449 ILCID_POSIX_SUBTABLE(ks
) { /* We could add PK and CN too */
451 {0x0860, "ks_IN"}, /* Documentation doesn't mention script */
452 {0x0460, "ks_Arab_IN"},
453 {0x0860, "ks_Deva_IN"}
456 ILCID_POSIX_ELEMENT_ARRAY(0x0440, ky
, ky_KG
) /* Kyrgyz is spoken in Kyrgyzstan */
457 ILCID_POSIX_ELEMENT_ARRAY(0x0476, la
, la_IT
) /* TODO: Verify the country */
458 ILCID_POSIX_ELEMENT_ARRAY(0x046e, lb
, lb_LU
)
459 ILCID_POSIX_ELEMENT_ARRAY(0x0454, lo
, lo_LA
)
460 ILCID_POSIX_ELEMENT_ARRAY(0x0427, lt
, lt_LT
)
461 ILCID_POSIX_ELEMENT_ARRAY(0x0426, lv
, lv_LV
)
462 ILCID_POSIX_ELEMENT_ARRAY(0x0481, mi
, mi_NZ
)
463 ILCID_POSIX_ELEMENT_ARRAY(0x042f, mk
, mk_MK
)
464 ILCID_POSIX_ELEMENT_ARRAY(0x044c, ml
, ml_IN
)
466 ILCID_POSIX_SUBTABLE(mn
) {
470 {0x0850, "mn_Mong_CN"},
473 {0x0c50, "mn_Mong_MN"}
476 ILCID_POSIX_ELEMENT_ARRAY(0x0458, mni
,mni_IN
)
477 ILCID_POSIX_ELEMENT_ARRAY(0x047c, moh
,moh_CA
)
478 ILCID_POSIX_ELEMENT_ARRAY(0x044e, mr
, mr_IN
)
480 ILCID_POSIX_SUBTABLE(ms
) {
482 {0x083e, "ms_BN"}, /* Brunei Darussalam*/
483 {0x043e, "ms_MY"} /* Malaysia*/
486 ILCID_POSIX_ELEMENT_ARRAY(0x043a, mt
, mt_MT
)
487 ILCID_POSIX_ELEMENT_ARRAY(0x0455, my
, my_MM
)
489 ILCID_POSIX_SUBTABLE(ne
) {
491 {0x0861, "ne_IN"}, /* India*/
492 {0x0461, "ne_NP"} /* Nepal*/
495 ILCID_POSIX_SUBTABLE(nl
) {
501 /* The "no" locale split into nb and nn. By default in ICU, "no" is nb.*/
502 ILCID_POSIX_SUBTABLE(no
) {
503 {0x14, "no"}, /* really nb_NO */
504 {0x7c14, "nb"}, /* really nb */
505 {0x0414, "nb_NO"}, /* really nb_NO. Keep first in the 414 list. */
506 {0x0414, "no_NO"}, /* really nb_NO */
507 {0x0814, "nn_NO"}, /* really nn_NO. Keep first in the 814 list. */
508 {0x7814, "nn"}, /* It's 0x14 or 0x814, pick one to make the test program happy. */
509 {0x0814, "no_NO_NY"}/* really nn_NO */
512 ILCID_POSIX_ELEMENT_ARRAY(0x046c, nso
,nso_ZA
) /* TODO: Verify the ISO-639 code */
513 ILCID_POSIX_ELEMENT_ARRAY(0x0482, oc
, oc_FR
)
515 ILCID_POSIX_SUBTABLE(om
) { /* TODO: Verify the country */
521 /* Declared as or_IN to get around compiler errors*/
522 ILCID_POSIX_SUBTABLE(or_IN
) {
528 ILCID_POSIX_SUBTABLE(pa
) {
532 {0x0846, "pa_Arab_PK"}
535 ILCID_POSIX_ELEMENT_ARRAY(0x0479, pap
, pap_AN
)
536 ILCID_POSIX_ELEMENT_ARRAY(0x0415, pl
, pl_PL
)
537 ILCID_POSIX_ELEMENT_ARRAY(0x0463, ps
, ps_AF
)
539 ILCID_POSIX_SUBTABLE(pt
) {
545 ILCID_POSIX_SUBTABLE(qu
) {
555 ILCID_POSIX_ELEMENT_ARRAY(0x0486, qut
, qut_GT
) /* qut is an ISO-639-3 code */
556 ILCID_POSIX_ELEMENT_ARRAY(0x0417, rm
, rm_CH
)
558 ILCID_POSIX_SUBTABLE(ro
) {
564 ILCID_POSIX_SUBTABLE(root
) {
568 ILCID_POSIX_SUBTABLE(ru
) {
574 ILCID_POSIX_ELEMENT_ARRAY(0x0487, rw
, rw_RW
)
575 ILCID_POSIX_ELEMENT_ARRAY(0x044f, sa
, sa_IN
)
576 ILCID_POSIX_ELEMENT_ARRAY(0x0485, sah
,sah_RU
)
578 ILCID_POSIX_SUBTABLE(sd
) {
584 ILCID_POSIX_SUBTABLE(se
) {
601 ILCID_POSIX_ELEMENT_ARRAY(0x045b, si
, si_LK
)
602 ILCID_POSIX_ELEMENT_ARRAY(0x041b, sk
, sk_SK
)
603 ILCID_POSIX_ELEMENT_ARRAY(0x0424, sl
, sl_SI
)
605 ILCID_POSIX_SUBTABLE(so
) { /* TODO: Verify the country */
611 ILCID_POSIX_ELEMENT_ARRAY(0x041c, sq
, sq_AL
)
612 ILCID_POSIX_ELEMENT_ARRAY(0x0430, st
, st_ZA
)
614 ILCID_POSIX_SUBTABLE(sv
) {
620 ILCID_POSIX_ELEMENT_ARRAY(0x0441, sw
, sw_KE
)
621 ILCID_POSIX_ELEMENT_ARRAY(0x045A, syr
, syr_SY
)
623 ILCID_POSIX_SUBTABLE(ta
) {
629 ILCID_POSIX_ELEMENT_ARRAY(0x044a, te
, te_IN
)
631 /* Cyrillic based by default */
632 ILCID_POSIX_SUBTABLE(tg
) {
635 {0x0428, "tg_Cyrl_TJ"}
638 ILCID_POSIX_ELEMENT_ARRAY(0x041e, th
, th_TH
)
640 ILCID_POSIX_SUBTABLE(ti
) {
646 ILCID_POSIX_ELEMENT_ARRAY(0x0442, tk
, tk_TM
)
648 ILCID_POSIX_SUBTABLE(tn
) {
654 ILCID_POSIX_ELEMENT_ARRAY(0x041f, tr
, tr_TR
)
655 ILCID_POSIX_ELEMENT_ARRAY(0x0431, ts
, ts_ZA
)
656 ILCID_POSIX_ELEMENT_ARRAY(0x0444, tt
, tt_RU
)
658 ILCID_POSIX_SUBTABLE(tzm
) {
660 {0x7c5f, "tzm_Latn"},
661 {0x085f, "tzm_Latn_DZ"},
662 {0x105f, "tzm_Tfng_MA"},
666 ILCID_POSIX_SUBTABLE(ug
) {
669 {0x0480, "ug_Arab_CN"}
672 ILCID_POSIX_ELEMENT_ARRAY(0x0422, uk
, uk_UA
)
674 ILCID_POSIX_SUBTABLE(ur
) {
680 ILCID_POSIX_SUBTABLE(uz
) {
682 {0x0843, "uz_Cyrl_UZ"}, /* Cyrillic based */
683 {0x7843, "uz_Cyrl"}, /* Cyrillic based */
684 {0x0843, "uz_UZ"}, /* Cyrillic based */
685 {0x0443, "uz_Latn_UZ"}, /* Latin based */
686 {0x7c43, "uz_Latn"} /* Latin based */
689 ILCID_POSIX_SUBTABLE(ve
) { /* TODO: Verify the country */
695 ILCID_POSIX_ELEMENT_ARRAY(0x042a, vi
, vi_VN
)
696 ILCID_POSIX_ELEMENT_ARRAY(0x0488, wo
, wo_SN
)
697 ILCID_POSIX_ELEMENT_ARRAY(0x0434, xh
, xh_ZA
)
698 ILCID_POSIX_ELEMENT_ARRAY(0x043d, yi
, yi
)
699 ILCID_POSIX_ELEMENT_ARRAY(0x046a, yo
, yo_NG
)
701 ILCID_POSIX_SUBTABLE(zh
) {
705 {0x0804, "zh_Hans_CN"},
706 {0x0c04, "zh_Hant_HK"},
708 {0x1404, "zh_Hant_MO"},
710 {0x1004, "zh_Hans_SG"},
712 {0x0404, "zh_Hant_TW"},
715 {0x30404,"zh_Hant_TW"}, /* Bopomofo order */
716 {0x30404,"zh_TW"}, /* Bopomofo order */
717 {0x20004,"zh@collation=stroke"},
718 {0x20404,"zh_Hant@collation=stroke"},
719 {0x20404,"zh_Hant_TW@collation=stroke"},
720 {0x20404,"zh_TW@collation=stroke"},
721 {0x20804,"zh_Hans@collation=stroke"},
722 {0x20804,"zh_Hans_CN@collation=stroke"},
723 {0x20804,"zh_CN@collation=stroke"}
726 ILCID_POSIX_ELEMENT_ARRAY(0x0435, zu
, zu_ZA
)
728 /* This must be static and grouped by LCID. */
729 static const ILcidPosixMap gPosixIDmap
[] = {
730 ILCID_POSIX_MAP(af
), /* af Afrikaans 0x36 */
731 ILCID_POSIX_MAP(am
), /* am Amharic 0x5e */
732 ILCID_POSIX_MAP(ar
), /* ar Arabic 0x01 */
733 ILCID_POSIX_MAP(arn
), /* arn Araucanian/Mapudungun 0x7a */
734 ILCID_POSIX_MAP(as
), /* as Assamese 0x4d */
735 ILCID_POSIX_MAP(az
), /* az Azerbaijani 0x2c */
736 ILCID_POSIX_MAP(ba
), /* ba Bashkir 0x6d */
737 ILCID_POSIX_MAP(be
), /* be Belarusian 0x23 */
738 /* ILCID_POSIX_MAP(ber), ber Berber/Tamazight 0x5f */
739 ILCID_POSIX_MAP(bg
), /* bg Bulgarian 0x02 */
740 ILCID_POSIX_MAP(bin
), /* bin Edo 0x66 */
741 ILCID_POSIX_MAP(bn
), /* bn Bengali; Bangla 0x45 */
742 ILCID_POSIX_MAP(bo
), /* bo Tibetan 0x51 */
743 ILCID_POSIX_MAP(br
), /* br Breton 0x7e */
744 ILCID_POSIX_MAP(ca
), /* ca Catalan 0x03 */
745 ILCID_POSIX_MAP(chr
), /* chr Cherokee 0x5c */
746 ILCID_POSIX_MAP(ckb
), /* ckb Sorani (Central Kurdish) 0x92 */
747 ILCID_POSIX_MAP(co
), /* co Corsican 0x83 */
748 ILCID_POSIX_MAP(cs
), /* cs Czech 0x05 */
749 ILCID_POSIX_MAP(cy
), /* cy Welsh 0x52 */
750 ILCID_POSIX_MAP(da
), /* da Danish 0x06 */
751 ILCID_POSIX_MAP(de
), /* de German 0x07 */
752 ILCID_POSIX_MAP(dv
), /* dv Divehi 0x65 */
753 ILCID_POSIX_MAP(el
), /* el Greek 0x08 */
754 ILCID_POSIX_MAP(en
), /* en English 0x09 */
755 ILCID_POSIX_MAP(en_US_POSIX
), /* invariant 0x7f */
756 ILCID_POSIX_MAP(es
), /* es Spanish 0x0a */
757 ILCID_POSIX_MAP(et
), /* et Estonian 0x25 */
758 ILCID_POSIX_MAP(eu
), /* eu Basque 0x2d */
759 ILCID_POSIX_MAP(fa
), /* fa Persian/Farsi 0x29 */
760 ILCID_POSIX_MAP(fa_AF
), /* fa Persian/Dari 0x8c */
761 ILCID_POSIX_MAP(ff
), /* ff Fula 0x67 */
762 ILCID_POSIX_MAP(fi
), /* fi Finnish 0x0b */
763 ILCID_POSIX_MAP(fil
), /* fil Filipino 0x64 */
764 ILCID_POSIX_MAP(fo
), /* fo Faroese 0x38 */
765 ILCID_POSIX_MAP(fr
), /* fr French 0x0c */
766 ILCID_POSIX_MAP(fuv
), /* fuv Fulfulde - Nigeria 0x67 */
767 ILCID_POSIX_MAP(fy
), /* fy Frisian 0x62 */
768 ILCID_POSIX_MAP(ga
), /* * Gaelic (Ireland,Scotland) 0x3c */
769 ILCID_POSIX_MAP(gd
), /* gd Gaelic (United Kingdom) 0x91 */
770 ILCID_POSIX_MAP(gl
), /* gl Galician 0x56 */
771 ILCID_POSIX_MAP(gn
), /* gn Guarani 0x74 */
772 ILCID_POSIX_MAP(gsw
), /* gsw Alemanic/Alsatian/Swiss German 0x84 */
773 ILCID_POSIX_MAP(gu
), /* gu Gujarati 0x47 */
774 ILCID_POSIX_MAP(ha
), /* ha Hausa 0x68 */
775 ILCID_POSIX_MAP(haw
), /* haw Hawaiian 0x75 */
776 ILCID_POSIX_MAP(he
), /* he Hebrew (formerly iw) 0x0d */
777 ILCID_POSIX_MAP(hi
), /* hi Hindi 0x39 */
778 ILCID_POSIX_MAP(hr
), /* * Croatian and others 0x1a */
779 ILCID_POSIX_MAP(hsb
), /* hsb Upper Sorbian 0x2e */
780 ILCID_POSIX_MAP(hu
), /* hu Hungarian 0x0e */
781 ILCID_POSIX_MAP(hy
), /* hy Armenian 0x2b */
782 ILCID_POSIX_MAP(ibb
), /* ibb Ibibio - Nigeria 0x69 */
783 ILCID_POSIX_MAP(id
), /* id Indonesian (formerly in) 0x21 */
784 ILCID_POSIX_MAP(ig
), /* ig Igbo 0x70 */
785 ILCID_POSIX_MAP(ii
), /* ii Sichuan Yi 0x78 */
786 ILCID_POSIX_MAP(is
), /* is Icelandic 0x0f */
787 ILCID_POSIX_MAP(it
), /* it Italian 0x10 */
788 ILCID_POSIX_MAP(iu
), /* iu Inuktitut 0x5d */
789 ILCID_POSIX_MAP(iw
), /* iw Hebrew 0x0d */
790 ILCID_POSIX_MAP(ja
), /* ja Japanese 0x11 */
791 ILCID_POSIX_MAP(ka
), /* ka Georgian 0x37 */
792 ILCID_POSIX_MAP(kk
), /* kk Kazakh 0x3f */
793 ILCID_POSIX_MAP(kl
), /* kl Kalaallisut 0x6f */
794 ILCID_POSIX_MAP(km
), /* km Khmer 0x53 */
795 ILCID_POSIX_MAP(kn
), /* kn Kannada 0x4b */
796 ILCID_POSIX_MAP(ko
), /* ko Korean 0x12 */
797 ILCID_POSIX_MAP(kok
), /* kok Konkani 0x57 */
798 ILCID_POSIX_MAP(kr
), /* kr Kanuri 0x71 */
799 ILCID_POSIX_MAP(ks
), /* ks Kashmiri 0x60 */
800 ILCID_POSIX_MAP(ky
), /* ky Kyrgyz 0x40 */
801 ILCID_POSIX_MAP(lb
), /* lb Luxembourgish 0x6e */
802 ILCID_POSIX_MAP(la
), /* la Latin 0x76 */
803 ILCID_POSIX_MAP(lo
), /* lo Lao 0x54 */
804 ILCID_POSIX_MAP(lt
), /* lt Lithuanian 0x27 */
805 ILCID_POSIX_MAP(lv
), /* lv Latvian, Lettish 0x26 */
806 ILCID_POSIX_MAP(mi
), /* mi Maori 0x81 */
807 ILCID_POSIX_MAP(mk
), /* mk Macedonian 0x2f */
808 ILCID_POSIX_MAP(ml
), /* ml Malayalam 0x4c */
809 ILCID_POSIX_MAP(mn
), /* mn Mongolian 0x50 */
810 ILCID_POSIX_MAP(mni
), /* mni Manipuri 0x58 */
811 ILCID_POSIX_MAP(moh
), /* moh Mohawk 0x7c */
812 ILCID_POSIX_MAP(mr
), /* mr Marathi 0x4e */
813 ILCID_POSIX_MAP(ms
), /* ms Malay 0x3e */
814 ILCID_POSIX_MAP(mt
), /* mt Maltese 0x3a */
815 ILCID_POSIX_MAP(my
), /* my Burmese 0x55 */
816 /* ILCID_POSIX_MAP(nb), // no Norwegian 0x14 */
817 ILCID_POSIX_MAP(ne
), /* ne Nepali 0x61 */
818 ILCID_POSIX_MAP(nl
), /* nl Dutch 0x13 */
819 /* ILCID_POSIX_MAP(nn), // no Norwegian 0x14 */
820 ILCID_POSIX_MAP(no
), /* * Norwegian 0x14 */
821 ILCID_POSIX_MAP(nso
), /* nso Sotho, Northern (Sepedi dialect) 0x6c */
822 ILCID_POSIX_MAP(oc
), /* oc Occitan 0x82 */
823 ILCID_POSIX_MAP(om
), /* om Oromo 0x72 */
824 ILCID_POSIX_MAP(or_IN
), /* or Oriya 0x48 */
825 ILCID_POSIX_MAP(pa
), /* pa Punjabi 0x46 */
826 ILCID_POSIX_MAP(pap
), /* pap Papiamentu 0x79 */
827 ILCID_POSIX_MAP(pl
), /* pl Polish 0x15 */
828 ILCID_POSIX_MAP(ps
), /* ps Pashto 0x63 */
829 ILCID_POSIX_MAP(pt
), /* pt Portuguese 0x16 */
830 ILCID_POSIX_MAP(qu
), /* qu Quechua 0x6B */
831 ILCID_POSIX_MAP(qut
), /* qut K'iche 0x86 */
832 ILCID_POSIX_MAP(rm
), /* rm Raeto-Romance/Romansh 0x17 */
833 ILCID_POSIX_MAP(ro
), /* ro Romanian 0x18 */
834 ILCID_POSIX_MAP(root
), /* root 0x00 */
835 ILCID_POSIX_MAP(ru
), /* ru Russian 0x19 */
836 ILCID_POSIX_MAP(rw
), /* rw Kinyarwanda 0x87 */
837 ILCID_POSIX_MAP(sa
), /* sa Sanskrit 0x4f */
838 ILCID_POSIX_MAP(sah
), /* sah Yakut 0x85 */
839 ILCID_POSIX_MAP(sd
), /* sd Sindhi 0x59 */
840 ILCID_POSIX_MAP(se
), /* se Sami 0x3b */
841 /* ILCID_POSIX_MAP(sh), // sh Serbo-Croatian 0x1a */
842 ILCID_POSIX_MAP(si
), /* si Sinhalese 0x5b */
843 ILCID_POSIX_MAP(sk
), /* sk Slovak 0x1b */
844 ILCID_POSIX_MAP(sl
), /* sl Slovenian 0x24 */
845 ILCID_POSIX_MAP(so
), /* so Somali 0x77 */
846 ILCID_POSIX_MAP(sq
), /* sq Albanian 0x1c */
847 /* ILCID_POSIX_MAP(sr), // sr Serbian 0x1a */
848 ILCID_POSIX_MAP(st
), /* st Sutu 0x30 */
849 ILCID_POSIX_MAP(sv
), /* sv Swedish 0x1d */
850 ILCID_POSIX_MAP(sw
), /* sw Swahili 0x41 */
851 ILCID_POSIX_MAP(syr
), /* syr Syriac 0x5A */
852 ILCID_POSIX_MAP(ta
), /* ta Tamil 0x49 */
853 ILCID_POSIX_MAP(te
), /* te Telugu 0x4a */
854 ILCID_POSIX_MAP(tg
), /* tg Tajik 0x28 */
855 ILCID_POSIX_MAP(th
), /* th Thai 0x1e */
856 ILCID_POSIX_MAP(ti
), /* ti Tigrigna 0x73 */
857 ILCID_POSIX_MAP(tk
), /* tk Turkmen 0x42 */
858 ILCID_POSIX_MAP(tn
), /* tn Tswana 0x32 */
859 ILCID_POSIX_MAP(tr
), /* tr Turkish 0x1f */
860 ILCID_POSIX_MAP(ts
), /* ts Tsonga 0x31 */
861 ILCID_POSIX_MAP(tt
), /* tt Tatar 0x44 */
862 ILCID_POSIX_MAP(tzm
), /* tzm Tamazight 0x5f */
863 ILCID_POSIX_MAP(ug
), /* ug Uighur 0x80 */
864 ILCID_POSIX_MAP(uk
), /* uk Ukrainian 0x22 */
865 ILCID_POSIX_MAP(ur
), /* ur Urdu 0x20 */
866 ILCID_POSIX_MAP(uz
), /* uz Uzbek 0x43 */
867 ILCID_POSIX_MAP(ve
), /* ve Venda 0x33 */
868 ILCID_POSIX_MAP(vi
), /* vi Vietnamese 0x2a */
869 ILCID_POSIX_MAP(wo
), /* wo Wolof 0x88 */
870 ILCID_POSIX_MAP(xh
), /* xh Xhosa 0x34 */
871 ILCID_POSIX_MAP(yi
), /* yi Yiddish 0x3d */
872 ILCID_POSIX_MAP(yo
), /* yo Yoruba 0x6a */
873 ILCID_POSIX_MAP(zh
), /* zh Chinese 0x04 */
874 ILCID_POSIX_MAP(zu
), /* zu Zulu 0x35 */
877 static const uint32_t gLocaleCount
= sizeof(gPosixIDmap
)/sizeof(ILcidPosixMap
);
880 * Do not call this function. It is called by hostID.
881 * The function is not private because this struct must stay as a C struct,
882 * and this is an internal class.
885 idCmp(const char* id1
, const char* id2
)
888 while (*id1
== *id2
&& *id1
!= 0) {
897 * Searches for a Windows LCID
899 * @param posixid the Posix style locale id.
900 * @param status gets set to U_ILLEGAL_ARGUMENT_ERROR when the Posix ID has
901 * no equivalent Windows LCID.
905 getHostID(const ILcidPosixMap
*this_0
, const char* posixID
, UErrorCode
* status
)
908 int32_t bestIdxDiff
= 0;
909 int32_t posixIDlen
= (int32_t)uprv_strlen(posixID
);
912 for (idx
= 0; idx
< this_0
->numRegions
; idx
++ ) {
913 int32_t sameChars
= idCmp(posixID
, this_0
->regionMaps
[idx
].posixID
);
914 if (sameChars
> bestIdxDiff
&& this_0
->regionMaps
[idx
].posixID
[sameChars
] == 0) {
915 if (posixIDlen
== sameChars
) {
917 return this_0
->regionMaps
[idx
].hostID
;
919 bestIdxDiff
= sameChars
;
923 /* We asked for something unusual, like en_ZZ, and we try to return the number for the same language. */
924 /* We also have to make sure that sid and si and similar string subsets don't match. */
925 if ((posixID
[bestIdxDiff
] == '_' || posixID
[bestIdxDiff
] == '@')
926 && this_0
->regionMaps
[bestIdx
].posixID
[bestIdxDiff
] == 0)
928 *status
= U_USING_FALLBACK_WARNING
;
929 return this_0
->regionMaps
[bestIdx
].hostID
;
933 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
934 return this_0
->regionMaps
->hostID
;
938 getPosixID(const ILcidPosixMap
*this_0
, uint32_t hostID
)
941 for (i
= 0; i
<= this_0
->numRegions
; i
++)
943 if (this_0
->regionMaps
[i
].hostID
== hostID
)
945 return this_0
->regionMaps
[i
].posixID
;
949 /* If you get here, then no matching region was found,
950 so return the language id with the wild card region. */
951 return this_0
->regionMaps
[0].posixID
;
955 //////////////////////////////////////
959 /////////////////////////////////////
961 #ifdef USE_WINDOWS_LOCALE_API
963 * Various language tags needs to be changed:
967 #define FIX_LANGUAGE_ID_TAG(buffer, len) \
969 if (buffer[0] == 'q' && buffer[1] == 'u' && buffer[2] == 'z') {\
971 uprv_strcat(buffer, buffer+3); \
972 } else if (buffer[0] == 'p' && buffer[1] == 'r' && buffer[2] == 's') {\
973 buffer[0] = 'f'; buffer[1] = 'a'; buffer[2] = 0; \
974 uprv_strcat(buffer, buffer+3); \
980 uprv_convertToPosix(uint32_t hostid
, char *posixID
, int32_t posixIDCapacity
, UErrorCode
* status
)
983 uint32_t localeIndex
;
984 UBool bLookup
= TRUE
;
985 const char *pPosixID
= NULL
;
987 #ifdef USE_WINDOWS_LOCALE_API
989 char locName
[157]; /* ULOC_FULLNAME_CAPACITY */
991 tmpLen
= GetLocaleInfoA(hostid
, LOCALE_SNAME
, (LPSTR
)locName
, sizeof(locName
)/sizeof(locName
[0]));
993 /* Windows locale name may contain sorting variant, such as "es-ES_tradnl".
994 In such case, we need special mapping data found in the hardcoded table
995 in this source file. */
996 char *p
= uprv_strchr(locName
, '_');
998 /* Keep the base locale, without variant */
1000 tmpLen
= uprv_strlen(locName
);
1002 /* No hardcoded table lookup necessary */
1005 /* Change the tag separator from '-' to '_' */
1013 FIX_LANGUAGE_ID_TAG(locName
, tmpLen
);
1018 const char *pCandidate
= NULL
;
1019 langID
= LANGUAGE_LCID(hostid
);
1021 for (localeIndex
= 0; localeIndex
< gLocaleCount
; localeIndex
++) {
1022 if (langID
== gPosixIDmap
[localeIndex
].regionMaps
->hostID
) {
1023 pCandidate
= getPosixID(&gPosixIDmap
[localeIndex
], hostid
);
1028 /* On Windows, when locale name has a variant, we still look up the hardcoded table.
1029 If a match in the hardcoded table is longer than the Windows locale name without
1030 variant, we use the one as the result */
1031 if (pCandidate
&& (pPosixID
== NULL
|| uprv_strlen(pCandidate
) > uprv_strlen(pPosixID
))) {
1032 pPosixID
= pCandidate
;
1037 int32_t resLen
= uprv_strlen(pPosixID
);
1038 int32_t copyLen
= resLen
<= posixIDCapacity
? resLen
: posixIDCapacity
;
1039 uprv_memcpy(posixID
, pPosixID
, copyLen
);
1040 if (resLen
< posixIDCapacity
) {
1041 posixID
[resLen
] = 0;
1042 if (*status
== U_STRING_NOT_TERMINATED_WARNING
) {
1043 *status
= U_ZERO_ERROR
;
1045 } else if (resLen
== posixIDCapacity
) {
1046 *status
= U_STRING_NOT_TERMINATED_WARNING
;
1048 *status
= U_BUFFER_OVERFLOW_ERROR
;
1053 /* no match found */
1054 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1059 //////////////////////////////////////
1062 // This should only be called from uloc_getLCID.
1063 // The locale ID must be in canonical form.
1064 // langID is separate so that this file doesn't depend on the uloc_* API.
1066 /////////////////////////////////////
1070 uprv_convertToLCID(const char *langID
, const char* posixID
, UErrorCode
* status
)
1074 uint32_t high
= gLocaleCount
;
1076 uint32_t oldmid
= 0;
1080 uint32_t fallbackValue
= (uint32_t)-1;
1081 UErrorCode myStatus
;
1084 /* Check for incomplete id. */
1085 if (!langID
|| !posixID
|| uprv_strlen(langID
) < 2 || uprv_strlen(posixID
) < 2) {
1089 /*Binary search for the map entry for normal cases */
1091 while (high
> low
) /*binary search*/{
1093 mid
= (high
+low
) >> 1; /*Finds median*/
1098 compVal
= uprv_strcmp(langID
, gPosixIDmap
[mid
].regionMaps
->posixID
);
1102 else if (compVal
> 0){
1105 else /*we found it*/{
1106 return getHostID(&gPosixIDmap
[mid
], posixID
, status
);
1112 * Sometimes we can't do a binary search on posixID because some LCIDs
1113 * go to different locales. We hit one of those special cases.
1115 for (idx
= 0; idx
< gLocaleCount
; idx
++ ) {
1116 myStatus
= U_ZERO_ERROR
;
1117 value
= getHostID(&gPosixIDmap
[idx
], posixID
, &myStatus
);
1118 if (myStatus
== U_ZERO_ERROR
) {
1121 else if (myStatus
== U_USING_FALLBACK_WARNING
) {
1122 fallbackValue
= value
;
1126 if (fallbackValue
!= (uint32_t)-1) {
1127 *status
= U_USING_FALLBACK_WARNING
;
1128 return fallbackValue
;
1131 /* no match found */
1132 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1133 return 0; /* return international (root) */