2 **********************************************************************
3 * Copyright (C) 1996-2011, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
7 * Provides functionality for mapping between
8 * LCID and Posix IDs or ICU locale to codepage
10 * Note: All classes and code in this file are
11 * intended for internal use only.
13 * Methods of interest:
14 * unsigned long convertToLCID(const char*);
15 * const char* convertToPosix(unsigned long);
17 * Kathleen Wilson, 4/30/96
19 * Date Name Description
20 * 3/11/97 aliu Fixed off-by-one bug in assignment operator. Added
21 * setId() method and safety check against
23 * 04/23/99 stephen Added C wrapper for convertToPosix.
24 * 09/18/00 george Removed the memory leaks.
25 * 08/23/01 george Convert to C
29 #include "unicode/uloc.h"
33 #if U_PLATFORM == U_PF_WINDOWS && defined(_MSC_VER) && (_MSC_VER >= 1500)
35 * TODO: It seems like we should widen this to
36 * either U_PLATFORM_USES_ONLY_WIN32_API (includes MinGW)
37 * or U_PLATFORM_HAS_WIN32_API (includes MinGW and Cygwin)
38 * but those use gcc and won't have defined(_MSC_VER).
39 * We might need to #include some Windows header and test for some version macro from there.
40 * Or call some Windows function and see what it returns.
42 #define USE_WINDOWS_LOCALE_API
45 #ifdef USE_WINDOWS_LOCALE_API
52 * The mapping from Win32 locale ID numbers to POSIX locale strings should
55 * Many LCID values come from winnt.h
56 * Some also come from http://www.microsoft.com/globaldev/reference/lcid-all.mspx
60 ////////////////////////////////////////////////
62 // Internal Classes for LCID <--> POSIX Mapping
64 /////////////////////////////////////////////////
67 typedef struct ILcidPosixElement
69 const uint32_t hostID
;
70 const char * const posixID
;
73 typedef struct ILcidPosixMap
75 const uint32_t numRegions
;
76 const struct ILcidPosixElement
* const regionMaps
;
81 /////////////////////////////////////////////////
83 // Easy macros to make the LCID <--> POSIX Mapping
85 /////////////////////////////////////////////////
89 * The standard one language/one country mapping for LCID.
90 * The first element must be the language, and the following
91 * elements are the language with the country.
92 * @param hostID LCID in host format such as 0x044d
93 * @param languageID posix ID of just the language such as 'de'
94 * @param posixID posix ID of the language_TERRITORY such as 'de_CH'
96 #define ILCID_POSIX_ELEMENT_ARRAY(hostID, languageID, posixID) \
97 static const ILcidPosixElement locmap_ ## languageID [] = { \
98 {LANGUAGE_LCID(hostID), #languageID}, /* parent locale */ \
103 * Define a subtable by ID
104 * @param id the POSIX ID, either a language or language_TERRITORY
106 #define ILCID_POSIX_SUBTABLE(id) \
107 static const ILcidPosixElement locmap_ ## id [] =
111 * Create the map for the posixID. This macro supposes that the language string
112 * name is the same as the global variable name, and that the first element
113 * in the ILcidPosixElement is just the language.
114 * @param _posixID the full POSIX ID for this entry.
116 #define ILCID_POSIX_MAP(_posixID) \
117 {sizeof(locmap_ ## _posixID)/sizeof(ILcidPosixElement), locmap_ ## _posixID}
120 ////////////////////////////////////////////
122 // Create the table of LCID to POSIX Mapping
123 // None of it should be dynamically created.
125 // Keep static locale variables inside the function so that
126 // it can be created properly during static init.
128 // Note: This table should be updated periodically. Check the National Lanaguage Support API Reference Website.
129 // Microsoft is moving away from LCID in favor of locale name as of Vista. This table needs to be
130 // maintained for support of older Windows version.
131 // Update: Windows 7 (091130)
132 ////////////////////////////////////////////
135 ILCID_POSIX_ELEMENT_ARRAY(0x0436, af
, af_ZA
)
137 ILCID_POSIX_SUBTABLE(ar
) {
158 ILCID_POSIX_ELEMENT_ARRAY(0x044d, as
, as_IN
)
159 ILCID_POSIX_ELEMENT_ARRAY(0x045e, am
, am_ET
)
160 ILCID_POSIX_ELEMENT_ARRAY(0x047a, arn
,arn_CL
)
162 ILCID_POSIX_SUBTABLE(az
) {
164 {0x082c, "az_Cyrl_AZ"}, /* Cyrillic based */
165 {0x742c, "az_Cyrl"}, /* Cyrillic based */
166 {0x042c, "az_Latn_AZ"}, /* Latin based */
167 {0x782c, "az_Latn"}, /* Latin based */
168 {0x042c, "az_AZ"} /* Latin based */
171 ILCID_POSIX_ELEMENT_ARRAY(0x046d, ba
, ba_RU
)
172 ILCID_POSIX_ELEMENT_ARRAY(0x0423, be
, be_BY
)
174 ILCID_POSIX_SUBTABLE(ber
) {
176 {0x045f, "ber_Arab_DZ"},
177 {0x045f, "ber_Arab"},
178 {0x085f, "ber_Latn_DZ"},
182 ILCID_POSIX_ELEMENT_ARRAY(0x0402, bg
, bg_BG
)
184 ILCID_POSIX_ELEMENT_ARRAY(0x0466, bin
, bin_NG
)
186 ILCID_POSIX_SUBTABLE(bn
) {
192 ILCID_POSIX_SUBTABLE(bo
) {
198 ILCID_POSIX_ELEMENT_ARRAY(0x047e, br
, br_FR
)
199 ILCID_POSIX_ELEMENT_ARRAY(0x0403, ca
, ca_ES
)
200 ILCID_POSIX_ELEMENT_ARRAY(0x0483, co
, co_FR
)
201 ILCID_POSIX_ELEMENT_ARRAY(0x045c, chr
,chr_US
)
203 /* Declared as cs_CZ to get around compiler errors on z/OS, which defines cs as a function */
204 ILCID_POSIX_ELEMENT_ARRAY(0x0405, cs
, cs_CZ
)
206 ILCID_POSIX_ELEMENT_ARRAY(0x0452, cy
, cy_GB
)
207 ILCID_POSIX_ELEMENT_ARRAY(0x0406, da
, da_DK
)
209 ILCID_POSIX_SUBTABLE(de
) {
216 {0x10407,"de_DE@collation=phonebook"}, /*This is really de_DE_PHONEBOOK on Windows*/
217 {0x10407,"de@collation=phonebook"} /*This is really de_DE_PHONEBOOK on Windows*/
220 ILCID_POSIX_ELEMENT_ARRAY(0x0465, dv
, dv_MV
)
221 ILCID_POSIX_ELEMENT_ARRAY(0x0408, el
, el_GR
)
223 ILCID_POSIX_SUBTABLE(en
) {
240 {0x007f, "en_US_POSIX"}, /* duplicate for roundtripping */
241 {0x2409, "en_VI"}, /* Virgin Islands AKA Caribbean Islands (en_CB). */
245 {0x0409, "en_AS"}, /* Alias for en_US. Leave last. */
246 {0x0409, "en_GU"}, /* Alias for en_US. Leave last. */
247 {0x0409, "en_MH"}, /* Alias for en_US. Leave last. */
248 {0x0409, "en_MP"}, /* Alias for en_US. Leave last. */
249 {0x0409, "en_UM"} /* Alias for en_US. Leave last. */
252 ILCID_POSIX_SUBTABLE(en_US_POSIX
) {
253 {0x007f, "en_US_POSIX"} /* duplicate for roundtripping */
256 ILCID_POSIX_SUBTABLE(es
) {
265 {0x0c0a, "es_ES"}, /*Modern sort.*/
279 {0x040a, "es_ES@collation=traditional"},
280 {0x040a, "es@collation=traditional"}
283 ILCID_POSIX_ELEMENT_ARRAY(0x0425, et
, et_EE
)
284 ILCID_POSIX_ELEMENT_ARRAY(0x042d, eu
, eu_ES
)
286 /* ISO-639 doesn't distinguish between Persian and Dari.*/
287 ILCID_POSIX_SUBTABLE(fa
) {
289 {0x0429, "fa_IR"}, /* Persian/Farsi (Iran) */
290 {0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */
293 /* duplicate for roundtripping */
294 ILCID_POSIX_SUBTABLE(fa_AF
) {
295 {0x8c, "fa_AF"}, /* Persian/Dari (Afghanistan) */
296 {0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */
299 ILCID_POSIX_ELEMENT_ARRAY(0x040b, fi
, fi_FI
)
300 ILCID_POSIX_ELEMENT_ARRAY(0x0464, fil
,fil_PH
)
301 ILCID_POSIX_ELEMENT_ARRAY(0x0438, fo
, fo_FO
)
303 ILCID_POSIX_SUBTABLE(fr
) {
324 ILCID_POSIX_ELEMENT_ARRAY(0x0467, fuv
, fuv_NG
)
326 ILCID_POSIX_ELEMENT_ARRAY(0x0462, fy
, fy_NL
)
328 ILCID_POSIX_SUBTABLE(ga
) { /* Gaelic (Ireland) */
334 ILCID_POSIX_SUBTABLE(gd
) { /* Gaelic (Scotland) */
339 ILCID_POSIX_ELEMENT_ARRAY(0x0456, gl
, gl_ES
)
340 ILCID_POSIX_ELEMENT_ARRAY(0x0447, gu
, gu_IN
)
341 ILCID_POSIX_ELEMENT_ARRAY(0x0474, gn
, gn_PY
)
342 ILCID_POSIX_ELEMENT_ARRAY(0x0484, gsw
,gsw_FR
)
344 ILCID_POSIX_SUBTABLE(ha
) {
347 {0x0468, "ha_Latn_NG"},
350 ILCID_POSIX_ELEMENT_ARRAY(0x0475, haw
,haw_US
)
351 ILCID_POSIX_ELEMENT_ARRAY(0x040d, he
, he_IL
)
352 ILCID_POSIX_ELEMENT_ARRAY(0x0439, hi
, hi_IN
)
354 /* This LCID is really four different locales.*/
355 ILCID_POSIX_SUBTABLE(hr
) {
357 {0x141a, "bs_Latn_BA"}, /* Bosnian, Bosnia and Herzegovina */
358 {0x681a, "bs_Latn"}, /* Bosnian, Bosnia and Herzegovina */
359 {0x141a, "bs_BA"}, /* Bosnian, Bosnia and Herzegovina */
360 {0x781a, "bs"}, /* Bosnian */
361 {0x201a, "bs_Cyrl_BA"}, /* Bosnian, Bosnia and Herzegovina */
362 {0x641a, "bs_Cyrl"}, /* Bosnian, Bosnia and Herzegovina */
363 {0x101a, "hr_BA"}, /* Croatian in Bosnia */
364 {0x041a, "hr_HR"}, /* Croatian*/
365 {0x2c1a, "sr_Latn_ME"},
366 {0x241a, "sr_Latn_RS"},
367 {0x181a, "sr_Latn_BA"}, /* Serbo-Croatian in Bosnia */
368 {0x081a, "sr_Latn_CS"}, /* Serbo-Croatian*/
369 {0x701a, "sr_Latn"}, /* It's 0x1a or 0x081a, pick one to make the test program happy. */
370 {0x1c1a, "sr_Cyrl_BA"}, /* Serbo-Croatian in Bosnia */
371 {0x0c1a, "sr_Cyrl_CS"}, /* Serbian*/
372 {0x301a, "sr_Cyrl_ME"},
373 {0x281a, "sr_Cyrl_RS"},
374 {0x6c1a, "sr_Cyrl"}, /* It's 0x1a or 0x0c1a, pick one to make the test program happy. */
375 {0x7c1a, "sr"} /* In CLDR sr is sr_Cyrl. */
378 ILCID_POSIX_ELEMENT_ARRAY(0x040e, hu
, hu_HU
)
379 ILCID_POSIX_ELEMENT_ARRAY(0x042b, hy
, hy_AM
)
380 ILCID_POSIX_ELEMENT_ARRAY(0x0469, ibb
, ibb_NG
)
381 ILCID_POSIX_ELEMENT_ARRAY(0x0421, id
, id_ID
)
382 ILCID_POSIX_ELEMENT_ARRAY(0x0470, ig
, ig_NG
)
383 ILCID_POSIX_ELEMENT_ARRAY(0x0478, ii
, ii_CN
)
384 ILCID_POSIX_ELEMENT_ARRAY(0x040f, is
, is_IS
)
386 ILCID_POSIX_SUBTABLE(it
) {
392 ILCID_POSIX_SUBTABLE(iu
) {
394 {0x045d, "iu_Cans_CA"},
396 {0x085d, "iu_Latn_CA"},
400 ILCID_POSIX_ELEMENT_ARRAY(0x040d, iw
, iw_IL
) /*Left in for compatibility*/
401 ILCID_POSIX_ELEMENT_ARRAY(0x0411, ja
, ja_JP
)
402 ILCID_POSIX_ELEMENT_ARRAY(0x0437, ka
, ka_GE
)
403 ILCID_POSIX_ELEMENT_ARRAY(0x043f, kk
, kk_KZ
)
404 ILCID_POSIX_ELEMENT_ARRAY(0x046f, kl
, kl_GL
)
405 ILCID_POSIX_ELEMENT_ARRAY(0x0453, km
, km_KH
)
406 ILCID_POSIX_ELEMENT_ARRAY(0x044b, kn
, kn_IN
)
408 ILCID_POSIX_SUBTABLE(ko
) {
414 ILCID_POSIX_ELEMENT_ARRAY(0x0457, kok
, kok_IN
)
415 ILCID_POSIX_ELEMENT_ARRAY(0x0471, kr
, kr_NG
)
417 ILCID_POSIX_SUBTABLE(ks
) { /* We could add PK and CN too */
419 {0x0860, "ks_IN"}, /* Documentation doesn't mention script */
420 {0x0460, "ks_Arab_IN"},
421 {0x0860, "ks_Deva_IN"}
424 ILCID_POSIX_ELEMENT_ARRAY(0x0440, ky
, ky_KG
) /* Kyrgyz is spoken in Kyrgyzstan */
425 ILCID_POSIX_ELEMENT_ARRAY(0x0476, la
, la_IT
) /* TODO: Verify the country */
426 ILCID_POSIX_ELEMENT_ARRAY(0x046e, lb
, lb_LU
)
427 ILCID_POSIX_ELEMENT_ARRAY(0x0454, lo
, lo_LA
)
428 ILCID_POSIX_ELEMENT_ARRAY(0x0427, lt
, lt_LT
)
429 ILCID_POSIX_ELEMENT_ARRAY(0x0426, lv
, lv_LV
)
430 ILCID_POSIX_ELEMENT_ARRAY(0x0481, mi
, mi_NZ
)
431 ILCID_POSIX_ELEMENT_ARRAY(0x042f, mk
, mk_MK
)
432 ILCID_POSIX_ELEMENT_ARRAY(0x044c, ml
, ml_IN
)
434 ILCID_POSIX_SUBTABLE(mn
) {
438 {0x0850, "mn_Mong_CN"},
443 ILCID_POSIX_ELEMENT_ARRAY(0x0458, mni
,mni_IN
)
444 ILCID_POSIX_ELEMENT_ARRAY(0x047c, moh
,moh_CA
)
445 ILCID_POSIX_ELEMENT_ARRAY(0x044e, mr
, mr_IN
)
447 ILCID_POSIX_SUBTABLE(ms
) {
449 {0x083e, "ms_BN"}, /* Brunei Darussalam*/
450 {0x043e, "ms_MY"} /* Malaysia*/
453 ILCID_POSIX_ELEMENT_ARRAY(0x043a, mt
, mt_MT
)
454 ILCID_POSIX_ELEMENT_ARRAY(0x0455, my
, my_MM
)
456 ILCID_POSIX_SUBTABLE(ne
) {
458 {0x0861, "ne_IN"}, /* India*/
459 {0x0461, "ne_NP"} /* Nepal*/
462 ILCID_POSIX_SUBTABLE(nl
) {
468 /* The "no" locale split into nb and nn. By default in ICU, "no" is nb.*/
469 ILCID_POSIX_SUBTABLE(no
) {
470 {0x14, "no"}, /* really nb_NO */
471 {0x7c14, "nb"}, /* really nb */
472 {0x0414, "nb_NO"}, /* really nb_NO. Keep first in the 414 list. */
473 {0x0414, "no_NO"}, /* really nb_NO */
474 {0x0814, "nn_NO"}, /* really nn_NO. Keep first in the 814 list. */
475 {0x7814, "nn"}, /* It's 0x14 or 0x814, pick one to make the test program happy. */
476 {0x0814, "no_NO_NY"}/* really nn_NO */
479 ILCID_POSIX_ELEMENT_ARRAY(0x046c, nso
,nso_ZA
) /* TODO: Verify the ISO-639 code */
480 ILCID_POSIX_ELEMENT_ARRAY(0x0482, oc
, oc_FR
)
482 ILCID_POSIX_SUBTABLE(om
) { /* TODO: Verify the country */
488 /* Declared as or_IN to get around compiler errors*/
489 ILCID_POSIX_SUBTABLE(or_IN
) {
495 ILCID_POSIX_SUBTABLE(pa
) {
501 ILCID_POSIX_ELEMENT_ARRAY(0x0479, pap
, pap_AN
)
502 ILCID_POSIX_ELEMENT_ARRAY(0x0415, pl
, pl_PL
)
503 ILCID_POSIX_ELEMENT_ARRAY(0x0463, ps
, ps_AF
)
505 ILCID_POSIX_SUBTABLE(pt
) {
511 ILCID_POSIX_SUBTABLE(qu
) {
521 ILCID_POSIX_ELEMENT_ARRAY(0x0486, qut
, qut_GT
) /* qut is an ISO-639-3 code */
522 ILCID_POSIX_ELEMENT_ARRAY(0x0417, rm
, rm_CH
)
524 ILCID_POSIX_SUBTABLE(ro
) {
530 ILCID_POSIX_SUBTABLE(root
) {
534 ILCID_POSIX_SUBTABLE(ru
) {
540 ILCID_POSIX_ELEMENT_ARRAY(0x0487, rw
, rw_RW
)
541 ILCID_POSIX_ELEMENT_ARRAY(0x044f, sa
, sa_IN
)
542 ILCID_POSIX_ELEMENT_ARRAY(0x0485, sah
,sah_RU
)
544 ILCID_POSIX_SUBTABLE(sd
) {
550 ILCID_POSIX_SUBTABLE(se
) {
567 ILCID_POSIX_ELEMENT_ARRAY(0x045b, si
, si_LK
)
568 ILCID_POSIX_ELEMENT_ARRAY(0x041b, sk
, sk_SK
)
569 ILCID_POSIX_ELEMENT_ARRAY(0x0424, sl
, sl_SI
)
571 ILCID_POSIX_SUBTABLE(so
) { /* TODO: Verify the country */
577 ILCID_POSIX_ELEMENT_ARRAY(0x041c, sq
, sq_AL
)
578 ILCID_POSIX_ELEMENT_ARRAY(0x0430, st
, st_ZA
)
580 ILCID_POSIX_SUBTABLE(sv
) {
586 ILCID_POSIX_ELEMENT_ARRAY(0x0441, sw
, sw_KE
)
587 ILCID_POSIX_ELEMENT_ARRAY(0x045A, syr
, syr_SY
)
588 ILCID_POSIX_ELEMENT_ARRAY(0x0449, ta
, ta_IN
)
589 ILCID_POSIX_ELEMENT_ARRAY(0x044a, te
, te_IN
)
591 /* Cyrillic based by default */
592 ILCID_POSIX_SUBTABLE(tg
) {
595 {0x0428, "tg_Cyrl_TJ"}
598 ILCID_POSIX_ELEMENT_ARRAY(0x041e, th
, th_TH
)
600 ILCID_POSIX_SUBTABLE(ti
) {
608 ILCID_POSIX_ELEMENT_ARRAY(0x0442, tk
, tk_TM
)
610 ILCID_POSIX_SUBTABLE(tn
) {
616 ILCID_POSIX_ELEMENT_ARRAY(0x041f, tr
, tr_TR
)
617 ILCID_POSIX_ELEMENT_ARRAY(0x0431, ts
, ts_ZA
)
618 ILCID_POSIX_ELEMENT_ARRAY(0x0444, tt
, tt_RU
)
620 ILCID_POSIX_SUBTABLE(tzm
) {
622 {0x7c5f, "tzm_Latn"},
623 {0x085f, "tzm_Latn_DZ"},
627 ILCID_POSIX_SUBTABLE(ug
) {
630 {0x0480, "ug_Arab_CN"}
633 ILCID_POSIX_ELEMENT_ARRAY(0x0422, uk
, uk_UA
)
635 ILCID_POSIX_SUBTABLE(ur
) {
641 ILCID_POSIX_SUBTABLE(uz
) {
643 {0x0843, "uz_Cyrl_UZ"}, /* Cyrillic based */
644 {0x7843, "uz_Cyrl"}, /* Cyrillic based */
645 {0x0843, "uz_UZ"}, /* Cyrillic based */
646 {0x0443, "uz_Latn_UZ"}, /* Latin based */
647 {0x7c43, "uz_Latn"} /* Latin based */
650 ILCID_POSIX_SUBTABLE(ve
) { /* TODO: Verify the country */
656 ILCID_POSIX_ELEMENT_ARRAY(0x042a, vi
, vi_VN
)
658 ILCID_POSIX_SUBTABLE(wen
) {
667 ILCID_POSIX_ELEMENT_ARRAY(0x0488, wo
, wo_SN
)
668 ILCID_POSIX_ELEMENT_ARRAY(0x0434, xh
, xh_ZA
)
669 ILCID_POSIX_ELEMENT_ARRAY(0x043d, yi
, yi
)
670 ILCID_POSIX_ELEMENT_ARRAY(0x046a, yo
, yo_NG
)
672 ILCID_POSIX_SUBTABLE(zh
) {
676 {0x0804, "zh_Hans_CN"},
677 {0x0c04, "zh_Hant_HK"},
679 {0x1404, "zh_Hant_MO"},
681 {0x1004, "zh_Hans_SG"},
683 {0x0404, "zh_Hant_TW"},
686 {0x30404,"zh_Hant_TW"}, /* Bopomofo order */
687 {0x30404,"zh_TW"}, /* Bopomofo order */
688 {0x20004,"zh@collation=stroke"},
689 {0x20404,"zh_Hant@collation=stroke"},
690 {0x20404,"zh_Hant_TW@collation=stroke"},
691 {0x20404,"zh_TW@collation=stroke"},
692 {0x20804,"zh_Hans@collation=stroke"},
693 {0x20804,"zh_Hans_CN@collation=stroke"},
694 {0x20804,"zh_CN@collation=stroke"}
697 ILCID_POSIX_ELEMENT_ARRAY(0x0435, zu
, zu_ZA
)
699 /* This must be static and grouped by LCID. */
700 static const ILcidPosixMap gPosixIDmap
[] = {
701 ILCID_POSIX_MAP(af
), /* af Afrikaans 0x36 */
702 ILCID_POSIX_MAP(am
), /* am Amharic 0x5e */
703 ILCID_POSIX_MAP(ar
), /* ar Arabic 0x01 */
704 ILCID_POSIX_MAP(arn
), /* arn Araucanian/Mapudungun 0x7a */
705 ILCID_POSIX_MAP(as
), /* as Assamese 0x4d */
706 ILCID_POSIX_MAP(az
), /* az Azerbaijani 0x2c */
707 ILCID_POSIX_MAP(ba
), /* ba Bashkir 0x6d */
708 ILCID_POSIX_MAP(be
), /* be Belarusian 0x23 */
709 /* ILCID_POSIX_MAP(ber), ber Berber/Tamazight 0x5f */
710 ILCID_POSIX_MAP(bg
), /* bg Bulgarian 0x02 */
711 ILCID_POSIX_MAP(bin
), /* bin Edo 0x66 */
712 ILCID_POSIX_MAP(bn
), /* bn Bengali; Bangla 0x45 */
713 ILCID_POSIX_MAP(bo
), /* bo Tibetan 0x51 */
714 ILCID_POSIX_MAP(br
), /* br Breton 0x7e */
715 ILCID_POSIX_MAP(ca
), /* ca Catalan 0x03 */
716 ILCID_POSIX_MAP(chr
), /* chr Cherokee 0x5c */
717 ILCID_POSIX_MAP(co
), /* co Corsican 0x83 */
718 ILCID_POSIX_MAP(cs
), /* cs Czech 0x05 */
719 ILCID_POSIX_MAP(cy
), /* cy Welsh 0x52 */
720 ILCID_POSIX_MAP(da
), /* da Danish 0x06 */
721 ILCID_POSIX_MAP(de
), /* de German 0x07 */
722 ILCID_POSIX_MAP(dv
), /* dv Divehi 0x65 */
723 ILCID_POSIX_MAP(el
), /* el Greek 0x08 */
724 ILCID_POSIX_MAP(en
), /* en English 0x09 */
725 ILCID_POSIX_MAP(en_US_POSIX
), /* invariant 0x7f */
726 ILCID_POSIX_MAP(es
), /* es Spanish 0x0a */
727 ILCID_POSIX_MAP(et
), /* et Estonian 0x25 */
728 ILCID_POSIX_MAP(eu
), /* eu Basque 0x2d */
729 ILCID_POSIX_MAP(fa
), /* fa Persian/Farsi 0x29 */
730 ILCID_POSIX_MAP(fa_AF
), /* fa Persian/Dari 0x8c */
731 ILCID_POSIX_MAP(fi
), /* fi Finnish 0x0b */
732 ILCID_POSIX_MAP(fil
), /* fil Filipino 0x64 */
733 ILCID_POSIX_MAP(fo
), /* fo Faroese 0x38 */
734 ILCID_POSIX_MAP(fr
), /* fr French 0x0c */
735 ILCID_POSIX_MAP(fuv
), /* fuv Fulfulde - Nigeria 0x67 */
736 ILCID_POSIX_MAP(fy
), /* fy Frisian 0x62 */
737 ILCID_POSIX_MAP(ga
), /* * Gaelic (Ireland,Scotland) 0x3c */
738 ILCID_POSIX_MAP(gd
), /* gd Gaelic (United Kingdom) 0x91 */
739 ILCID_POSIX_MAP(gl
), /* gl Galician 0x56 */
740 ILCID_POSIX_MAP(gn
), /* gn Guarani 0x74 */
741 ILCID_POSIX_MAP(gsw
), /* gsw Alemanic/Alsatian/Swiss German 0x84 */
742 ILCID_POSIX_MAP(gu
), /* gu Gujarati 0x47 */
743 ILCID_POSIX_MAP(ha
), /* ha Hausa 0x68 */
744 ILCID_POSIX_MAP(haw
), /* haw Hawaiian 0x75 */
745 ILCID_POSIX_MAP(he
), /* he Hebrew (formerly iw) 0x0d */
746 ILCID_POSIX_MAP(hi
), /* hi Hindi 0x39 */
747 ILCID_POSIX_MAP(hr
), /* * Croatian and others 0x1a */
748 ILCID_POSIX_MAP(hu
), /* hu Hungarian 0x0e */
749 ILCID_POSIX_MAP(hy
), /* hy Armenian 0x2b */
750 ILCID_POSIX_MAP(ibb
), /* ibb Ibibio - Nigeria 0x69 */
751 ILCID_POSIX_MAP(id
), /* id Indonesian (formerly in) 0x21 */
752 ILCID_POSIX_MAP(ig
), /* ig Igbo 0x70 */
753 ILCID_POSIX_MAP(ii
), /* ii Sichuan Yi 0x78 */
754 ILCID_POSIX_MAP(is
), /* is Icelandic 0x0f */
755 ILCID_POSIX_MAP(it
), /* it Italian 0x10 */
756 ILCID_POSIX_MAP(iu
), /* iu Inuktitut 0x5d */
757 ILCID_POSIX_MAP(iw
), /* iw Hebrew 0x0d */
758 ILCID_POSIX_MAP(ja
), /* ja Japanese 0x11 */
759 ILCID_POSIX_MAP(ka
), /* ka Georgian 0x37 */
760 ILCID_POSIX_MAP(kk
), /* kk Kazakh 0x3f */
761 ILCID_POSIX_MAP(kl
), /* kl Kalaallisut 0x6f */
762 ILCID_POSIX_MAP(km
), /* km Khmer 0x53 */
763 ILCID_POSIX_MAP(kn
), /* kn Kannada 0x4b */
764 ILCID_POSIX_MAP(ko
), /* ko Korean 0x12 */
765 ILCID_POSIX_MAP(kok
), /* kok Konkani 0x57 */
766 ILCID_POSIX_MAP(kr
), /* kr Kanuri 0x71 */
767 ILCID_POSIX_MAP(ks
), /* ks Kashmiri 0x60 */
768 ILCID_POSIX_MAP(ky
), /* ky Kyrgyz 0x40 */
769 ILCID_POSIX_MAP(lb
), /* lb Luxembourgish 0x6e */
770 ILCID_POSIX_MAP(la
), /* la Latin 0x76 */
771 ILCID_POSIX_MAP(lo
), /* lo Lao 0x54 */
772 ILCID_POSIX_MAP(lt
), /* lt Lithuanian 0x27 */
773 ILCID_POSIX_MAP(lv
), /* lv Latvian, Lettish 0x26 */
774 ILCID_POSIX_MAP(mi
), /* mi Maori 0x81 */
775 ILCID_POSIX_MAP(mk
), /* mk Macedonian 0x2f */
776 ILCID_POSIX_MAP(ml
), /* ml Malayalam 0x4c */
777 ILCID_POSIX_MAP(mn
), /* mn Mongolian 0x50 */
778 ILCID_POSIX_MAP(mni
), /* mni Manipuri 0x58 */
779 ILCID_POSIX_MAP(moh
), /* moh Mohawk 0x7c */
780 ILCID_POSIX_MAP(mr
), /* mr Marathi 0x4e */
781 ILCID_POSIX_MAP(ms
), /* ms Malay 0x3e */
782 ILCID_POSIX_MAP(mt
), /* mt Maltese 0x3a */
783 ILCID_POSIX_MAP(my
), /* my Burmese 0x55 */
784 /* ILCID_POSIX_MAP(nb), // no Norwegian 0x14 */
785 ILCID_POSIX_MAP(ne
), /* ne Nepali 0x61 */
786 ILCID_POSIX_MAP(nl
), /* nl Dutch 0x13 */
787 /* ILCID_POSIX_MAP(nn), // no Norwegian 0x14 */
788 ILCID_POSIX_MAP(no
), /* * Norwegian 0x14 */
789 ILCID_POSIX_MAP(nso
), /* nso Sotho, Northern (Sepedi dialect) 0x6c */
790 ILCID_POSIX_MAP(oc
), /* oc Occitan 0x82 */
791 ILCID_POSIX_MAP(om
), /* om Oromo 0x72 */
792 ILCID_POSIX_MAP(or_IN
), /* or Oriya 0x48 */
793 ILCID_POSIX_MAP(pa
), /* pa Punjabi 0x46 */
794 ILCID_POSIX_MAP(pap
), /* pap Papiamentu 0x79 */
795 ILCID_POSIX_MAP(pl
), /* pl Polish 0x15 */
796 ILCID_POSIX_MAP(ps
), /* ps Pashto 0x63 */
797 ILCID_POSIX_MAP(pt
), /* pt Portuguese 0x16 */
798 ILCID_POSIX_MAP(qu
), /* qu Quechua 0x6B */
799 ILCID_POSIX_MAP(qut
), /* qut K'iche 0x86 */
800 ILCID_POSIX_MAP(rm
), /* rm Raeto-Romance/Romansh 0x17 */
801 ILCID_POSIX_MAP(ro
), /* ro Romanian 0x18 */
802 ILCID_POSIX_MAP(root
), /* root 0x00 */
803 ILCID_POSIX_MAP(ru
), /* ru Russian 0x19 */
804 ILCID_POSIX_MAP(rw
), /* rw Kinyarwanda 0x87 */
805 ILCID_POSIX_MAP(sa
), /* sa Sanskrit 0x4f */
806 ILCID_POSIX_MAP(sah
), /* sah Yakut 0x85 */
807 ILCID_POSIX_MAP(sd
), /* sd Sindhi 0x59 */
808 ILCID_POSIX_MAP(se
), /* se Sami 0x3b */
809 /* ILCID_POSIX_MAP(sh), // sh Serbo-Croatian 0x1a */
810 ILCID_POSIX_MAP(si
), /* si Sinhalese 0x5b */
811 ILCID_POSIX_MAP(sk
), /* sk Slovak 0x1b */
812 ILCID_POSIX_MAP(sl
), /* sl Slovenian 0x24 */
813 ILCID_POSIX_MAP(so
), /* so Somali 0x77 */
814 ILCID_POSIX_MAP(sq
), /* sq Albanian 0x1c */
815 /* ILCID_POSIX_MAP(sr), // sr Serbian 0x1a */
816 ILCID_POSIX_MAP(st
), /* st Sutu 0x30 */
817 ILCID_POSIX_MAP(sv
), /* sv Swedish 0x1d */
818 ILCID_POSIX_MAP(sw
), /* sw Swahili 0x41 */
819 ILCID_POSIX_MAP(syr
), /* syr Syriac 0x5A */
820 ILCID_POSIX_MAP(ta
), /* ta Tamil 0x49 */
821 ILCID_POSIX_MAP(te
), /* te Telugu 0x4a */
822 ILCID_POSIX_MAP(tg
), /* tg Tajik 0x28 */
823 ILCID_POSIX_MAP(th
), /* th Thai 0x1e */
824 ILCID_POSIX_MAP(ti
), /* ti Tigrigna 0x73 */
825 ILCID_POSIX_MAP(tk
), /* tk Turkmen 0x42 */
826 ILCID_POSIX_MAP(tn
), /* tn Tswana 0x32 */
827 ILCID_POSIX_MAP(tr
), /* tr Turkish 0x1f */
828 ILCID_POSIX_MAP(ts
), /* ts Tsonga 0x31 */
829 ILCID_POSIX_MAP(tt
), /* tt Tatar 0x44 */
830 ILCID_POSIX_MAP(tzm
), /* tzm Tamazight 0x5f */
831 ILCID_POSIX_MAP(ug
), /* ug Uighur 0x80 */
832 ILCID_POSIX_MAP(uk
), /* uk Ukrainian 0x22 */
833 ILCID_POSIX_MAP(ur
), /* ur Urdu 0x20 */
834 ILCID_POSIX_MAP(uz
), /* uz Uzbek 0x43 */
835 ILCID_POSIX_MAP(ve
), /* ve Venda 0x33 */
836 ILCID_POSIX_MAP(vi
), /* vi Vietnamese 0x2a */
837 ILCID_POSIX_MAP(wen
), /* wen Sorbian 0x2e */
838 ILCID_POSIX_MAP(wo
), /* wo Wolof 0x88 */
839 ILCID_POSIX_MAP(xh
), /* xh Xhosa 0x34 */
840 ILCID_POSIX_MAP(yi
), /* yi Yiddish 0x3d */
841 ILCID_POSIX_MAP(yo
), /* yo Yoruba 0x6a */
842 ILCID_POSIX_MAP(zh
), /* zh Chinese 0x04 */
843 ILCID_POSIX_MAP(zu
), /* zu Zulu 0x35 */
846 static const uint32_t gLocaleCount
= sizeof(gPosixIDmap
)/sizeof(ILcidPosixMap
);
849 * Do not call this function. It is called by hostID.
850 * The function is not private because this struct must stay as a C struct,
851 * and this is an internal class.
854 idCmp(const char* id1
, const char* id2
)
857 while (*id1
== *id2
&& *id1
!= 0) {
866 * Searches for a Windows LCID
868 * @param posixid the Posix style locale id.
869 * @param status gets set to U_ILLEGAL_ARGUMENT_ERROR when the Posix ID has
870 * no equivalent Windows LCID.
874 getHostID(const ILcidPosixMap
*this_0
, const char* posixID
, UErrorCode
* status
)
877 int32_t bestIdxDiff
= 0;
878 int32_t posixIDlen
= (int32_t)uprv_strlen(posixID
);
881 for (idx
= 0; idx
< this_0
->numRegions
; idx
++ ) {
882 int32_t sameChars
= idCmp(posixID
, this_0
->regionMaps
[idx
].posixID
);
883 if (sameChars
> bestIdxDiff
&& this_0
->regionMaps
[idx
].posixID
[sameChars
] == 0) {
884 if (posixIDlen
== sameChars
) {
886 return this_0
->regionMaps
[idx
].hostID
;
888 bestIdxDiff
= sameChars
;
892 /* We asked for something unusual, like en_ZZ, and we try to return the number for the same language. */
893 /* We also have to make sure that sid and si and similar string subsets don't match. */
894 if ((posixID
[bestIdxDiff
] == '_' || posixID
[bestIdxDiff
] == '@')
895 && this_0
->regionMaps
[bestIdx
].posixID
[bestIdxDiff
] == 0)
897 *status
= U_USING_FALLBACK_WARNING
;
898 return this_0
->regionMaps
[bestIdx
].hostID
;
902 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
903 return this_0
->regionMaps
->hostID
;
907 getPosixID(const ILcidPosixMap
*this_0
, uint32_t hostID
)
910 for (i
= 0; i
<= this_0
->numRegions
; i
++)
912 if (this_0
->regionMaps
[i
].hostID
== hostID
)
914 return this_0
->regionMaps
[i
].posixID
;
918 /* If you get here, then no matching region was found,
919 so return the language id with the wild card region. */
920 return this_0
->regionMaps
[0].posixID
;
924 //////////////////////////////////////
928 /////////////////////////////////////
930 #ifdef USE_WINDOWS_LOCALE_API
932 * Change the tag separator from '-' to '_'
934 #define FIX_LOCALE_ID_TAG_SEPARATOR(buffer, len, i) \
935 for(i = 0; i < len; i++) \
936 if (buffer[i] == '-') buffer[i] = '_';
939 * Various language tags needs to be changed:
943 #define FIX_LANGUAGE_ID_TAG(buffer, len) \
945 if (buffer[0] == 'q' && buffer[1] == 'u' && buffer[2] == 'z') {\
947 uprv_strcat(buffer, buffer+3); \
948 } else if (buffer[0] == 'p' && buffer[1] == 'r' && buffer[2] == 's') {\
949 buffer[0] = 'f'; buffer[1] = 'a'; buffer[2] = 0; \
950 uprv_strcat(buffer, buffer+3); \
954 static char gPosixFromLCID
[ULOC_FULLNAME_CAPACITY
];
957 uprv_convertToPosix(uint32_t hostid
, UErrorCode
* status
)
960 uint32_t localeIndex
;
961 #ifdef USE_WINDOWS_LOCALE_API
964 uprv_memset(gPosixFromLCID
, 0, sizeof(gPosixFromLCID
));
966 ret
= GetLocaleInfoA(hostid
, LOCALE_SNAME
, (LPSTR
)gPosixFromLCID
, sizeof(gPosixFromLCID
));
968 FIX_LOCALE_ID_TAG_SEPARATOR(gPosixFromLCID
, (uint32_t)ret
, localeIndex
)
969 FIX_LANGUAGE_ID_TAG(gPosixFromLCID
, ret
)
971 return gPosixFromLCID
;
974 langID
= LANGUAGE_LCID(hostid
);
976 for (localeIndex
= 0; localeIndex
< gLocaleCount
; localeIndex
++)
978 if (langID
== gPosixIDmap
[localeIndex
].regionMaps
->hostID
)
980 return getPosixID(&gPosixIDmap
[localeIndex
], hostid
);
985 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
990 //////////////////////////////////////
993 // This should only be called from uloc_getLCID.
994 // The locale ID must be in canonical form.
995 // langID is separate so that this file doesn't depend on the uloc_* API.
997 /////////////////////////////////////
1001 uprv_convertToLCID(const char *langID
, const char* posixID
, UErrorCode
* status
)
1005 uint32_t high
= gLocaleCount
;
1007 uint32_t oldmid
= 0;
1011 uint32_t fallbackValue
= (uint32_t)-1;
1012 UErrorCode myStatus
;
1015 /* Check for incomplete id. */
1016 if (!langID
|| !posixID
|| uprv_strlen(langID
) < 2 || uprv_strlen(posixID
) < 2) {
1020 /*Binary search for the map entry for normal cases */
1022 while (high
> low
) /*binary search*/{
1024 mid
= (high
+low
) >> 1; /*Finds median*/
1029 compVal
= uprv_strcmp(langID
, gPosixIDmap
[mid
].regionMaps
->posixID
);
1033 else if (compVal
> 0){
1036 else /*we found it*/{
1037 return getHostID(&gPosixIDmap
[mid
], posixID
, status
);
1043 * Sometimes we can't do a binary search on posixID because some LCIDs
1044 * go to different locales. We hit one of those special cases.
1046 for (idx
= 0; idx
< gLocaleCount
; idx
++ ) {
1047 myStatus
= U_ZERO_ERROR
;
1048 value
= getHostID(&gPosixIDmap
[idx
], posixID
, &myStatus
);
1049 if (myStatus
== U_ZERO_ERROR
) {
1052 else if (myStatus
== U_USING_FALLBACK_WARNING
) {
1053 fallbackValue
= value
;
1057 if (fallbackValue
!= (uint32_t)-1) {
1058 *status
= U_USING_FALLBACK_WARNING
;
1059 return fallbackValue
;
1062 /* no match found */
1063 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1064 return 0; /* return international (root) */