]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/locmap.cpp
ICU-62135.0.1.tar.gz
[apple/icu.git] / icuSources / common / locmap.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f
A
3/*
4 **********************************************************************
2ca993e8 5 * Copyright (C) 1996-2016, International Business Machines
b75a7d8f
A
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
b75a7d8f
A
8 *
9 * Provides functionality for mapping between
10 * LCID and Posix IDs or ICU locale to codepage
11 *
12 * Note: All classes and code in this file are
13 * intended for internal use only.
14 *
15 * Methods of interest:
374ca955
A
16 * unsigned long convertToLCID(const char*);
17 * const char* convertToPosix(unsigned long);
b75a7d8f
A
18 *
19 * Kathleen Wilson, 4/30/96
20 *
21 * Date Name Description
22 * 3/11/97 aliu Fixed off-by-one bug in assignment operator. Added
23 * setId() method and safety check against
24 * MAX_ID_LENGTH.
25 * 04/23/99 stephen Added C wrapper for convertToPosix.
26 * 09/18/00 george Removed the memory leaks.
27 * 08/23/01 george Convert to C
28 */
29
30#include "locmap.h"
31#include "cstring.h"
729e4ab9 32#include "cmemory.h"
f3c0d7a5 33#include "unicode/uloc.h"
729e4ab9 34
4388f060
A
35#if U_PLATFORM == U_PF_WINDOWS && defined(_MSC_VER) && (_MSC_VER >= 1500)
36/*
37 * TODO: It seems like we should widen this to
38 * either U_PLATFORM_USES_ONLY_WIN32_API (includes MinGW)
39 * or U_PLATFORM_HAS_WIN32_API (includes MinGW and Cygwin)
40 * but those use gcc and won't have defined(_MSC_VER).
41 * We might need to #include some Windows header and test for some version macro from there.
42 * Or call some Windows function and see what it returns.
43 */
f3c0d7a5 44#define USE_WINDOWS_LCID_MAPPING_API
729e4ab9
A
45#include <windows.h>
46#include <winnls.h>
47#endif
b75a7d8f 48
b75a7d8f
A
49/*
50 * Note:
b75a7d8f 51 * The mapping from Win32 locale ID numbers to POSIX locale strings should
374ca955 52 * be the faster one.
b75a7d8f 53 *
f3c0d7a5
A
54 * Windows LCIDs are defined at https://msdn.microsoft.com/en-us/library/cc233965.aspx
55 * [MS-LCID] Windows Language Code Identifier (LCID) Reference
b75a7d8f
A
56 */
57
b75a7d8f
A
58/*
59////////////////////////////////////////////////
60//
61// Internal Classes for LCID <--> POSIX Mapping
62//
63/////////////////////////////////////////////////
64*/
65
66typedef struct ILcidPosixElement
67{
68 const uint32_t hostID;
69 const char * const posixID;
70} ILcidPosixElement;
71
72typedef struct ILcidPosixMap
73{
74 const uint32_t numRegions;
75 const struct ILcidPosixElement* const regionMaps;
76} ILcidPosixMap;
77
b75a7d8f
A
78
79/*
80/////////////////////////////////////////////////
81//
82// Easy macros to make the LCID <--> POSIX Mapping
83//
84/////////////////////////////////////////////////
85*/
86
729e4ab9
A
87/**
88 * The standard one language/one country mapping for LCID.
89 * The first element must be the language, and the following
90 * elements are the language with the country.
91 * @param hostID LCID in host format such as 0x044d
92 * @param languageID posix ID of just the language such as 'de'
93 * @param posixID posix ID of the language_TERRITORY such as 'de_CH'
b75a7d8f
A
94 */
95#define ILCID_POSIX_ELEMENT_ARRAY(hostID, languageID, posixID) \
729e4ab9 96static const ILcidPosixElement locmap_ ## languageID [] = { \
b75a7d8f
A
97 {LANGUAGE_LCID(hostID), #languageID}, /* parent locale */ \
98 {hostID, #posixID}, \
99};
100
729e4ab9
A
101/**
102 * Define a subtable by ID
103 * @param id the POSIX ID, either a language or language_TERRITORY
104 */
105#define ILCID_POSIX_SUBTABLE(id) \
106static const ILcidPosixElement locmap_ ## id [] =
107
108
109/**
110 * Create the map for the posixID. This macro supposes that the language string
111 * name is the same as the global variable name, and that the first element
112 * in the ILcidPosixElement is just the language.
57a6839d 113 * @param _posixID the full POSIX ID for this entry.
b75a7d8f
A
114 */
115#define ILCID_POSIX_MAP(_posixID) \
2ca993e8 116 {UPRV_LENGTHOF(locmap_ ## _posixID), locmap_ ## _posixID}
b75a7d8f
A
117
118/*
119////////////////////////////////////////////
120//
121// Create the table of LCID to POSIX Mapping
122// None of it should be dynamically created.
123//
124// Keep static locale variables inside the function so that
125// it can be created properly during static init.
126//
f3c0d7a5
A
127// Note: This table should be updated periodically. Check the [MS-LCID] Windows Language Code Identifier
128// (LCID) Reference defined at https://msdn.microsoft.com/en-us/library/cc233965.aspx
129//
729e4ab9
A
130// Microsoft is moving away from LCID in favor of locale name as of Vista. This table needs to be
131// maintained for support of older Windows version.
132// Update: Windows 7 (091130)
57a6839d
A
133//
134// Note: Microsoft assign a different LCID if a locale has a sorting variant. POSIX IDs below may contain
135// @collation=XXX, but no other keywords are allowed (at least for now). When uprv_convertToLCID() is
136// called from uloc_getLCID(), keywords other than collation are already removed. If we really need
137// to support other keywords in this mapping data, we must update the implementation.
b75a7d8f
A
138////////////////////////////////////////////
139*/
140
f3c0d7a5
A
141// TODO: For Windows ideally this table would be a list of exceptions rather than a complete list as
142// LocaleNameToLCID and LCIDToLocaleName provide 90% of these.
143
b75a7d8f
A
144ILCID_POSIX_ELEMENT_ARRAY(0x0436, af, af_ZA)
145
729e4ab9 146ILCID_POSIX_SUBTABLE(ar) {
b75a7d8f
A
147 {0x01, "ar"},
148 {0x3801, "ar_AE"},
149 {0x3c01, "ar_BH"},
150 {0x1401, "ar_DZ"},
151 {0x0c01, "ar_EG"},
152 {0x0801, "ar_IQ"},
153 {0x2c01, "ar_JO"},
154 {0x3401, "ar_KW"},
155 {0x3001, "ar_LB"},
156 {0x1001, "ar_LY"},
157 {0x1801, "ar_MA"},
4388f060 158 {0x1801, "ar_MO"},
b75a7d8f
A
159 {0x2001, "ar_OM"},
160 {0x4001, "ar_QA"},
161 {0x0401, "ar_SA"},
162 {0x2801, "ar_SY"},
163 {0x1c01, "ar_TN"},
164 {0x2401, "ar_YE"}
165};
166
374ca955
A
167ILCID_POSIX_ELEMENT_ARRAY(0x044d, as, as_IN)
168ILCID_POSIX_ELEMENT_ARRAY(0x045e, am, am_ET)
73c04bcf 169ILCID_POSIX_ELEMENT_ARRAY(0x047a, arn,arn_CL)
b75a7d8f 170
729e4ab9 171ILCID_POSIX_SUBTABLE(az) {
b75a7d8f 172 {0x2c, "az"},
73c04bcf 173 {0x082c, "az_Cyrl_AZ"}, /* Cyrillic based */
729e4ab9 174 {0x742c, "az_Cyrl"}, /* Cyrillic based */
73c04bcf 175 {0x042c, "az_Latn_AZ"}, /* Latin based */
729e4ab9 176 {0x782c, "az_Latn"}, /* Latin based */
73c04bcf 177 {0x042c, "az_AZ"} /* Latin based */
b75a7d8f
A
178};
179
73c04bcf 180ILCID_POSIX_ELEMENT_ARRAY(0x046d, ba, ba_RU)
b75a7d8f 181ILCID_POSIX_ELEMENT_ARRAY(0x0423, be, be_BY)
73c04bcf 182
51004dcb 183/*ILCID_POSIX_SUBTABLE(ber) {
73c04bcf
A
184 {0x5f, "ber"},
185 {0x045f, "ber_Arab_DZ"},
186 {0x045f, "ber_Arab"},
187 {0x085f, "ber_Latn_DZ"},
188 {0x085f, "ber_Latn"}
51004dcb 189};*/
73c04bcf 190
b75a7d8f 191ILCID_POSIX_ELEMENT_ARRAY(0x0402, bg, bg_BG)
374ca955 192
0f5d89e8
A
193ILCID_POSIX_SUBTABLE(bin) {
194 {0x66, "bin"},
195 {0x0466, "bin_NG"}
196};
4388f060 197
729e4ab9 198ILCID_POSIX_SUBTABLE(bn) {
374ca955
A
199 {0x45, "bn"},
200 {0x0845, "bn_BD"},
201 {0x0445, "bn_IN"}
202};
203
729e4ab9 204ILCID_POSIX_SUBTABLE(bo) {
374ca955
A
205 {0x51, "bo"},
206 {0x0851, "bo_BT"},
2ca993e8
A
207 {0x0451, "bo_CN"},
208 {0x0c51, "dz_BT"}
374ca955
A
209};
210
73c04bcf 211ILCID_POSIX_ELEMENT_ARRAY(0x047e, br, br_FR)
51004dcb
A
212
213ILCID_POSIX_SUBTABLE(ca) {
214 {0x03, "ca"},
215 {0x0403, "ca_ES"},
216 {0x0803, "ca_ES_VALENCIA"}
217};
218
73c04bcf 219ILCID_POSIX_ELEMENT_ARRAY(0x0483, co, co_FR)
0f5d89e8
A
220
221ILCID_POSIX_SUBTABLE(chr) {
222 {0x05c, "chr"},
223 {0x7c5c, "chr_Cher"},
224 {0x045c, "chr_Cher_US"},
225 {0x045c, "chr_US"}
226};
374ca955 227
f3c0d7a5 228// ICU has chosen different names for these.
51004dcb
A
229ILCID_POSIX_SUBTABLE(ckb) {
230 {0x92, "ckb"},
51004dcb 231 {0x7c92, "ckb_Arab"},
f3c0d7a5 232 {0x0492, "ckb_Arab_IQ"}
51004dcb
A
233};
234
374ca955 235/* Declared as cs_CZ to get around compiler errors on z/OS, which defines cs as a function */
729e4ab9 236ILCID_POSIX_ELEMENT_ARRAY(0x0405, cs, cs_CZ)
374ca955
A
237
238ILCID_POSIX_ELEMENT_ARRAY(0x0452, cy, cy_GB)
b75a7d8f
A
239ILCID_POSIX_ELEMENT_ARRAY(0x0406, da, da_DK)
240
f3c0d7a5 241// Windows doesn't know POSIX or BCP47 Unicode phonebook sort names
729e4ab9 242ILCID_POSIX_SUBTABLE(de) {
b75a7d8f
A
243 {0x07, "de"},
244 {0x0c07, "de_AT"},
245 {0x0807, "de_CH"},
246 {0x0407, "de_DE"},
247 {0x1407, "de_LI"},
248 {0x1007, "de_LU"},
73c04bcf
A
249 {0x10407,"de_DE@collation=phonebook"}, /*This is really de_DE_PHONEBOOK on Windows*/
250 {0x10407,"de@collation=phonebook"} /*This is really de_DE_PHONEBOOK on Windows*/
b75a7d8f
A
251};
252
253ILCID_POSIX_ELEMENT_ARRAY(0x0465, dv, dv_MV)
254ILCID_POSIX_ELEMENT_ARRAY(0x0408, el, el_GR)
255
f3c0d7a5 256// Windows uses an empty string for 'invariant'
729e4ab9 257ILCID_POSIX_SUBTABLE(en) {
b75a7d8f
A
258 {0x09, "en"},
259 {0x0c09, "en_AU"},
260 {0x2809, "en_BZ"},
261 {0x1009, "en_CA"},
262 {0x0809, "en_GB"},
4388f060
A
263 {0x3c09, "en_HK"},
264 {0x3809, "en_ID"},
b75a7d8f 265 {0x1809, "en_IE"},
73c04bcf 266 {0x4009, "en_IN"},
b75a7d8f 267 {0x2009, "en_JM"},
73c04bcf 268 {0x4409, "en_MY"},
b75a7d8f
A
269 {0x1409, "en_NZ"},
270 {0x3409, "en_PH"},
73c04bcf 271 {0x4809, "en_SG"},
b75a7d8f
A
272 {0x2C09, "en_TT"},
273 {0x0409, "en_US"},
f3c0d7a5 274 {0x007f, "en_US_POSIX"}, /* duplicate for round-tripping */
0f5d89e8 275 {0x2409, "en_029"},
b75a7d8f 276 {0x1c09, "en_ZA"},
374ca955 277 {0x3009, "en_ZW"},
0f5d89e8 278 {0x2409, "en_VI"}, /* Virgin Islands AKA Caribbean Islands (en_CB). On Windows8+ This is 0x1000 or dynamically assigned */
f3c0d7a5
A
279 {0x0409, "en_AS"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
280 {0x0409, "en_GU"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
281 {0x0409, "en_MH"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
282 {0x0409, "en_MP"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
283 {0x0409, "en_UM"} /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
b75a7d8f
A
284};
285
729e4ab9 286ILCID_POSIX_SUBTABLE(en_US_POSIX) {
73c04bcf 287 {0x007f, "en_US_POSIX"} /* duplicate for roundtripping */
b75a7d8f
A
288};
289
f3c0d7a5 290// Windows doesn't know POSIX or BCP47 Unicode traditional sort names
729e4ab9 291ILCID_POSIX_SUBTABLE(es) {
b75a7d8f
A
292 {0x0a, "es"},
293 {0x2c0a, "es_AR"},
294 {0x400a, "es_BO"},
295 {0x340a, "es_CL"},
296 {0x240a, "es_CO"},
297 {0x140a, "es_CR"},
2ca993e8 298 {0x5c0a, "es_CU"},
b75a7d8f
A
299 {0x1c0a, "es_DO"},
300 {0x300a, "es_EC"},
301 {0x0c0a, "es_ES"}, /*Modern sort.*/
302 {0x100a, "es_GT"},
303 {0x480a, "es_HN"},
304 {0x080a, "es_MX"},
305 {0x4c0a, "es_NI"},
306 {0x180a, "es_PA"},
307 {0x280a, "es_PE"},
308 {0x500a, "es_PR"},
309 {0x3c0a, "es_PY"},
310 {0x440a, "es_SV"},
73c04bcf 311 {0x540a, "es_US"},
b75a7d8f
A
312 {0x380a, "es_UY"},
313 {0x200a, "es_VE"},
57a6839d 314 {0x580a, "es_419"},
73c04bcf 315 {0x040a, "es_ES@collation=traditional"},
f3c0d7a5 316 {0x040a, "es@collation=traditional"} // Windows will treat this as es-ES@collation=traditional
b75a7d8f
A
317};
318
319ILCID_POSIX_ELEMENT_ARRAY(0x0425, et, et_EE)
320ILCID_POSIX_ELEMENT_ARRAY(0x042d, eu, eu_ES)
73c04bcf
A
321
322/* ISO-639 doesn't distinguish between Persian and Dari.*/
729e4ab9 323ILCID_POSIX_SUBTABLE(fa) {
73c04bcf
A
324 {0x29, "fa"},
325 {0x0429, "fa_IR"}, /* Persian/Farsi (Iran) */
326 {0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */
327};
328
f3c0d7a5 329
73c04bcf 330/* duplicate for roundtripping */
729e4ab9 331ILCID_POSIX_SUBTABLE(fa_AF) {
73c04bcf
A
332 {0x8c, "fa_AF"}, /* Persian/Dari (Afghanistan) */
333 {0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */
334};
335
51004dcb
A
336ILCID_POSIX_SUBTABLE(ff) {
337 {0x67, "ff"},
338 {0x7c67, "ff_Latn"},
2ca993e8
A
339 {0x0867, "ff_Latn_SN"},
340 {0x0467, "ff_NG"}
51004dcb
A
341};
342
b75a7d8f 343ILCID_POSIX_ELEMENT_ARRAY(0x040b, fi, fi_FI)
73c04bcf 344ILCID_POSIX_ELEMENT_ARRAY(0x0464, fil,fil_PH)
b75a7d8f
A
345ILCID_POSIX_ELEMENT_ARRAY(0x0438, fo, fo_FO)
346
729e4ab9 347ILCID_POSIX_SUBTABLE(fr) {
b75a7d8f
A
348 {0x0c, "fr"},
349 {0x080c, "fr_BE"},
350 {0x0c0c, "fr_CA"},
374ca955 351 {0x240c, "fr_CD"},
4388f060 352 {0x240c, "fr_CG"},
b75a7d8f 353 {0x100c, "fr_CH"},
374ca955
A
354 {0x300c, "fr_CI"},
355 {0x2c0c, "fr_CM"},
b75a7d8f 356 {0x040c, "fr_FR"},
374ca955 357 {0x3c0c, "fr_HT"},
b75a7d8f 358 {0x140c, "fr_LU"},
374ca955
A
359 {0x380c, "fr_MA"},
360 {0x180c, "fr_MC"},
361 {0x340c, "fr_ML"},
362 {0x200c, "fr_RE"},
4388f060
A
363 {0x280c, "fr_SN"},
364 {0xe40c, "fr_015"},
365 {0x1c0c, "fr_029"}
374ca955
A
366};
367
4388f060
A
368ILCID_POSIX_ELEMENT_ARRAY(0x0467, fuv, fuv_NG)
369
374ca955
A
370ILCID_POSIX_ELEMENT_ARRAY(0x0462, fy, fy_NL)
371
4388f060
A
372ILCID_POSIX_SUBTABLE(ga) { /* Gaelic (Ireland) */
373 {0x3c, "ga"},
374 {0x083c, "ga_IE"},
375 {0x043c, "gd_GB"}
376};
377
378ILCID_POSIX_SUBTABLE(gd) { /* Gaelic (Scotland) */
379 {0x91, "gd"},
380 {0x0491, "gd_GB"}
381};
b75a7d8f
A
382
383ILCID_POSIX_ELEMENT_ARRAY(0x0456, gl, gl_ES)
384ILCID_POSIX_ELEMENT_ARRAY(0x0447, gu, gu_IN)
374ca955 385ILCID_POSIX_ELEMENT_ARRAY(0x0474, gn, gn_PY)
73c04bcf 386ILCID_POSIX_ELEMENT_ARRAY(0x0484, gsw,gsw_FR)
729e4ab9
A
387
388ILCID_POSIX_SUBTABLE(ha) {
389 {0x68, "ha"},
390 {0x7c68, "ha_Latn"},
391 {0x0468, "ha_Latn_NG"},
392};
393
374ca955 394ILCID_POSIX_ELEMENT_ARRAY(0x0475, haw,haw_US)
b75a7d8f
A
395ILCID_POSIX_ELEMENT_ARRAY(0x040d, he, he_IL)
396ILCID_POSIX_ELEMENT_ARRAY(0x0439, hi, hi_IN)
397
374ca955 398/* This LCID is really four different locales.*/
729e4ab9 399ILCID_POSIX_SUBTABLE(hr) {
b75a7d8f 400 {0x1a, "hr"},
73c04bcf 401 {0x141a, "bs_Latn_BA"}, /* Bosnian, Bosnia and Herzegovina */
729e4ab9 402 {0x681a, "bs_Latn"}, /* Bosnian, Bosnia and Herzegovina */
374ca955 403 {0x141a, "bs_BA"}, /* Bosnian, Bosnia and Herzegovina */
729e4ab9 404 {0x781a, "bs"}, /* Bosnian */
73c04bcf 405 {0x201a, "bs_Cyrl_BA"}, /* Bosnian, Bosnia and Herzegovina */
729e4ab9 406 {0x641a, "bs_Cyrl"}, /* Bosnian, Bosnia and Herzegovina */
73c04bcf 407 {0x101a, "hr_BA"}, /* Croatian in Bosnia */
b75a7d8f 408 {0x041a, "hr_HR"}, /* Croatian*/
729e4ab9
A
409 {0x2c1a, "sr_Latn_ME"},
410 {0x241a, "sr_Latn_RS"},
73c04bcf
A
411 {0x181a, "sr_Latn_BA"}, /* Serbo-Croatian in Bosnia */
412 {0x081a, "sr_Latn_CS"}, /* Serbo-Croatian*/
729e4ab9 413 {0x701a, "sr_Latn"}, /* It's 0x1a or 0x081a, pick one to make the test program happy. */
73c04bcf
A
414 {0x1c1a, "sr_Cyrl_BA"}, /* Serbo-Croatian in Bosnia */
415 {0x0c1a, "sr_Cyrl_CS"}, /* Serbian*/
729e4ab9
A
416 {0x301a, "sr_Cyrl_ME"},
417 {0x281a, "sr_Cyrl_RS"},
418 {0x6c1a, "sr_Cyrl"}, /* It's 0x1a or 0x0c1a, pick one to make the test program happy. */
419 {0x7c1a, "sr"} /* In CLDR sr is sr_Cyrl. */
b75a7d8f
A
420};
421
b331163b
A
422ILCID_POSIX_SUBTABLE(hsb) {
423 {0x2E, "hsb"},
424 {0x042E, "hsb_DE"},
425 {0x082E, "dsb_DE"},
426 {0x7C2E, "dsb"},
427};
428
b75a7d8f
A
429ILCID_POSIX_ELEMENT_ARRAY(0x040e, hu, hu_HU)
430ILCID_POSIX_ELEMENT_ARRAY(0x042b, hy, hy_AM)
0f5d89e8
A
431
432ILCID_POSIX_SUBTABLE(ibb) {
433 {0x69, "ibb"},
434 {0x0469, "ibb_NG"}
435};
436
b75a7d8f 437ILCID_POSIX_ELEMENT_ARRAY(0x0421, id, id_ID)
374ca955 438ILCID_POSIX_ELEMENT_ARRAY(0x0470, ig, ig_NG)
73c04bcf 439ILCID_POSIX_ELEMENT_ARRAY(0x0478, ii, ii_CN)
b75a7d8f
A
440ILCID_POSIX_ELEMENT_ARRAY(0x040f, is, is_IS)
441
729e4ab9 442ILCID_POSIX_SUBTABLE(it) {
b75a7d8f
A
443 {0x10, "it"},
444 {0x0810, "it_CH"},
445 {0x0410, "it_IT"}
446};
447
729e4ab9 448ILCID_POSIX_SUBTABLE(iu) {
73c04bcf
A
449 {0x5d, "iu"},
450 {0x045d, "iu_Cans_CA"},
729e4ab9 451 {0x785d, "iu_Cans"},
73c04bcf 452 {0x085d, "iu_Latn_CA"},
729e4ab9 453 {0x7c5d, "iu_Latn"}
73c04bcf
A
454};
455
b75a7d8f
A
456ILCID_POSIX_ELEMENT_ARRAY(0x040d, iw, iw_IL) /*Left in for compatibility*/
457ILCID_POSIX_ELEMENT_ARRAY(0x0411, ja, ja_JP)
458ILCID_POSIX_ELEMENT_ARRAY(0x0437, ka, ka_GE)
459ILCID_POSIX_ELEMENT_ARRAY(0x043f, kk, kk_KZ)
73c04bcf 460ILCID_POSIX_ELEMENT_ARRAY(0x046f, kl, kl_GL)
374ca955 461ILCID_POSIX_ELEMENT_ARRAY(0x0453, km, km_KH)
b75a7d8f
A
462ILCID_POSIX_ELEMENT_ARRAY(0x044b, kn, kn_IN)
463
729e4ab9 464ILCID_POSIX_SUBTABLE(ko) {
b75a7d8f
A
465 {0x12, "ko"},
466 {0x0812, "ko_KP"},
467 {0x0412, "ko_KR"}
468};
469
470ILCID_POSIX_ELEMENT_ARRAY(0x0457, kok, kok_IN)
374ca955
A
471ILCID_POSIX_ELEMENT_ARRAY(0x0471, kr, kr_NG)
472
729e4ab9 473ILCID_POSIX_SUBTABLE(ks) { /* We could add PK and CN too */
374ca955 474 {0x60, "ks"},
4388f060
A
475 {0x0460, "ks_Arab_IN"},
476 {0x0860, "ks_Deva_IN"}
374ca955
A
477};
478
479ILCID_POSIX_ELEMENT_ARRAY(0x0440, ky, ky_KG) /* Kyrgyz is spoken in Kyrgyzstan */
0f5d89e8
A
480
481ILCID_POSIX_SUBTABLE(la) {
482 {0x76, "la"},
483 {0x0476, "la_001"},
484 {0x0476, "la_IT"} /*Left in for compatibility*/
485};
486
73c04bcf 487ILCID_POSIX_ELEMENT_ARRAY(0x046e, lb, lb_LU)
374ca955
A
488ILCID_POSIX_ELEMENT_ARRAY(0x0454, lo, lo_LA)
489ILCID_POSIX_ELEMENT_ARRAY(0x0427, lt, lt_LT)
490ILCID_POSIX_ELEMENT_ARRAY(0x0426, lv, lv_LV)
491ILCID_POSIX_ELEMENT_ARRAY(0x0481, mi, mi_NZ)
492ILCID_POSIX_ELEMENT_ARRAY(0x042f, mk, mk_MK)
493ILCID_POSIX_ELEMENT_ARRAY(0x044c, ml, ml_IN)
73c04bcf 494
729e4ab9 495ILCID_POSIX_SUBTABLE(mn) {
73c04bcf 496 {0x50, "mn"},
729e4ab9
A
497 {0x0450, "mn_MN"},
498 {0x7c50, "mn_Mong"},
499 {0x0850, "mn_Mong_CN"},
73c04bcf 500 {0x0850, "mn_CN"},
57a6839d
A
501 {0x7850, "mn_Cyrl"},
502 {0x0c50, "mn_Mong_MN"}
73c04bcf
A
503};
504
374ca955 505ILCID_POSIX_ELEMENT_ARRAY(0x0458, mni,mni_IN)
73c04bcf 506ILCID_POSIX_ELEMENT_ARRAY(0x047c, moh,moh_CA)
374ca955 507ILCID_POSIX_ELEMENT_ARRAY(0x044e, mr, mr_IN)
b75a7d8f 508
729e4ab9 509ILCID_POSIX_SUBTABLE(ms) {
b75a7d8f
A
510 {0x3e, "ms"},
511 {0x083e, "ms_BN"}, /* Brunei Darussalam*/
512 {0x043e, "ms_MY"} /* Malaysia*/
513};
514
b75a7d8f 515ILCID_POSIX_ELEMENT_ARRAY(0x043a, mt, mt_MT)
73c04bcf 516ILCID_POSIX_ELEMENT_ARRAY(0x0455, my, my_MM)
b75a7d8f 517
729e4ab9 518ILCID_POSIX_SUBTABLE(ne) {
b75a7d8f
A
519 {0x61, "ne"},
520 {0x0861, "ne_IN"}, /* India*/
521 {0x0461, "ne_NP"} /* Nepal*/
522};
523
729e4ab9 524ILCID_POSIX_SUBTABLE(nl) {
b75a7d8f
A
525 {0x13, "nl"},
526 {0x0813, "nl_BE"},
527 {0x0413, "nl_NL"}
528};
529
530/* The "no" locale split into nb and nn. By default in ICU, "no" is nb.*/
f3c0d7a5 531// TODO: Not all of these are needed on Windows, but I don't know how ICU treats preferred ones here.
729e4ab9 532ILCID_POSIX_SUBTABLE(no) {
f3c0d7a5 533 {0x14, "no"}, /* really nb_NO - actually Windows differentiates between neutral (no region) and specific (with region) */
729e4ab9 534 {0x7c14, "nb"}, /* really nb */
374ca955 535 {0x0414, "nb_NO"}, /* really nb_NO. Keep first in the 414 list. */
374ca955
A
536 {0x0414, "no_NO"}, /* really nb_NO */
537 {0x0814, "nn_NO"}, /* really nn_NO. Keep first in the 814 list. */
729e4ab9 538 {0x7814, "nn"}, /* It's 0x14 or 0x814, pick one to make the test program happy. */
374ca955 539 {0x0814, "no_NO_NY"}/* really nn_NO */
b75a7d8f
A
540};
541
73c04bcf
A
542ILCID_POSIX_ELEMENT_ARRAY(0x046c, nso,nso_ZA) /* TODO: Verify the ISO-639 code */
543ILCID_POSIX_ELEMENT_ARRAY(0x0482, oc, oc_FR)
4388f060
A
544
545ILCID_POSIX_SUBTABLE(om) { /* TODO: Verify the country */
546 {0x72, "om"},
547 {0x0472, "om_ET"},
548 {0x0472, "gaz_ET"}
549};
374ca955 550
b75a7d8f 551/* Declared as or_IN to get around compiler errors*/
729e4ab9 552ILCID_POSIX_SUBTABLE(or_IN) {
b75a7d8f
A
553 {0x48, "or"},
554 {0x0448, "or_IN"},
555};
556
729e4ab9 557ILCID_POSIX_SUBTABLE(pa) {
374ca955
A
558 {0x46, "pa"},
559 {0x0446, "pa_IN"},
0f5d89e8
A
560 {0x0846, "pa_Arab_PK"},
561 {0x0846, "pa_PK"}
562};
563
564ILCID_POSIX_SUBTABLE(pap) {
565 {0x79, "pap"},
566 {0x0479, "pap_029"},
567 {0x0479, "pap_AN"} /*Left in for compatibility*/
374ca955
A
568};
569
b75a7d8f 570ILCID_POSIX_ELEMENT_ARRAY(0x0415, pl, pl_PL)
374ca955 571ILCID_POSIX_ELEMENT_ARRAY(0x0463, ps, ps_AF)
b75a7d8f 572
729e4ab9 573ILCID_POSIX_SUBTABLE(pt) {
b75a7d8f
A
574 {0x16, "pt"},
575 {0x0416, "pt_BR"},
576 {0x0816, "pt_PT"}
577};
578
729e4ab9 579ILCID_POSIX_SUBTABLE(qu) {
73c04bcf
A
580 {0x6b, "qu"},
581 {0x046b, "qu_BO"},
582 {0x086b, "qu_EC"},
4388f060
A
583 {0x0C6b, "qu_PE"},
584 {0x046b, "quz_BO"},
585 {0x086b, "quz_EC"},
586 {0x0C6b, "quz_PE"}
374ca955
A
587};
588
2ca993e8
A
589ILCID_POSIX_SUBTABLE(quc) {
590 {0x93, "quc"},
591 {0x0493, "quc_CO"},
592 /*
593 "quc_Latn_GT" is an exceptional case. Language ID of "quc"
594 is 0x93, but LCID of "quc_Latn_GT" is 0x486, which should be
595 under the group of "qut". "qut" is a retired ISO 639-3 language
596 code for West Central Quiche, and merged to "quc".
597 It looks Windows previously reserved "qut" for K'iche', but,
598 decided to use "quc" when adding a locale for K'iche' (Guatemala).
599
600 This data structure used here assumes language ID bits in
601 LCID is unique for alphabetic language code. But this is not true
602 for "quc_Latn_GT". If we don't have the data below, LCID look up
603 by alphabetic locale ID (POSIX) will fail. The same entry is found
604 under "qut" below, which is required for reverse look up.
605 */
606 {0x0486, "quc_Latn_GT"}
607};
608
609ILCID_POSIX_SUBTABLE(qut) {
610 {0x86, "qut"},
611 {0x0486, "qut_GT"},
612 /*
613 See the note in "quc" above.
614 */
615 {0x0486, "quc_Latn_GT"}
616};
617
73c04bcf 618ILCID_POSIX_ELEMENT_ARRAY(0x0417, rm, rm_CH)
4388f060
A
619
620ILCID_POSIX_SUBTABLE(ro) {
621 {0x18, "ro"},
622 {0x0418, "ro_RO"},
623 {0x0818, "ro_MD"}
624};
b75a7d8f 625
f3c0d7a5
A
626// TODO: This is almost certainly 'wrong'. 0 in Windows is a synonym for LOCALE_USER_DEFAULT.
627// More likely this is a similar concept to the Windows 0x7f Invariant locale ""
628// (Except that it's not invariant in ICU)
729e4ab9 629ILCID_POSIX_SUBTABLE(root) {
b75a7d8f
A
630 {0x00, "root"}
631};
632
4388f060
A
633ILCID_POSIX_SUBTABLE(ru) {
634 {0x19, "ru"},
635 {0x0419, "ru_RU"},
636 {0x0819, "ru_MD"}
637};
638
73c04bcf 639ILCID_POSIX_ELEMENT_ARRAY(0x0487, rw, rw_RW)
b75a7d8f 640ILCID_POSIX_ELEMENT_ARRAY(0x044f, sa, sa_IN)
73c04bcf 641ILCID_POSIX_ELEMENT_ARRAY(0x0485, sah,sah_RU)
374ca955 642
729e4ab9 643ILCID_POSIX_SUBTABLE(sd) {
374ca955 644 {0x59, "sd"},
2ca993e8 645 {0x0459, "sd_Deva_IN"},
0f5d89e8
A
646 {0x0459, "sd_IN"},
647 {0x0859, "sd_Arab_PK"},
648 {0x0859, "sd_PK"},
649 {0x7c59, "sd_Arab"}
374ca955
A
650};
651
729e4ab9 652ILCID_POSIX_SUBTABLE(se) {
73c04bcf
A
653 {0x3b, "se"},
654 {0x0c3b, "se_FI"},
655 {0x043b, "se_NO"},
656 {0x083b, "se_SE"},
729e4ab9 657 {0x783b, "sma"},
73c04bcf
A
658 {0x183b, "sma_NO"},
659 {0x1c3b, "sma_SE"},
729e4ab9
A
660 {0x7c3b, "smj"},
661 {0x703b, "smn"},
662 {0x743b, "sms"},
73c04bcf
A
663 {0x103b, "smj_NO"},
664 {0x143b, "smj_SE"},
665 {0x243b, "smn_FI"},
666 {0x203b, "sms_FI"},
667};
668
374ca955 669ILCID_POSIX_ELEMENT_ARRAY(0x045b, si, si_LK)
b75a7d8f
A
670ILCID_POSIX_ELEMENT_ARRAY(0x041b, sk, sk_SK)
671ILCID_POSIX_ELEMENT_ARRAY(0x0424, sl, sl_SI)
4388f060 672
0f5d89e8 673ILCID_POSIX_SUBTABLE(so) {
4388f060 674 {0x77, "so"},
4388f060
A
675 {0x0477, "so_SO"}
676};
677
b75a7d8f 678ILCID_POSIX_ELEMENT_ARRAY(0x041c, sq, sq_AL)
4388f060 679ILCID_POSIX_ELEMENT_ARRAY(0x0430, st, st_ZA)
b75a7d8f 680
729e4ab9 681ILCID_POSIX_SUBTABLE(sv) {
b75a7d8f
A
682 {0x1d, "sv"},
683 {0x081d, "sv_FI"},
684 {0x041d, "sv_SE"}
685};
686
687ILCID_POSIX_ELEMENT_ARRAY(0x0441, sw, sw_KE)
688ILCID_POSIX_ELEMENT_ARRAY(0x045A, syr, syr_SY)
51004dcb
A
689
690ILCID_POSIX_SUBTABLE(ta) {
691 {0x49, "ta"},
692 {0x0449, "ta_IN"},
693 {0x0849, "ta_LK"}
694};
695
b75a7d8f 696ILCID_POSIX_ELEMENT_ARRAY(0x044a, te, te_IN)
729e4ab9
A
697
698/* Cyrillic based by default */
699ILCID_POSIX_SUBTABLE(tg) {
700 {0x28, "tg"},
701 {0x7c28, "tg_Cyrl"},
702 {0x0428, "tg_Cyrl_TJ"}
703};
704
b75a7d8f 705ILCID_POSIX_ELEMENT_ARRAY(0x041e, th, th_TH)
374ca955 706
729e4ab9 707ILCID_POSIX_SUBTABLE(ti) {
374ca955
A
708 {0x73, "ti"},
709 {0x0873, "ti_ER"},
710 {0x0473, "ti_ET"}
711};
712
713ILCID_POSIX_ELEMENT_ARRAY(0x0442, tk, tk_TM)
4388f060
A
714
715ILCID_POSIX_SUBTABLE(tn) {
716 {0x32, "tn"},
51004dcb 717 {0x0832, "tn_BW"},
4388f060
A
718 {0x0432, "tn_ZA"}
719};
720
b75a7d8f 721ILCID_POSIX_ELEMENT_ARRAY(0x041f, tr, tr_TR)
4388f060 722ILCID_POSIX_ELEMENT_ARRAY(0x0431, ts, ts_ZA)
b75a7d8f 723ILCID_POSIX_ELEMENT_ARRAY(0x0444, tt, tt_RU)
729e4ab9
A
724
725ILCID_POSIX_SUBTABLE(tzm) {
726 {0x5f, "tzm"},
727 {0x7c5f, "tzm_Latn"},
4388f060 728 {0x085f, "tzm_Latn_DZ"},
51004dcb 729 {0x105f, "tzm_Tfng_MA"},
2ca993e8 730 {0x045f, "tzm_Arab_MA"},
4388f060
A
731 {0x045f, "tmz"}
732};
733
734ILCID_POSIX_SUBTABLE(ug) {
735 {0x80, "ug"},
736 {0x0480, "ug_CN"},
737 {0x0480, "ug_Arab_CN"}
729e4ab9
A
738};
739
b75a7d8f
A
740ILCID_POSIX_ELEMENT_ARRAY(0x0422, uk, uk_UA)
741
729e4ab9 742ILCID_POSIX_SUBTABLE(ur) {
b75a7d8f
A
743 {0x20, "ur"},
744 {0x0820, "ur_IN"},
745 {0x0420, "ur_PK"}
746};
747
729e4ab9 748ILCID_POSIX_SUBTABLE(uz) {
b75a7d8f 749 {0x43, "uz"},
73c04bcf 750 {0x0843, "uz_Cyrl_UZ"}, /* Cyrillic based */
729e4ab9 751 {0x7843, "uz_Cyrl"}, /* Cyrillic based */
b75a7d8f 752 {0x0843, "uz_UZ"}, /* Cyrillic based */
73c04bcf 753 {0x0443, "uz_Latn_UZ"}, /* Latin based */
729e4ab9 754 {0x7c43, "uz_Latn"} /* Latin based */
b75a7d8f
A
755};
756
4388f060
A
757ILCID_POSIX_SUBTABLE(ve) { /* TODO: Verify the country */
758 {0x33, "ve"},
759 {0x0433, "ve_ZA"},
760 {0x0433, "ven_ZA"}
761};
762
b75a7d8f 763ILCID_POSIX_ELEMENT_ARRAY(0x042a, vi, vi_VN)
73c04bcf
A
764ILCID_POSIX_ELEMENT_ARRAY(0x0488, wo, wo_SN)
765ILCID_POSIX_ELEMENT_ARRAY(0x0434, xh, xh_ZA)
0f5d89e8
A
766
767ILCID_POSIX_SUBTABLE(yi) {
768 {0x003d, "yi"},
769 {0x043d, "yi_001"}
770};
771
73c04bcf
A
772ILCID_POSIX_ELEMENT_ARRAY(0x046a, yo, yo_NG)
773
f3c0d7a5
A
774// Windows & ICU tend to different names for some of these
775// TODO: Windows probably does not need all of these entries, but I don't know how the precedence works.
729e4ab9
A
776ILCID_POSIX_SUBTABLE(zh) {
777 {0x0004, "zh_Hans"},
778 {0x7804, "zh"},
b75a7d8f 779 {0x0804, "zh_CN"},
729e4ab9 780 {0x0804, "zh_Hans_CN"},
374ca955 781 {0x0c04, "zh_Hant_HK"},
b75a7d8f 782 {0x0c04, "zh_HK"},
374ca955 783 {0x1404, "zh_Hant_MO"},
b75a7d8f 784 {0x1404, "zh_MO"},
374ca955 785 {0x1004, "zh_Hans_SG"},
b75a7d8f 786 {0x1004, "zh_SG"},
374ca955 787 {0x0404, "zh_Hant_TW"},
729e4ab9 788 {0x7c04, "zh_Hant"},
b75a7d8f 789 {0x0404, "zh_TW"},
73c04bcf 790 {0x30404,"zh_Hant_TW"}, /* Bopomofo order */
374ca955 791 {0x30404,"zh_TW"}, /* Bopomofo order */
729e4ab9 792 {0x20004,"zh@collation=stroke"},
73c04bcf 793 {0x20404,"zh_Hant@collation=stroke"},
729e4ab9 794 {0x20404,"zh_Hant_TW@collation=stroke"},
73c04bcf 795 {0x20404,"zh_TW@collation=stroke"},
73c04bcf 796 {0x20804,"zh_Hans@collation=stroke"},
729e4ab9 797 {0x20804,"zh_Hans_CN@collation=stroke"},
73c04bcf 798 {0x20804,"zh_CN@collation=stroke"}
f3c0d7a5 799 // TODO: Alternate collations for other LCIDs are missing, eg: 0x50804
b75a7d8f
A
800};
801
73c04bcf 802ILCID_POSIX_ELEMENT_ARRAY(0x0435, zu, zu_ZA)
374ca955 803
b75a7d8f
A
804/* This must be static and grouped by LCID. */
805static const ILcidPosixMap gPosixIDmap[] = {
806 ILCID_POSIX_MAP(af), /* af Afrikaans 0x36 */
374ca955 807 ILCID_POSIX_MAP(am), /* am Amharic 0x5e */
b75a7d8f 808 ILCID_POSIX_MAP(ar), /* ar Arabic 0x01 */
73c04bcf 809 ILCID_POSIX_MAP(arn), /* arn Araucanian/Mapudungun 0x7a */
b75a7d8f
A
810 ILCID_POSIX_MAP(as), /* as Assamese 0x4d */
811 ILCID_POSIX_MAP(az), /* az Azerbaijani 0x2c */
73c04bcf 812 ILCID_POSIX_MAP(ba), /* ba Bashkir 0x6d */
374ca955 813 ILCID_POSIX_MAP(be), /* be Belarusian 0x23 */
729e4ab9 814/* ILCID_POSIX_MAP(ber), ber Berber/Tamazight 0x5f */
b75a7d8f 815 ILCID_POSIX_MAP(bg), /* bg Bulgarian 0x02 */
4388f060 816 ILCID_POSIX_MAP(bin), /* bin Edo 0x66 */
b75a7d8f 817 ILCID_POSIX_MAP(bn), /* bn Bengali; Bangla 0x45 */
374ca955 818 ILCID_POSIX_MAP(bo), /* bo Tibetan 0x51 */
73c04bcf 819 ILCID_POSIX_MAP(br), /* br Breton 0x7e */
b75a7d8f 820 ILCID_POSIX_MAP(ca), /* ca Catalan 0x03 */
374ca955 821 ILCID_POSIX_MAP(chr), /* chr Cherokee 0x5c */
51004dcb 822 ILCID_POSIX_MAP(ckb), /* ckb Sorani (Central Kurdish) 0x92 */
73c04bcf 823 ILCID_POSIX_MAP(co), /* co Corsican 0x83 */
729e4ab9 824 ILCID_POSIX_MAP(cs), /* cs Czech 0x05 */
374ca955 825 ILCID_POSIX_MAP(cy), /* cy Welsh 0x52 */
b75a7d8f
A
826 ILCID_POSIX_MAP(da), /* da Danish 0x06 */
827 ILCID_POSIX_MAP(de), /* de German 0x07 */
374ca955 828 ILCID_POSIX_MAP(dv), /* dv Divehi 0x65 */
b75a7d8f
A
829 ILCID_POSIX_MAP(el), /* el Greek 0x08 */
830 ILCID_POSIX_MAP(en), /* en English 0x09 */
831 ILCID_POSIX_MAP(en_US_POSIX), /* invariant 0x7f */
832 ILCID_POSIX_MAP(es), /* es Spanish 0x0a */
833 ILCID_POSIX_MAP(et), /* et Estonian 0x25 */
834 ILCID_POSIX_MAP(eu), /* eu Basque 0x2d */
73c04bcf
A
835 ILCID_POSIX_MAP(fa), /* fa Persian/Farsi 0x29 */
836 ILCID_POSIX_MAP(fa_AF), /* fa Persian/Dari 0x8c */
51004dcb 837 ILCID_POSIX_MAP(ff), /* ff Fula 0x67 */
b75a7d8f 838 ILCID_POSIX_MAP(fi), /* fi Finnish 0x0b */
73c04bcf 839 ILCID_POSIX_MAP(fil), /* fil Filipino 0x64 */
b75a7d8f
A
840 ILCID_POSIX_MAP(fo), /* fo Faroese 0x38 */
841 ILCID_POSIX_MAP(fr), /* fr French 0x0c */
4388f060 842 ILCID_POSIX_MAP(fuv), /* fuv Fulfulde - Nigeria 0x67 */
374ca955
A
843 ILCID_POSIX_MAP(fy), /* fy Frisian 0x62 */
844 ILCID_POSIX_MAP(ga), /* * Gaelic (Ireland,Scotland) 0x3c */
729e4ab9 845 ILCID_POSIX_MAP(gd), /* gd Gaelic (United Kingdom) 0x91 */
b75a7d8f 846 ILCID_POSIX_MAP(gl), /* gl Galician 0x56 */
374ca955 847 ILCID_POSIX_MAP(gn), /* gn Guarani 0x74 */
73c04bcf 848 ILCID_POSIX_MAP(gsw), /* gsw Alemanic/Alsatian/Swiss German 0x84 */
b75a7d8f 849 ILCID_POSIX_MAP(gu), /* gu Gujarati 0x47 */
374ca955
A
850 ILCID_POSIX_MAP(ha), /* ha Hausa 0x68 */
851 ILCID_POSIX_MAP(haw), /* haw Hawaiian 0x75 */
b75a7d8f
A
852 ILCID_POSIX_MAP(he), /* he Hebrew (formerly iw) 0x0d */
853 ILCID_POSIX_MAP(hi), /* hi Hindi 0x39 */
374ca955 854 ILCID_POSIX_MAP(hr), /* * Croatian and others 0x1a */
b331163b 855 ILCID_POSIX_MAP(hsb), /* hsb Upper Sorbian 0x2e */
b75a7d8f
A
856 ILCID_POSIX_MAP(hu), /* hu Hungarian 0x0e */
857 ILCID_POSIX_MAP(hy), /* hy Armenian 0x2b */
4388f060 858 ILCID_POSIX_MAP(ibb), /* ibb Ibibio - Nigeria 0x69 */
b75a7d8f 859 ILCID_POSIX_MAP(id), /* id Indonesian (formerly in) 0x21 */
374ca955 860 ILCID_POSIX_MAP(ig), /* ig Igbo 0x70 */
73c04bcf 861 ILCID_POSIX_MAP(ii), /* ii Sichuan Yi 0x78 */
b75a7d8f
A
862 ILCID_POSIX_MAP(is), /* is Icelandic 0x0f */
863 ILCID_POSIX_MAP(it), /* it Italian 0x10 */
374ca955 864 ILCID_POSIX_MAP(iu), /* iu Inuktitut 0x5d */
b75a7d8f
A
865 ILCID_POSIX_MAP(iw), /* iw Hebrew 0x0d */
866 ILCID_POSIX_MAP(ja), /* ja Japanese 0x11 */
867 ILCID_POSIX_MAP(ka), /* ka Georgian 0x37 */
868 ILCID_POSIX_MAP(kk), /* kk Kazakh 0x3f */
73c04bcf 869 ILCID_POSIX_MAP(kl), /* kl Kalaallisut 0x6f */
374ca955 870 ILCID_POSIX_MAP(km), /* km Khmer 0x53 */
b75a7d8f 871 ILCID_POSIX_MAP(kn), /* kn Kannada 0x4b */
b75a7d8f
A
872 ILCID_POSIX_MAP(ko), /* ko Korean 0x12 */
873 ILCID_POSIX_MAP(kok), /* kok Konkani 0x57 */
374ca955 874 ILCID_POSIX_MAP(kr), /* kr Kanuri 0x71 */
b75a7d8f 875 ILCID_POSIX_MAP(ks), /* ks Kashmiri 0x60 */
374ca955 876 ILCID_POSIX_MAP(ky), /* ky Kyrgyz 0x40 */
73c04bcf 877 ILCID_POSIX_MAP(lb), /* lb Luxembourgish 0x6e */
374ca955
A
878 ILCID_POSIX_MAP(la), /* la Latin 0x76 */
879 ILCID_POSIX_MAP(lo), /* lo Lao 0x54 */
b75a7d8f
A
880 ILCID_POSIX_MAP(lt), /* lt Lithuanian 0x27 */
881 ILCID_POSIX_MAP(lv), /* lv Latvian, Lettish 0x26 */
374ca955 882 ILCID_POSIX_MAP(mi), /* mi Maori 0x81 */
b75a7d8f
A
883 ILCID_POSIX_MAP(mk), /* mk Macedonian 0x2f */
884 ILCID_POSIX_MAP(ml), /* ml Malayalam 0x4c */
885 ILCID_POSIX_MAP(mn), /* mn Mongolian 0x50 */
886 ILCID_POSIX_MAP(mni), /* mni Manipuri 0x58 */
73c04bcf 887 ILCID_POSIX_MAP(moh), /* moh Mohawk 0x7c */
b75a7d8f
A
888 ILCID_POSIX_MAP(mr), /* mr Marathi 0x4e */
889 ILCID_POSIX_MAP(ms), /* ms Malay 0x3e */
890 ILCID_POSIX_MAP(mt), /* mt Maltese 0x3a */
73c04bcf 891 ILCID_POSIX_MAP(my), /* my Burmese 0x55 */
374ca955 892/* ILCID_POSIX_MAP(nb), // no Norwegian 0x14 */
b75a7d8f
A
893 ILCID_POSIX_MAP(ne), /* ne Nepali 0x61 */
894 ILCID_POSIX_MAP(nl), /* nl Dutch 0x13 */
374ca955
A
895/* ILCID_POSIX_MAP(nn), // no Norwegian 0x14 */
896 ILCID_POSIX_MAP(no), /* * Norwegian 0x14 */
897 ILCID_POSIX_MAP(nso), /* nso Sotho, Northern (Sepedi dialect) 0x6c */
73c04bcf 898 ILCID_POSIX_MAP(oc), /* oc Occitan 0x82 */
374ca955 899 ILCID_POSIX_MAP(om), /* om Oromo 0x72 */
b75a7d8f
A
900 ILCID_POSIX_MAP(or_IN), /* or Oriya 0x48 */
901 ILCID_POSIX_MAP(pa), /* pa Punjabi 0x46 */
4388f060 902 ILCID_POSIX_MAP(pap), /* pap Papiamentu 0x79 */
b75a7d8f 903 ILCID_POSIX_MAP(pl), /* pl Polish 0x15 */
374ca955 904 ILCID_POSIX_MAP(ps), /* ps Pashto 0x63 */
b75a7d8f 905 ILCID_POSIX_MAP(pt), /* pt Portuguese 0x16 */
73c04bcf 906 ILCID_POSIX_MAP(qu), /* qu Quechua 0x6B */
2ca993e8 907 ILCID_POSIX_MAP(quc), /* quc K'iche 0x93 */
73c04bcf
A
908 ILCID_POSIX_MAP(qut), /* qut K'iche 0x86 */
909 ILCID_POSIX_MAP(rm), /* rm Raeto-Romance/Romansh 0x17 */
b75a7d8f
A
910 ILCID_POSIX_MAP(ro), /* ro Romanian 0x18 */
911 ILCID_POSIX_MAP(root), /* root 0x00 */
912 ILCID_POSIX_MAP(ru), /* ru Russian 0x19 */
73c04bcf 913 ILCID_POSIX_MAP(rw), /* rw Kinyarwanda 0x87 */
b75a7d8f 914 ILCID_POSIX_MAP(sa), /* sa Sanskrit 0x4f */
73c04bcf 915 ILCID_POSIX_MAP(sah), /* sah Yakut 0x85 */
b75a7d8f 916 ILCID_POSIX_MAP(sd), /* sd Sindhi 0x59 */
73c04bcf 917 ILCID_POSIX_MAP(se), /* se Sami 0x3b */
374ca955
A
918/* ILCID_POSIX_MAP(sh), // sh Serbo-Croatian 0x1a */
919 ILCID_POSIX_MAP(si), /* si Sinhalese 0x5b */
b75a7d8f
A
920 ILCID_POSIX_MAP(sk), /* sk Slovak 0x1b */
921 ILCID_POSIX_MAP(sl), /* sl Slovenian 0x24 */
374ca955 922 ILCID_POSIX_MAP(so), /* so Somali 0x77 */
b75a7d8f 923 ILCID_POSIX_MAP(sq), /* sq Albanian 0x1c */
374ca955 924/* ILCID_POSIX_MAP(sr), // sr Serbian 0x1a */
4388f060 925 ILCID_POSIX_MAP(st), /* st Sutu 0x30 */
b75a7d8f
A
926 ILCID_POSIX_MAP(sv), /* sv Swedish 0x1d */
927 ILCID_POSIX_MAP(sw), /* sw Swahili 0x41 */
928 ILCID_POSIX_MAP(syr), /* syr Syriac 0x5A */
929 ILCID_POSIX_MAP(ta), /* ta Tamil 0x49 */
930 ILCID_POSIX_MAP(te), /* te Telugu 0x4a */
73c04bcf 931 ILCID_POSIX_MAP(tg), /* tg Tajik 0x28 */
b75a7d8f 932 ILCID_POSIX_MAP(th), /* th Thai 0x1e */
374ca955
A
933 ILCID_POSIX_MAP(ti), /* ti Tigrigna 0x73 */
934 ILCID_POSIX_MAP(tk), /* tk Turkmen 0x42 */
374ca955 935 ILCID_POSIX_MAP(tn), /* tn Tswana 0x32 */
b75a7d8f 936 ILCID_POSIX_MAP(tr), /* tr Turkish 0x1f */
4388f060 937 ILCID_POSIX_MAP(ts), /* ts Tsonga 0x31 */
b75a7d8f 938 ILCID_POSIX_MAP(tt), /* tt Tatar 0x44 */
4388f060 939 ILCID_POSIX_MAP(tzm), /* tzm Tamazight 0x5f */
374ca955 940 ILCID_POSIX_MAP(ug), /* ug Uighur 0x80 */
b75a7d8f
A
941 ILCID_POSIX_MAP(uk), /* uk Ukrainian 0x22 */
942 ILCID_POSIX_MAP(ur), /* ur Urdu 0x20 */
943 ILCID_POSIX_MAP(uz), /* uz Uzbek 0x43 */
374ca955 944 ILCID_POSIX_MAP(ve), /* ve Venda 0x33 */
b75a7d8f 945 ILCID_POSIX_MAP(vi), /* vi Vietnamese 0x2a */
73c04bcf 946 ILCID_POSIX_MAP(wo), /* wo Wolof 0x88 */
374ca955 947 ILCID_POSIX_MAP(xh), /* xh Xhosa 0x34 */
4388f060 948 ILCID_POSIX_MAP(yi), /* yi Yiddish 0x3d */
374ca955 949 ILCID_POSIX_MAP(yo), /* yo Yoruba 0x6a */
b75a7d8f 950 ILCID_POSIX_MAP(zh), /* zh Chinese 0x04 */
374ca955 951 ILCID_POSIX_MAP(zu), /* zu Zulu 0x35 */
b75a7d8f
A
952};
953
2ca993e8 954static const uint32_t gLocaleCount = UPRV_LENGTHOF(gPosixIDmap);
b75a7d8f 955
374ca955
A
956/**
957 * Do not call this function. It is called by hostID.
958 * The function is not private because this struct must stay as a C struct,
959 * and this is an internal class.
960 */
b75a7d8f
A
961static int32_t
962idCmp(const char* id1, const char* id2)
963{
964 int32_t diffIdx = 0;
965 while (*id1 == *id2 && *id1 != 0) {
966 diffIdx++;
967 id1++;
968 id2++;
969 }
970 return diffIdx;
971}
972
973/**
974 * Searches for a Windows LCID
975 *
976 * @param posixid the Posix style locale id.
977 * @param status gets set to U_ILLEGAL_ARGUMENT_ERROR when the Posix ID has
978 * no equivalent Windows LCID.
979 * @return the LCID
980 */
981static uint32_t
374ca955 982getHostID(const ILcidPosixMap *this_0, const char* posixID, UErrorCode* status)
b75a7d8f
A
983{
984 int32_t bestIdx = 0;
985 int32_t bestIdxDiff = 0;
73c04bcf 986 int32_t posixIDlen = (int32_t)uprv_strlen(posixID);
b75a7d8f
A
987 uint32_t idx;
988
989 for (idx = 0; idx < this_0->numRegions; idx++ ) {
990 int32_t sameChars = idCmp(posixID, this_0->regionMaps[idx].posixID);
991 if (sameChars > bestIdxDiff && this_0->regionMaps[idx].posixID[sameChars] == 0) {
992 if (posixIDlen == sameChars) {
993 /* Exact match */
994 return this_0->regionMaps[idx].hostID;
995 }
996 bestIdxDiff = sameChars;
997 bestIdx = idx;
998 }
999 }
73c04bcf
A
1000 /* We asked for something unusual, like en_ZZ, and we try to return the number for the same language. */
1001 /* We also have to make sure that sid and si and similar string subsets don't match. */
1002 if ((posixID[bestIdxDiff] == '_' || posixID[bestIdxDiff] == '@')
1003 && this_0->regionMaps[bestIdx].posixID[bestIdxDiff] == 0)
1004 {
b75a7d8f
A
1005 *status = U_USING_FALLBACK_WARNING;
1006 return this_0->regionMaps[bestIdx].hostID;
1007 }
1008
1009 /*no match found */
1010 *status = U_ILLEGAL_ARGUMENT_ERROR;
1011 return this_0->regionMaps->hostID;
1012}
1013
1014static const char*
374ca955 1015getPosixID(const ILcidPosixMap *this_0, uint32_t hostID)
b75a7d8f
A
1016{
1017 uint32_t i;
0f5d89e8 1018 for (i = 0; i < this_0->numRegions; i++)
b75a7d8f
A
1019 {
1020 if (this_0->regionMaps[i].hostID == hostID)
1021 {
1022 return this_0->regionMaps[i].posixID;
1023 }
1024 }
1025
1026 /* If you get here, then no matching region was found,
1027 so return the language id with the wild card region. */
1028 return this_0->regionMaps[0].posixID;
1029}
1030
1031/*
1032//////////////////////////////////////
1033//
1034// LCID --> POSIX
1035//
1036/////////////////////////////////////
1037*/
f3c0d7a5 1038#ifdef USE_WINDOWS_LCID_MAPPING_API
729e4ab9
A
1039/*
1040 * Various language tags needs to be changed:
1041 * quz -> qu
1042 * prs -> fa
1043 */
1044#define FIX_LANGUAGE_ID_TAG(buffer, len) \
1045 if (len >= 3) { \
1046 if (buffer[0] == 'q' && buffer[1] == 'u' && buffer[2] == 'z') {\
1047 buffer[2] = 0; \
1048 uprv_strcat(buffer, buffer+3); \
1049 } else if (buffer[0] == 'p' && buffer[1] == 'r' && buffer[2] == 's') {\
1050 buffer[0] = 'f'; buffer[1] = 'a'; buffer[2] = 0; \
1051 uprv_strcat(buffer, buffer+3); \
1052 } \
1053 }
b75a7d8f 1054
729e4ab9 1055#endif
57a6839d
A
1056U_CAPI int32_t
1057uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UErrorCode* status)
b75a7d8f 1058{
729e4ab9
A
1059 uint16_t langID;
1060 uint32_t localeIndex;
57a6839d
A
1061 UBool bLookup = TRUE;
1062 const char *pPosixID = NULL;
729e4ab9 1063
f3c0d7a5 1064#ifdef USE_WINDOWS_LCID_MAPPING_API
0f5d89e8
A
1065 char locName[LOCALE_NAME_MAX_LENGTH] = {}; // ICU name can't be longer than Windows name
1066
f3c0d7a5
A
1067 // Note: Windows primary lang ID 0x92 in LCID is used for Central Kurdish and
1068 // GetLocaleInfo() maps such LCID to "ku". However, CLDR uses "ku" for
1069 // Northern Kurdish and "ckb" for Central Kurdish. For this reason, we cannot
1070 // use the Windows API to resolve locale ID for this specific case.
1071 if ((hostid & 0x3FF) != 0x92) {
1072 int32_t tmpLen = 0;
1073 UChar windowsLocaleName[LOCALE_NAME_MAX_LENGTH]; // ULOC_FULLNAME_CAPACITY > LOCALE_NAME_MAX_LENGTH
f3c0d7a5
A
1074
1075 // Note: LOCALE_ALLOW_NEUTRAL_NAMES was enabled in Windows7+, prior versions did not handle neutral (no-region) locale names.
1076 tmpLen = LCIDToLocaleName(hostid, (PWSTR)windowsLocaleName, UPRV_LENGTHOF(windowsLocaleName), LOCALE_ALLOW_NEUTRAL_NAMES);
1077 if (tmpLen > 1) {
1078 int32_t i = 0;
1079 // Only need to look up in table if have _, eg for de-de_phoneb type alternate sort.
57a6839d 1080 bLookup = FALSE;
f3c0d7a5
A
1081 for (i = 0; i < UPRV_LENGTHOF(locName); i++)
1082 {
1083 locName[i] = (char)(windowsLocaleName[i]);
1084
1085 // Windows locale name may contain sorting variant, such as "es-ES_tradnl".
1086 // In such cases, we need special mapping data found in the hardcoded table
1087 // in this source file.
1088 if (windowsLocaleName[i] == L'_')
1089 {
1090 // Keep the base locale, without variant
1091 // TODO: Should these be mapped from _phoneb to @collation=phonebook, etc.?
1092 locName[i] = '\0';
1093 tmpLen = i;
1094 bLookup = TRUE;
1095 break;
1096 }
1097 else if (windowsLocaleName[i] == L'-')
1098 {
1099 // Windows names use -, ICU uses _
1100 locName[i] = '_';
1101 }
1102 else if (windowsLocaleName[i] == L'\0')
1103 {
1104 // No point in doing more work than necessary
1105 break;
1106 }
57a6839d 1107 }
f3c0d7a5
A
1108 // TODO: Need to understand this better, why isn't it an alias?
1109 FIX_LANGUAGE_ID_TAG(locName, tmpLen);
1110 pPosixID = locName;
57a6839d 1111 }
729e4ab9 1112 }
f3c0d7a5
A
1113#endif // USE_WINDOWS_LCID_MAPPING_API
1114
57a6839d
A
1115 if (bLookup) {
1116 const char *pCandidate = NULL;
1117 langID = LANGUAGE_LCID(hostid);
1118
1119 for (localeIndex = 0; localeIndex < gLocaleCount; localeIndex++) {
1120 if (langID == gPosixIDmap[localeIndex].regionMaps->hostID) {
1121 pCandidate = getPosixID(&gPosixIDmap[localeIndex], hostid);
1122 break;
1123 }
1124 }
b75a7d8f 1125
57a6839d
A
1126 /* On Windows, when locale name has a variant, we still look up the hardcoded table.
1127 If a match in the hardcoded table is longer than the Windows locale name without
1128 variant, we use the one as the result */
1129 if (pCandidate && (pPosixID == NULL || uprv_strlen(pCandidate) > uprv_strlen(pPosixID))) {
1130 pPosixID = pCandidate;
1131 }
1132 }
1133
1134 if (pPosixID) {
0f5d89e8 1135 int32_t resLen = static_cast<int32_t>(uprv_strlen(pPosixID));
57a6839d
A
1136 int32_t copyLen = resLen <= posixIDCapacity ? resLen : posixIDCapacity;
1137 uprv_memcpy(posixID, pPosixID, copyLen);
1138 if (resLen < posixIDCapacity) {
1139 posixID[resLen] = 0;
1140 if (*status == U_STRING_NOT_TERMINATED_WARNING) {
1141 *status = U_ZERO_ERROR;
1142 }
1143 } else if (resLen == posixIDCapacity) {
1144 *status = U_STRING_NOT_TERMINATED_WARNING;
1145 } else {
1146 *status = U_BUFFER_OVERFLOW_ERROR;
b75a7d8f 1147 }
57a6839d 1148 return resLen;
b75a7d8f
A
1149 }
1150
1151 /* no match found */
1152 *status = U_ILLEGAL_ARGUMENT_ERROR;
57a6839d 1153 return -1;
b75a7d8f
A
1154}
1155
1156/*
1157//////////////////////////////////////
1158//
1159// POSIX --> LCID
374ca955
A
1160// This should only be called from uloc_getLCID.
1161// The locale ID must be in canonical form.
b75a7d8f
A
1162//
1163/////////////////////////////////////
1164*/
f3c0d7a5
A
1165U_CAPI uint32_t
1166uprv_convertToLCIDPlatform(const char* localeID)
1167{
1168 // The purpose of this function is to leverage native platform name->lcid
1169 // conversion functionality when available.
1170#ifdef USE_WINDOWS_LCID_MAPPING_API
1171 DWORD nameLCIDFlags = 0;
1172 UErrorCode myStatus = U_ZERO_ERROR;
1173
1174 // First check for a Windows name->LCID match, fall through to catch
1175 // ICU special cases, but Windows may know it already.
1176#if LOCALE_ALLOW_NEUTRAL_NAMES
1177 nameLCIDFlags = LOCALE_ALLOW_NEUTRAL_NAMES;
1178#endif /* LOCALE_ALLOW_NEUTRAL_NAMES */
1179
1180 int32_t len;
1181 char collVal[ULOC_KEYWORDS_CAPACITY] = {};
1182 char baseName[ULOC_FULLNAME_CAPACITY] = {};
1183 const char * mylocaleID = localeID;
1184
1185 // Check any for keywords.
1186 if (uprv_strchr(localeID, '@'))
1187 {
1188 len = uloc_getKeywordValue(localeID, "collation", collVal, UPRV_LENGTHOF(collVal) - 1, &myStatus);
1189 if (U_SUCCESS(myStatus) && len > 0)
1190 {
1191 // If it contains the keyword collation, return 0 so that the LCID lookup table will be used.
1192 return 0;
1193 }
1194 else
1195 {
1196 // If the locale ID contains keywords other than collation, just use the base name.
1197 len = uloc_getBaseName(localeID, baseName, UPRV_LENGTHOF(baseName) - 1, &myStatus);
1198
1199 if (U_SUCCESS(myStatus) && len > 0)
1200 {
1201 baseName[len] = 0;
1202 mylocaleID = baseName;
1203 }
1204 }
1205 }
1206
1207 char asciiBCP47Tag[LOCALE_NAME_MAX_LENGTH] = {};
1208 // this will change it from de_DE@collation=phonebook to de-DE-u-co-phonebk form
0f5d89e8 1209 (void)uloc_toLanguageTag(mylocaleID, asciiBCP47Tag, UPRV_LENGTHOF(asciiBCP47Tag), FALSE, &myStatus);
f3c0d7a5
A
1210
1211 if (U_SUCCESS(myStatus))
1212 {
1213 // Need it to be UTF-16, not 8-bit
1214 wchar_t bcp47Tag[LOCALE_NAME_MAX_LENGTH] = {};
1215 int32_t i;
1216 for (i = 0; i < UPRV_LENGTHOF(bcp47Tag); i++)
1217 {
1218 if (asciiBCP47Tag[i] == '\0')
1219 {
1220 break;
1221 }
1222 else
1223 {
1224 // Copy the character
1225 bcp47Tag[i] = static_cast<wchar_t>(asciiBCP47Tag[i]);
1226 }
1227 }
1228
1229 if (i < (UPRV_LENGTHOF(bcp47Tag) - 1))
1230 {
1231 // Ensure it's null terminated
1232 bcp47Tag[i] = L'\0';
1233 LCID lcid = LocaleNameToLCID(bcp47Tag, nameLCIDFlags);
1234 if (lcid > 0)
1235 {
1236 // Found LCID from windows, return that one, unless its completely ambiguous
1237 // LOCALE_USER_DEFAULT and transients are OK because they will round trip
1238 // for this process.
1239 if (lcid != LOCALE_CUSTOM_UNSPECIFIED)
1240 {
1241 return lcid;
1242 }
1243 }
1244 }
1245 }
0f5d89e8
A
1246#else
1247 (void)localeID; // Suppress unused variable warning.
f3c0d7a5
A
1248#endif /* USE_WINDOWS_LCID_MAPPING_API */
1249
1250 // No found, or not implemented on platforms without native name->lcid conversion
1251 return 0;
1252}
b75a7d8f
A
1253
1254U_CAPI uint32_t
374ca955 1255uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status)
b75a7d8f 1256{
f3c0d7a5
A
1257 // This function does the table lookup when native platform name->lcid conversion isn't available,
1258 // or for locales that don't follow patterns the platform expects.
b75a7d8f 1259 uint32_t low = 0;
374ca955 1260 uint32_t high = gLocaleCount;
4388f060 1261 uint32_t mid;
374ca955 1262 uint32_t oldmid = 0;
b75a7d8f 1263 int32_t compVal;
b75a7d8f
A
1264
1265 uint32_t value = 0;
1266 uint32_t fallbackValue = (uint32_t)-1;
1267 UErrorCode myStatus;
1268 uint32_t idx;
1269
1270 /* Check for incomplete id. */
374ca955 1271 if (!langID || !posixID || uprv_strlen(langID) < 2 || uprv_strlen(posixID) < 2) {
b75a7d8f
A
1272 return 0;
1273 }
1274
1275 /*Binary search for the map entry for normal cases */
b75a7d8f 1276
374ca955 1277 while (high > low) /*binary search*/{
b75a7d8f 1278
374ca955
A
1279 mid = (high+low) >> 1; /*Finds median*/
1280
1281 if (mid == oldmid)
1282 break;
b75a7d8f 1283
374ca955
A
1284 compVal = uprv_strcmp(langID, gPosixIDmap[mid].regionMaps->posixID);
1285 if (compVal < 0){
1286 high = mid;
1287 }
1288 else if (compVal > 0){
1289 low = mid;
1290 }
1291 else /*we found it*/{
1292 return getHostID(&gPosixIDmap[mid], posixID, status);
1293 }
1294 oldmid = mid;
b75a7d8f
A
1295 }
1296
1297 /*
1298 * Sometimes we can't do a binary search on posixID because some LCIDs
1299 * go to different locales. We hit one of those special cases.
1300 */
1301 for (idx = 0; idx < gLocaleCount; idx++ ) {
1302 myStatus = U_ZERO_ERROR;
374ca955 1303 value = getHostID(&gPosixIDmap[idx], posixID, &myStatus);
b75a7d8f
A
1304 if (myStatus == U_ZERO_ERROR) {
1305 return value;
1306 }
1307 else if (myStatus == U_USING_FALLBACK_WARNING) {
1308 fallbackValue = value;
1309 }
1310 }
1311
1312 if (fallbackValue != (uint32_t)-1) {
1313 *status = U_USING_FALLBACK_WARNING;
1314 return fallbackValue;
1315 }
1316
1317 /* no match found */
1318 *status = U_ILLEGAL_ARGUMENT_ERROR;
1319 return 0; /* return international (root) */
1320}