]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/locmap.cpp
ICU-59173.0.1.tar.gz
[apple/icu.git] / icuSources / common / locmap.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f
A
3/*
4 **********************************************************************
2ca993e8 5 * Copyright (C) 1996-2016, International Business Machines
b75a7d8f
A
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
b75a7d8f
A
8 *
9 * Provides functionality for mapping between
10 * LCID and Posix IDs or ICU locale to codepage
11 *
12 * Note: All classes and code in this file are
13 * intended for internal use only.
14 *
15 * Methods of interest:
374ca955
A
16 * unsigned long convertToLCID(const char*);
17 * const char* convertToPosix(unsigned long);
b75a7d8f
A
18 *
19 * Kathleen Wilson, 4/30/96
20 *
21 * Date Name Description
22 * 3/11/97 aliu Fixed off-by-one bug in assignment operator. Added
23 * setId() method and safety check against
24 * MAX_ID_LENGTH.
25 * 04/23/99 stephen Added C wrapper for convertToPosix.
26 * 09/18/00 george Removed the memory leaks.
27 * 08/23/01 george Convert to C
28 */
29
30#include "locmap.h"
31#include "cstring.h"
729e4ab9 32#include "cmemory.h"
f3c0d7a5 33#include "unicode/uloc.h"
729e4ab9 34
4388f060
A
35#if U_PLATFORM == U_PF_WINDOWS && defined(_MSC_VER) && (_MSC_VER >= 1500)
36/*
37 * TODO: It seems like we should widen this to
38 * either U_PLATFORM_USES_ONLY_WIN32_API (includes MinGW)
39 * or U_PLATFORM_HAS_WIN32_API (includes MinGW and Cygwin)
40 * but those use gcc and won't have defined(_MSC_VER).
41 * We might need to #include some Windows header and test for some version macro from there.
42 * Or call some Windows function and see what it returns.
43 */
f3c0d7a5 44#define USE_WINDOWS_LCID_MAPPING_API
729e4ab9
A
45#include <windows.h>
46#include <winnls.h>
47#endif
b75a7d8f 48
b75a7d8f
A
49/*
50 * Note:
b75a7d8f 51 * The mapping from Win32 locale ID numbers to POSIX locale strings should
374ca955 52 * be the faster one.
b75a7d8f 53 *
f3c0d7a5
A
54 * Windows LCIDs are defined at https://msdn.microsoft.com/en-us/library/cc233965.aspx
55 * [MS-LCID] Windows Language Code Identifier (LCID) Reference
b75a7d8f
A
56 */
57
b75a7d8f
A
58/*
59////////////////////////////////////////////////
60//
61// Internal Classes for LCID <--> POSIX Mapping
62//
63/////////////////////////////////////////////////
64*/
65
66typedef struct ILcidPosixElement
67{
68 const uint32_t hostID;
69 const char * const posixID;
70} ILcidPosixElement;
71
72typedef struct ILcidPosixMap
73{
74 const uint32_t numRegions;
75 const struct ILcidPosixElement* const regionMaps;
76} ILcidPosixMap;
77
b75a7d8f
A
78
79/*
80/////////////////////////////////////////////////
81//
82// Easy macros to make the LCID <--> POSIX Mapping
83//
84/////////////////////////////////////////////////
85*/
86
729e4ab9
A
87/**
88 * The standard one language/one country mapping for LCID.
89 * The first element must be the language, and the following
90 * elements are the language with the country.
91 * @param hostID LCID in host format such as 0x044d
92 * @param languageID posix ID of just the language such as 'de'
93 * @param posixID posix ID of the language_TERRITORY such as 'de_CH'
b75a7d8f
A
94 */
95#define ILCID_POSIX_ELEMENT_ARRAY(hostID, languageID, posixID) \
729e4ab9 96static const ILcidPosixElement locmap_ ## languageID [] = { \
b75a7d8f
A
97 {LANGUAGE_LCID(hostID), #languageID}, /* parent locale */ \
98 {hostID, #posixID}, \
99};
100
729e4ab9
A
101/**
102 * Define a subtable by ID
103 * @param id the POSIX ID, either a language or language_TERRITORY
104 */
105#define ILCID_POSIX_SUBTABLE(id) \
106static const ILcidPosixElement locmap_ ## id [] =
107
108
109/**
110 * Create the map for the posixID. This macro supposes that the language string
111 * name is the same as the global variable name, and that the first element
112 * in the ILcidPosixElement is just the language.
57a6839d 113 * @param _posixID the full POSIX ID for this entry.
b75a7d8f
A
114 */
115#define ILCID_POSIX_MAP(_posixID) \
2ca993e8 116 {UPRV_LENGTHOF(locmap_ ## _posixID), locmap_ ## _posixID}
b75a7d8f
A
117
118/*
119////////////////////////////////////////////
120//
121// Create the table of LCID to POSIX Mapping
122// None of it should be dynamically created.
123//
124// Keep static locale variables inside the function so that
125// it can be created properly during static init.
126//
f3c0d7a5
A
127// Note: This table should be updated periodically. Check the [MS-LCID] Windows Language Code Identifier
128// (LCID) Reference defined at https://msdn.microsoft.com/en-us/library/cc233965.aspx
129//
729e4ab9
A
130// Microsoft is moving away from LCID in favor of locale name as of Vista. This table needs to be
131// maintained for support of older Windows version.
132// Update: Windows 7 (091130)
57a6839d
A
133//
134// Note: Microsoft assign a different LCID if a locale has a sorting variant. POSIX IDs below may contain
135// @collation=XXX, but no other keywords are allowed (at least for now). When uprv_convertToLCID() is
136// called from uloc_getLCID(), keywords other than collation are already removed. If we really need
137// to support other keywords in this mapping data, we must update the implementation.
b75a7d8f
A
138////////////////////////////////////////////
139*/
140
f3c0d7a5
A
141// TODO: For Windows ideally this table would be a list of exceptions rather than a complete list as
142// LocaleNameToLCID and LCIDToLocaleName provide 90% of these.
143
b75a7d8f
A
144ILCID_POSIX_ELEMENT_ARRAY(0x0436, af, af_ZA)
145
729e4ab9 146ILCID_POSIX_SUBTABLE(ar) {
b75a7d8f
A
147 {0x01, "ar"},
148 {0x3801, "ar_AE"},
149 {0x3c01, "ar_BH"},
150 {0x1401, "ar_DZ"},
151 {0x0c01, "ar_EG"},
152 {0x0801, "ar_IQ"},
153 {0x2c01, "ar_JO"},
154 {0x3401, "ar_KW"},
155 {0x3001, "ar_LB"},
156 {0x1001, "ar_LY"},
157 {0x1801, "ar_MA"},
4388f060 158 {0x1801, "ar_MO"},
b75a7d8f
A
159 {0x2001, "ar_OM"},
160 {0x4001, "ar_QA"},
161 {0x0401, "ar_SA"},
162 {0x2801, "ar_SY"},
163 {0x1c01, "ar_TN"},
164 {0x2401, "ar_YE"}
165};
166
374ca955
A
167ILCID_POSIX_ELEMENT_ARRAY(0x044d, as, as_IN)
168ILCID_POSIX_ELEMENT_ARRAY(0x045e, am, am_ET)
73c04bcf 169ILCID_POSIX_ELEMENT_ARRAY(0x047a, arn,arn_CL)
b75a7d8f 170
729e4ab9 171ILCID_POSIX_SUBTABLE(az) {
b75a7d8f 172 {0x2c, "az"},
73c04bcf 173 {0x082c, "az_Cyrl_AZ"}, /* Cyrillic based */
729e4ab9 174 {0x742c, "az_Cyrl"}, /* Cyrillic based */
73c04bcf 175 {0x042c, "az_Latn_AZ"}, /* Latin based */
729e4ab9 176 {0x782c, "az_Latn"}, /* Latin based */
73c04bcf 177 {0x042c, "az_AZ"} /* Latin based */
b75a7d8f
A
178};
179
73c04bcf 180ILCID_POSIX_ELEMENT_ARRAY(0x046d, ba, ba_RU)
b75a7d8f 181ILCID_POSIX_ELEMENT_ARRAY(0x0423, be, be_BY)
73c04bcf 182
51004dcb 183/*ILCID_POSIX_SUBTABLE(ber) {
73c04bcf
A
184 {0x5f, "ber"},
185 {0x045f, "ber_Arab_DZ"},
186 {0x045f, "ber_Arab"},
187 {0x085f, "ber_Latn_DZ"},
188 {0x085f, "ber_Latn"}
51004dcb 189};*/
73c04bcf 190
b75a7d8f 191ILCID_POSIX_ELEMENT_ARRAY(0x0402, bg, bg_BG)
374ca955 192
4388f060
A
193ILCID_POSIX_ELEMENT_ARRAY(0x0466, bin, bin_NG)
194
729e4ab9 195ILCID_POSIX_SUBTABLE(bn) {
374ca955
A
196 {0x45, "bn"},
197 {0x0845, "bn_BD"},
198 {0x0445, "bn_IN"}
199};
200
729e4ab9 201ILCID_POSIX_SUBTABLE(bo) {
374ca955
A
202 {0x51, "bo"},
203 {0x0851, "bo_BT"},
2ca993e8
A
204 {0x0451, "bo_CN"},
205 {0x0c51, "dz_BT"}
374ca955
A
206};
207
73c04bcf 208ILCID_POSIX_ELEMENT_ARRAY(0x047e, br, br_FR)
51004dcb
A
209
210ILCID_POSIX_SUBTABLE(ca) {
211 {0x03, "ca"},
212 {0x0403, "ca_ES"},
213 {0x0803, "ca_ES_VALENCIA"}
214};
215
73c04bcf 216ILCID_POSIX_ELEMENT_ARRAY(0x0483, co, co_FR)
374ca955
A
217ILCID_POSIX_ELEMENT_ARRAY(0x045c, chr,chr_US)
218
f3c0d7a5 219// ICU has chosen different names for these.
51004dcb
A
220ILCID_POSIX_SUBTABLE(ckb) {
221 {0x92, "ckb"},
51004dcb 222 {0x7c92, "ckb_Arab"},
f3c0d7a5 223 {0x0492, "ckb_Arab_IQ"}
51004dcb
A
224};
225
374ca955 226/* Declared as cs_CZ to get around compiler errors on z/OS, which defines cs as a function */
729e4ab9 227ILCID_POSIX_ELEMENT_ARRAY(0x0405, cs, cs_CZ)
374ca955
A
228
229ILCID_POSIX_ELEMENT_ARRAY(0x0452, cy, cy_GB)
b75a7d8f
A
230ILCID_POSIX_ELEMENT_ARRAY(0x0406, da, da_DK)
231
f3c0d7a5 232// Windows doesn't know POSIX or BCP47 Unicode phonebook sort names
729e4ab9 233ILCID_POSIX_SUBTABLE(de) {
b75a7d8f
A
234 {0x07, "de"},
235 {0x0c07, "de_AT"},
236 {0x0807, "de_CH"},
237 {0x0407, "de_DE"},
238 {0x1407, "de_LI"},
239 {0x1007, "de_LU"},
73c04bcf
A
240 {0x10407,"de_DE@collation=phonebook"}, /*This is really de_DE_PHONEBOOK on Windows*/
241 {0x10407,"de@collation=phonebook"} /*This is really de_DE_PHONEBOOK on Windows*/
b75a7d8f
A
242};
243
244ILCID_POSIX_ELEMENT_ARRAY(0x0465, dv, dv_MV)
245ILCID_POSIX_ELEMENT_ARRAY(0x0408, el, el_GR)
246
f3c0d7a5 247// Windows uses an empty string for 'invariant'
729e4ab9 248ILCID_POSIX_SUBTABLE(en) {
b75a7d8f
A
249 {0x09, "en"},
250 {0x0c09, "en_AU"},
251 {0x2809, "en_BZ"},
252 {0x1009, "en_CA"},
253 {0x0809, "en_GB"},
4388f060
A
254 {0x3c09, "en_HK"},
255 {0x3809, "en_ID"},
b75a7d8f 256 {0x1809, "en_IE"},
73c04bcf 257 {0x4009, "en_IN"},
b75a7d8f 258 {0x2009, "en_JM"},
73c04bcf 259 {0x4409, "en_MY"},
b75a7d8f
A
260 {0x1409, "en_NZ"},
261 {0x3409, "en_PH"},
73c04bcf 262 {0x4809, "en_SG"},
b75a7d8f
A
263 {0x2C09, "en_TT"},
264 {0x0409, "en_US"},
f3c0d7a5
A
265 {0x007f, "en_US_POSIX"}, /* duplicate for round-tripping */
266 {0x2409, "en_VI"}, /* Virgin Islands AKA Caribbean Islands (en_CB). On Windows8+ This is 0x1000 or dynamically assigned */
b75a7d8f 267 {0x1c09, "en_ZA"},
374ca955 268 {0x3009, "en_ZW"},
729e4ab9 269 {0x2409, "en_029"},
f3c0d7a5
A
270 {0x0409, "en_AS"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
271 {0x0409, "en_GU"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
272 {0x0409, "en_MH"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
273 {0x0409, "en_MP"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
274 {0x0409, "en_UM"} /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
b75a7d8f
A
275};
276
729e4ab9 277ILCID_POSIX_SUBTABLE(en_US_POSIX) {
73c04bcf 278 {0x007f, "en_US_POSIX"} /* duplicate for roundtripping */
b75a7d8f
A
279};
280
f3c0d7a5 281// Windows doesn't know POSIX or BCP47 Unicode traditional sort names
729e4ab9 282ILCID_POSIX_SUBTABLE(es) {
b75a7d8f
A
283 {0x0a, "es"},
284 {0x2c0a, "es_AR"},
285 {0x400a, "es_BO"},
286 {0x340a, "es_CL"},
287 {0x240a, "es_CO"},
288 {0x140a, "es_CR"},
2ca993e8 289 {0x5c0a, "es_CU"},
b75a7d8f
A
290 {0x1c0a, "es_DO"},
291 {0x300a, "es_EC"},
292 {0x0c0a, "es_ES"}, /*Modern sort.*/
293 {0x100a, "es_GT"},
294 {0x480a, "es_HN"},
295 {0x080a, "es_MX"},
296 {0x4c0a, "es_NI"},
297 {0x180a, "es_PA"},
298 {0x280a, "es_PE"},
299 {0x500a, "es_PR"},
300 {0x3c0a, "es_PY"},
301 {0x440a, "es_SV"},
73c04bcf 302 {0x540a, "es_US"},
b75a7d8f
A
303 {0x380a, "es_UY"},
304 {0x200a, "es_VE"},
57a6839d 305 {0x580a, "es_419"},
73c04bcf 306 {0x040a, "es_ES@collation=traditional"},
f3c0d7a5 307 {0x040a, "es@collation=traditional"} // Windows will treat this as es-ES@collation=traditional
b75a7d8f
A
308};
309
310ILCID_POSIX_ELEMENT_ARRAY(0x0425, et, et_EE)
311ILCID_POSIX_ELEMENT_ARRAY(0x042d, eu, eu_ES)
73c04bcf
A
312
313/* ISO-639 doesn't distinguish between Persian and Dari.*/
729e4ab9 314ILCID_POSIX_SUBTABLE(fa) {
73c04bcf
A
315 {0x29, "fa"},
316 {0x0429, "fa_IR"}, /* Persian/Farsi (Iran) */
317 {0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */
318};
319
f3c0d7a5 320
73c04bcf 321/* duplicate for roundtripping */
729e4ab9 322ILCID_POSIX_SUBTABLE(fa_AF) {
73c04bcf
A
323 {0x8c, "fa_AF"}, /* Persian/Dari (Afghanistan) */
324 {0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */
325};
326
51004dcb
A
327ILCID_POSIX_SUBTABLE(ff) {
328 {0x67, "ff"},
329 {0x7c67, "ff_Latn"},
2ca993e8
A
330 {0x0867, "ff_Latn_SN"},
331 {0x0467, "ff_NG"}
51004dcb
A
332};
333
b75a7d8f 334ILCID_POSIX_ELEMENT_ARRAY(0x040b, fi, fi_FI)
73c04bcf 335ILCID_POSIX_ELEMENT_ARRAY(0x0464, fil,fil_PH)
b75a7d8f
A
336ILCID_POSIX_ELEMENT_ARRAY(0x0438, fo, fo_FO)
337
729e4ab9 338ILCID_POSIX_SUBTABLE(fr) {
b75a7d8f
A
339 {0x0c, "fr"},
340 {0x080c, "fr_BE"},
341 {0x0c0c, "fr_CA"},
374ca955 342 {0x240c, "fr_CD"},
4388f060 343 {0x240c, "fr_CG"},
b75a7d8f 344 {0x100c, "fr_CH"},
374ca955
A
345 {0x300c, "fr_CI"},
346 {0x2c0c, "fr_CM"},
b75a7d8f 347 {0x040c, "fr_FR"},
374ca955 348 {0x3c0c, "fr_HT"},
b75a7d8f 349 {0x140c, "fr_LU"},
374ca955
A
350 {0x380c, "fr_MA"},
351 {0x180c, "fr_MC"},
352 {0x340c, "fr_ML"},
353 {0x200c, "fr_RE"},
4388f060
A
354 {0x280c, "fr_SN"},
355 {0xe40c, "fr_015"},
356 {0x1c0c, "fr_029"}
374ca955
A
357};
358
4388f060
A
359ILCID_POSIX_ELEMENT_ARRAY(0x0467, fuv, fuv_NG)
360
374ca955
A
361ILCID_POSIX_ELEMENT_ARRAY(0x0462, fy, fy_NL)
362
4388f060
A
363ILCID_POSIX_SUBTABLE(ga) { /* Gaelic (Ireland) */
364 {0x3c, "ga"},
365 {0x083c, "ga_IE"},
366 {0x043c, "gd_GB"}
367};
368
369ILCID_POSIX_SUBTABLE(gd) { /* Gaelic (Scotland) */
370 {0x91, "gd"},
371 {0x0491, "gd_GB"}
372};
b75a7d8f
A
373
374ILCID_POSIX_ELEMENT_ARRAY(0x0456, gl, gl_ES)
375ILCID_POSIX_ELEMENT_ARRAY(0x0447, gu, gu_IN)
374ca955 376ILCID_POSIX_ELEMENT_ARRAY(0x0474, gn, gn_PY)
73c04bcf 377ILCID_POSIX_ELEMENT_ARRAY(0x0484, gsw,gsw_FR)
729e4ab9
A
378
379ILCID_POSIX_SUBTABLE(ha) {
380 {0x68, "ha"},
381 {0x7c68, "ha_Latn"},
382 {0x0468, "ha_Latn_NG"},
383};
384
374ca955 385ILCID_POSIX_ELEMENT_ARRAY(0x0475, haw,haw_US)
b75a7d8f
A
386ILCID_POSIX_ELEMENT_ARRAY(0x040d, he, he_IL)
387ILCID_POSIX_ELEMENT_ARRAY(0x0439, hi, hi_IN)
388
374ca955 389/* This LCID is really four different locales.*/
729e4ab9 390ILCID_POSIX_SUBTABLE(hr) {
b75a7d8f 391 {0x1a, "hr"},
73c04bcf 392 {0x141a, "bs_Latn_BA"}, /* Bosnian, Bosnia and Herzegovina */
729e4ab9 393 {0x681a, "bs_Latn"}, /* Bosnian, Bosnia and Herzegovina */
374ca955 394 {0x141a, "bs_BA"}, /* Bosnian, Bosnia and Herzegovina */
729e4ab9 395 {0x781a, "bs"}, /* Bosnian */
73c04bcf 396 {0x201a, "bs_Cyrl_BA"}, /* Bosnian, Bosnia and Herzegovina */
729e4ab9 397 {0x641a, "bs_Cyrl"}, /* Bosnian, Bosnia and Herzegovina */
73c04bcf 398 {0x101a, "hr_BA"}, /* Croatian in Bosnia */
b75a7d8f 399 {0x041a, "hr_HR"}, /* Croatian*/
729e4ab9
A
400 {0x2c1a, "sr_Latn_ME"},
401 {0x241a, "sr_Latn_RS"},
73c04bcf
A
402 {0x181a, "sr_Latn_BA"}, /* Serbo-Croatian in Bosnia */
403 {0x081a, "sr_Latn_CS"}, /* Serbo-Croatian*/
729e4ab9 404 {0x701a, "sr_Latn"}, /* It's 0x1a or 0x081a, pick one to make the test program happy. */
73c04bcf
A
405 {0x1c1a, "sr_Cyrl_BA"}, /* Serbo-Croatian in Bosnia */
406 {0x0c1a, "sr_Cyrl_CS"}, /* Serbian*/
729e4ab9
A
407 {0x301a, "sr_Cyrl_ME"},
408 {0x281a, "sr_Cyrl_RS"},
409 {0x6c1a, "sr_Cyrl"}, /* It's 0x1a or 0x0c1a, pick one to make the test program happy. */
410 {0x7c1a, "sr"} /* In CLDR sr is sr_Cyrl. */
b75a7d8f
A
411};
412
b331163b
A
413ILCID_POSIX_SUBTABLE(hsb) {
414 {0x2E, "hsb"},
415 {0x042E, "hsb_DE"},
416 {0x082E, "dsb_DE"},
417 {0x7C2E, "dsb"},
418};
419
b75a7d8f
A
420ILCID_POSIX_ELEMENT_ARRAY(0x040e, hu, hu_HU)
421ILCID_POSIX_ELEMENT_ARRAY(0x042b, hy, hy_AM)
4388f060 422ILCID_POSIX_ELEMENT_ARRAY(0x0469, ibb, ibb_NG)
b75a7d8f 423ILCID_POSIX_ELEMENT_ARRAY(0x0421, id, id_ID)
374ca955 424ILCID_POSIX_ELEMENT_ARRAY(0x0470, ig, ig_NG)
73c04bcf 425ILCID_POSIX_ELEMENT_ARRAY(0x0478, ii, ii_CN)
b75a7d8f
A
426ILCID_POSIX_ELEMENT_ARRAY(0x040f, is, is_IS)
427
729e4ab9 428ILCID_POSIX_SUBTABLE(it) {
b75a7d8f
A
429 {0x10, "it"},
430 {0x0810, "it_CH"},
431 {0x0410, "it_IT"}
432};
433
729e4ab9 434ILCID_POSIX_SUBTABLE(iu) {
73c04bcf
A
435 {0x5d, "iu"},
436 {0x045d, "iu_Cans_CA"},
729e4ab9 437 {0x785d, "iu_Cans"},
73c04bcf 438 {0x085d, "iu_Latn_CA"},
729e4ab9 439 {0x7c5d, "iu_Latn"}
73c04bcf
A
440};
441
b75a7d8f
A
442ILCID_POSIX_ELEMENT_ARRAY(0x040d, iw, iw_IL) /*Left in for compatibility*/
443ILCID_POSIX_ELEMENT_ARRAY(0x0411, ja, ja_JP)
444ILCID_POSIX_ELEMENT_ARRAY(0x0437, ka, ka_GE)
445ILCID_POSIX_ELEMENT_ARRAY(0x043f, kk, kk_KZ)
73c04bcf 446ILCID_POSIX_ELEMENT_ARRAY(0x046f, kl, kl_GL)
374ca955 447ILCID_POSIX_ELEMENT_ARRAY(0x0453, km, km_KH)
b75a7d8f
A
448ILCID_POSIX_ELEMENT_ARRAY(0x044b, kn, kn_IN)
449
729e4ab9 450ILCID_POSIX_SUBTABLE(ko) {
b75a7d8f
A
451 {0x12, "ko"},
452 {0x0812, "ko_KP"},
453 {0x0412, "ko_KR"}
454};
455
456ILCID_POSIX_ELEMENT_ARRAY(0x0457, kok, kok_IN)
374ca955
A
457ILCID_POSIX_ELEMENT_ARRAY(0x0471, kr, kr_NG)
458
729e4ab9 459ILCID_POSIX_SUBTABLE(ks) { /* We could add PK and CN too */
374ca955
A
460 {0x60, "ks"},
461 {0x0860, "ks_IN"}, /* Documentation doesn't mention script */
4388f060
A
462 {0x0460, "ks_Arab_IN"},
463 {0x0860, "ks_Deva_IN"}
374ca955
A
464};
465
466ILCID_POSIX_ELEMENT_ARRAY(0x0440, ky, ky_KG) /* Kyrgyz is spoken in Kyrgyzstan */
467ILCID_POSIX_ELEMENT_ARRAY(0x0476, la, la_IT) /* TODO: Verify the country */
73c04bcf 468ILCID_POSIX_ELEMENT_ARRAY(0x046e, lb, lb_LU)
374ca955
A
469ILCID_POSIX_ELEMENT_ARRAY(0x0454, lo, lo_LA)
470ILCID_POSIX_ELEMENT_ARRAY(0x0427, lt, lt_LT)
471ILCID_POSIX_ELEMENT_ARRAY(0x0426, lv, lv_LV)
472ILCID_POSIX_ELEMENT_ARRAY(0x0481, mi, mi_NZ)
473ILCID_POSIX_ELEMENT_ARRAY(0x042f, mk, mk_MK)
474ILCID_POSIX_ELEMENT_ARRAY(0x044c, ml, ml_IN)
73c04bcf 475
729e4ab9 476ILCID_POSIX_SUBTABLE(mn) {
73c04bcf 477 {0x50, "mn"},
729e4ab9
A
478 {0x0450, "mn_MN"},
479 {0x7c50, "mn_Mong"},
480 {0x0850, "mn_Mong_CN"},
73c04bcf 481 {0x0850, "mn_CN"},
57a6839d
A
482 {0x7850, "mn_Cyrl"},
483 {0x0c50, "mn_Mong_MN"}
73c04bcf
A
484};
485
374ca955 486ILCID_POSIX_ELEMENT_ARRAY(0x0458, mni,mni_IN)
73c04bcf 487ILCID_POSIX_ELEMENT_ARRAY(0x047c, moh,moh_CA)
374ca955 488ILCID_POSIX_ELEMENT_ARRAY(0x044e, mr, mr_IN)
b75a7d8f 489
729e4ab9 490ILCID_POSIX_SUBTABLE(ms) {
b75a7d8f
A
491 {0x3e, "ms"},
492 {0x083e, "ms_BN"}, /* Brunei Darussalam*/
493 {0x043e, "ms_MY"} /* Malaysia*/
494};
495
b75a7d8f 496ILCID_POSIX_ELEMENT_ARRAY(0x043a, mt, mt_MT)
73c04bcf 497ILCID_POSIX_ELEMENT_ARRAY(0x0455, my, my_MM)
b75a7d8f 498
729e4ab9 499ILCID_POSIX_SUBTABLE(ne) {
b75a7d8f
A
500 {0x61, "ne"},
501 {0x0861, "ne_IN"}, /* India*/
502 {0x0461, "ne_NP"} /* Nepal*/
503};
504
729e4ab9 505ILCID_POSIX_SUBTABLE(nl) {
b75a7d8f
A
506 {0x13, "nl"},
507 {0x0813, "nl_BE"},
508 {0x0413, "nl_NL"}
509};
510
511/* The "no" locale split into nb and nn. By default in ICU, "no" is nb.*/
f3c0d7a5 512// TODO: Not all of these are needed on Windows, but I don't know how ICU treats preferred ones here.
729e4ab9 513ILCID_POSIX_SUBTABLE(no) {
f3c0d7a5 514 {0x14, "no"}, /* really nb_NO - actually Windows differentiates between neutral (no region) and specific (with region) */
729e4ab9 515 {0x7c14, "nb"}, /* really nb */
374ca955 516 {0x0414, "nb_NO"}, /* really nb_NO. Keep first in the 414 list. */
374ca955
A
517 {0x0414, "no_NO"}, /* really nb_NO */
518 {0x0814, "nn_NO"}, /* really nn_NO. Keep first in the 814 list. */
729e4ab9 519 {0x7814, "nn"}, /* It's 0x14 or 0x814, pick one to make the test program happy. */
374ca955 520 {0x0814, "no_NO_NY"}/* really nn_NO */
b75a7d8f
A
521};
522
73c04bcf
A
523ILCID_POSIX_ELEMENT_ARRAY(0x046c, nso,nso_ZA) /* TODO: Verify the ISO-639 code */
524ILCID_POSIX_ELEMENT_ARRAY(0x0482, oc, oc_FR)
4388f060
A
525
526ILCID_POSIX_SUBTABLE(om) { /* TODO: Verify the country */
527 {0x72, "om"},
528 {0x0472, "om_ET"},
529 {0x0472, "gaz_ET"}
530};
374ca955 531
b75a7d8f 532/* Declared as or_IN to get around compiler errors*/
729e4ab9 533ILCID_POSIX_SUBTABLE(or_IN) {
b75a7d8f
A
534 {0x48, "or"},
535 {0x0448, "or_IN"},
536};
537
729e4ab9
A
538
539ILCID_POSIX_SUBTABLE(pa) {
374ca955
A
540 {0x46, "pa"},
541 {0x0446, "pa_IN"},
51004dcb
A
542 {0x0846, "pa_PK"},
543 {0x0846, "pa_Arab_PK"}
374ca955
A
544};
545
4388f060 546ILCID_POSIX_ELEMENT_ARRAY(0x0479, pap, pap_AN)
b75a7d8f 547ILCID_POSIX_ELEMENT_ARRAY(0x0415, pl, pl_PL)
374ca955 548ILCID_POSIX_ELEMENT_ARRAY(0x0463, ps, ps_AF)
b75a7d8f 549
729e4ab9 550ILCID_POSIX_SUBTABLE(pt) {
b75a7d8f
A
551 {0x16, "pt"},
552 {0x0416, "pt_BR"},
553 {0x0816, "pt_PT"}
554};
555
729e4ab9 556ILCID_POSIX_SUBTABLE(qu) {
73c04bcf
A
557 {0x6b, "qu"},
558 {0x046b, "qu_BO"},
559 {0x086b, "qu_EC"},
4388f060
A
560 {0x0C6b, "qu_PE"},
561 {0x046b, "quz_BO"},
562 {0x086b, "quz_EC"},
563 {0x0C6b, "quz_PE"}
374ca955
A
564};
565
2ca993e8
A
566ILCID_POSIX_SUBTABLE(quc) {
567 {0x93, "quc"},
568 {0x0493, "quc_CO"},
569 /*
570 "quc_Latn_GT" is an exceptional case. Language ID of "quc"
571 is 0x93, but LCID of "quc_Latn_GT" is 0x486, which should be
572 under the group of "qut". "qut" is a retired ISO 639-3 language
573 code for West Central Quiche, and merged to "quc".
574 It looks Windows previously reserved "qut" for K'iche', but,
575 decided to use "quc" when adding a locale for K'iche' (Guatemala).
576
577 This data structure used here assumes language ID bits in
578 LCID is unique for alphabetic language code. But this is not true
579 for "quc_Latn_GT". If we don't have the data below, LCID look up
580 by alphabetic locale ID (POSIX) will fail. The same entry is found
581 under "qut" below, which is required for reverse look up.
582 */
583 {0x0486, "quc_Latn_GT"}
584};
585
586ILCID_POSIX_SUBTABLE(qut) {
587 {0x86, "qut"},
588 {0x0486, "qut_GT"},
589 /*
590 See the note in "quc" above.
591 */
592 {0x0486, "quc_Latn_GT"}
593};
594
73c04bcf 595ILCID_POSIX_ELEMENT_ARRAY(0x0417, rm, rm_CH)
4388f060
A
596
597ILCID_POSIX_SUBTABLE(ro) {
598 {0x18, "ro"},
599 {0x0418, "ro_RO"},
600 {0x0818, "ro_MD"}
601};
b75a7d8f 602
f3c0d7a5
A
603// TODO: This is almost certainly 'wrong'. 0 in Windows is a synonym for LOCALE_USER_DEFAULT.
604// More likely this is a similar concept to the Windows 0x7f Invariant locale ""
605// (Except that it's not invariant in ICU)
729e4ab9 606ILCID_POSIX_SUBTABLE(root) {
b75a7d8f
A
607 {0x00, "root"}
608};
609
4388f060
A
610ILCID_POSIX_SUBTABLE(ru) {
611 {0x19, "ru"},
612 {0x0419, "ru_RU"},
613 {0x0819, "ru_MD"}
614};
615
73c04bcf 616ILCID_POSIX_ELEMENT_ARRAY(0x0487, rw, rw_RW)
b75a7d8f 617ILCID_POSIX_ELEMENT_ARRAY(0x044f, sa, sa_IN)
73c04bcf 618ILCID_POSIX_ELEMENT_ARRAY(0x0485, sah,sah_RU)
374ca955 619
729e4ab9 620ILCID_POSIX_SUBTABLE(sd) {
374ca955
A
621 {0x59, "sd"},
622 {0x0459, "sd_IN"},
2ca993e8 623 {0x0459, "sd_Deva_IN"},
374ca955
A
624 {0x0859, "sd_PK"}
625};
626
729e4ab9 627ILCID_POSIX_SUBTABLE(se) {
73c04bcf
A
628 {0x3b, "se"},
629 {0x0c3b, "se_FI"},
630 {0x043b, "se_NO"},
631 {0x083b, "se_SE"},
729e4ab9 632 {0x783b, "sma"},
73c04bcf
A
633 {0x183b, "sma_NO"},
634 {0x1c3b, "sma_SE"},
729e4ab9
A
635 {0x7c3b, "smj"},
636 {0x703b, "smn"},
637 {0x743b, "sms"},
73c04bcf
A
638 {0x103b, "smj_NO"},
639 {0x143b, "smj_SE"},
640 {0x243b, "smn_FI"},
641 {0x203b, "sms_FI"},
642};
643
374ca955 644ILCID_POSIX_ELEMENT_ARRAY(0x045b, si, si_LK)
b75a7d8f
A
645ILCID_POSIX_ELEMENT_ARRAY(0x041b, sk, sk_SK)
646ILCID_POSIX_ELEMENT_ARRAY(0x0424, sl, sl_SI)
4388f060
A
647
648ILCID_POSIX_SUBTABLE(so) { /* TODO: Verify the country */
649 {0x77, "so"},
650 {0x0477, "so_ET"},
651 {0x0477, "so_SO"}
652};
653
b75a7d8f 654ILCID_POSIX_ELEMENT_ARRAY(0x041c, sq, sq_AL)
4388f060 655ILCID_POSIX_ELEMENT_ARRAY(0x0430, st, st_ZA)
b75a7d8f 656
729e4ab9 657ILCID_POSIX_SUBTABLE(sv) {
b75a7d8f
A
658 {0x1d, "sv"},
659 {0x081d, "sv_FI"},
660 {0x041d, "sv_SE"}
661};
662
663ILCID_POSIX_ELEMENT_ARRAY(0x0441, sw, sw_KE)
664ILCID_POSIX_ELEMENT_ARRAY(0x045A, syr, syr_SY)
51004dcb
A
665
666ILCID_POSIX_SUBTABLE(ta) {
667 {0x49, "ta"},
668 {0x0449, "ta_IN"},
669 {0x0849, "ta_LK"}
670};
671
b75a7d8f 672ILCID_POSIX_ELEMENT_ARRAY(0x044a, te, te_IN)
729e4ab9
A
673
674/* Cyrillic based by default */
675ILCID_POSIX_SUBTABLE(tg) {
676 {0x28, "tg"},
677 {0x7c28, "tg_Cyrl"},
678 {0x0428, "tg_Cyrl_TJ"}
679};
680
b75a7d8f 681ILCID_POSIX_ELEMENT_ARRAY(0x041e, th, th_TH)
374ca955 682
729e4ab9 683ILCID_POSIX_SUBTABLE(ti) {
374ca955
A
684 {0x73, "ti"},
685 {0x0873, "ti_ER"},
686 {0x0473, "ti_ET"}
687};
688
689ILCID_POSIX_ELEMENT_ARRAY(0x0442, tk, tk_TM)
4388f060
A
690
691ILCID_POSIX_SUBTABLE(tn) {
692 {0x32, "tn"},
51004dcb 693 {0x0832, "tn_BW"},
4388f060
A
694 {0x0432, "tn_ZA"}
695};
696
b75a7d8f 697ILCID_POSIX_ELEMENT_ARRAY(0x041f, tr, tr_TR)
4388f060 698ILCID_POSIX_ELEMENT_ARRAY(0x0431, ts, ts_ZA)
b75a7d8f 699ILCID_POSIX_ELEMENT_ARRAY(0x0444, tt, tt_RU)
729e4ab9
A
700
701ILCID_POSIX_SUBTABLE(tzm) {
702 {0x5f, "tzm"},
703 {0x7c5f, "tzm_Latn"},
4388f060 704 {0x085f, "tzm_Latn_DZ"},
51004dcb 705 {0x105f, "tzm_Tfng_MA"},
2ca993e8 706 {0x045f, "tzm_Arab_MA"},
4388f060
A
707 {0x045f, "tmz"}
708};
709
710ILCID_POSIX_SUBTABLE(ug) {
711 {0x80, "ug"},
712 {0x0480, "ug_CN"},
713 {0x0480, "ug_Arab_CN"}
729e4ab9
A
714};
715
b75a7d8f
A
716ILCID_POSIX_ELEMENT_ARRAY(0x0422, uk, uk_UA)
717
729e4ab9 718ILCID_POSIX_SUBTABLE(ur) {
b75a7d8f
A
719 {0x20, "ur"},
720 {0x0820, "ur_IN"},
721 {0x0420, "ur_PK"}
722};
723
729e4ab9 724ILCID_POSIX_SUBTABLE(uz) {
b75a7d8f 725 {0x43, "uz"},
73c04bcf 726 {0x0843, "uz_Cyrl_UZ"}, /* Cyrillic based */
729e4ab9 727 {0x7843, "uz_Cyrl"}, /* Cyrillic based */
b75a7d8f 728 {0x0843, "uz_UZ"}, /* Cyrillic based */
73c04bcf 729 {0x0443, "uz_Latn_UZ"}, /* Latin based */
729e4ab9 730 {0x7c43, "uz_Latn"} /* Latin based */
b75a7d8f
A
731};
732
4388f060
A
733ILCID_POSIX_SUBTABLE(ve) { /* TODO: Verify the country */
734 {0x33, "ve"},
735 {0x0433, "ve_ZA"},
736 {0x0433, "ven_ZA"}
737};
738
b75a7d8f 739ILCID_POSIX_ELEMENT_ARRAY(0x042a, vi, vi_VN)
73c04bcf
A
740ILCID_POSIX_ELEMENT_ARRAY(0x0488, wo, wo_SN)
741ILCID_POSIX_ELEMENT_ARRAY(0x0434, xh, xh_ZA)
4388f060 742ILCID_POSIX_ELEMENT_ARRAY(0x043d, yi, yi)
73c04bcf
A
743ILCID_POSIX_ELEMENT_ARRAY(0x046a, yo, yo_NG)
744
f3c0d7a5
A
745// Windows & ICU tend to different names for some of these
746// TODO: Windows probably does not need all of these entries, but I don't know how the precedence works.
729e4ab9
A
747ILCID_POSIX_SUBTABLE(zh) {
748 {0x0004, "zh_Hans"},
749 {0x7804, "zh"},
b75a7d8f 750 {0x0804, "zh_CN"},
729e4ab9 751 {0x0804, "zh_Hans_CN"},
374ca955 752 {0x0c04, "zh_Hant_HK"},
b75a7d8f 753 {0x0c04, "zh_HK"},
374ca955 754 {0x1404, "zh_Hant_MO"},
b75a7d8f 755 {0x1404, "zh_MO"},
374ca955 756 {0x1004, "zh_Hans_SG"},
b75a7d8f 757 {0x1004, "zh_SG"},
374ca955 758 {0x0404, "zh_Hant_TW"},
729e4ab9 759 {0x7c04, "zh_Hant"},
b75a7d8f 760 {0x0404, "zh_TW"},
73c04bcf 761 {0x30404,"zh_Hant_TW"}, /* Bopomofo order */
374ca955 762 {0x30404,"zh_TW"}, /* Bopomofo order */
729e4ab9 763 {0x20004,"zh@collation=stroke"},
73c04bcf 764 {0x20404,"zh_Hant@collation=stroke"},
729e4ab9 765 {0x20404,"zh_Hant_TW@collation=stroke"},
73c04bcf 766 {0x20404,"zh_TW@collation=stroke"},
73c04bcf 767 {0x20804,"zh_Hans@collation=stroke"},
729e4ab9 768 {0x20804,"zh_Hans_CN@collation=stroke"},
73c04bcf 769 {0x20804,"zh_CN@collation=stroke"}
f3c0d7a5 770 // TODO: Alternate collations for other LCIDs are missing, eg: 0x50804
b75a7d8f
A
771};
772
73c04bcf 773ILCID_POSIX_ELEMENT_ARRAY(0x0435, zu, zu_ZA)
374ca955 774
b75a7d8f
A
775/* This must be static and grouped by LCID. */
776static const ILcidPosixMap gPosixIDmap[] = {
777 ILCID_POSIX_MAP(af), /* af Afrikaans 0x36 */
374ca955 778 ILCID_POSIX_MAP(am), /* am Amharic 0x5e */
b75a7d8f 779 ILCID_POSIX_MAP(ar), /* ar Arabic 0x01 */
73c04bcf 780 ILCID_POSIX_MAP(arn), /* arn Araucanian/Mapudungun 0x7a */
b75a7d8f
A
781 ILCID_POSIX_MAP(as), /* as Assamese 0x4d */
782 ILCID_POSIX_MAP(az), /* az Azerbaijani 0x2c */
73c04bcf 783 ILCID_POSIX_MAP(ba), /* ba Bashkir 0x6d */
374ca955 784 ILCID_POSIX_MAP(be), /* be Belarusian 0x23 */
729e4ab9 785/* ILCID_POSIX_MAP(ber), ber Berber/Tamazight 0x5f */
b75a7d8f 786 ILCID_POSIX_MAP(bg), /* bg Bulgarian 0x02 */
4388f060 787 ILCID_POSIX_MAP(bin), /* bin Edo 0x66 */
b75a7d8f 788 ILCID_POSIX_MAP(bn), /* bn Bengali; Bangla 0x45 */
374ca955 789 ILCID_POSIX_MAP(bo), /* bo Tibetan 0x51 */
73c04bcf 790 ILCID_POSIX_MAP(br), /* br Breton 0x7e */
b75a7d8f 791 ILCID_POSIX_MAP(ca), /* ca Catalan 0x03 */
374ca955 792 ILCID_POSIX_MAP(chr), /* chr Cherokee 0x5c */
51004dcb 793 ILCID_POSIX_MAP(ckb), /* ckb Sorani (Central Kurdish) 0x92 */
73c04bcf 794 ILCID_POSIX_MAP(co), /* co Corsican 0x83 */
729e4ab9 795 ILCID_POSIX_MAP(cs), /* cs Czech 0x05 */
374ca955 796 ILCID_POSIX_MAP(cy), /* cy Welsh 0x52 */
b75a7d8f
A
797 ILCID_POSIX_MAP(da), /* da Danish 0x06 */
798 ILCID_POSIX_MAP(de), /* de German 0x07 */
374ca955 799 ILCID_POSIX_MAP(dv), /* dv Divehi 0x65 */
b75a7d8f
A
800 ILCID_POSIX_MAP(el), /* el Greek 0x08 */
801 ILCID_POSIX_MAP(en), /* en English 0x09 */
802 ILCID_POSIX_MAP(en_US_POSIX), /* invariant 0x7f */
803 ILCID_POSIX_MAP(es), /* es Spanish 0x0a */
804 ILCID_POSIX_MAP(et), /* et Estonian 0x25 */
805 ILCID_POSIX_MAP(eu), /* eu Basque 0x2d */
73c04bcf
A
806 ILCID_POSIX_MAP(fa), /* fa Persian/Farsi 0x29 */
807 ILCID_POSIX_MAP(fa_AF), /* fa Persian/Dari 0x8c */
51004dcb 808 ILCID_POSIX_MAP(ff), /* ff Fula 0x67 */
b75a7d8f 809 ILCID_POSIX_MAP(fi), /* fi Finnish 0x0b */
73c04bcf 810 ILCID_POSIX_MAP(fil), /* fil Filipino 0x64 */
b75a7d8f
A
811 ILCID_POSIX_MAP(fo), /* fo Faroese 0x38 */
812 ILCID_POSIX_MAP(fr), /* fr French 0x0c */
4388f060 813 ILCID_POSIX_MAP(fuv), /* fuv Fulfulde - Nigeria 0x67 */
374ca955
A
814 ILCID_POSIX_MAP(fy), /* fy Frisian 0x62 */
815 ILCID_POSIX_MAP(ga), /* * Gaelic (Ireland,Scotland) 0x3c */
729e4ab9 816 ILCID_POSIX_MAP(gd), /* gd Gaelic (United Kingdom) 0x91 */
b75a7d8f 817 ILCID_POSIX_MAP(gl), /* gl Galician 0x56 */
374ca955 818 ILCID_POSIX_MAP(gn), /* gn Guarani 0x74 */
73c04bcf 819 ILCID_POSIX_MAP(gsw), /* gsw Alemanic/Alsatian/Swiss German 0x84 */
b75a7d8f 820 ILCID_POSIX_MAP(gu), /* gu Gujarati 0x47 */
374ca955
A
821 ILCID_POSIX_MAP(ha), /* ha Hausa 0x68 */
822 ILCID_POSIX_MAP(haw), /* haw Hawaiian 0x75 */
b75a7d8f
A
823 ILCID_POSIX_MAP(he), /* he Hebrew (formerly iw) 0x0d */
824 ILCID_POSIX_MAP(hi), /* hi Hindi 0x39 */
374ca955 825 ILCID_POSIX_MAP(hr), /* * Croatian and others 0x1a */
b331163b 826 ILCID_POSIX_MAP(hsb), /* hsb Upper Sorbian 0x2e */
b75a7d8f
A
827 ILCID_POSIX_MAP(hu), /* hu Hungarian 0x0e */
828 ILCID_POSIX_MAP(hy), /* hy Armenian 0x2b */
4388f060 829 ILCID_POSIX_MAP(ibb), /* ibb Ibibio - Nigeria 0x69 */
b75a7d8f 830 ILCID_POSIX_MAP(id), /* id Indonesian (formerly in) 0x21 */
374ca955 831 ILCID_POSIX_MAP(ig), /* ig Igbo 0x70 */
73c04bcf 832 ILCID_POSIX_MAP(ii), /* ii Sichuan Yi 0x78 */
b75a7d8f
A
833 ILCID_POSIX_MAP(is), /* is Icelandic 0x0f */
834 ILCID_POSIX_MAP(it), /* it Italian 0x10 */
374ca955 835 ILCID_POSIX_MAP(iu), /* iu Inuktitut 0x5d */
b75a7d8f
A
836 ILCID_POSIX_MAP(iw), /* iw Hebrew 0x0d */
837 ILCID_POSIX_MAP(ja), /* ja Japanese 0x11 */
838 ILCID_POSIX_MAP(ka), /* ka Georgian 0x37 */
839 ILCID_POSIX_MAP(kk), /* kk Kazakh 0x3f */
73c04bcf 840 ILCID_POSIX_MAP(kl), /* kl Kalaallisut 0x6f */
374ca955 841 ILCID_POSIX_MAP(km), /* km Khmer 0x53 */
b75a7d8f 842 ILCID_POSIX_MAP(kn), /* kn Kannada 0x4b */
b75a7d8f
A
843 ILCID_POSIX_MAP(ko), /* ko Korean 0x12 */
844 ILCID_POSIX_MAP(kok), /* kok Konkani 0x57 */
374ca955 845 ILCID_POSIX_MAP(kr), /* kr Kanuri 0x71 */
b75a7d8f 846 ILCID_POSIX_MAP(ks), /* ks Kashmiri 0x60 */
374ca955 847 ILCID_POSIX_MAP(ky), /* ky Kyrgyz 0x40 */
73c04bcf 848 ILCID_POSIX_MAP(lb), /* lb Luxembourgish 0x6e */
374ca955
A
849 ILCID_POSIX_MAP(la), /* la Latin 0x76 */
850 ILCID_POSIX_MAP(lo), /* lo Lao 0x54 */
b75a7d8f
A
851 ILCID_POSIX_MAP(lt), /* lt Lithuanian 0x27 */
852 ILCID_POSIX_MAP(lv), /* lv Latvian, Lettish 0x26 */
374ca955 853 ILCID_POSIX_MAP(mi), /* mi Maori 0x81 */
b75a7d8f
A
854 ILCID_POSIX_MAP(mk), /* mk Macedonian 0x2f */
855 ILCID_POSIX_MAP(ml), /* ml Malayalam 0x4c */
856 ILCID_POSIX_MAP(mn), /* mn Mongolian 0x50 */
857 ILCID_POSIX_MAP(mni), /* mni Manipuri 0x58 */
73c04bcf 858 ILCID_POSIX_MAP(moh), /* moh Mohawk 0x7c */
b75a7d8f
A
859 ILCID_POSIX_MAP(mr), /* mr Marathi 0x4e */
860 ILCID_POSIX_MAP(ms), /* ms Malay 0x3e */
861 ILCID_POSIX_MAP(mt), /* mt Maltese 0x3a */
73c04bcf 862 ILCID_POSIX_MAP(my), /* my Burmese 0x55 */
374ca955 863/* ILCID_POSIX_MAP(nb), // no Norwegian 0x14 */
b75a7d8f
A
864 ILCID_POSIX_MAP(ne), /* ne Nepali 0x61 */
865 ILCID_POSIX_MAP(nl), /* nl Dutch 0x13 */
374ca955
A
866/* ILCID_POSIX_MAP(nn), // no Norwegian 0x14 */
867 ILCID_POSIX_MAP(no), /* * Norwegian 0x14 */
868 ILCID_POSIX_MAP(nso), /* nso Sotho, Northern (Sepedi dialect) 0x6c */
73c04bcf 869 ILCID_POSIX_MAP(oc), /* oc Occitan 0x82 */
374ca955 870 ILCID_POSIX_MAP(om), /* om Oromo 0x72 */
b75a7d8f
A
871 ILCID_POSIX_MAP(or_IN), /* or Oriya 0x48 */
872 ILCID_POSIX_MAP(pa), /* pa Punjabi 0x46 */
4388f060 873 ILCID_POSIX_MAP(pap), /* pap Papiamentu 0x79 */
b75a7d8f 874 ILCID_POSIX_MAP(pl), /* pl Polish 0x15 */
374ca955 875 ILCID_POSIX_MAP(ps), /* ps Pashto 0x63 */
b75a7d8f 876 ILCID_POSIX_MAP(pt), /* pt Portuguese 0x16 */
73c04bcf 877 ILCID_POSIX_MAP(qu), /* qu Quechua 0x6B */
2ca993e8 878 ILCID_POSIX_MAP(quc), /* quc K'iche 0x93 */
73c04bcf
A
879 ILCID_POSIX_MAP(qut), /* qut K'iche 0x86 */
880 ILCID_POSIX_MAP(rm), /* rm Raeto-Romance/Romansh 0x17 */
b75a7d8f
A
881 ILCID_POSIX_MAP(ro), /* ro Romanian 0x18 */
882 ILCID_POSIX_MAP(root), /* root 0x00 */
883 ILCID_POSIX_MAP(ru), /* ru Russian 0x19 */
73c04bcf 884 ILCID_POSIX_MAP(rw), /* rw Kinyarwanda 0x87 */
b75a7d8f 885 ILCID_POSIX_MAP(sa), /* sa Sanskrit 0x4f */
73c04bcf 886 ILCID_POSIX_MAP(sah), /* sah Yakut 0x85 */
b75a7d8f 887 ILCID_POSIX_MAP(sd), /* sd Sindhi 0x59 */
73c04bcf 888 ILCID_POSIX_MAP(se), /* se Sami 0x3b */
374ca955
A
889/* ILCID_POSIX_MAP(sh), // sh Serbo-Croatian 0x1a */
890 ILCID_POSIX_MAP(si), /* si Sinhalese 0x5b */
b75a7d8f
A
891 ILCID_POSIX_MAP(sk), /* sk Slovak 0x1b */
892 ILCID_POSIX_MAP(sl), /* sl Slovenian 0x24 */
374ca955 893 ILCID_POSIX_MAP(so), /* so Somali 0x77 */
b75a7d8f 894 ILCID_POSIX_MAP(sq), /* sq Albanian 0x1c */
374ca955 895/* ILCID_POSIX_MAP(sr), // sr Serbian 0x1a */
4388f060 896 ILCID_POSIX_MAP(st), /* st Sutu 0x30 */
b75a7d8f
A
897 ILCID_POSIX_MAP(sv), /* sv Swedish 0x1d */
898 ILCID_POSIX_MAP(sw), /* sw Swahili 0x41 */
899 ILCID_POSIX_MAP(syr), /* syr Syriac 0x5A */
900 ILCID_POSIX_MAP(ta), /* ta Tamil 0x49 */
901 ILCID_POSIX_MAP(te), /* te Telugu 0x4a */
73c04bcf 902 ILCID_POSIX_MAP(tg), /* tg Tajik 0x28 */
b75a7d8f 903 ILCID_POSIX_MAP(th), /* th Thai 0x1e */
374ca955
A
904 ILCID_POSIX_MAP(ti), /* ti Tigrigna 0x73 */
905 ILCID_POSIX_MAP(tk), /* tk Turkmen 0x42 */
374ca955 906 ILCID_POSIX_MAP(tn), /* tn Tswana 0x32 */
b75a7d8f 907 ILCID_POSIX_MAP(tr), /* tr Turkish 0x1f */
4388f060 908 ILCID_POSIX_MAP(ts), /* ts Tsonga 0x31 */
b75a7d8f 909 ILCID_POSIX_MAP(tt), /* tt Tatar 0x44 */
4388f060 910 ILCID_POSIX_MAP(tzm), /* tzm Tamazight 0x5f */
374ca955 911 ILCID_POSIX_MAP(ug), /* ug Uighur 0x80 */
b75a7d8f
A
912 ILCID_POSIX_MAP(uk), /* uk Ukrainian 0x22 */
913 ILCID_POSIX_MAP(ur), /* ur Urdu 0x20 */
914 ILCID_POSIX_MAP(uz), /* uz Uzbek 0x43 */
374ca955 915 ILCID_POSIX_MAP(ve), /* ve Venda 0x33 */
b75a7d8f 916 ILCID_POSIX_MAP(vi), /* vi Vietnamese 0x2a */
73c04bcf 917 ILCID_POSIX_MAP(wo), /* wo Wolof 0x88 */
374ca955 918 ILCID_POSIX_MAP(xh), /* xh Xhosa 0x34 */
4388f060 919 ILCID_POSIX_MAP(yi), /* yi Yiddish 0x3d */
374ca955 920 ILCID_POSIX_MAP(yo), /* yo Yoruba 0x6a */
b75a7d8f 921 ILCID_POSIX_MAP(zh), /* zh Chinese 0x04 */
374ca955 922 ILCID_POSIX_MAP(zu), /* zu Zulu 0x35 */
b75a7d8f
A
923};
924
2ca993e8 925static const uint32_t gLocaleCount = UPRV_LENGTHOF(gPosixIDmap);
b75a7d8f 926
374ca955
A
927/**
928 * Do not call this function. It is called by hostID.
929 * The function is not private because this struct must stay as a C struct,
930 * and this is an internal class.
931 */
b75a7d8f
A
932static int32_t
933idCmp(const char* id1, const char* id2)
934{
935 int32_t diffIdx = 0;
936 while (*id1 == *id2 && *id1 != 0) {
937 diffIdx++;
938 id1++;
939 id2++;
940 }
941 return diffIdx;
942}
943
944/**
945 * Searches for a Windows LCID
946 *
947 * @param posixid the Posix style locale id.
948 * @param status gets set to U_ILLEGAL_ARGUMENT_ERROR when the Posix ID has
949 * no equivalent Windows LCID.
950 * @return the LCID
951 */
952static uint32_t
374ca955 953getHostID(const ILcidPosixMap *this_0, const char* posixID, UErrorCode* status)
b75a7d8f
A
954{
955 int32_t bestIdx = 0;
956 int32_t bestIdxDiff = 0;
73c04bcf 957 int32_t posixIDlen = (int32_t)uprv_strlen(posixID);
b75a7d8f
A
958 uint32_t idx;
959
960 for (idx = 0; idx < this_0->numRegions; idx++ ) {
961 int32_t sameChars = idCmp(posixID, this_0->regionMaps[idx].posixID);
962 if (sameChars > bestIdxDiff && this_0->regionMaps[idx].posixID[sameChars] == 0) {
963 if (posixIDlen == sameChars) {
964 /* Exact match */
965 return this_0->regionMaps[idx].hostID;
966 }
967 bestIdxDiff = sameChars;
968 bestIdx = idx;
969 }
970 }
73c04bcf
A
971 /* We asked for something unusual, like en_ZZ, and we try to return the number for the same language. */
972 /* We also have to make sure that sid and si and similar string subsets don't match. */
973 if ((posixID[bestIdxDiff] == '_' || posixID[bestIdxDiff] == '@')
974 && this_0->regionMaps[bestIdx].posixID[bestIdxDiff] == 0)
975 {
b75a7d8f
A
976 *status = U_USING_FALLBACK_WARNING;
977 return this_0->regionMaps[bestIdx].hostID;
978 }
979
980 /*no match found */
981 *status = U_ILLEGAL_ARGUMENT_ERROR;
982 return this_0->regionMaps->hostID;
983}
984
985static const char*
374ca955 986getPosixID(const ILcidPosixMap *this_0, uint32_t hostID)
b75a7d8f
A
987{
988 uint32_t i;
989 for (i = 0; i <= this_0->numRegions; i++)
990 {
991 if (this_0->regionMaps[i].hostID == hostID)
992 {
993 return this_0->regionMaps[i].posixID;
994 }
995 }
996
997 /* If you get here, then no matching region was found,
998 so return the language id with the wild card region. */
999 return this_0->regionMaps[0].posixID;
1000}
1001
1002/*
1003//////////////////////////////////////
1004//
1005// LCID --> POSIX
1006//
1007/////////////////////////////////////
1008*/
f3c0d7a5 1009#ifdef USE_WINDOWS_LCID_MAPPING_API
729e4ab9
A
1010/*
1011 * Various language tags needs to be changed:
1012 * quz -> qu
1013 * prs -> fa
1014 */
1015#define FIX_LANGUAGE_ID_TAG(buffer, len) \
1016 if (len >= 3) { \
1017 if (buffer[0] == 'q' && buffer[1] == 'u' && buffer[2] == 'z') {\
1018 buffer[2] = 0; \
1019 uprv_strcat(buffer, buffer+3); \
1020 } else if (buffer[0] == 'p' && buffer[1] == 'r' && buffer[2] == 's') {\
1021 buffer[0] = 'f'; buffer[1] = 'a'; buffer[2] = 0; \
1022 uprv_strcat(buffer, buffer+3); \
1023 } \
1024 }
b75a7d8f 1025
729e4ab9 1026#endif
57a6839d
A
1027U_CAPI int32_t
1028uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UErrorCode* status)
b75a7d8f 1029{
729e4ab9
A
1030 uint16_t langID;
1031 uint32_t localeIndex;
57a6839d
A
1032 UBool bLookup = TRUE;
1033 const char *pPosixID = NULL;
729e4ab9 1034
f3c0d7a5
A
1035#ifdef USE_WINDOWS_LCID_MAPPING_API
1036 // Note: Windows primary lang ID 0x92 in LCID is used for Central Kurdish and
1037 // GetLocaleInfo() maps such LCID to "ku". However, CLDR uses "ku" for
1038 // Northern Kurdish and "ckb" for Central Kurdish. For this reason, we cannot
1039 // use the Windows API to resolve locale ID for this specific case.
1040 if ((hostid & 0x3FF) != 0x92) {
1041 int32_t tmpLen = 0;
1042 UChar windowsLocaleName[LOCALE_NAME_MAX_LENGTH]; // ULOC_FULLNAME_CAPACITY > LOCALE_NAME_MAX_LENGTH
1043 char locName[LOCALE_NAME_MAX_LENGTH]; // ICU name can't be longer than Windows name
1044
1045 // Note: LOCALE_ALLOW_NEUTRAL_NAMES was enabled in Windows7+, prior versions did not handle neutral (no-region) locale names.
1046 tmpLen = LCIDToLocaleName(hostid, (PWSTR)windowsLocaleName, UPRV_LENGTHOF(windowsLocaleName), LOCALE_ALLOW_NEUTRAL_NAMES);
1047 if (tmpLen > 1) {
1048 int32_t i = 0;
1049 // Only need to look up in table if have _, eg for de-de_phoneb type alternate sort.
57a6839d 1050 bLookup = FALSE;
f3c0d7a5
A
1051 for (i = 0; i < UPRV_LENGTHOF(locName); i++)
1052 {
1053 locName[i] = (char)(windowsLocaleName[i]);
1054
1055 // Windows locale name may contain sorting variant, such as "es-ES_tradnl".
1056 // In such cases, we need special mapping data found in the hardcoded table
1057 // in this source file.
1058 if (windowsLocaleName[i] == L'_')
1059 {
1060 // Keep the base locale, without variant
1061 // TODO: Should these be mapped from _phoneb to @collation=phonebook, etc.?
1062 locName[i] = '\0';
1063 tmpLen = i;
1064 bLookup = TRUE;
1065 break;
1066 }
1067 else if (windowsLocaleName[i] == L'-')
1068 {
1069 // Windows names use -, ICU uses _
1070 locName[i] = '_';
1071 }
1072 else if (windowsLocaleName[i] == L'\0')
1073 {
1074 // No point in doing more work than necessary
1075 break;
1076 }
57a6839d 1077 }
f3c0d7a5
A
1078 // TODO: Need to understand this better, why isn't it an alias?
1079 FIX_LANGUAGE_ID_TAG(locName, tmpLen);
1080 pPosixID = locName;
57a6839d 1081 }
729e4ab9 1082 }
f3c0d7a5
A
1083#endif // USE_WINDOWS_LCID_MAPPING_API
1084
57a6839d
A
1085 if (bLookup) {
1086 const char *pCandidate = NULL;
1087 langID = LANGUAGE_LCID(hostid);
1088
1089 for (localeIndex = 0; localeIndex < gLocaleCount; localeIndex++) {
1090 if (langID == gPosixIDmap[localeIndex].regionMaps->hostID) {
1091 pCandidate = getPosixID(&gPosixIDmap[localeIndex], hostid);
1092 break;
1093 }
1094 }
b75a7d8f 1095
57a6839d
A
1096 /* On Windows, when locale name has a variant, we still look up the hardcoded table.
1097 If a match in the hardcoded table is longer than the Windows locale name without
1098 variant, we use the one as the result */
1099 if (pCandidate && (pPosixID == NULL || uprv_strlen(pCandidate) > uprv_strlen(pPosixID))) {
1100 pPosixID = pCandidate;
1101 }
1102 }
1103
1104 if (pPosixID) {
1105 int32_t resLen = uprv_strlen(pPosixID);
1106 int32_t copyLen = resLen <= posixIDCapacity ? resLen : posixIDCapacity;
1107 uprv_memcpy(posixID, pPosixID, copyLen);
1108 if (resLen < posixIDCapacity) {
1109 posixID[resLen] = 0;
1110 if (*status == U_STRING_NOT_TERMINATED_WARNING) {
1111 *status = U_ZERO_ERROR;
1112 }
1113 } else if (resLen == posixIDCapacity) {
1114 *status = U_STRING_NOT_TERMINATED_WARNING;
1115 } else {
1116 *status = U_BUFFER_OVERFLOW_ERROR;
b75a7d8f 1117 }
57a6839d 1118 return resLen;
b75a7d8f
A
1119 }
1120
1121 /* no match found */
1122 *status = U_ILLEGAL_ARGUMENT_ERROR;
57a6839d 1123 return -1;
b75a7d8f
A
1124}
1125
1126/*
1127//////////////////////////////////////
1128//
1129// POSIX --> LCID
374ca955
A
1130// This should only be called from uloc_getLCID.
1131// The locale ID must be in canonical form.
b75a7d8f
A
1132//
1133/////////////////////////////////////
1134*/
f3c0d7a5
A
1135U_CAPI uint32_t
1136uprv_convertToLCIDPlatform(const char* localeID)
1137{
1138 // The purpose of this function is to leverage native platform name->lcid
1139 // conversion functionality when available.
1140#ifdef USE_WINDOWS_LCID_MAPPING_API
1141 DWORD nameLCIDFlags = 0;
1142 UErrorCode myStatus = U_ZERO_ERROR;
1143
1144 // First check for a Windows name->LCID match, fall through to catch
1145 // ICU special cases, but Windows may know it already.
1146#if LOCALE_ALLOW_NEUTRAL_NAMES
1147 nameLCIDFlags = LOCALE_ALLOW_NEUTRAL_NAMES;
1148#endif /* LOCALE_ALLOW_NEUTRAL_NAMES */
1149
1150 int32_t len;
1151 char collVal[ULOC_KEYWORDS_CAPACITY] = {};
1152 char baseName[ULOC_FULLNAME_CAPACITY] = {};
1153 const char * mylocaleID = localeID;
1154
1155 // Check any for keywords.
1156 if (uprv_strchr(localeID, '@'))
1157 {
1158 len = uloc_getKeywordValue(localeID, "collation", collVal, UPRV_LENGTHOF(collVal) - 1, &myStatus);
1159 if (U_SUCCESS(myStatus) && len > 0)
1160 {
1161 // If it contains the keyword collation, return 0 so that the LCID lookup table will be used.
1162 return 0;
1163 }
1164 else
1165 {
1166 // If the locale ID contains keywords other than collation, just use the base name.
1167 len = uloc_getBaseName(localeID, baseName, UPRV_LENGTHOF(baseName) - 1, &myStatus);
1168
1169 if (U_SUCCESS(myStatus) && len > 0)
1170 {
1171 baseName[len] = 0;
1172 mylocaleID = baseName;
1173 }
1174 }
1175 }
1176
1177 char asciiBCP47Tag[LOCALE_NAME_MAX_LENGTH] = {};
1178 // this will change it from de_DE@collation=phonebook to de-DE-u-co-phonebk form
1179 int32_t bcp47Len = uloc_toLanguageTag(mylocaleID, asciiBCP47Tag, UPRV_LENGTHOF(asciiBCP47Tag), FALSE, &myStatus);
1180
1181 if (U_SUCCESS(myStatus))
1182 {
1183 // Need it to be UTF-16, not 8-bit
1184 wchar_t bcp47Tag[LOCALE_NAME_MAX_LENGTH] = {};
1185 int32_t i;
1186 for (i = 0; i < UPRV_LENGTHOF(bcp47Tag); i++)
1187 {
1188 if (asciiBCP47Tag[i] == '\0')
1189 {
1190 break;
1191 }
1192 else
1193 {
1194 // Copy the character
1195 bcp47Tag[i] = static_cast<wchar_t>(asciiBCP47Tag[i]);
1196 }
1197 }
1198
1199 if (i < (UPRV_LENGTHOF(bcp47Tag) - 1))
1200 {
1201 // Ensure it's null terminated
1202 bcp47Tag[i] = L'\0';
1203 LCID lcid = LocaleNameToLCID(bcp47Tag, nameLCIDFlags);
1204 if (lcid > 0)
1205 {
1206 // Found LCID from windows, return that one, unless its completely ambiguous
1207 // LOCALE_USER_DEFAULT and transients are OK because they will round trip
1208 // for this process.
1209 if (lcid != LOCALE_CUSTOM_UNSPECIFIED)
1210 {
1211 return lcid;
1212 }
1213 }
1214 }
1215 }
1216#endif /* USE_WINDOWS_LCID_MAPPING_API */
1217
1218 // No found, or not implemented on platforms without native name->lcid conversion
1219 return 0;
1220}
b75a7d8f
A
1221
1222U_CAPI uint32_t
374ca955 1223uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status)
b75a7d8f 1224{
f3c0d7a5
A
1225 // This function does the table lookup when native platform name->lcid conversion isn't available,
1226 // or for locales that don't follow patterns the platform expects.
b75a7d8f 1227 uint32_t low = 0;
374ca955 1228 uint32_t high = gLocaleCount;
4388f060 1229 uint32_t mid;
374ca955 1230 uint32_t oldmid = 0;
b75a7d8f 1231 int32_t compVal;
b75a7d8f
A
1232
1233 uint32_t value = 0;
1234 uint32_t fallbackValue = (uint32_t)-1;
1235 UErrorCode myStatus;
1236 uint32_t idx;
1237
1238 /* Check for incomplete id. */
374ca955 1239 if (!langID || !posixID || uprv_strlen(langID) < 2 || uprv_strlen(posixID) < 2) {
b75a7d8f
A
1240 return 0;
1241 }
1242
1243 /*Binary search for the map entry for normal cases */
b75a7d8f 1244
374ca955 1245 while (high > low) /*binary search*/{
b75a7d8f 1246
374ca955
A
1247 mid = (high+low) >> 1; /*Finds median*/
1248
1249 if (mid == oldmid)
1250 break;
b75a7d8f 1251
374ca955
A
1252 compVal = uprv_strcmp(langID, gPosixIDmap[mid].regionMaps->posixID);
1253 if (compVal < 0){
1254 high = mid;
1255 }
1256 else if (compVal > 0){
1257 low = mid;
1258 }
1259 else /*we found it*/{
1260 return getHostID(&gPosixIDmap[mid], posixID, status);
1261 }
1262 oldmid = mid;
b75a7d8f
A
1263 }
1264
1265 /*
1266 * Sometimes we can't do a binary search on posixID because some LCIDs
1267 * go to different locales. We hit one of those special cases.
1268 */
1269 for (idx = 0; idx < gLocaleCount; idx++ ) {
1270 myStatus = U_ZERO_ERROR;
374ca955 1271 value = getHostID(&gPosixIDmap[idx], posixID, &myStatus);
b75a7d8f
A
1272 if (myStatus == U_ZERO_ERROR) {
1273 return value;
1274 }
1275 else if (myStatus == U_USING_FALLBACK_WARNING) {
1276 fallbackValue = value;
1277 }
1278 }
1279
1280 if (fallbackValue != (uint32_t)-1) {
1281 *status = U_USING_FALLBACK_WARNING;
1282 return fallbackValue;
1283 }
1284
1285 /* no match found */
1286 *status = U_ILLEGAL_ARGUMENT_ERROR;
1287 return 0; /* return international (root) */
1288}