]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/locmap.cpp
ICU-64252.0.1.tar.gz
[apple/icu.git] / icuSources / common / locmap.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f
A
3/*
4 **********************************************************************
2ca993e8 5 * Copyright (C) 1996-2016, International Business Machines
b75a7d8f
A
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
b75a7d8f
A
8 *
9 * Provides functionality for mapping between
10 * LCID and Posix IDs or ICU locale to codepage
11 *
12 * Note: All classes and code in this file are
13 * intended for internal use only.
14 *
15 * Methods of interest:
374ca955
A
16 * unsigned long convertToLCID(const char*);
17 * const char* convertToPosix(unsigned long);
b75a7d8f
A
18 *
19 * Kathleen Wilson, 4/30/96
20 *
21 * Date Name Description
22 * 3/11/97 aliu Fixed off-by-one bug in assignment operator. Added
23 * setId() method and safety check against
24 * MAX_ID_LENGTH.
25 * 04/23/99 stephen Added C wrapper for convertToPosix.
26 * 09/18/00 george Removed the memory leaks.
27 * 08/23/01 george Convert to C
28 */
29
30#include "locmap.h"
31#include "cstring.h"
729e4ab9 32#include "cmemory.h"
f3c0d7a5 33#include "unicode/uloc.h"
729e4ab9 34
3d1f044b 35#if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API
729e4ab9 36#include <windows.h>
3d1f044b 37#include <winnls.h> // LCIDToLocaleName and LocaleNameToLCID
729e4ab9 38#endif
b75a7d8f 39
b75a7d8f
A
40/*
41 * Note:
b75a7d8f 42 * The mapping from Win32 locale ID numbers to POSIX locale strings should
374ca955 43 * be the faster one.
b75a7d8f 44 *
f3c0d7a5
A
45 * Windows LCIDs are defined at https://msdn.microsoft.com/en-us/library/cc233965.aspx
46 * [MS-LCID] Windows Language Code Identifier (LCID) Reference
b75a7d8f
A
47 */
48
b75a7d8f
A
49/*
50////////////////////////////////////////////////
51//
52// Internal Classes for LCID <--> POSIX Mapping
53//
54/////////////////////////////////////////////////
55*/
56
57typedef struct ILcidPosixElement
58{
59 const uint32_t hostID;
60 const char * const posixID;
61} ILcidPosixElement;
62
63typedef struct ILcidPosixMap
64{
65 const uint32_t numRegions;
66 const struct ILcidPosixElement* const regionMaps;
67} ILcidPosixMap;
68
b75a7d8f
A
69
70/*
71/////////////////////////////////////////////////
72//
73// Easy macros to make the LCID <--> POSIX Mapping
74//
75/////////////////////////////////////////////////
76*/
77
729e4ab9
A
78/**
79 * The standard one language/one country mapping for LCID.
80 * The first element must be the language, and the following
81 * elements are the language with the country.
82 * @param hostID LCID in host format such as 0x044d
83 * @param languageID posix ID of just the language such as 'de'
84 * @param posixID posix ID of the language_TERRITORY such as 'de_CH'
b75a7d8f
A
85 */
86#define ILCID_POSIX_ELEMENT_ARRAY(hostID, languageID, posixID) \
729e4ab9 87static const ILcidPosixElement locmap_ ## languageID [] = { \
b75a7d8f
A
88 {LANGUAGE_LCID(hostID), #languageID}, /* parent locale */ \
89 {hostID, #posixID}, \
90};
91
729e4ab9
A
92/**
93 * Define a subtable by ID
94 * @param id the POSIX ID, either a language or language_TERRITORY
95 */
96#define ILCID_POSIX_SUBTABLE(id) \
97static const ILcidPosixElement locmap_ ## id [] =
98
99
100/**
101 * Create the map for the posixID. This macro supposes that the language string
102 * name is the same as the global variable name, and that the first element
103 * in the ILcidPosixElement is just the language.
57a6839d 104 * @param _posixID the full POSIX ID for this entry.
b75a7d8f
A
105 */
106#define ILCID_POSIX_MAP(_posixID) \
2ca993e8 107 {UPRV_LENGTHOF(locmap_ ## _posixID), locmap_ ## _posixID}
b75a7d8f
A
108
109/*
110////////////////////////////////////////////
111//
112// Create the table of LCID to POSIX Mapping
113// None of it should be dynamically created.
114//
115// Keep static locale variables inside the function so that
116// it can be created properly during static init.
117//
f3c0d7a5
A
118// Note: This table should be updated periodically. Check the [MS-LCID] Windows Language Code Identifier
119// (LCID) Reference defined at https://msdn.microsoft.com/en-us/library/cc233965.aspx
120//
729e4ab9
A
121// Microsoft is moving away from LCID in favor of locale name as of Vista. This table needs to be
122// maintained for support of older Windows version.
123// Update: Windows 7 (091130)
57a6839d
A
124//
125// Note: Microsoft assign a different LCID if a locale has a sorting variant. POSIX IDs below may contain
126// @collation=XXX, but no other keywords are allowed (at least for now). When uprv_convertToLCID() is
127// called from uloc_getLCID(), keywords other than collation are already removed. If we really need
128// to support other keywords in this mapping data, we must update the implementation.
b75a7d8f
A
129////////////////////////////////////////////
130*/
131
f3c0d7a5
A
132// TODO: For Windows ideally this table would be a list of exceptions rather than a complete list as
133// LocaleNameToLCID and LCIDToLocaleName provide 90% of these.
134
b75a7d8f
A
135ILCID_POSIX_ELEMENT_ARRAY(0x0436, af, af_ZA)
136
729e4ab9 137ILCID_POSIX_SUBTABLE(ar) {
b75a7d8f
A
138 {0x01, "ar"},
139 {0x3801, "ar_AE"},
140 {0x3c01, "ar_BH"},
141 {0x1401, "ar_DZ"},
142 {0x0c01, "ar_EG"},
143 {0x0801, "ar_IQ"},
144 {0x2c01, "ar_JO"},
145 {0x3401, "ar_KW"},
146 {0x3001, "ar_LB"},
147 {0x1001, "ar_LY"},
148 {0x1801, "ar_MA"},
4388f060 149 {0x1801, "ar_MO"},
b75a7d8f
A
150 {0x2001, "ar_OM"},
151 {0x4001, "ar_QA"},
152 {0x0401, "ar_SA"},
153 {0x2801, "ar_SY"},
154 {0x1c01, "ar_TN"},
155 {0x2401, "ar_YE"}
156};
157
374ca955
A
158ILCID_POSIX_ELEMENT_ARRAY(0x044d, as, as_IN)
159ILCID_POSIX_ELEMENT_ARRAY(0x045e, am, am_ET)
73c04bcf 160ILCID_POSIX_ELEMENT_ARRAY(0x047a, arn,arn_CL)
b75a7d8f 161
729e4ab9 162ILCID_POSIX_SUBTABLE(az) {
b75a7d8f 163 {0x2c, "az"},
73c04bcf 164 {0x082c, "az_Cyrl_AZ"}, /* Cyrillic based */
729e4ab9 165 {0x742c, "az_Cyrl"}, /* Cyrillic based */
73c04bcf 166 {0x042c, "az_Latn_AZ"}, /* Latin based */
729e4ab9 167 {0x782c, "az_Latn"}, /* Latin based */
73c04bcf 168 {0x042c, "az_AZ"} /* Latin based */
b75a7d8f
A
169};
170
73c04bcf 171ILCID_POSIX_ELEMENT_ARRAY(0x046d, ba, ba_RU)
b75a7d8f 172ILCID_POSIX_ELEMENT_ARRAY(0x0423, be, be_BY)
73c04bcf 173
51004dcb 174/*ILCID_POSIX_SUBTABLE(ber) {
73c04bcf
A
175 {0x5f, "ber"},
176 {0x045f, "ber_Arab_DZ"},
177 {0x045f, "ber_Arab"},
178 {0x085f, "ber_Latn_DZ"},
179 {0x085f, "ber_Latn"}
51004dcb 180};*/
73c04bcf 181
b75a7d8f 182ILCID_POSIX_ELEMENT_ARRAY(0x0402, bg, bg_BG)
374ca955 183
0f5d89e8
A
184ILCID_POSIX_SUBTABLE(bin) {
185 {0x66, "bin"},
186 {0x0466, "bin_NG"}
187};
4388f060 188
729e4ab9 189ILCID_POSIX_SUBTABLE(bn) {
374ca955
A
190 {0x45, "bn"},
191 {0x0845, "bn_BD"},
192 {0x0445, "bn_IN"}
193};
194
729e4ab9 195ILCID_POSIX_SUBTABLE(bo) {
374ca955
A
196 {0x51, "bo"},
197 {0x0851, "bo_BT"},
2ca993e8
A
198 {0x0451, "bo_CN"},
199 {0x0c51, "dz_BT"}
374ca955
A
200};
201
73c04bcf 202ILCID_POSIX_ELEMENT_ARRAY(0x047e, br, br_FR)
51004dcb
A
203
204ILCID_POSIX_SUBTABLE(ca) {
205 {0x03, "ca"},
206 {0x0403, "ca_ES"},
207 {0x0803, "ca_ES_VALENCIA"}
208};
209
73c04bcf 210ILCID_POSIX_ELEMENT_ARRAY(0x0483, co, co_FR)
0f5d89e8
A
211
212ILCID_POSIX_SUBTABLE(chr) {
213 {0x05c, "chr"},
214 {0x7c5c, "chr_Cher"},
215 {0x045c, "chr_Cher_US"},
216 {0x045c, "chr_US"}
217};
374ca955 218
f3c0d7a5 219// ICU has chosen different names for these.
51004dcb
A
220ILCID_POSIX_SUBTABLE(ckb) {
221 {0x92, "ckb"},
51004dcb 222 {0x7c92, "ckb_Arab"},
f3c0d7a5 223 {0x0492, "ckb_Arab_IQ"}
51004dcb
A
224};
225
374ca955 226/* Declared as cs_CZ to get around compiler errors on z/OS, which defines cs as a function */
729e4ab9 227ILCID_POSIX_ELEMENT_ARRAY(0x0405, cs, cs_CZ)
374ca955
A
228
229ILCID_POSIX_ELEMENT_ARRAY(0x0452, cy, cy_GB)
b75a7d8f
A
230ILCID_POSIX_ELEMENT_ARRAY(0x0406, da, da_DK)
231
f3c0d7a5 232// Windows doesn't know POSIX or BCP47 Unicode phonebook sort names
729e4ab9 233ILCID_POSIX_SUBTABLE(de) {
b75a7d8f
A
234 {0x07, "de"},
235 {0x0c07, "de_AT"},
236 {0x0807, "de_CH"},
237 {0x0407, "de_DE"},
238 {0x1407, "de_LI"},
239 {0x1007, "de_LU"},
73c04bcf
A
240 {0x10407,"de_DE@collation=phonebook"}, /*This is really de_DE_PHONEBOOK on Windows*/
241 {0x10407,"de@collation=phonebook"} /*This is really de_DE_PHONEBOOK on Windows*/
b75a7d8f
A
242};
243
244ILCID_POSIX_ELEMENT_ARRAY(0x0465, dv, dv_MV)
245ILCID_POSIX_ELEMENT_ARRAY(0x0408, el, el_GR)
246
f3c0d7a5 247// Windows uses an empty string for 'invariant'
729e4ab9 248ILCID_POSIX_SUBTABLE(en) {
b75a7d8f
A
249 {0x09, "en"},
250 {0x0c09, "en_AU"},
251 {0x2809, "en_BZ"},
252 {0x1009, "en_CA"},
253 {0x0809, "en_GB"},
4388f060
A
254 {0x3c09, "en_HK"},
255 {0x3809, "en_ID"},
b75a7d8f 256 {0x1809, "en_IE"},
73c04bcf 257 {0x4009, "en_IN"},
b75a7d8f 258 {0x2009, "en_JM"},
73c04bcf 259 {0x4409, "en_MY"},
b75a7d8f
A
260 {0x1409, "en_NZ"},
261 {0x3409, "en_PH"},
73c04bcf 262 {0x4809, "en_SG"},
b75a7d8f
A
263 {0x2C09, "en_TT"},
264 {0x0409, "en_US"},
f3c0d7a5 265 {0x007f, "en_US_POSIX"}, /* duplicate for round-tripping */
0f5d89e8 266 {0x2409, "en_029"},
b75a7d8f 267 {0x1c09, "en_ZA"},
374ca955 268 {0x3009, "en_ZW"},
0f5d89e8 269 {0x2409, "en_VI"}, /* Virgin Islands AKA Caribbean Islands (en_CB). On Windows8+ This is 0x1000 or dynamically assigned */
f3c0d7a5
A
270 {0x0409, "en_AS"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
271 {0x0409, "en_GU"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
272 {0x0409, "en_MH"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
273 {0x0409, "en_MP"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
274 {0x0409, "en_UM"} /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
b75a7d8f
A
275};
276
729e4ab9 277ILCID_POSIX_SUBTABLE(en_US_POSIX) {
73c04bcf 278 {0x007f, "en_US_POSIX"} /* duplicate for roundtripping */
b75a7d8f
A
279};
280
f3c0d7a5 281// Windows doesn't know POSIX or BCP47 Unicode traditional sort names
729e4ab9 282ILCID_POSIX_SUBTABLE(es) {
b75a7d8f
A
283 {0x0a, "es"},
284 {0x2c0a, "es_AR"},
285 {0x400a, "es_BO"},
286 {0x340a, "es_CL"},
287 {0x240a, "es_CO"},
288 {0x140a, "es_CR"},
2ca993e8 289 {0x5c0a, "es_CU"},
b75a7d8f
A
290 {0x1c0a, "es_DO"},
291 {0x300a, "es_EC"},
292 {0x0c0a, "es_ES"}, /*Modern sort.*/
293 {0x100a, "es_GT"},
294 {0x480a, "es_HN"},
295 {0x080a, "es_MX"},
296 {0x4c0a, "es_NI"},
297 {0x180a, "es_PA"},
298 {0x280a, "es_PE"},
299 {0x500a, "es_PR"},
300 {0x3c0a, "es_PY"},
301 {0x440a, "es_SV"},
73c04bcf 302 {0x540a, "es_US"},
b75a7d8f
A
303 {0x380a, "es_UY"},
304 {0x200a, "es_VE"},
57a6839d 305 {0x580a, "es_419"},
73c04bcf 306 {0x040a, "es_ES@collation=traditional"},
f3c0d7a5 307 {0x040a, "es@collation=traditional"} // Windows will treat this as es-ES@collation=traditional
b75a7d8f
A
308};
309
310ILCID_POSIX_ELEMENT_ARRAY(0x0425, et, et_EE)
311ILCID_POSIX_ELEMENT_ARRAY(0x042d, eu, eu_ES)
73c04bcf
A
312
313/* ISO-639 doesn't distinguish between Persian and Dari.*/
729e4ab9 314ILCID_POSIX_SUBTABLE(fa) {
73c04bcf
A
315 {0x29, "fa"},
316 {0x0429, "fa_IR"}, /* Persian/Farsi (Iran) */
317 {0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */
318};
319
f3c0d7a5 320
73c04bcf 321/* duplicate for roundtripping */
729e4ab9 322ILCID_POSIX_SUBTABLE(fa_AF) {
73c04bcf
A
323 {0x8c, "fa_AF"}, /* Persian/Dari (Afghanistan) */
324 {0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */
325};
326
51004dcb
A
327ILCID_POSIX_SUBTABLE(ff) {
328 {0x67, "ff"},
329 {0x7c67, "ff_Latn"},
2ca993e8
A
330 {0x0867, "ff_Latn_SN"},
331 {0x0467, "ff_NG"}
51004dcb
A
332};
333
b75a7d8f 334ILCID_POSIX_ELEMENT_ARRAY(0x040b, fi, fi_FI)
73c04bcf 335ILCID_POSIX_ELEMENT_ARRAY(0x0464, fil,fil_PH)
b75a7d8f
A
336ILCID_POSIX_ELEMENT_ARRAY(0x0438, fo, fo_FO)
337
729e4ab9 338ILCID_POSIX_SUBTABLE(fr) {
b75a7d8f
A
339 {0x0c, "fr"},
340 {0x080c, "fr_BE"},
341 {0x0c0c, "fr_CA"},
374ca955 342 {0x240c, "fr_CD"},
4388f060 343 {0x240c, "fr_CG"},
b75a7d8f 344 {0x100c, "fr_CH"},
374ca955
A
345 {0x300c, "fr_CI"},
346 {0x2c0c, "fr_CM"},
b75a7d8f 347 {0x040c, "fr_FR"},
374ca955 348 {0x3c0c, "fr_HT"},
b75a7d8f 349 {0x140c, "fr_LU"},
374ca955
A
350 {0x380c, "fr_MA"},
351 {0x180c, "fr_MC"},
352 {0x340c, "fr_ML"},
353 {0x200c, "fr_RE"},
4388f060
A
354 {0x280c, "fr_SN"},
355 {0xe40c, "fr_015"},
356 {0x1c0c, "fr_029"}
374ca955
A
357};
358
4388f060
A
359ILCID_POSIX_ELEMENT_ARRAY(0x0467, fuv, fuv_NG)
360
374ca955
A
361ILCID_POSIX_ELEMENT_ARRAY(0x0462, fy, fy_NL)
362
4388f060
A
363ILCID_POSIX_SUBTABLE(ga) { /* Gaelic (Ireland) */
364 {0x3c, "ga"},
365 {0x083c, "ga_IE"},
366 {0x043c, "gd_GB"}
367};
368
369ILCID_POSIX_SUBTABLE(gd) { /* Gaelic (Scotland) */
370 {0x91, "gd"},
371 {0x0491, "gd_GB"}
372};
b75a7d8f
A
373
374ILCID_POSIX_ELEMENT_ARRAY(0x0456, gl, gl_ES)
375ILCID_POSIX_ELEMENT_ARRAY(0x0447, gu, gu_IN)
374ca955 376ILCID_POSIX_ELEMENT_ARRAY(0x0474, gn, gn_PY)
73c04bcf 377ILCID_POSIX_ELEMENT_ARRAY(0x0484, gsw,gsw_FR)
729e4ab9
A
378
379ILCID_POSIX_SUBTABLE(ha) {
380 {0x68, "ha"},
381 {0x7c68, "ha_Latn"},
382 {0x0468, "ha_Latn_NG"},
383};
384
374ca955 385ILCID_POSIX_ELEMENT_ARRAY(0x0475, haw,haw_US)
b75a7d8f
A
386ILCID_POSIX_ELEMENT_ARRAY(0x040d, he, he_IL)
387ILCID_POSIX_ELEMENT_ARRAY(0x0439, hi, hi_IN)
388
374ca955 389/* This LCID is really four different locales.*/
729e4ab9 390ILCID_POSIX_SUBTABLE(hr) {
b75a7d8f 391 {0x1a, "hr"},
73c04bcf 392 {0x141a, "bs_Latn_BA"}, /* Bosnian, Bosnia and Herzegovina */
729e4ab9 393 {0x681a, "bs_Latn"}, /* Bosnian, Bosnia and Herzegovina */
374ca955 394 {0x141a, "bs_BA"}, /* Bosnian, Bosnia and Herzegovina */
729e4ab9 395 {0x781a, "bs"}, /* Bosnian */
73c04bcf 396 {0x201a, "bs_Cyrl_BA"}, /* Bosnian, Bosnia and Herzegovina */
729e4ab9 397 {0x641a, "bs_Cyrl"}, /* Bosnian, Bosnia and Herzegovina */
73c04bcf 398 {0x101a, "hr_BA"}, /* Croatian in Bosnia */
b75a7d8f 399 {0x041a, "hr_HR"}, /* Croatian*/
729e4ab9
A
400 {0x2c1a, "sr_Latn_ME"},
401 {0x241a, "sr_Latn_RS"},
73c04bcf
A
402 {0x181a, "sr_Latn_BA"}, /* Serbo-Croatian in Bosnia */
403 {0x081a, "sr_Latn_CS"}, /* Serbo-Croatian*/
729e4ab9 404 {0x701a, "sr_Latn"}, /* It's 0x1a or 0x081a, pick one to make the test program happy. */
73c04bcf
A
405 {0x1c1a, "sr_Cyrl_BA"}, /* Serbo-Croatian in Bosnia */
406 {0x0c1a, "sr_Cyrl_CS"}, /* Serbian*/
729e4ab9
A
407 {0x301a, "sr_Cyrl_ME"},
408 {0x281a, "sr_Cyrl_RS"},
409 {0x6c1a, "sr_Cyrl"}, /* It's 0x1a or 0x0c1a, pick one to make the test program happy. */
410 {0x7c1a, "sr"} /* In CLDR sr is sr_Cyrl. */
b75a7d8f
A
411};
412
b331163b
A
413ILCID_POSIX_SUBTABLE(hsb) {
414 {0x2E, "hsb"},
415 {0x042E, "hsb_DE"},
416 {0x082E, "dsb_DE"},
417 {0x7C2E, "dsb"},
418};
419
b75a7d8f
A
420ILCID_POSIX_ELEMENT_ARRAY(0x040e, hu, hu_HU)
421ILCID_POSIX_ELEMENT_ARRAY(0x042b, hy, hy_AM)
0f5d89e8
A
422
423ILCID_POSIX_SUBTABLE(ibb) {
424 {0x69, "ibb"},
425 {0x0469, "ibb_NG"}
426};
427
b75a7d8f 428ILCID_POSIX_ELEMENT_ARRAY(0x0421, id, id_ID)
374ca955 429ILCID_POSIX_ELEMENT_ARRAY(0x0470, ig, ig_NG)
73c04bcf 430ILCID_POSIX_ELEMENT_ARRAY(0x0478, ii, ii_CN)
b75a7d8f
A
431ILCID_POSIX_ELEMENT_ARRAY(0x040f, is, is_IS)
432
729e4ab9 433ILCID_POSIX_SUBTABLE(it) {
b75a7d8f
A
434 {0x10, "it"},
435 {0x0810, "it_CH"},
436 {0x0410, "it_IT"}
437};
438
729e4ab9 439ILCID_POSIX_SUBTABLE(iu) {
73c04bcf
A
440 {0x5d, "iu"},
441 {0x045d, "iu_Cans_CA"},
729e4ab9 442 {0x785d, "iu_Cans"},
73c04bcf 443 {0x085d, "iu_Latn_CA"},
729e4ab9 444 {0x7c5d, "iu_Latn"}
73c04bcf
A
445};
446
b75a7d8f
A
447ILCID_POSIX_ELEMENT_ARRAY(0x040d, iw, iw_IL) /*Left in for compatibility*/
448ILCID_POSIX_ELEMENT_ARRAY(0x0411, ja, ja_JP)
449ILCID_POSIX_ELEMENT_ARRAY(0x0437, ka, ka_GE)
450ILCID_POSIX_ELEMENT_ARRAY(0x043f, kk, kk_KZ)
73c04bcf 451ILCID_POSIX_ELEMENT_ARRAY(0x046f, kl, kl_GL)
374ca955 452ILCID_POSIX_ELEMENT_ARRAY(0x0453, km, km_KH)
b75a7d8f
A
453ILCID_POSIX_ELEMENT_ARRAY(0x044b, kn, kn_IN)
454
729e4ab9 455ILCID_POSIX_SUBTABLE(ko) {
b75a7d8f
A
456 {0x12, "ko"},
457 {0x0812, "ko_KP"},
458 {0x0412, "ko_KR"}
459};
460
461ILCID_POSIX_ELEMENT_ARRAY(0x0457, kok, kok_IN)
374ca955
A
462ILCID_POSIX_ELEMENT_ARRAY(0x0471, kr, kr_NG)
463
729e4ab9 464ILCID_POSIX_SUBTABLE(ks) { /* We could add PK and CN too */
374ca955 465 {0x60, "ks"},
4388f060
A
466 {0x0460, "ks_Arab_IN"},
467 {0x0860, "ks_Deva_IN"}
374ca955
A
468};
469
470ILCID_POSIX_ELEMENT_ARRAY(0x0440, ky, ky_KG) /* Kyrgyz is spoken in Kyrgyzstan */
0f5d89e8
A
471
472ILCID_POSIX_SUBTABLE(la) {
473 {0x76, "la"},
474 {0x0476, "la_001"},
475 {0x0476, "la_IT"} /*Left in for compatibility*/
476};
477
73c04bcf 478ILCID_POSIX_ELEMENT_ARRAY(0x046e, lb, lb_LU)
374ca955
A
479ILCID_POSIX_ELEMENT_ARRAY(0x0454, lo, lo_LA)
480ILCID_POSIX_ELEMENT_ARRAY(0x0427, lt, lt_LT)
481ILCID_POSIX_ELEMENT_ARRAY(0x0426, lv, lv_LV)
482ILCID_POSIX_ELEMENT_ARRAY(0x0481, mi, mi_NZ)
483ILCID_POSIX_ELEMENT_ARRAY(0x042f, mk, mk_MK)
484ILCID_POSIX_ELEMENT_ARRAY(0x044c, ml, ml_IN)
73c04bcf 485
729e4ab9 486ILCID_POSIX_SUBTABLE(mn) {
73c04bcf 487 {0x50, "mn"},
729e4ab9
A
488 {0x0450, "mn_MN"},
489 {0x7c50, "mn_Mong"},
490 {0x0850, "mn_Mong_CN"},
73c04bcf 491 {0x0850, "mn_CN"},
57a6839d
A
492 {0x7850, "mn_Cyrl"},
493 {0x0c50, "mn_Mong_MN"}
73c04bcf
A
494};
495
374ca955 496ILCID_POSIX_ELEMENT_ARRAY(0x0458, mni,mni_IN)
73c04bcf 497ILCID_POSIX_ELEMENT_ARRAY(0x047c, moh,moh_CA)
374ca955 498ILCID_POSIX_ELEMENT_ARRAY(0x044e, mr, mr_IN)
b75a7d8f 499
729e4ab9 500ILCID_POSIX_SUBTABLE(ms) {
b75a7d8f
A
501 {0x3e, "ms"},
502 {0x083e, "ms_BN"}, /* Brunei Darussalam*/
503 {0x043e, "ms_MY"} /* Malaysia*/
504};
505
b75a7d8f 506ILCID_POSIX_ELEMENT_ARRAY(0x043a, mt, mt_MT)
73c04bcf 507ILCID_POSIX_ELEMENT_ARRAY(0x0455, my, my_MM)
b75a7d8f 508
729e4ab9 509ILCID_POSIX_SUBTABLE(ne) {
b75a7d8f
A
510 {0x61, "ne"},
511 {0x0861, "ne_IN"}, /* India*/
512 {0x0461, "ne_NP"} /* Nepal*/
513};
514
729e4ab9 515ILCID_POSIX_SUBTABLE(nl) {
b75a7d8f
A
516 {0x13, "nl"},
517 {0x0813, "nl_BE"},
518 {0x0413, "nl_NL"}
519};
520
521/* The "no" locale split into nb and nn. By default in ICU, "no" is nb.*/
f3c0d7a5 522// TODO: Not all of these are needed on Windows, but I don't know how ICU treats preferred ones here.
729e4ab9 523ILCID_POSIX_SUBTABLE(no) {
f3c0d7a5 524 {0x14, "no"}, /* really nb_NO - actually Windows differentiates between neutral (no region) and specific (with region) */
729e4ab9 525 {0x7c14, "nb"}, /* really nb */
374ca955 526 {0x0414, "nb_NO"}, /* really nb_NO. Keep first in the 414 list. */
374ca955
A
527 {0x0414, "no_NO"}, /* really nb_NO */
528 {0x0814, "nn_NO"}, /* really nn_NO. Keep first in the 814 list. */
729e4ab9 529 {0x7814, "nn"}, /* It's 0x14 or 0x814, pick one to make the test program happy. */
374ca955 530 {0x0814, "no_NO_NY"}/* really nn_NO */
b75a7d8f
A
531};
532
73c04bcf
A
533ILCID_POSIX_ELEMENT_ARRAY(0x046c, nso,nso_ZA) /* TODO: Verify the ISO-639 code */
534ILCID_POSIX_ELEMENT_ARRAY(0x0482, oc, oc_FR)
4388f060
A
535
536ILCID_POSIX_SUBTABLE(om) { /* TODO: Verify the country */
537 {0x72, "om"},
538 {0x0472, "om_ET"},
539 {0x0472, "gaz_ET"}
540};
374ca955 541
b75a7d8f 542/* Declared as or_IN to get around compiler errors*/
729e4ab9 543ILCID_POSIX_SUBTABLE(or_IN) {
b75a7d8f
A
544 {0x48, "or"},
545 {0x0448, "or_IN"},
546};
547
729e4ab9 548ILCID_POSIX_SUBTABLE(pa) {
374ca955
A
549 {0x46, "pa"},
550 {0x0446, "pa_IN"},
0f5d89e8
A
551 {0x0846, "pa_Arab_PK"},
552 {0x0846, "pa_PK"}
553};
554
555ILCID_POSIX_SUBTABLE(pap) {
556 {0x79, "pap"},
557 {0x0479, "pap_029"},
558 {0x0479, "pap_AN"} /*Left in for compatibility*/
374ca955
A
559};
560
b75a7d8f 561ILCID_POSIX_ELEMENT_ARRAY(0x0415, pl, pl_PL)
374ca955 562ILCID_POSIX_ELEMENT_ARRAY(0x0463, ps, ps_AF)
b75a7d8f 563
729e4ab9 564ILCID_POSIX_SUBTABLE(pt) {
b75a7d8f
A
565 {0x16, "pt"},
566 {0x0416, "pt_BR"},
567 {0x0816, "pt_PT"}
568};
569
729e4ab9 570ILCID_POSIX_SUBTABLE(qu) {
73c04bcf
A
571 {0x6b, "qu"},
572 {0x046b, "qu_BO"},
573 {0x086b, "qu_EC"},
4388f060
A
574 {0x0C6b, "qu_PE"},
575 {0x046b, "quz_BO"},
576 {0x086b, "quz_EC"},
577 {0x0C6b, "quz_PE"}
374ca955
A
578};
579
2ca993e8
A
580ILCID_POSIX_SUBTABLE(quc) {
581 {0x93, "quc"},
582 {0x0493, "quc_CO"},
583 /*
584 "quc_Latn_GT" is an exceptional case. Language ID of "quc"
585 is 0x93, but LCID of "quc_Latn_GT" is 0x486, which should be
586 under the group of "qut". "qut" is a retired ISO 639-3 language
587 code for West Central Quiche, and merged to "quc".
588 It looks Windows previously reserved "qut" for K'iche', but,
589 decided to use "quc" when adding a locale for K'iche' (Guatemala).
590
591 This data structure used here assumes language ID bits in
592 LCID is unique for alphabetic language code. But this is not true
593 for "quc_Latn_GT". If we don't have the data below, LCID look up
594 by alphabetic locale ID (POSIX) will fail. The same entry is found
595 under "qut" below, which is required for reverse look up.
596 */
597 {0x0486, "quc_Latn_GT"}
598};
599
600ILCID_POSIX_SUBTABLE(qut) {
601 {0x86, "qut"},
602 {0x0486, "qut_GT"},
603 /*
604 See the note in "quc" above.
605 */
606 {0x0486, "quc_Latn_GT"}
607};
608
73c04bcf 609ILCID_POSIX_ELEMENT_ARRAY(0x0417, rm, rm_CH)
4388f060
A
610
611ILCID_POSIX_SUBTABLE(ro) {
612 {0x18, "ro"},
613 {0x0418, "ro_RO"},
614 {0x0818, "ro_MD"}
615};
b75a7d8f 616
f3c0d7a5
A
617// TODO: This is almost certainly 'wrong'. 0 in Windows is a synonym for LOCALE_USER_DEFAULT.
618// More likely this is a similar concept to the Windows 0x7f Invariant locale ""
619// (Except that it's not invariant in ICU)
729e4ab9 620ILCID_POSIX_SUBTABLE(root) {
b75a7d8f
A
621 {0x00, "root"}
622};
623
4388f060
A
624ILCID_POSIX_SUBTABLE(ru) {
625 {0x19, "ru"},
626 {0x0419, "ru_RU"},
627 {0x0819, "ru_MD"}
628};
629
73c04bcf 630ILCID_POSIX_ELEMENT_ARRAY(0x0487, rw, rw_RW)
b75a7d8f 631ILCID_POSIX_ELEMENT_ARRAY(0x044f, sa, sa_IN)
73c04bcf 632ILCID_POSIX_ELEMENT_ARRAY(0x0485, sah,sah_RU)
374ca955 633
729e4ab9 634ILCID_POSIX_SUBTABLE(sd) {
374ca955 635 {0x59, "sd"},
2ca993e8 636 {0x0459, "sd_Deva_IN"},
0f5d89e8
A
637 {0x0459, "sd_IN"},
638 {0x0859, "sd_Arab_PK"},
639 {0x0859, "sd_PK"},
640 {0x7c59, "sd_Arab"}
374ca955
A
641};
642
729e4ab9 643ILCID_POSIX_SUBTABLE(se) {
73c04bcf
A
644 {0x3b, "se"},
645 {0x0c3b, "se_FI"},
646 {0x043b, "se_NO"},
647 {0x083b, "se_SE"},
729e4ab9 648 {0x783b, "sma"},
73c04bcf
A
649 {0x183b, "sma_NO"},
650 {0x1c3b, "sma_SE"},
729e4ab9
A
651 {0x7c3b, "smj"},
652 {0x703b, "smn"},
653 {0x743b, "sms"},
73c04bcf
A
654 {0x103b, "smj_NO"},
655 {0x143b, "smj_SE"},
656 {0x243b, "smn_FI"},
657 {0x203b, "sms_FI"},
658};
659
374ca955 660ILCID_POSIX_ELEMENT_ARRAY(0x045b, si, si_LK)
b75a7d8f
A
661ILCID_POSIX_ELEMENT_ARRAY(0x041b, sk, sk_SK)
662ILCID_POSIX_ELEMENT_ARRAY(0x0424, sl, sl_SI)
4388f060 663
0f5d89e8 664ILCID_POSIX_SUBTABLE(so) {
4388f060 665 {0x77, "so"},
4388f060
A
666 {0x0477, "so_SO"}
667};
668
b75a7d8f 669ILCID_POSIX_ELEMENT_ARRAY(0x041c, sq, sq_AL)
4388f060 670ILCID_POSIX_ELEMENT_ARRAY(0x0430, st, st_ZA)
b75a7d8f 671
729e4ab9 672ILCID_POSIX_SUBTABLE(sv) {
b75a7d8f
A
673 {0x1d, "sv"},
674 {0x081d, "sv_FI"},
675 {0x041d, "sv_SE"}
676};
677
678ILCID_POSIX_ELEMENT_ARRAY(0x0441, sw, sw_KE)
679ILCID_POSIX_ELEMENT_ARRAY(0x045A, syr, syr_SY)
51004dcb
A
680
681ILCID_POSIX_SUBTABLE(ta) {
682 {0x49, "ta"},
683 {0x0449, "ta_IN"},
684 {0x0849, "ta_LK"}
685};
686
b75a7d8f 687ILCID_POSIX_ELEMENT_ARRAY(0x044a, te, te_IN)
729e4ab9
A
688
689/* Cyrillic based by default */
690ILCID_POSIX_SUBTABLE(tg) {
691 {0x28, "tg"},
692 {0x7c28, "tg_Cyrl"},
693 {0x0428, "tg_Cyrl_TJ"}
694};
695
b75a7d8f 696ILCID_POSIX_ELEMENT_ARRAY(0x041e, th, th_TH)
374ca955 697
729e4ab9 698ILCID_POSIX_SUBTABLE(ti) {
374ca955
A
699 {0x73, "ti"},
700 {0x0873, "ti_ER"},
701 {0x0473, "ti_ET"}
702};
703
704ILCID_POSIX_ELEMENT_ARRAY(0x0442, tk, tk_TM)
4388f060
A
705
706ILCID_POSIX_SUBTABLE(tn) {
707 {0x32, "tn"},
51004dcb 708 {0x0832, "tn_BW"},
4388f060
A
709 {0x0432, "tn_ZA"}
710};
711
b75a7d8f 712ILCID_POSIX_ELEMENT_ARRAY(0x041f, tr, tr_TR)
4388f060 713ILCID_POSIX_ELEMENT_ARRAY(0x0431, ts, ts_ZA)
b75a7d8f 714ILCID_POSIX_ELEMENT_ARRAY(0x0444, tt, tt_RU)
729e4ab9
A
715
716ILCID_POSIX_SUBTABLE(tzm) {
717 {0x5f, "tzm"},
718 {0x7c5f, "tzm_Latn"},
4388f060 719 {0x085f, "tzm_Latn_DZ"},
51004dcb 720 {0x105f, "tzm_Tfng_MA"},
2ca993e8 721 {0x045f, "tzm_Arab_MA"},
4388f060
A
722 {0x045f, "tmz"}
723};
724
725ILCID_POSIX_SUBTABLE(ug) {
726 {0x80, "ug"},
727 {0x0480, "ug_CN"},
728 {0x0480, "ug_Arab_CN"}
729e4ab9
A
729};
730
b75a7d8f
A
731ILCID_POSIX_ELEMENT_ARRAY(0x0422, uk, uk_UA)
732
729e4ab9 733ILCID_POSIX_SUBTABLE(ur) {
b75a7d8f
A
734 {0x20, "ur"},
735 {0x0820, "ur_IN"},
736 {0x0420, "ur_PK"}
737};
738
729e4ab9 739ILCID_POSIX_SUBTABLE(uz) {
b75a7d8f 740 {0x43, "uz"},
73c04bcf 741 {0x0843, "uz_Cyrl_UZ"}, /* Cyrillic based */
729e4ab9 742 {0x7843, "uz_Cyrl"}, /* Cyrillic based */
b75a7d8f 743 {0x0843, "uz_UZ"}, /* Cyrillic based */
73c04bcf 744 {0x0443, "uz_Latn_UZ"}, /* Latin based */
729e4ab9 745 {0x7c43, "uz_Latn"} /* Latin based */
b75a7d8f
A
746};
747
4388f060
A
748ILCID_POSIX_SUBTABLE(ve) { /* TODO: Verify the country */
749 {0x33, "ve"},
750 {0x0433, "ve_ZA"},
751 {0x0433, "ven_ZA"}
752};
753
b75a7d8f 754ILCID_POSIX_ELEMENT_ARRAY(0x042a, vi, vi_VN)
73c04bcf
A
755ILCID_POSIX_ELEMENT_ARRAY(0x0488, wo, wo_SN)
756ILCID_POSIX_ELEMENT_ARRAY(0x0434, xh, xh_ZA)
0f5d89e8
A
757
758ILCID_POSIX_SUBTABLE(yi) {
759 {0x003d, "yi"},
760 {0x043d, "yi_001"}
761};
762
73c04bcf
A
763ILCID_POSIX_ELEMENT_ARRAY(0x046a, yo, yo_NG)
764
f3c0d7a5
A
765// Windows & ICU tend to different names for some of these
766// TODO: Windows probably does not need all of these entries, but I don't know how the precedence works.
729e4ab9
A
767ILCID_POSIX_SUBTABLE(zh) {
768 {0x0004, "zh_Hans"},
769 {0x7804, "zh"},
b75a7d8f 770 {0x0804, "zh_CN"},
729e4ab9 771 {0x0804, "zh_Hans_CN"},
374ca955 772 {0x0c04, "zh_Hant_HK"},
b75a7d8f 773 {0x0c04, "zh_HK"},
374ca955 774 {0x1404, "zh_Hant_MO"},
b75a7d8f 775 {0x1404, "zh_MO"},
374ca955 776 {0x1004, "zh_Hans_SG"},
b75a7d8f 777 {0x1004, "zh_SG"},
374ca955 778 {0x0404, "zh_Hant_TW"},
729e4ab9 779 {0x7c04, "zh_Hant"},
b75a7d8f 780 {0x0404, "zh_TW"},
73c04bcf 781 {0x30404,"zh_Hant_TW"}, /* Bopomofo order */
374ca955 782 {0x30404,"zh_TW"}, /* Bopomofo order */
729e4ab9 783 {0x20004,"zh@collation=stroke"},
73c04bcf 784 {0x20404,"zh_Hant@collation=stroke"},
729e4ab9 785 {0x20404,"zh_Hant_TW@collation=stroke"},
73c04bcf 786 {0x20404,"zh_TW@collation=stroke"},
73c04bcf 787 {0x20804,"zh_Hans@collation=stroke"},
729e4ab9 788 {0x20804,"zh_Hans_CN@collation=stroke"},
73c04bcf 789 {0x20804,"zh_CN@collation=stroke"}
f3c0d7a5 790 // TODO: Alternate collations for other LCIDs are missing, eg: 0x50804
b75a7d8f
A
791};
792
73c04bcf 793ILCID_POSIX_ELEMENT_ARRAY(0x0435, zu, zu_ZA)
374ca955 794
b75a7d8f
A
795/* This must be static and grouped by LCID. */
796static const ILcidPosixMap gPosixIDmap[] = {
797 ILCID_POSIX_MAP(af), /* af Afrikaans 0x36 */
374ca955 798 ILCID_POSIX_MAP(am), /* am Amharic 0x5e */
b75a7d8f 799 ILCID_POSIX_MAP(ar), /* ar Arabic 0x01 */
73c04bcf 800 ILCID_POSIX_MAP(arn), /* arn Araucanian/Mapudungun 0x7a */
b75a7d8f
A
801 ILCID_POSIX_MAP(as), /* as Assamese 0x4d */
802 ILCID_POSIX_MAP(az), /* az Azerbaijani 0x2c */
73c04bcf 803 ILCID_POSIX_MAP(ba), /* ba Bashkir 0x6d */
374ca955 804 ILCID_POSIX_MAP(be), /* be Belarusian 0x23 */
729e4ab9 805/* ILCID_POSIX_MAP(ber), ber Berber/Tamazight 0x5f */
b75a7d8f 806 ILCID_POSIX_MAP(bg), /* bg Bulgarian 0x02 */
4388f060 807 ILCID_POSIX_MAP(bin), /* bin Edo 0x66 */
b75a7d8f 808 ILCID_POSIX_MAP(bn), /* bn Bengali; Bangla 0x45 */
374ca955 809 ILCID_POSIX_MAP(bo), /* bo Tibetan 0x51 */
73c04bcf 810 ILCID_POSIX_MAP(br), /* br Breton 0x7e */
b75a7d8f 811 ILCID_POSIX_MAP(ca), /* ca Catalan 0x03 */
374ca955 812 ILCID_POSIX_MAP(chr), /* chr Cherokee 0x5c */
51004dcb 813 ILCID_POSIX_MAP(ckb), /* ckb Sorani (Central Kurdish) 0x92 */
73c04bcf 814 ILCID_POSIX_MAP(co), /* co Corsican 0x83 */
729e4ab9 815 ILCID_POSIX_MAP(cs), /* cs Czech 0x05 */
374ca955 816 ILCID_POSIX_MAP(cy), /* cy Welsh 0x52 */
b75a7d8f
A
817 ILCID_POSIX_MAP(da), /* da Danish 0x06 */
818 ILCID_POSIX_MAP(de), /* de German 0x07 */
374ca955 819 ILCID_POSIX_MAP(dv), /* dv Divehi 0x65 */
b75a7d8f
A
820 ILCID_POSIX_MAP(el), /* el Greek 0x08 */
821 ILCID_POSIX_MAP(en), /* en English 0x09 */
822 ILCID_POSIX_MAP(en_US_POSIX), /* invariant 0x7f */
823 ILCID_POSIX_MAP(es), /* es Spanish 0x0a */
824 ILCID_POSIX_MAP(et), /* et Estonian 0x25 */
825 ILCID_POSIX_MAP(eu), /* eu Basque 0x2d */
73c04bcf
A
826 ILCID_POSIX_MAP(fa), /* fa Persian/Farsi 0x29 */
827 ILCID_POSIX_MAP(fa_AF), /* fa Persian/Dari 0x8c */
51004dcb 828 ILCID_POSIX_MAP(ff), /* ff Fula 0x67 */
b75a7d8f 829 ILCID_POSIX_MAP(fi), /* fi Finnish 0x0b */
73c04bcf 830 ILCID_POSIX_MAP(fil), /* fil Filipino 0x64 */
b75a7d8f
A
831 ILCID_POSIX_MAP(fo), /* fo Faroese 0x38 */
832 ILCID_POSIX_MAP(fr), /* fr French 0x0c */
4388f060 833 ILCID_POSIX_MAP(fuv), /* fuv Fulfulde - Nigeria 0x67 */
374ca955
A
834 ILCID_POSIX_MAP(fy), /* fy Frisian 0x62 */
835 ILCID_POSIX_MAP(ga), /* * Gaelic (Ireland,Scotland) 0x3c */
729e4ab9 836 ILCID_POSIX_MAP(gd), /* gd Gaelic (United Kingdom) 0x91 */
b75a7d8f 837 ILCID_POSIX_MAP(gl), /* gl Galician 0x56 */
374ca955 838 ILCID_POSIX_MAP(gn), /* gn Guarani 0x74 */
73c04bcf 839 ILCID_POSIX_MAP(gsw), /* gsw Alemanic/Alsatian/Swiss German 0x84 */
b75a7d8f 840 ILCID_POSIX_MAP(gu), /* gu Gujarati 0x47 */
374ca955
A
841 ILCID_POSIX_MAP(ha), /* ha Hausa 0x68 */
842 ILCID_POSIX_MAP(haw), /* haw Hawaiian 0x75 */
b75a7d8f
A
843 ILCID_POSIX_MAP(he), /* he Hebrew (formerly iw) 0x0d */
844 ILCID_POSIX_MAP(hi), /* hi Hindi 0x39 */
374ca955 845 ILCID_POSIX_MAP(hr), /* * Croatian and others 0x1a */
b331163b 846 ILCID_POSIX_MAP(hsb), /* hsb Upper Sorbian 0x2e */
b75a7d8f
A
847 ILCID_POSIX_MAP(hu), /* hu Hungarian 0x0e */
848 ILCID_POSIX_MAP(hy), /* hy Armenian 0x2b */
4388f060 849 ILCID_POSIX_MAP(ibb), /* ibb Ibibio - Nigeria 0x69 */
b75a7d8f 850 ILCID_POSIX_MAP(id), /* id Indonesian (formerly in) 0x21 */
374ca955 851 ILCID_POSIX_MAP(ig), /* ig Igbo 0x70 */
73c04bcf 852 ILCID_POSIX_MAP(ii), /* ii Sichuan Yi 0x78 */
b75a7d8f
A
853 ILCID_POSIX_MAP(is), /* is Icelandic 0x0f */
854 ILCID_POSIX_MAP(it), /* it Italian 0x10 */
374ca955 855 ILCID_POSIX_MAP(iu), /* iu Inuktitut 0x5d */
b75a7d8f
A
856 ILCID_POSIX_MAP(iw), /* iw Hebrew 0x0d */
857 ILCID_POSIX_MAP(ja), /* ja Japanese 0x11 */
858 ILCID_POSIX_MAP(ka), /* ka Georgian 0x37 */
859 ILCID_POSIX_MAP(kk), /* kk Kazakh 0x3f */
73c04bcf 860 ILCID_POSIX_MAP(kl), /* kl Kalaallisut 0x6f */
374ca955 861 ILCID_POSIX_MAP(km), /* km Khmer 0x53 */
b75a7d8f 862 ILCID_POSIX_MAP(kn), /* kn Kannada 0x4b */
b75a7d8f
A
863 ILCID_POSIX_MAP(ko), /* ko Korean 0x12 */
864 ILCID_POSIX_MAP(kok), /* kok Konkani 0x57 */
374ca955 865 ILCID_POSIX_MAP(kr), /* kr Kanuri 0x71 */
b75a7d8f 866 ILCID_POSIX_MAP(ks), /* ks Kashmiri 0x60 */
374ca955 867 ILCID_POSIX_MAP(ky), /* ky Kyrgyz 0x40 */
73c04bcf 868 ILCID_POSIX_MAP(lb), /* lb Luxembourgish 0x6e */
374ca955
A
869 ILCID_POSIX_MAP(la), /* la Latin 0x76 */
870 ILCID_POSIX_MAP(lo), /* lo Lao 0x54 */
b75a7d8f
A
871 ILCID_POSIX_MAP(lt), /* lt Lithuanian 0x27 */
872 ILCID_POSIX_MAP(lv), /* lv Latvian, Lettish 0x26 */
374ca955 873 ILCID_POSIX_MAP(mi), /* mi Maori 0x81 */
b75a7d8f
A
874 ILCID_POSIX_MAP(mk), /* mk Macedonian 0x2f */
875 ILCID_POSIX_MAP(ml), /* ml Malayalam 0x4c */
876 ILCID_POSIX_MAP(mn), /* mn Mongolian 0x50 */
877 ILCID_POSIX_MAP(mni), /* mni Manipuri 0x58 */
73c04bcf 878 ILCID_POSIX_MAP(moh), /* moh Mohawk 0x7c */
b75a7d8f
A
879 ILCID_POSIX_MAP(mr), /* mr Marathi 0x4e */
880 ILCID_POSIX_MAP(ms), /* ms Malay 0x3e */
881 ILCID_POSIX_MAP(mt), /* mt Maltese 0x3a */
73c04bcf 882 ILCID_POSIX_MAP(my), /* my Burmese 0x55 */
374ca955 883/* ILCID_POSIX_MAP(nb), // no Norwegian 0x14 */
b75a7d8f
A
884 ILCID_POSIX_MAP(ne), /* ne Nepali 0x61 */
885 ILCID_POSIX_MAP(nl), /* nl Dutch 0x13 */
374ca955
A
886/* ILCID_POSIX_MAP(nn), // no Norwegian 0x14 */
887 ILCID_POSIX_MAP(no), /* * Norwegian 0x14 */
888 ILCID_POSIX_MAP(nso), /* nso Sotho, Northern (Sepedi dialect) 0x6c */
73c04bcf 889 ILCID_POSIX_MAP(oc), /* oc Occitan 0x82 */
374ca955 890 ILCID_POSIX_MAP(om), /* om Oromo 0x72 */
b75a7d8f
A
891 ILCID_POSIX_MAP(or_IN), /* or Oriya 0x48 */
892 ILCID_POSIX_MAP(pa), /* pa Punjabi 0x46 */
4388f060 893 ILCID_POSIX_MAP(pap), /* pap Papiamentu 0x79 */
b75a7d8f 894 ILCID_POSIX_MAP(pl), /* pl Polish 0x15 */
374ca955 895 ILCID_POSIX_MAP(ps), /* ps Pashto 0x63 */
b75a7d8f 896 ILCID_POSIX_MAP(pt), /* pt Portuguese 0x16 */
73c04bcf 897 ILCID_POSIX_MAP(qu), /* qu Quechua 0x6B */
2ca993e8 898 ILCID_POSIX_MAP(quc), /* quc K'iche 0x93 */
73c04bcf
A
899 ILCID_POSIX_MAP(qut), /* qut K'iche 0x86 */
900 ILCID_POSIX_MAP(rm), /* rm Raeto-Romance/Romansh 0x17 */
b75a7d8f
A
901 ILCID_POSIX_MAP(ro), /* ro Romanian 0x18 */
902 ILCID_POSIX_MAP(root), /* root 0x00 */
903 ILCID_POSIX_MAP(ru), /* ru Russian 0x19 */
73c04bcf 904 ILCID_POSIX_MAP(rw), /* rw Kinyarwanda 0x87 */
b75a7d8f 905 ILCID_POSIX_MAP(sa), /* sa Sanskrit 0x4f */
73c04bcf 906 ILCID_POSIX_MAP(sah), /* sah Yakut 0x85 */
b75a7d8f 907 ILCID_POSIX_MAP(sd), /* sd Sindhi 0x59 */
73c04bcf 908 ILCID_POSIX_MAP(se), /* se Sami 0x3b */
374ca955
A
909/* ILCID_POSIX_MAP(sh), // sh Serbo-Croatian 0x1a */
910 ILCID_POSIX_MAP(si), /* si Sinhalese 0x5b */
b75a7d8f
A
911 ILCID_POSIX_MAP(sk), /* sk Slovak 0x1b */
912 ILCID_POSIX_MAP(sl), /* sl Slovenian 0x24 */
374ca955 913 ILCID_POSIX_MAP(so), /* so Somali 0x77 */
b75a7d8f 914 ILCID_POSIX_MAP(sq), /* sq Albanian 0x1c */
374ca955 915/* ILCID_POSIX_MAP(sr), // sr Serbian 0x1a */
4388f060 916 ILCID_POSIX_MAP(st), /* st Sutu 0x30 */
b75a7d8f
A
917 ILCID_POSIX_MAP(sv), /* sv Swedish 0x1d */
918 ILCID_POSIX_MAP(sw), /* sw Swahili 0x41 */
919 ILCID_POSIX_MAP(syr), /* syr Syriac 0x5A */
920 ILCID_POSIX_MAP(ta), /* ta Tamil 0x49 */
921 ILCID_POSIX_MAP(te), /* te Telugu 0x4a */
73c04bcf 922 ILCID_POSIX_MAP(tg), /* tg Tajik 0x28 */
b75a7d8f 923 ILCID_POSIX_MAP(th), /* th Thai 0x1e */
374ca955
A
924 ILCID_POSIX_MAP(ti), /* ti Tigrigna 0x73 */
925 ILCID_POSIX_MAP(tk), /* tk Turkmen 0x42 */
374ca955 926 ILCID_POSIX_MAP(tn), /* tn Tswana 0x32 */
b75a7d8f 927 ILCID_POSIX_MAP(tr), /* tr Turkish 0x1f */
4388f060 928 ILCID_POSIX_MAP(ts), /* ts Tsonga 0x31 */
b75a7d8f 929 ILCID_POSIX_MAP(tt), /* tt Tatar 0x44 */
4388f060 930 ILCID_POSIX_MAP(tzm), /* tzm Tamazight 0x5f */
374ca955 931 ILCID_POSIX_MAP(ug), /* ug Uighur 0x80 */
b75a7d8f
A
932 ILCID_POSIX_MAP(uk), /* uk Ukrainian 0x22 */
933 ILCID_POSIX_MAP(ur), /* ur Urdu 0x20 */
934 ILCID_POSIX_MAP(uz), /* uz Uzbek 0x43 */
374ca955 935 ILCID_POSIX_MAP(ve), /* ve Venda 0x33 */
b75a7d8f 936 ILCID_POSIX_MAP(vi), /* vi Vietnamese 0x2a */
73c04bcf 937 ILCID_POSIX_MAP(wo), /* wo Wolof 0x88 */
374ca955 938 ILCID_POSIX_MAP(xh), /* xh Xhosa 0x34 */
4388f060 939 ILCID_POSIX_MAP(yi), /* yi Yiddish 0x3d */
374ca955 940 ILCID_POSIX_MAP(yo), /* yo Yoruba 0x6a */
b75a7d8f 941 ILCID_POSIX_MAP(zh), /* zh Chinese 0x04 */
374ca955 942 ILCID_POSIX_MAP(zu), /* zu Zulu 0x35 */
b75a7d8f
A
943};
944
2ca993e8 945static const uint32_t gLocaleCount = UPRV_LENGTHOF(gPosixIDmap);
b75a7d8f 946
374ca955
A
947/**
948 * Do not call this function. It is called by hostID.
949 * The function is not private because this struct must stay as a C struct,
950 * and this is an internal class.
951 */
b75a7d8f
A
952static int32_t
953idCmp(const char* id1, const char* id2)
954{
955 int32_t diffIdx = 0;
956 while (*id1 == *id2 && *id1 != 0) {
957 diffIdx++;
958 id1++;
959 id2++;
960 }
961 return diffIdx;
962}
963
964/**
965 * Searches for a Windows LCID
966 *
3d1f044b 967 * @param posixID the Posix style locale id.
b75a7d8f
A
968 * @param status gets set to U_ILLEGAL_ARGUMENT_ERROR when the Posix ID has
969 * no equivalent Windows LCID.
970 * @return the LCID
971 */
972static uint32_t
374ca955 973getHostID(const ILcidPosixMap *this_0, const char* posixID, UErrorCode* status)
b75a7d8f
A
974{
975 int32_t bestIdx = 0;
976 int32_t bestIdxDiff = 0;
73c04bcf 977 int32_t posixIDlen = (int32_t)uprv_strlen(posixID);
b75a7d8f
A
978 uint32_t idx;
979
980 for (idx = 0; idx < this_0->numRegions; idx++ ) {
981 int32_t sameChars = idCmp(posixID, this_0->regionMaps[idx].posixID);
982 if (sameChars > bestIdxDiff && this_0->regionMaps[idx].posixID[sameChars] == 0) {
983 if (posixIDlen == sameChars) {
984 /* Exact match */
985 return this_0->regionMaps[idx].hostID;
986 }
987 bestIdxDiff = sameChars;
988 bestIdx = idx;
989 }
990 }
73c04bcf
A
991 /* We asked for something unusual, like en_ZZ, and we try to return the number for the same language. */
992 /* We also have to make sure that sid and si and similar string subsets don't match. */
993 if ((posixID[bestIdxDiff] == '_' || posixID[bestIdxDiff] == '@')
994 && this_0->regionMaps[bestIdx].posixID[bestIdxDiff] == 0)
995 {
b75a7d8f
A
996 *status = U_USING_FALLBACK_WARNING;
997 return this_0->regionMaps[bestIdx].hostID;
998 }
999
1000 /*no match found */
1001 *status = U_ILLEGAL_ARGUMENT_ERROR;
1002 return this_0->regionMaps->hostID;
1003}
1004
1005static const char*
374ca955 1006getPosixID(const ILcidPosixMap *this_0, uint32_t hostID)
b75a7d8f
A
1007{
1008 uint32_t i;
0f5d89e8 1009 for (i = 0; i < this_0->numRegions; i++)
b75a7d8f
A
1010 {
1011 if (this_0->regionMaps[i].hostID == hostID)
1012 {
1013 return this_0->regionMaps[i].posixID;
1014 }
1015 }
1016
1017 /* If you get here, then no matching region was found,
1018 so return the language id with the wild card region. */
1019 return this_0->regionMaps[0].posixID;
1020}
1021
1022/*
1023//////////////////////////////////////
1024//
1025// LCID --> POSIX
1026//
1027/////////////////////////////////////
1028*/
3d1f044b 1029#if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API
729e4ab9
A
1030/*
1031 * Various language tags needs to be changed:
1032 * quz -> qu
1033 * prs -> fa
1034 */
1035#define FIX_LANGUAGE_ID_TAG(buffer, len) \
1036 if (len >= 3) { \
1037 if (buffer[0] == 'q' && buffer[1] == 'u' && buffer[2] == 'z') {\
1038 buffer[2] = 0; \
1039 uprv_strcat(buffer, buffer+3); \
1040 } else if (buffer[0] == 'p' && buffer[1] == 'r' && buffer[2] == 's') {\
1041 buffer[0] = 'f'; buffer[1] = 'a'; buffer[2] = 0; \
1042 uprv_strcat(buffer, buffer+3); \
1043 } \
1044 }
b75a7d8f 1045
729e4ab9 1046#endif
3d1f044b 1047
57a6839d
A
1048U_CAPI int32_t
1049uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UErrorCode* status)
b75a7d8f 1050{
729e4ab9
A
1051 uint16_t langID;
1052 uint32_t localeIndex;
57a6839d
A
1053 UBool bLookup = TRUE;
1054 const char *pPosixID = NULL;
729e4ab9 1055
3d1f044b
A
1056#if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API
1057 static_assert(ULOC_FULLNAME_CAPACITY > LOCALE_NAME_MAX_LENGTH, "Windows locale names have smaller length than ICU locale names.");
1058
1059 char locName[LOCALE_NAME_MAX_LENGTH] = {};
0f5d89e8 1060
f3c0d7a5
A
1061 // Note: Windows primary lang ID 0x92 in LCID is used for Central Kurdish and
1062 // GetLocaleInfo() maps such LCID to "ku". However, CLDR uses "ku" for
1063 // Northern Kurdish and "ckb" for Central Kurdish. For this reason, we cannot
1064 // use the Windows API to resolve locale ID for this specific case.
1065 if ((hostid & 0x3FF) != 0x92) {
1066 int32_t tmpLen = 0;
3d1f044b 1067 char16_t windowsLocaleName[LOCALE_NAME_MAX_LENGTH] = {};
f3c0d7a5
A
1068
1069 // Note: LOCALE_ALLOW_NEUTRAL_NAMES was enabled in Windows7+, prior versions did not handle neutral (no-region) locale names.
1070 tmpLen = LCIDToLocaleName(hostid, (PWSTR)windowsLocaleName, UPRV_LENGTHOF(windowsLocaleName), LOCALE_ALLOW_NEUTRAL_NAMES);
1071 if (tmpLen > 1) {
1072 int32_t i = 0;
1073 // Only need to look up in table if have _, eg for de-de_phoneb type alternate sort.
57a6839d 1074 bLookup = FALSE;
f3c0d7a5
A
1075 for (i = 0; i < UPRV_LENGTHOF(locName); i++)
1076 {
1077 locName[i] = (char)(windowsLocaleName[i]);
1078
1079 // Windows locale name may contain sorting variant, such as "es-ES_tradnl".
1080 // In such cases, we need special mapping data found in the hardcoded table
1081 // in this source file.
1082 if (windowsLocaleName[i] == L'_')
1083 {
1084 // Keep the base locale, without variant
1085 // TODO: Should these be mapped from _phoneb to @collation=phonebook, etc.?
1086 locName[i] = '\0';
1087 tmpLen = i;
1088 bLookup = TRUE;
1089 break;
1090 }
1091 else if (windowsLocaleName[i] == L'-')
1092 {
1093 // Windows names use -, ICU uses _
1094 locName[i] = '_';
1095 }
1096 else if (windowsLocaleName[i] == L'\0')
1097 {
1098 // No point in doing more work than necessary
1099 break;
1100 }
57a6839d 1101 }
f3c0d7a5
A
1102 // TODO: Need to understand this better, why isn't it an alias?
1103 FIX_LANGUAGE_ID_TAG(locName, tmpLen);
1104 pPosixID = locName;
57a6839d 1105 }
729e4ab9 1106 }
3d1f044b 1107#endif
f3c0d7a5 1108
57a6839d
A
1109 if (bLookup) {
1110 const char *pCandidate = NULL;
1111 langID = LANGUAGE_LCID(hostid);
1112
1113 for (localeIndex = 0; localeIndex < gLocaleCount; localeIndex++) {
1114 if (langID == gPosixIDmap[localeIndex].regionMaps->hostID) {
1115 pCandidate = getPosixID(&gPosixIDmap[localeIndex], hostid);
1116 break;
1117 }
1118 }
b75a7d8f 1119
57a6839d
A
1120 /* On Windows, when locale name has a variant, we still look up the hardcoded table.
1121 If a match in the hardcoded table is longer than the Windows locale name without
1122 variant, we use the one as the result */
1123 if (pCandidate && (pPosixID == NULL || uprv_strlen(pCandidate) > uprv_strlen(pPosixID))) {
1124 pPosixID = pCandidate;
1125 }
1126 }
1127
1128 if (pPosixID) {
0f5d89e8 1129 int32_t resLen = static_cast<int32_t>(uprv_strlen(pPosixID));
57a6839d
A
1130 int32_t copyLen = resLen <= posixIDCapacity ? resLen : posixIDCapacity;
1131 uprv_memcpy(posixID, pPosixID, copyLen);
1132 if (resLen < posixIDCapacity) {
1133 posixID[resLen] = 0;
1134 if (*status == U_STRING_NOT_TERMINATED_WARNING) {
1135 *status = U_ZERO_ERROR;
1136 }
1137 } else if (resLen == posixIDCapacity) {
1138 *status = U_STRING_NOT_TERMINATED_WARNING;
1139 } else {
1140 *status = U_BUFFER_OVERFLOW_ERROR;
b75a7d8f 1141 }
57a6839d 1142 return resLen;
b75a7d8f
A
1143 }
1144
1145 /* no match found */
1146 *status = U_ILLEGAL_ARGUMENT_ERROR;
57a6839d 1147 return -1;
b75a7d8f
A
1148}
1149
1150/*
1151//////////////////////////////////////
1152//
1153// POSIX --> LCID
374ca955
A
1154// This should only be called from uloc_getLCID.
1155// The locale ID must be in canonical form.
b75a7d8f
A
1156//
1157/////////////////////////////////////
1158*/
f3c0d7a5 1159U_CAPI uint32_t
3d1f044b 1160uprv_convertToLCIDPlatform(const char* localeID, UErrorCode* status)
f3c0d7a5 1161{
3d1f044b
A
1162 if (U_FAILURE(*status)) {
1163 return 0;
1164 }
f3c0d7a5 1165
3d1f044b
A
1166 // The purpose of this function is to leverage the Windows platform name->lcid
1167 // conversion functionality when available.
1168#if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API
f3c0d7a5
A
1169 int32_t len;
1170 char collVal[ULOC_KEYWORDS_CAPACITY] = {};
1171 char baseName[ULOC_FULLNAME_CAPACITY] = {};
1172 const char * mylocaleID = localeID;
1173
1174 // Check any for keywords.
1175 if (uprv_strchr(localeID, '@'))
1176 {
3d1f044b
A
1177 len = uloc_getKeywordValue(localeID, "collation", collVal, UPRV_LENGTHOF(collVal) - 1, status);
1178 if (U_SUCCESS(*status) && len > 0)
f3c0d7a5
A
1179 {
1180 // If it contains the keyword collation, return 0 so that the LCID lookup table will be used.
1181 return 0;
1182 }
1183 else
1184 {
1185 // If the locale ID contains keywords other than collation, just use the base name.
3d1f044b 1186 len = uloc_getBaseName(localeID, baseName, UPRV_LENGTHOF(baseName) - 1, status);
f3c0d7a5 1187
3d1f044b 1188 if (U_SUCCESS(*status) && len > 0)
f3c0d7a5
A
1189 {
1190 baseName[len] = 0;
1191 mylocaleID = baseName;
1192 }
1193 }
1194 }
1195
1196 char asciiBCP47Tag[LOCALE_NAME_MAX_LENGTH] = {};
1197 // this will change it from de_DE@collation=phonebook to de-DE-u-co-phonebk form
3d1f044b 1198 (void)uloc_toLanguageTag(mylocaleID, asciiBCP47Tag, UPRV_LENGTHOF(asciiBCP47Tag), FALSE, status);
f3c0d7a5 1199
3d1f044b 1200 if (U_SUCCESS(*status))
f3c0d7a5
A
1201 {
1202 // Need it to be UTF-16, not 8-bit
1203 wchar_t bcp47Tag[LOCALE_NAME_MAX_LENGTH] = {};
1204 int32_t i;
1205 for (i = 0; i < UPRV_LENGTHOF(bcp47Tag); i++)
1206 {
1207 if (asciiBCP47Tag[i] == '\0')
1208 {
1209 break;
1210 }
1211 else
1212 {
1213 // Copy the character
1214 bcp47Tag[i] = static_cast<wchar_t>(asciiBCP47Tag[i]);
1215 }
1216 }
1217
1218 if (i < (UPRV_LENGTHOF(bcp47Tag) - 1))
1219 {
1220 // Ensure it's null terminated
1221 bcp47Tag[i] = L'\0';
3d1f044b 1222 LCID lcid = LocaleNameToLCID(bcp47Tag, LOCALE_ALLOW_NEUTRAL_NAMES);
f3c0d7a5
A
1223 if (lcid > 0)
1224 {
1225 // Found LCID from windows, return that one, unless its completely ambiguous
1226 // LOCALE_USER_DEFAULT and transients are OK because they will round trip
1227 // for this process.
1228 if (lcid != LOCALE_CUSTOM_UNSPECIFIED)
1229 {
1230 return lcid;
1231 }
1232 }
1233 }
1234 }
0f5d89e8 1235#else
3d1f044b
A
1236 (void) localeID; // Suppress unused variable warning.
1237#endif
f3c0d7a5 1238
3d1f044b 1239 // Nothing found, or not implemented.
f3c0d7a5
A
1240 return 0;
1241}
b75a7d8f
A
1242
1243U_CAPI uint32_t
374ca955 1244uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status)
b75a7d8f 1245{
f3c0d7a5
A
1246 // This function does the table lookup when native platform name->lcid conversion isn't available,
1247 // or for locales that don't follow patterns the platform expects.
b75a7d8f 1248 uint32_t low = 0;
374ca955 1249 uint32_t high = gLocaleCount;
4388f060 1250 uint32_t mid;
374ca955 1251 uint32_t oldmid = 0;
b75a7d8f 1252 int32_t compVal;
b75a7d8f
A
1253
1254 uint32_t value = 0;
1255 uint32_t fallbackValue = (uint32_t)-1;
1256 UErrorCode myStatus;
1257 uint32_t idx;
1258
1259 /* Check for incomplete id. */
374ca955 1260 if (!langID || !posixID || uprv_strlen(langID) < 2 || uprv_strlen(posixID) < 2) {
b75a7d8f
A
1261 return 0;
1262 }
1263
1264 /*Binary search for the map entry for normal cases */
b75a7d8f 1265
374ca955 1266 while (high > low) /*binary search*/{
b75a7d8f 1267
374ca955
A
1268 mid = (high+low) >> 1; /*Finds median*/
1269
1270 if (mid == oldmid)
1271 break;
b75a7d8f 1272
374ca955
A
1273 compVal = uprv_strcmp(langID, gPosixIDmap[mid].regionMaps->posixID);
1274 if (compVal < 0){
1275 high = mid;
1276 }
1277 else if (compVal > 0){
1278 low = mid;
1279 }
1280 else /*we found it*/{
1281 return getHostID(&gPosixIDmap[mid], posixID, status);
1282 }
1283 oldmid = mid;
b75a7d8f
A
1284 }
1285
1286 /*
1287 * Sometimes we can't do a binary search on posixID because some LCIDs
1288 * go to different locales. We hit one of those special cases.
1289 */
1290 for (idx = 0; idx < gLocaleCount; idx++ ) {
1291 myStatus = U_ZERO_ERROR;
374ca955 1292 value = getHostID(&gPosixIDmap[idx], posixID, &myStatus);
b75a7d8f
A
1293 if (myStatus == U_ZERO_ERROR) {
1294 return value;
1295 }
1296 else if (myStatus == U_USING_FALLBACK_WARNING) {
1297 fallbackValue = value;
1298 }
1299 }
1300
1301 if (fallbackValue != (uint32_t)-1) {
1302 *status = U_USING_FALLBACK_WARNING;
1303 return fallbackValue;
1304 }
1305
1306 /* no match found */
1307 *status = U_ILLEGAL_ARGUMENT_ERROR;
1308 return 0; /* return international (root) */
1309}