icuSources/common/locmap.c

   1 /*
   2  **********************************************************************
   3  *   Copyright (C) 1996-2003, International Business Machines
   4  *   Corporation and others.  All Rights Reserved.
   5  **********************************************************************
   6 */
   7 /*
   8  *
   9  * Provides functionality for mapping between
  10  * LCID and Posix IDs or ICU locale to codepage
  11  *
  12  * Note: All classes and code in this file are
  13  *       intended for internal use only.
  14  *
  15  * Methods of interest:
  16  *   unsigned long convertToLCID(const int8_t*);
  17  *   const int8_t* convertToPosix(unsigned long);
  18  *
  19  * Kathleen Wilson, 4/30/96
  20  *
  21  *  Date        Name        Description
  22  *  3/11/97     aliu        Fixed off-by-one bug in assignment operator. Added
  23  *                          setId() method and safety check against
  24  *                          MAX_ID_LENGTH.
  25  * 04/23/99     stephen     Added C wrapper for convertToPosix.
  26  * 09/18/00     george      Removed the memory leaks.
  27  * 08/23/01     george      Convert to C
  28  */
  29
  30 #include "locmap.h"
  31 #include "cstring.h"
  32
  33 static struct
  34 {
  35   const char *loc;
  36   const char *charmap;
  37 }
  38 const _localeToDefaultCharmapTable [] =
  39 {
  40 /*
  41   See:         http://czyborra.com/charsets/iso8859.html
  42 */
  43
  44 /* xx_XX locales first, so they will match: */
  45  { "zh_CN", "gb2312" },  /* Chinese (Simplified) */
  46  { "zh_TW", "Big5" },    /* Chinese (Traditional) */
  47
  48  { "af", "iso-8859-1" },  /* Afrikaans */
  49  { "ar", "iso-8859-6" },  /* Arabic */
  50  { "be", "iso-8859-5" },  /* Byelorussian */
  51  { "bg", "iso-8859-5" },  /* Bulgarian */
  52  { "ca", "iso-8859-1" },  /* Catalan */
  53  { "cs", "iso-8859-2" },  /* Czech */
  54  { "da", "iso-8859-1" },  /* Danish */
  55  { "de", "iso-8859-1" },  /* German */
  56  { "el", "iso-8859-7" },  /* Greek */
  57  { "en", "iso-8859-1" },  /* English */
  58  { "eo", "iso-8859-3" },  /* Esperanto */
  59  { "es", "iso-8859-1" },  /* Spanish */
  60  { "et", "iso-8859-4" },  /* Estonian  */
  61  { "eu", "iso-8859-1" },  /* basque */
  62  { "fi", "iso-8859-1" },  /* Finnish */
  63  { "fo", "iso-8859-1" },  /* faroese */
  64  { "fr", "iso-8859-1" },  /* French */
  65  { "ga", "iso-8859-1" },  /* Irish (Gaelic) */
  66  { "gd", "iso-8859-1" },  /* Scottish */
  67  { "he", "iso-8859-8" },  /* hebrew */
  68  { "hr", "iso-8859-2" },  /* Croatian */
  69  { "hu", "iso-8859-2" },  /* Hungarian */
  70  { "in", "iso-8859-1" },  /* Indonesian */
  71  { "is", "iso-8859-1" },  /* Icelandic */
  72  { "it", "iso-8859-1" },  /* Italian  */
  73  { "iw", "iso-8859-8" },  /* hebrew old ISO name */
  74  { "ja", "Shift_JIS"  },  /* Japanese [was: ja_JP ] */
  75  { "ji", "iso-8859-8" },  /* Yiddish */
  76  { "kl", "iso-8859-4" },  /* Greenlandic */
  77  { "ko", "euc-kr"     },  /* korean [was: ko_KR ] */
  78  { "lt", "iso-8859-4" },  /* Lithuanian */
  79  { "lv", "iso-8859-4" },  /* latvian (lettish) */
  80  { "mk", "iso-8859-5" },  /* Macedonian */
  81  { "mt", "iso-8859-3" },  /* Maltese  */
  82  { "nb", "iso-8859-1" },  /* Norwegian Bokmal */
  83  { "nl", "iso-8859-1" },  /* dutch */
  84  { "no", "iso-8859-1" },  /* Norwegian old ISO name*/
  85  { "nn", "iso-8859-1" },  /* Norwegian Nynorsk */
  86  { "pl", "iso-8859-2" },  /* Polish */
  87  { "pt", "iso-8859-1" },  /* Portugese */
  88  { "rm", "iso-8859-1" },  /* Rhaeto-romance */
  89  { "ro", "iso-8859-2" },  /* Romanian */
  90  { "ru", "iso-8859-5" },  /* Russian */
  91  { "sk", "iso-8859-2" },  /* Slovak  */
  92  { "sl", "iso-8859-2" },  /* Slovenian */
  93  { "sq", "iso-8859-1" },  /* albanian */
  94  { "sr", "iso-8859-5" },  /* Serbian */
  95  { "sv", "iso-8859-1" },  /* Swedish */
  96  { "sw", "iso-8859-1" },  /* Swahili */
  97  { "th", "tis-620"    },  /* Thai [windows-874] */
  98  { "tr", "iso-8859-9" },  /* Turkish */
  99  { "uk", "iso-8859-5" },  /* pre 1990 Ukranian... see: <http://czyborra.com/charsets/cyrillic.html#KOI8-U>  */
 100  { "zh", "Big-5"      },  /* Chinese (Traditional) */
 101  { NULL, NULL         }
 102 };
 103
 104 U_CAPI const char* U_EXPORT2
 105 uprv_defaultCodePageForLocale(const char *locale)
 106 {
 107     int32_t i;
 108     int32_t locale_len;
 109
 110     if (locale == NULL)
 111     {
 112         return NULL;
 113     }
 114     locale_len = (int32_t)uprv_strlen(locale);
 115
 116     if(locale_len < 2)
 117     {
 118         return NULL; /* non existent. Not a complete check, but it will
 119                       * make sure that 'c' doesn't match catalan, etc.
 120                       */
 121     }
 122
 123     for(i=0; _localeToDefaultCharmapTable[i].loc; i++)
 124     {
 125         if(uprv_strncmp(locale, _localeToDefaultCharmapTable[i].loc,
 126                         uprv_min(locale_len,
 127                                  (int32_t)uprv_strlen(_localeToDefaultCharmapTable[i].loc)))
 128             == 0)
 129         {
 130             return _localeToDefaultCharmapTable[i].charmap;
 131         }
 132     }
 133
 134     return NULL;
 135 }
 136
 137 #ifdef WIN32
 138
 139 /*
 140  * Note:
 141  * This code is used only internally by putil.c/uprv_getDefaultLocaleID().
 142  * The mapping from Win32 locale ID numbers to POSIX locale strings should
 143  * be the faster one. It is more important to get the LCID to ICU locale
 144  * mapping correct than to get a correct ICU locale to LCID mapping.
 145  *
 146  * In order to test this code, please use the lcid test program.
 147  *
 148  * The LCID values come from winnt.h
 149  */
 150
 151 #include "unicode/uloc.h"
 152
 153
 154 /*
 155 ////////////////////////////////////////////////
 156 //
 157 // Internal Classes for LCID <--> POSIX Mapping
 158 //
 159 /////////////////////////////////////////////////
 160 */
 161
 162 typedef struct ILcidPosixElement
 163 {
 164     const uint32_t hostID;
 165     const char * const posixID;
 166 } ILcidPosixElement;
 167
 168 typedef struct ILcidPosixMap
 169 {
 170     const uint32_t numRegions;
 171     const struct ILcidPosixElement* const regionMaps;
 172 } ILcidPosixMap;
 173
 174 static const char* posixID(const ILcidPosixMap *this_0, uint32_t fromHostID);
 175
 176 /**
 177  * Searches for a Windows LCID
 178  *
 179  * @param posixid the Posix style locale id.
 180  * @param status gets set to U_ILLEGAL_ARGUMENT_ERROR when the Posix ID has
 181  *               no equivalent Windows LCID.
 182  * @return the LCID
 183  */
 184 static uint32_t hostID(const ILcidPosixMap *this_0, const char* fromPosixID, UErrorCode* status);
 185
 186 /**
 187  * Do not call this function. It is called by hostID.
 188  * The function is not private because this struct must stay as a C struct,
 189  * and this is an internal class.
 190  */
 191 static int32_t idCmp(const char* id1, const char* id2);
 192
 193
 194 /*
 195 /////////////////////////////////////////////////
 196 //
 197 // Easy macros to make the LCID <--> POSIX Mapping
 198 //
 199 /////////////////////////////////////////////////
 200 */
 201
 202 /*
 203  The standard one language/one country mapping for LCID.
 204  The first element must be the language, and the following
 205  elements are the language with the country.
 206  */
 207 #define ILCID_POSIX_ELEMENT_ARRAY(hostID, languageID, posixID) \
 208 static const ILcidPosixElement languageID[] = { \
 209     {LANGUAGE_LCID(hostID), #languageID},     /* parent locale */ \
 210     {hostID, #posixID}, \
 211 };
 212
 213 /*
 214  Create the map for the posixID. This macro supposes that the language string
 215  name is the same as the global variable name, and that the first element
 216  in the ILcidPosixElement is just the language.
 217  */
 218 #define ILCID_POSIX_MAP(_posixID) \
 219     {sizeof(_posixID)/sizeof(ILcidPosixElement), _posixID}
 220
 221 /*
 222 ////////////////////////////////////////////
 223 //
 224 // Create the table of LCID to POSIX Mapping
 225 // None of it should be dynamically created.
 226 //
 227 // Keep static locale variables inside the function so that
 228 // it can be created properly during static init.
 229 //
 230 ////////////////////////////////////////////
 231 */
 232
 233 ILCID_POSIX_ELEMENT_ARRAY(0x0436, af, af_ZA)
 234
 235 static const ILcidPosixElement ar[] = {
 236     {0x01,   "ar"},
 237     {0x3801, "ar_AE"},
 238     {0x3c01, "ar_BH"},
 239     {0x1401, "ar_DZ"},
 240     {0x0c01, "ar_EG"},
 241     {0x0801, "ar_IQ"},
 242     {0x2c01, "ar_JO"},
 243     {0x3401, "ar_KW"},
 244     {0x3001, "ar_LB"},
 245     {0x1001, "ar_LY"},
 246     {0x1801, "ar_MA"},
 247     {0x2001, "ar_OM"},
 248     {0x4001, "ar_QA"},
 249     {0x0401, "ar_SA"},
 250     {0x2801, "ar_SY"},
 251     {0x1c01, "ar_TN"},
 252     {0x2401, "ar_YE"}
 253 };
 254
 255 ILCID_POSIX_ELEMENT_ARRAY(0x044d, as, as_IN)    /*Todo: Data does not exist*/
 256
 257 static const ILcidPosixElement az[] = {
 258     {0x2c,   "az"},
 259     {0x082c, "az_AZ"},  /* Cyrillic based */
 260     {0x042c, "az_AZ_LATN"} /* Latin based */
 261 };
 262
 263 ILCID_POSIX_ELEMENT_ARRAY(0x0423, be, be_BY)
 264 ILCID_POSIX_ELEMENT_ARRAY(0x0402, bg, bg_BG)
 265 ILCID_POSIX_ELEMENT_ARRAY(0x0445, bn, bn_IN)
 266 ILCID_POSIX_ELEMENT_ARRAY(0x0403, ca, ca_ES)
 267 ILCID_POSIX_ELEMENT_ARRAY(0x0405, cs, cs_CZ)
 268 ILCID_POSIX_ELEMENT_ARRAY(0x0406, da, da_DK)
 269
 270 static const ILcidPosixElement de[] = {
 271     {0x07,   "de"},
 272     {0x0c07, "de_AT"},
 273     {0x0807, "de_CH"},
 274     {0x0407, "de_DE"},
 275     {0x1407, "de_LI"},
 276     {0x1007, "de_LU"},
 277     {0x10407,"de__PHONEBOOK"}  /*This is really de_DE_PHONEBOOK on Windows, maybe 10007*/
 278 };
 279
 280 ILCID_POSIX_ELEMENT_ARRAY(0x0465, dv, dv_MV)
 281 ILCID_POSIX_ELEMENT_ARRAY(0x0408, el, el_GR)
 282
 283 static const ILcidPosixElement en[] = {
 284     {0x09,   "en"},
 285     {0x0c09, "en_AU"},
 286     {0x2809, "en_BZ"},
 287     {0x1009, "en_CA"},
 288     {0x0809, "en_GB"},
 289     {0x1809, "en_IE"},
 290     {0x2009, "en_JM"},
 291     {0x1409, "en_NZ"},
 292     {0x3409, "en_PH"},
 293     {0x2C09, "en_TT"},
 294     {0x0409, "en_US"},
 295     {0x007f, "en_US_POSIX"}, /* duplicate for roundtripping */
 296     {0x2409, "en_VI"},  /* Virgin Islands AKA Caribbean Islands (en_CB). */
 297     {0x1c09, "en_ZA"},
 298     {0x3009, "en_ZW"}
 299 };
 300
 301 static const ILcidPosixElement en_US_POSIX[] = {
 302     {0x007f, "en_US_POSIX"}, /* duplicate for roundtripping */
 303 };
 304
 305 static const ILcidPosixElement es[] = {
 306     {0x0a,   "es"},
 307     {0x2c0a, "es_AR"},
 308     {0x400a, "es_BO"},
 309     {0x340a, "es_CL"},
 310     {0x240a, "es_CO"},
 311     {0x140a, "es_CR"},
 312     {0x1c0a, "es_DO"},
 313     {0x300a, "es_EC"},
 314     {0x0c0a, "es_ES"},      /*Modern sort.*/
 315     {0x100a, "es_GT"},
 316     {0x480a, "es_HN"},
 317     {0x080a, "es_MX"},
 318     {0x4c0a, "es_NI"},
 319     {0x180a, "es_PA"},
 320     {0x280a, "es_PE"},
 321     {0x500a, "es_PR"},
 322     {0x3c0a, "es_PY"},
 323     {0x440a, "es_SV"},
 324     {0x380a, "es_UY"},
 325     {0x200a, "es_VE"},
 326     {0x040a, "es__TRADITIONAL"}  /*This is really es_ES_TRADITIONAL on Windows */
 327 };
 328
 329 ILCID_POSIX_ELEMENT_ARRAY(0x0425, et, et_EE)
 330 ILCID_POSIX_ELEMENT_ARRAY(0x042d, eu, eu_ES)
 331 ILCID_POSIX_ELEMENT_ARRAY(0x0429, fa, fa_IR)
 332 ILCID_POSIX_ELEMENT_ARRAY(0x040b, fi, fi_FI)
 333 ILCID_POSIX_ELEMENT_ARRAY(0x0438, fo, fo_FO)
 334
 335 static const ILcidPosixElement fr[] = {
 336     {0x0c,   "fr"},
 337     {0x080c, "fr_BE"},
 338     {0x0c0c, "fr_CA"},
 339     {0x100c, "fr_CH"},
 340     {0x040c, "fr_FR"},
 341     {0x140c, "fr_LU"},
 342     {0x180c, "fr_MC"}
 343 };
 344
 345 ILCID_POSIX_ELEMENT_ARRAY(0x0456, gl, gl_ES)
 346 ILCID_POSIX_ELEMENT_ARRAY(0x0447, gu, gu_IN)
 347 ILCID_POSIX_ELEMENT_ARRAY(0x040d, he, he_IL)
 348 ILCID_POSIX_ELEMENT_ARRAY(0x0439, hi, hi_IN)
 349
 350 /* This LCID is really three different locales.*/
 351 static const ILcidPosixElement hr[] = {
 352     {0x1a,   "hr"},
 353     {0x041a, "hr_HR"},  /* Croatian*/
 354     {0x081a, "sh_YU"},  /* Serbo-Croatian*/
 355     {0x081a, "sh"},     /* It's 0x1a or 0x081a, pick one to make the test program happy. */
 356     {0x0c1a, "sr_YU"},  /* Serbian*/
 357     {0x0c1a, "sr"},     /* It's 0x1a or 0x0c1a, pick one to make the test program happy. */
 358 };
 359
 360 ILCID_POSIX_ELEMENT_ARRAY(0x040e, hu, hu_HU)
 361 ILCID_POSIX_ELEMENT_ARRAY(0x042b, hy, hy_AM)
 362 ILCID_POSIX_ELEMENT_ARRAY(0x0421, id, id_ID)
 363 ILCID_POSIX_ELEMENT_ARRAY(0x040f, is, is_IS)
 364
 365 static const ILcidPosixElement it[] = {
 366     {0x10,   "it"},
 367     {0x0810, "it_CH"},
 368     {0x0410, "it_IT"}
 369 };
 370
 371 ILCID_POSIX_ELEMENT_ARRAY(0x040d, iw, iw_IL)    /*Left in for compatibility*/
 372 ILCID_POSIX_ELEMENT_ARRAY(0x0411, ja, ja_JP)
 373 ILCID_POSIX_ELEMENT_ARRAY(0x0437, ka, ka_GE)
 374 ILCID_POSIX_ELEMENT_ARRAY(0x043f, kk, kk_KZ)
 375 ILCID_POSIX_ELEMENT_ARRAY(0x044b, kn, kn_IN)
 376
 377 static const ILcidPosixElement ko[] = {
 378     {0x12,   "ko"},
 379     {0x0812, "ko_KP"},
 380     {0x0412, "ko_KR"}
 381 };
 382
 383 ILCID_POSIX_ELEMENT_ARRAY(0x0457, kok, kok_IN)
 384 ILCID_POSIX_ELEMENT_ARRAY(0x0460, ks,  ks_IN)   /*Todo: Data does not exist*/
 385 ILCID_POSIX_ELEMENT_ARRAY(0x0440, ky,  ky_KG)   /* Kyrgyz is spoken in Kyrgyzstan */
 386 ILCID_POSIX_ELEMENT_ARRAY(0x0427, lt,  lt_LT)
 387 ILCID_POSIX_ELEMENT_ARRAY(0x0426, lv,  lv_LV)
 388 ILCID_POSIX_ELEMENT_ARRAY(0x042f, mk,  mk_MK)
 389 ILCID_POSIX_ELEMENT_ARRAY(0x044c, ml,  ml_IN)   /*Todo: Data does not exist*/
 390 ILCID_POSIX_ELEMENT_ARRAY(0x0450, mn,  mn_MN)
 391 ILCID_POSIX_ELEMENT_ARRAY(0x0458, mni, mni_IN)  /*Todo: Data does not exist*/
 392 ILCID_POSIX_ELEMENT_ARRAY(0x044e, mr,  mr_IN)
 393
 394 static const ILcidPosixElement ms[] = {
 395     {0x3e,   "ms"},
 396     {0x083e, "ms_BN"},   /* Brunei Darussalam*/
 397     {0x043e, "ms_MY"}    /* Malaysia*/
 398 };
 399
 400 /* The MSJDK documentation says this is maltese, but it's not supported.*/
 401 ILCID_POSIX_ELEMENT_ARRAY(0x043a, mt, mt_MT)
 402
 403 static const ILcidPosixElement ne[] = {         /*Todo: Data does not exist*/
 404     {0x61,   "ne"},
 405     {0x0861, "ne_IN"},   /* India*/
 406     {0x0461, "ne_NP"}    /* Nepal*/
 407 };
 408
 409 static const ILcidPosixElement nl[] = {
 410     {0x13,   "nl"},
 411     {0x0813, "nl_BE"},
 412     {0x0413, "nl_NL"}
 413 };
 414
 415 /* The "no" locale split into nb and nn.  By default in ICU, "no" is nb.*/
 416 static const ILcidPosixElement no[] = {
 417     {0x14,   "nb"},     /* really nb */
 418     {0x0414, "nb_NO"},  /* really nb_NO */
 419     {0x0814, "nn_NO"},  /* really nn_NO */
 420     {0x0814, "nn"}      /* It's 0x14 or 0x814, pick one to make the test program happy. */
 421 };
 422
 423 /* Declared as or_IN to get around compiler errors*/
 424 static const ILcidPosixElement or_IN[] = {
 425     {0x48,   "or"},
 426     {0x0448, "or_IN"},
 427 };
 428
 429 ILCID_POSIX_ELEMENT_ARRAY(0x0446, pa, pa_IN)
 430 ILCID_POSIX_ELEMENT_ARRAY(0x0415, pl, pl_PL)
 431
 432 static const ILcidPosixElement pt[] = {
 433     {0x16,   "pt"},
 434     {0x0416, "pt_BR"},
 435     {0x0816, "pt_PT"}
 436 };
 437
 438 ILCID_POSIX_ELEMENT_ARRAY(0x0418, ro, ro_RO)
 439
 440 static const ILcidPosixElement root[] = {
 441     {0x00,   "root"}
 442 };
 443
 444 ILCID_POSIX_ELEMENT_ARRAY(0x0419, ru, ru_RU)
 445 ILCID_POSIX_ELEMENT_ARRAY(0x044f, sa, sa_IN)
 446 ILCID_POSIX_ELEMENT_ARRAY(0x0459, sd, sd_IN)    /*Todo: Data does not exist*/
 447 ILCID_POSIX_ELEMENT_ARRAY(0x041b, sk, sk_SK)
 448 ILCID_POSIX_ELEMENT_ARRAY(0x0424, sl, sl_SI)
 449 ILCID_POSIX_ELEMENT_ARRAY(0x041c, sq, sq_AL)
 450
 451 static const ILcidPosixElement sv[] = {
 452     {0x1d,   "sv"},
 453     {0x081d, "sv_FI"},
 454     {0x041d, "sv_SE"}
 455 };
 456
 457 ILCID_POSIX_ELEMENT_ARRAY(0x0441, sw, sw_KE)
 458 ILCID_POSIX_ELEMENT_ARRAY(0x045A, syr, syr_SY)
 459 ILCID_POSIX_ELEMENT_ARRAY(0x0449, ta, ta_IN)
 460 ILCID_POSIX_ELEMENT_ARRAY(0x044a, te, te_IN)
 461 ILCID_POSIX_ELEMENT_ARRAY(0x041e, th, th_TH)
 462 ILCID_POSIX_ELEMENT_ARRAY(0x041f, tr, tr_TR)
 463 ILCID_POSIX_ELEMENT_ARRAY(0x0444, tt, tt_RU)
 464 ILCID_POSIX_ELEMENT_ARRAY(0x0422, uk, uk_UA)
 465
 466 static const ILcidPosixElement ur[] = {
 467     {0x20,   "ur"},
 468     {0x0820, "ur_IN"},
 469     {0x0420, "ur_PK"}
 470 };
 471
 472 static const ILcidPosixElement uz[] = {
 473     {0x43,   "uz"},
 474     {0x0843, "uz_UZ"},  /* Cyrillic based */
 475     {0x0443, "uz_UZ_LATN"} /* Latin based */
 476 };
 477
 478 ILCID_POSIX_ELEMENT_ARRAY(0x042a, vi, vi_VN)
 479
 480 static const ILcidPosixElement zh[] = {
 481     {0x04,   "zh"},
 482     {0x0804, "zh_CN"},
 483     {0x0c04, "zh_HK"},
 484     {0x1404, "zh_MO"},
 485     {0x1004, "zh_SG"},
 486     {0x0404, "zh_TW"},
 487     {0x30404,"zh_TW"},
 488     {0x20404,"zh_TW_STROKE"}
 489 };
 490
 491 /* This must be static and grouped by LCID. */
 492 static const ILcidPosixMap gPosixIDmap[] = {
 493     ILCID_POSIX_MAP(af),    /*  af  Afrikaans                 0x36 */
 494     ILCID_POSIX_MAP(ar),    /*  ar  Arabic                    0x01 */
 495     ILCID_POSIX_MAP(as),    /*  as  Assamese                  0x4d */
 496     ILCID_POSIX_MAP(az),    /*  az  Azerbaijani               0x2c */
 497     ILCID_POSIX_MAP(be),    /*  be  Byelorussian              0x23 */
 498     ILCID_POSIX_MAP(bg),    /*  bg  Bulgarian                 0x02 */
 499     ILCID_POSIX_MAP(bn),    /*  bn  Bengali; Bangla           0x45 */
 500     ILCID_POSIX_MAP(ca),    /*  ca  Catalan                   0x03 */
 501     ILCID_POSIX_MAP(cs),    /*  cs  Czech                     0x05 */
 502     ILCID_POSIX_MAP(da),    /*  da  Danish                    0x06 */
 503     ILCID_POSIX_MAP(de),    /*  de  German                    0x07 */
 504     ILCID_POSIX_MAP(dv),    /*  dv Divehi                     0x65 */
 505     ILCID_POSIX_MAP(el),    /*  el  Greek                     0x08 */
 506     ILCID_POSIX_MAP(en),    /*  en  English                   0x09 */
 507     ILCID_POSIX_MAP(en_US_POSIX), /*    invariant             0x7f */
 508     ILCID_POSIX_MAP(es),    /*  es  Spanish                   0x0a */
 509     ILCID_POSIX_MAP(et),    /*  et  Estonian                  0x25 */
 510     ILCID_POSIX_MAP(eu),    /*  eu  Basque                    0x2d */
 511     ILCID_POSIX_MAP(fa),    /*  fa  Farsi                     0x29 */
 512     ILCID_POSIX_MAP(fi),    /*  fi  Finnish                   0x0b */
 513     ILCID_POSIX_MAP(fo),    /*  fo  Faroese                   0x38 */
 514     ILCID_POSIX_MAP(fr),    /*  fr  French                    0x0c */
 515     ILCID_POSIX_MAP(gl),    /*  gl  Galician                  0x56 */
 516     ILCID_POSIX_MAP(gu),    /*  gu  Gujarati                  0x47 */
 517     ILCID_POSIX_MAP(he),    /*  he  Hebrew (formerly iw)      0x0d */
 518     ILCID_POSIX_MAP(hi),    /*  hi  Hindi                     0x39 */
 519     ILCID_POSIX_MAP(hr),    /*  hr  Croatian                  0x1a */
 520     ILCID_POSIX_MAP(hu),    /*  hu  Hungarian                 0x0e */
 521     ILCID_POSIX_MAP(hy),    /*  hy  Armenian                  0x2b */
 522     ILCID_POSIX_MAP(id),    /*  id  Indonesian (formerly in)  0x21 */
 523 /*        ILCID_POSIX_MAP(in),    //  in  Indonesian                0x21 */
 524     ILCID_POSIX_MAP(is),    /*  is  Icelandic                 0x0f */
 525     ILCID_POSIX_MAP(it),    /*  it  Italian                   0x10 */
 526     ILCID_POSIX_MAP(iw),    /*  iw  Hebrew                    0x0d */
 527     ILCID_POSIX_MAP(ja),    /*  ja  Japanese                  0x11 */
 528     ILCID_POSIX_MAP(ka),    /*  ka  Georgian                  0x37 */
 529     ILCID_POSIX_MAP(kk),    /*  kk  Kazakh                    0x3f */
 530     ILCID_POSIX_MAP(kn),    /*  kn  Kannada                   0x4b */
 531     ILCID_POSIX_MAP(ky),    /*  ky  Kyrgyz                    0x40 */
 532     ILCID_POSIX_MAP(ko),    /*  ko  Korean                    0x12 */
 533     ILCID_POSIX_MAP(kok),   /*  kok Konkani                   0x57 */
 534     ILCID_POSIX_MAP(ks),    /*  ks  Kashmiri                  0x60 */
 535     ILCID_POSIX_MAP(lt),    /*  lt  Lithuanian                0x27 */
 536     ILCID_POSIX_MAP(lv),    /*  lv  Latvian, Lettish          0x26 */
 537     ILCID_POSIX_MAP(mk),    /*  mk  Macedonian                0x2f */
 538     ILCID_POSIX_MAP(ml),    /*  ml  Malayalam                 0x4c */
 539     ILCID_POSIX_MAP(mn),    /*  mn  Mongolian                 0x50 */
 540     ILCID_POSIX_MAP(mni),   /*  mni Manipuri                  0x58 */
 541     ILCID_POSIX_MAP(mr),    /*  mr  Marathi                   0x4e */
 542     ILCID_POSIX_MAP(ms),    /*  ms  Malay                     0x3e */
 543     ILCID_POSIX_MAP(mt),    /*  mt  Maltese                   0x3a */
 544 /*        ILCID_POSIX_MAP(nb),    //  no  Norwegian                 0x14 */
 545     ILCID_POSIX_MAP(ne),    /*  ne  Nepali                    0x61 */
 546     ILCID_POSIX_MAP(nl),    /*  nl  Dutch                     0x13 */
 547 /*        ILCID_POSIX_MAP(nn),    //  no  Norwegian                 0x14 */
 548     ILCID_POSIX_MAP(no),    /*  nb/nn Norwegian (formerly no) 0x14 */
 549     ILCID_POSIX_MAP(or_IN), /*  or  Oriya                     0x48 */
 550     ILCID_POSIX_MAP(pa),    /*  pa  Punjabi                   0x46 */
 551     ILCID_POSIX_MAP(pl),    /*  pl  Polish                    0x15 */
 552     ILCID_POSIX_MAP(pt),    /*  pt  Portuguese                0x16 */
 553     ILCID_POSIX_MAP(ro),    /*  ro  Romanian                  0x18 */
 554     ILCID_POSIX_MAP(root),  /*  root                          0x00 */
 555     ILCID_POSIX_MAP(ru),    /*  ru  Russian                   0x19 */
 556     ILCID_POSIX_MAP(sa),    /*  sa  Sanskrit                  0x4f */
 557     ILCID_POSIX_MAP(sd),    /*  sd  Sindhi                    0x59 */
 558 /*        ILCID_POSIX_MAP(sh),    //  sh  Serbo-Croatian            0x1a */
 559     ILCID_POSIX_MAP(sk),    /*  sk  Slovak                    0x1b */
 560     ILCID_POSIX_MAP(sl),    /*  sl  Slovenian                 0x24 */
 561     ILCID_POSIX_MAP(sq),    /*  sq  Albanian                  0x1c */
 562 /*        ILCID_POSIX_MAP(sr),    //  sr  Serbian                   0x1a */
 563     ILCID_POSIX_MAP(sv),    /*  sv  Swedish                   0x1d */
 564     ILCID_POSIX_MAP(sw),    /*  sw  Swahili                   0x41 */
 565     ILCID_POSIX_MAP(syr),   /*  syr Syriac                    0x5A */
 566     ILCID_POSIX_MAP(ta),    /*  ta  Tamil                     0x49 */
 567     ILCID_POSIX_MAP(te),    /*  te  Telugu                    0x4a */
 568     ILCID_POSIX_MAP(th),    /*  th  Thai                      0x1e */
 569     ILCID_POSIX_MAP(tr),    /*  tr  Turkish                   0x1f */
 570     ILCID_POSIX_MAP(tt),    /*  tt  Tatar                     0x44 */
 571     ILCID_POSIX_MAP(uk),    /*  uk  Ukrainian                 0x22 */
 572     ILCID_POSIX_MAP(ur),    /*  ur  Urdu                      0x20 */
 573     ILCID_POSIX_MAP(uz),    /*  uz  Uzbek                     0x43 */
 574     ILCID_POSIX_MAP(vi),    /*  vi  Vietnamese                0x2a */
 575     ILCID_POSIX_MAP(zh),    /*  zh  Chinese                   0x04 */
 576 };
 577
 578 static const uint32_t gLocaleCount = sizeof(gPosixIDmap)/sizeof(ILcidPosixMap);
 579
 580 static int32_t
 581 idCmp(const char* id1, const char* id2)
 582 {
 583     int32_t diffIdx = 0;
 584     while (*id1 == *id2 && *id1 != 0) {
 585         diffIdx++;
 586         id1++;
 587         id2++;
 588     }
 589     return diffIdx;
 590 }
 591
 592 /**
 593  * Searches for a Windows LCID
 594  *
 595  * @param posixid the Posix style locale id.
 596  * @param status gets set to U_ILLEGAL_ARGUMENT_ERROR when the Posix ID has
 597  *               no equivalent Windows LCID.
 598  * @return the LCID
 599  */
 600 static uint32_t
 601 hostID(const ILcidPosixMap *this_0, const char* posixID, UErrorCode* status)
 602 {
 603     int32_t bestIdx = 0;
 604     int32_t bestIdxDiff = 0;
 605     int32_t posixIDlen = (int32_t)uprv_strlen(posixID) + 1;
 606     uint32_t idx;
 607
 608     for (idx = 0; idx < this_0->numRegions; idx++ ) {
 609         int32_t sameChars = idCmp(posixID, this_0->regionMaps[idx].posixID);
 610         if (sameChars > bestIdxDiff && this_0->regionMaps[idx].posixID[sameChars] == 0) {
 611             if (posixIDlen == sameChars) {
 612                 /* Exact match */
 613                 return this_0->regionMaps[idx].hostID;
 614             }
 615             bestIdxDiff = sameChars;
 616             bestIdx = idx;
 617         }
 618     }
 619     if (this_0->regionMaps[bestIdx].posixID[bestIdxDiff] == 0) {
 620         *status = U_USING_FALLBACK_WARNING;
 621         return this_0->regionMaps[bestIdx].hostID;
 622     }
 623
 624     /*no match found */
 625     *status = U_ILLEGAL_ARGUMENT_ERROR;
 626     return this_0->regionMaps->hostID;
 627 }
 628
 629 static const char*
 630 posixID(const ILcidPosixMap *this_0, uint32_t hostID)
 631 {
 632     uint32_t i;
 633     for (i = 0; i <= this_0->numRegions; i++)
 634     {
 635         if (this_0->regionMaps[i].hostID == hostID)
 636         {
 637             return this_0->regionMaps[i].posixID;
 638         }
 639     }
 640
 641     /* If you get here, then no matching region was found,
 642        so return the language id with the wild card region. */
 643     return this_0->regionMaps[0].posixID;
 644 }
 645
 646 /*
 647 //////////////////////////////////////
 648 //
 649 // LCID --> POSIX
 650 //
 651 /////////////////////////////////////
 652 */
 653
 654 U_CAPI const char *
 655 uprv_convertToPosix(uint32_t hostid, UErrorCode* status)
 656 {
 657     uint16_t langID = LANGUAGE_LCID(hostid);
 658     uint32_t index;
 659
 660     for (index = 0; index < gLocaleCount; index++)
 661     {
 662         if (langID == gPosixIDmap[index].regionMaps->hostID)
 663         {
 664             return posixID(&gPosixIDmap[index], hostid);
 665         }
 666     }
 667
 668     /* no match found */
 669     *status = U_ILLEGAL_ARGUMENT_ERROR;
 670     return "??_??";
 671 }
 672
 673 /*
 674 //////////////////////////////////////
 675 //
 676 // POSIX --> LCID
 677 //
 678 /////////////////////////////////////
 679 */
 680
 681 U_CAPI uint32_t
 682 uprv_convertToLCID(const char* posixID, UErrorCode* status)
 683 {
 684
 685     uint32_t   low    = 0;
 686     uint32_t   high   = gLocaleCount - 1;
 687     uint32_t   mid    = high;
 688     int32_t    compVal;
 689     char       langID[ULOC_FULLNAME_CAPACITY];
 690
 691     uint32_t   value         = 0;
 692     uint32_t   fallbackValue = (uint32_t)-1;
 693     UErrorCode myStatus;
 694     uint32_t   idx;
 695
 696     /* Check for incomplete id. */
 697     if (!posixID || uprv_strlen(posixID) < 2) {
 698         return 0;
 699     }
 700
 701     uloc_getLanguage(posixID, langID, sizeof(langID), status);
 702     if (U_FAILURE(*status)) {
 703         return 0;
 704     }
 705
 706     /*Binary search for the map entry for normal cases */
 707     /* When mid == 0, it's not found */
 708     while (low <= high && mid != 0) {
 709
 710         mid = (low + high + 1) / 2;    /* +1 is to round properly */
 711
 712         compVal = uprv_strcmp(langID, gPosixIDmap[mid].regionMaps->posixID);
 713
 714         if (compVal < 0)
 715             high = mid - 1;
 716         else if (compVal > 0)
 717             low = mid + 1;
 718         else  /* found match! */
 719             return hostID(&gPosixIDmap[mid], posixID, status);
 720     }
 721
 722     /*
 723      * Sometimes we can't do a binary search on posixID because some LCIDs
 724      * go to different locales.  We hit one of those special cases.
 725      */
 726     for (idx = 0; idx < gLocaleCount; idx++ ) {
 727         myStatus = U_ZERO_ERROR;
 728         value = hostID(&gPosixIDmap[idx], posixID, &myStatus);
 729         if (myStatus == U_ZERO_ERROR) {
 730             return value;
 731         }
 732         else if (myStatus == U_USING_FALLBACK_WARNING) {
 733             fallbackValue = value;
 734         }
 735     }
 736
 737     if (fallbackValue != (uint32_t)-1) {
 738         *status = U_USING_FALLBACK_WARNING;
 739         return fallbackValue;
 740     }
 741
 742     /* no match found */
 743     *status = U_ILLEGAL_ARGUMENT_ERROR;
 744     return 0;   /* return international (root) */
 745 }
 746
 747 #endif
 748