icuSources/common/uloc.cpp

   1 /*
   2 **********************************************************************
   3 *   Copyright (C) 1997-2013, International Business Machines
   4 *   Corporation and others.  All Rights Reserved.
   5 **********************************************************************
   6 *
   7 * File ULOC.CPP
   8 *
   9 * Modification History:
  10 *
  11 *   Date        Name        Description
  12 *   04/01/97    aliu        Creation.
  13 *   08/21/98    stephen     JDK 1.2 sync
  14 *   12/08/98    rtg         New Locale implementation and C API
  15 *   03/15/99    damiba      overhaul.
  16 *   04/06/99    stephen     changed setDefault() to realloc and copy
  17 *   06/14/99    stephen     Changed calls to ures_open for new params
  18 *   07/21/99    stephen     Modified setDefault() to propagate to C++
  19 *   05/14/04    alan        7 years later: refactored, cleaned up, fixed bugs,
  20 *                           brought canonicalization code into line with spec
  21 *****************************************************************************/
  22
  23 /*
  24    POSIX's locale format, from putil.c: [no spaces]
  25
  26      ll [ _CC ] [ . MM ] [ @ VV]
  27
  28      l = lang, C = ctry, M = charmap, V = variant
  29 */
  30
  31 #include "unicode/utypes.h"
  32 #include "unicode/ustring.h"
  33 #include "unicode/uloc.h"
  34
  35 #include "putilimp.h"
  36 #include "ustr_imp.h"
  37 #include "ulocimp.h"
  38 #include "umutex.h"
  39 #include "cstring.h"
  40 #include "cmemory.h"
  41 #include "ucln_cmn.h"
  42 #include "locmap.h"
  43 #include "uarrsort.h"
  44 #include "uenumimp.h"
  45 #include "uassert.h"
  46
  47 #include <stdio.h> /* for sprintf */
  48
  49 /* ### Declarations **************************************************/
  50
  51 /* Locale stuff from locid.cpp */
  52 U_CFUNC void locale_set_default(const char *id);
  53 U_CFUNC const char *locale_get_default(void);
  54 U_CFUNC int32_t
  55 locale_getKeywords(const char *localeID,
  56             char prev,
  57             char *keywords, int32_t keywordCapacity,
  58             char *values, int32_t valuesCapacity, int32_t *valLen,
  59             UBool valuesToo,
  60             UErrorCode *status);
  61
  62 /* ### Data tables **************************************************/
  63
  64 /**
  65  * Table of language codes, both 2- and 3-letter, with preference
  66  * given to 2-letter codes where possible.  Includes 3-letter codes
  67  * that lack a 2-letter equivalent.
  68  *
  69  * This list must be in sorted order.  This list is returned directly
  70  * to the user by some API.
  71  *
  72  * This list must be kept in sync with LANGUAGES_3, with corresponding
  73  * entries matched.
  74  *
  75  * This table should be terminated with a NULL entry, followed by a
  76  * second list, and another NULL entry.  The first list is visible to
  77  * user code when this array is returned by API.  The second list
  78  * contains codes we support, but do not expose through user API.
  79  *
  80  * Notes
  81  *
  82  * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
  83  * include the revisions up to 2001/7/27 *CWB*
  84  *
  85  * The 3 character codes are the terminology codes like RFC 3066.  This
  86  * is compatible with prior ICU codes
  87  *
  88  * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
  89  * table but now at the end of the table because 3 character codes are
  90  * duplicates.  This avoids bad searches going from 3 to 2 character
  91  * codes.
  92  *
  93  * The range qaa-qtz is reserved for local use
  94  */
  95 /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
  96 /* ISO639 table version is 20130123 */
  97 static const char * const LANGUAGES[] = {
  98     "aa",  "ab",  "ace", "ach", "ada", "ady", "ae",  "af",
  99     "afa", "afh", "agq", "ain", "ak",  "akk", "ale", "alg",
 100     "alt", "am",  "an",  "ang", "anp", "apa", "ar",  "arc",
 101     "arn", "arp", "art", "arw", "as",  "asa", "ast", "ath",
 102     "aus", "av",  "awa", "ay",  "az",
 103     "ba",  "bad", "bai", "bal", "ban", "bas", "bat", "bax",
 104     "bbj", "be",  "bej", "bem", "ber", "bez", "bfd", "bg",
 105     "bh",  "bho", "bi",  "bik", "bin", "bkm", "bla", "bm",
 106     "bn",  "bnt", "bo",  "br",  "bra", "brx", "bs",  "bss",
 107     "btk", "bua", "bug", "bum", "byn", "byv",
 108     "ca",  "cad", "cai", "car", "cau", "cay", "cch", "ce",
 109     "ceb", "cel", "cgg", "ch",  "chb", "chg", "chk", "chm",
 110     "chn", "cho", "chp", "chr", "chy", "ckb", "cmc", "co",
 111     "cop", "cpe", "cpf", "cpp", "cr",  "crh", "crp", "cs",
 112     "csb", "cu",  "cus", "cv",  "cy",
 113     "da",  "dak", "dar", "dav", "day", "de",  "del", "den",
 114     "dgr", "din", "dje", "doi", "dra", "dsb", "dua", "dum",
 115     "dv",  "dyo", "dyu", "dz",  "dzg",
 116     "ebu", "ee",  "efi", "egy", "eka", "el",  "elx", "en",
 117     "enm", "eo",  "es",  "et",  "eu",  "ewo",
 118     "fa",  "fan", "fat", "ff",  "fi",  "fil", "fiu", "fj",
 119     "fo",  "fon", "fr",  "frm", "fro", "frr", "frs", "fur",
 120     "fy",
 121     "ga",  "gaa", "gay", "gba", "gd",  "gem", "gez", "gil",
 122     "gl",  "gmh", "gn",  "goh", "gon", "gor", "got", "grb",
 123     "grc", "gsw", "gu",  "guz", "gv",  "gwi",
 124     "ha",  "hai", "haw", "he",  "hi",  "hil", "him", "hit",
 125     "hmn", "ho",  "hr",  "hsb", "ht",  "hu",  "hup", "hy",
 126     "hz",
 127     "ia",  "iba", "ibb", "id",  "ie",  "ig",  "ii",  "ijo",
 128     "ik",  "ilo", "inc", "ine", "inh", "io",  "ira", "iro",
 129     "is",  "it",  "iu",
 130     "ja",  "jbo", "jgo", "jmc", "jpr", "jrb", "jv",
 131     "ka",  "kaa", "kab", "kac", "kaj", "kam", "kar", "kaw",
 132     "kbd", "kbl", "kcg", "kde", "kea", "kfo", "kg",  "kha",
 133     "khi", "kho", "khq", "ki",  "kj",  "kk",  "kkj", "kl",
 134     "kln", "km",  "kmb", "kn",  "ko",  "kok", "kos", "kpe",
 135     "kr",  "krc", "krl", "kro", "kru", "ks",  "ksb", "ksf",
 136     "ksh", "ku",  "kum", "kut", "kv",  "kw",  "ky",
 137     "la",  "lad", "lag", "lah", "lam", "lb",  "lez", "lg",
 138     "li",  "lkt", "ln",  "lo",  "lol", "loz", "lt",  "lu",
 139     "lua", "lui", "lun", "luo", "lus", "luy", "lv",
 140     "mad", "maf", "mag", "mai", "mak", "man", "map", "mas",
 141     "mde", "mdf", "mdr", "men", "mer", "mfe", "mg",  "mga",
 142     "mgh", "mgo", "mh",  "mi",  "mic", "min", "mis", "mk",
 143     "mkh", "ml",  "mn",  "mnc", "mni", "mno", "mo",  "moh",
 144     "mos", "mr",  "ms",  "mt",  "mua", "mul", "mun", "mus",
 145     "mwl", "mwr", "my",  "mye", "myn", "myv",
 146     "na",  "nah", "nai", "nap", "naq", "nb",  "nd",  "nds",
 147     "ne",  "new", "ng",  "nia", "nic", "niu", "nl",  "nmg",
 148     "nn",  "nnh", "no",  "nog", "non", "nqo", "nr",  "nso",
 149     "nub", "nus", "nv",  "nwc", "ny",  "nym", "nyn", "nyo",
 150     "nzi",
 151     "oc",  "oj",  "om",  "or",  "os",  "osa", "ota", "oto",
 152     "pa",  "paa", "pag", "pal", "pam", "pap", "pau", "peo",
 153     "phi", "phn", "pi",  "pl",  "pon", "pra", "pro", "ps",
 154     "pt",
 155     "qu",
 156     "raj", "rap", "rar", "rm",  "rn",  "ro",  "roa", "rof",
 157     "rom", "ru",  "rup", "rw",  "rwk",
 158     "sa",  "sad", "sah", "sai", "sal", "sam", "saq", "sas",
 159     "sat", "sba", "sbp", "sc",  "scn", "sco", "sd",  "se",
 160     "see", "seh", "sel", "sem", "ses", "sg",  "sga", "sgn",
 161     "shi", "shn", "shu", "si",  "sid", "sio", "sit",
 162     "sk",  "sl",  "sla", "sm",  "sma", "smi", "smj", "smn",
 163     "sms", "sn",  "snk", "so",  "sog", "son", "sq",  "sr",
 164     "srn", "srr", "ss",  "ssa", "ssy", "st",  "su",  "suk",
 165     "sus", "sux", "sv",  "sw",  "swb", "swc", "syc", "syr",
 166     "ta",  "tai", "te",  "tem", "teo", "ter", "tet", "tg",
 167     "th",  "ti",  "tig", "tiv", "tk",  "tkl", "tl",  "tlh",
 168     "tli", "tmh", "tn",  "to",  "tog", "tpi", "tr",  "trv",
 169     "ts",  "tsi", "tt",  "tum", "tup", "tut", "tvl", "tw",
 170     "twq", "ty",  "tyv", "tzm",
 171     "udm", "ug",  "uga", "uk",  "umb", "und", "ur",  "uz",
 172     "vai", "ve",  "vi",  "vo",  "vot", "vun",
 173     "wa",  "wae", "wak", "wal", "war", "was", "wen", "wo",
 174     "xal", "xh",  "xog",
 175     "yao", "yap", "yav", "ybb", "yi",  "yo",  "ypk", "yue",
 176     "za",  "zap", "zbl", "zen", "zh",  "znd", "zu",  "zun",
 177     "zxx", "zza",
 178 NULL,
 179     "in",  "iw",  "ji",  "jw",  "sh",    /* obsolete language codes */
 180 NULL
 181 };
 182
 183 static const char* const DEPRECATED_LANGUAGES[]={
 184     "in", "iw", "ji", "jw", NULL, NULL
 185 };
 186 static const char* const REPLACEMENT_LANGUAGES[]={
 187     "id", "he", "yi", "jv", NULL, NULL
 188 };
 189
 190 /**
 191  * Table of 3-letter language codes.
 192  *
 193  * This is a lookup table used to convert 3-letter language codes to
 194  * their 2-letter equivalent, where possible.  It must be kept in sync
 195  * with LANGUAGES.  For all valid i, LANGUAGES[i] must refer to the
 196  * same language as LANGUAGES_3[i].  The commented-out lines are
 197  * copied from LANGUAGES to make eyeballing this baby easier.
 198  *
 199  * Where a 3-letter language code has no 2-letter equivalent, the
 200  * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
 201  *
 202  * This table should be terminated with a NULL entry, followed by a
 203  * second list, and another NULL entry.  The two lists correspond to
 204  * the two lists in LANGUAGES.
 205  */
 206 /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
 207 /* ISO639 table version is 20130123 */
 208 static const char * const LANGUAGES_3[] = {
 209     "aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr",
 210     "afa", "afh", "agq", "ain", "aka", "akk", "ale", "alg",
 211     "alt", "amh", "arg", "ang", "anp", "apa", "ara", "arc",
 212     "arn", "arp", "art", "arw", "asm", "asa", "ast", "ath",
 213     "aus", "ava", "awa", "aym", "aze",
 214     "bak", "bad", "bai", "bal", "ban", "bas", "bat", "bax",
 215     "bbj", "bel", "bej", "bem", "ber", "bez", "bfd", "bul",
 216     "bih", "bho", "bis", "bik", "bin", "bkm", "bla", "bam",
 217     "ben", "bnt", "bod", "bre", "bra", "brx", "bos", "bss",
 218     "btk", "bua", "bug", "bum", "byn", "byv",
 219     "cat", "cad", "cai", "car", "cau", "cay", "cch", "che",
 220     "ceb", "cel", "cgg", "cha", "chb", "chg", "chk", "chm",
 221     "chn", "cho", "chp", "chr", "chy", "ckb", "cmc", "cos",
 222     "cop", "cpe", "cpf", "cpp", "cre", "crh", "crp", "ces",
 223     "csb", "chu", "cus", "chv", "cym",
 224     "dan", "dak", "dar", "dav", "day", "deu", "del", "den",
 225     "dgr", "din", "dje", "doi", "dra", "dsb", "dua", "dum",
 226     "div", "dyo", "dyu", "dzo", "dzg",
 227     "ebu", "ewe", "efi", "egy", "eka", "ell", "elx", "eng",
 228     "enm", "epo", "spa", "est", "eus", "ewo",
 229     "fas", "fan", "fat", "ful", "fin", "fil", "fiu", "fij",
 230     "fao", "fon", "fra", "frm", "fro", "frr", "frs", "fur",
 231     "fry",
 232     "gle", "gaa", "gay", "gba", "gla", "gem", "gez", "gil",
 233     "glg", "gmh", "grn", "goh", "gon", "gor", "got", "grb",
 234     "grc", "gsw", "guj", "guz", "glv", "gwi",
 235     "hau", "hai", "haw", "heb", "hin", "hil", "him", "hit",
 236     "hmn", "hmo", "hrv", "hsb", "hat", "hun", "hup", "hye",
 237     "her",
 238     "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ijo",
 239     "ipk", "ilo", "inc", "ine", "inh", "ido", "ira", "iro",
 240     "isl", "ita", "iku",
 241     "jpn", "jbo", "jgo", "jmc", "jpr", "jrb", "jav",
 242     "kat", "kaa", "kab", "kac", "kaj", "kam", "kar", "kaw",
 243     "kbd", "kbl", "kcg", "kde", "kea", "kfo", "kon", "kha",
 244     "khi", "kho", "khq", "kik", "kua", "kaz", "kkj", "kal",
 245     "kln", "khm", "kmb", "kan", "kor", "kok", "kos", "kpe",
 246     "kau", "krc", "krl", "kro", "kru", "kas", "ksb", "ksf",
 247     "ksh", "kur", "kum", "kut", "kom", "cor", "kir",
 248     "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lug",
 249     "lim", "lkt", "lin", "lao", "lol", "loz", "lit", "lub",
 250     "lua", "lui", "lun", "luo", "lus", "luy", "lav",
 251     "mad", "maf", "mag", "mai", "mak", "man", "map", "mas",
 252     "mde", "mdf", "mdr", "men", "mer", "mfe", "mlg", "mga",
 253     "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
 254     "mkh", "mal", "mon", "mnc", "mni", "mno", "mol", "moh",
 255     "mos", "mar", "msa", "mlt", "mua", "mul", "mun", "mus",
 256     "mwl", "mwr", "mya", "mye", "myn", "myv",
 257     "nau", "nah", "nai", "nap", "naq", "nob", "nde", "nds",
 258     "nep", "new", "ndo", "nia", "nic", "niu", "nld", "nmg",
 259     "nno", "nnh", "nor", "nog", "non", "nqo", "nbl", "nso",
 260     "nub", "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo",
 261     "nzi",
 262     "oci", "oji", "orm", "ori", "oss", "osa", "ota", "oto",
 263     "pan", "paa", "pag", "pal", "pam", "pap", "pau", "peo",
 264     "phi", "phn", "pli", "pol", "pon", "pra", "pro", "pus",
 265     "por",
 266     "que",
 267     "raj", "rap", "rar", "roh", "run", "ron", "roa", "rof",
 268     "rom", "rus", "rup", "kin", "rwk",
 269     "san", "sad", "sah", "sai", "sal", "sam", "saq", "sas",
 270     "sat", "sba", "sbp", "srd", "scn", "sco", "snd", "sme",
 271     "see", "seh", "sel", "sem", "ses", "sag", "sga", "sgn",
 272     "shi", "shn", "shu", "sin", "sid", "sio", "sit",
 273     "slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn",
 274     "sms", "sna", "snk", "som", "sog", "son", "sqi", "srp",
 275     "srn", "srr", "ssw", "ssa", "ssy", "sot", "sun", "suk",
 276     "sus", "sux", "swe", "swa", "swb", "swc", "syc", "syr",
 277     "tam", "tai", "tel", "tem", "teo", "ter", "tet", "tgk",
 278     "tha", "tir", "tig", "tiv", "tuk", "tkl", "tgl", "tlh",
 279     "tli", "tmh", "tsn", "ton", "tog", "tpi", "tur", "trv",
 280     "tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi",
 281     "twq", "tah", "tyv", "tzm",
 282     "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
 283     "vai", "ven", "vie", "vol", "vot", "vun",
 284     "wln", "wae", "wak", "wal", "war", "was", "wen", "wol",
 285     "xal", "xho", "xog",
 286     "yao", "yap", "yav", "ybb", "yid", "yor", "ypk", "yue",
 287     "zha", "zap", "zbl", "zen", "zho", "znd", "zul", "zun",
 288     "zxx", "zza",
 289 NULL,
 290 /*  "in",  "iw",  "ji",  "jw",  "sh",                          */
 291     "ind", "heb", "yid", "jaw", "srp",
 292 NULL
 293 };
 294
 295 /**
 296  * Table of 2-letter country codes.
 297  *
 298  * This list must be in sorted order.  This list is returned directly
 299  * to the user by some API.
 300  *
 301  * This list must be kept in sync with COUNTRIES_3, with corresponding
 302  * entries matched.
 303  *
 304  * This table should be terminated with a NULL entry, followed by a
 305  * second list, and another NULL entry.  The first list is visible to
 306  * user code when this array is returned by API.  The second list
 307  * contains codes we support, but do not expose through user API.
 308  *
 309  * Notes:
 310  *
 311  * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
 312  * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
 313  * new codes keeping the old ones for compatibility updated to include
 314  * 1999/12/03 revisions *CWB*
 315  *
 316  * RO(ROM) is now RO(ROU) according to
 317  * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
 318  */
 319 static const char * const COUNTRIES[] = {
 320     "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",
 321     "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",
 322     "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",
 323     "BJ",  "BL",  "BM",  "BN",  "BO",  "BQ",  "BR",  "BS",  "BT",  "BV",
 324     "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",
 325     "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CR",
 326     "CU",  "CV",  "CW",  "CX",  "CY",  "CZ",  "DE",  "DJ",  "DK",
 327     "DM",  "DO",  "DZ",  "EC",  "EE",  "EG",  "EH",  "ER",
 328     "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",
 329     "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",
 330     "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",
 331     "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",
 332     "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS",
 333     "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",
 334     "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",
 335     "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",
 336     "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",
 337     "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",
 338     "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",
 339     "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",
 340     "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",
 341     "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",
 342     "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",
 343     "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",
 344     "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "SS",  "ST",  "SV",
 345     "SX",  "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",
 346     "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",
 347     "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",
 348     "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",
 349     "WS",  "YE",  "YT",  "ZA",  "ZM",  "ZW",
 350 NULL,
 351     "AN",  "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR",   /* obsolete country codes */
 352 NULL
 353 };
 354
 355 static const char* const DEPRECATED_COUNTRIES[] = {
 356     "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */
 357 };
 358 static const char* const REPLACEMENT_COUNTRIES[] = {
 359 /*  "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
 360     "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL  /* replacement country codes */
 361 };
 362
 363 /**
 364  * Table of 3-letter country codes.
 365  *
 366  * This is a lookup table used to convert 3-letter country codes to
 367  * their 2-letter equivalent.  It must be kept in sync with COUNTRIES.
 368  * For all valid i, COUNTRIES[i] must refer to the same country as
 369  * COUNTRIES_3[i].  The commented-out lines are copied from COUNTRIES
 370  * to make eyeballing this baby easier.
 371  *
 372  * This table should be terminated with a NULL entry, followed by a
 373  * second list, and another NULL entry.  The two lists correspond to
 374  * the two lists in COUNTRIES.
 375  */
 376 static const char * const COUNTRIES_3[] = {
 377 /*  "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",      */
 378     "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
 379 /*  "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",     */
 380     "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
 381 /*  "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",     */
 382     "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
 383 /*  "BJ",  "BL",  "BM",  "BN",  "BO",  "BQ",  "BR",  "BS",  "BT",  "BV",     */
 384     "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
 385 /*  "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",     */
 386     "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
 387 /*  "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CR",     */
 388     "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
 389 /*  "CU",  "CV",  "CW",  "CX",  "CY",  "CZ",  "DE",  "DJ",  "DK",     */
 390     "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
 391 /*  "DM",  "DO",  "DZ",  "EC",  "EE",  "EG",  "EH",  "ER",     */
 392     "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
 393 /*  "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",     */
 394     "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
 395 /*  "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",     */
 396     "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
 397 /*  "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",     */
 398     "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
 399 /*  "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",     */
 400     "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
 401 /*  "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS" */
 402     "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
 403 /*  "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",     */
 404     "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
 405 /*  "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",     */
 406     "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
 407 /*  "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",     */
 408     "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
 409 /*  "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",     */
 410     "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
 411 /*  "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",     */
 412     "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
 413 /*  "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",     */
 414     "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
 415 /*  "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",     */
 416     "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
 417 /*  "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",     */
 418     "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
 419 /*  "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",     */
 420     "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
 421 /*  "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",     */
 422     "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
 423 /*  "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",     */
 424     "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
 425 /*  "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "SS",  "ST",  "SV",     */
 426     "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
 427 /*  "SX",  "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",     */
 428     "SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
 429 /*  "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",     */
 430     "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
 431 /*  "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",     */
 432     "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
 433 /*  "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",     */
 434     "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
 435 /*  "WS",  "YE",  "YT",  "ZA",  "ZM",  "ZW",          */
 436     "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
 437 NULL,
 438 /*  "AN",  "BU",  "CS",  "FX",  "RO", "SU",  "TP",  "YD",  "YU",  "ZR" */
 439     "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
 440 NULL
 441 };
 442
 443 typedef struct CanonicalizationMap {
 444     const char *id;          /* input ID */
 445     const char *canonicalID; /* canonicalized output ID */
 446     const char *keyword;     /* keyword, or NULL if none */
 447     const char *value;       /* keyword value, or NULL if kw==NULL */
 448 } CanonicalizationMap;
 449
 450 /**
 451  * A map to canonicalize locale IDs.  This handles a variety of
 452  * different semantic kinds of transformations.
 453  */
 454 static const CanonicalizationMap CANONICALIZE_MAP[] = {
 455     { "",               "en_US_POSIX", NULL, NULL }, /* .NET name */
 456     { "c",              "en_US_POSIX", NULL, NULL }, /* POSIX name */
 457     { "posix",          "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */
 458     { "art_LOJBAN",     "jbo", NULL, NULL }, /* registered name */
 459     { "az_AZ_CYRL",     "az_Cyrl_AZ", NULL, NULL }, /* .NET name */
 460     { "az_AZ_LATN",     "az_Latn_AZ", NULL, NULL }, /* .NET name */
 461     { "ca_ES_PREEURO",  "ca_ES", "currency", "ESP" },
 462     { "de__PHONEBOOK",  "de", "collation", "phonebook" }, /* Old ICU name */
 463     { "de_AT_PREEURO",  "de_AT", "currency", "ATS" },
 464     { "de_DE_PREEURO",  "de_DE", "currency", "DEM" },
 465     { "de_LU_PREEURO",  "de_LU", "currency", "LUF" },
 466     { "el_GR_PREEURO",  "el_GR", "currency", "GRD" },
 467     { "en_BE_PREEURO",  "en_BE", "currency", "BEF" },
 468     { "en_IE_PREEURO",  "en_IE", "currency", "IEP" },
 469     { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */
 470     { "es_ES_PREEURO",  "es_ES", "currency", "ESP" },
 471     { "eu_ES_PREEURO",  "eu_ES", "currency", "ESP" },
 472     { "fi_FI_PREEURO",  "fi_FI", "currency", "FIM" },
 473     { "fr_BE_PREEURO",  "fr_BE", "currency", "BEF" },
 474     { "fr_FR_PREEURO",  "fr_FR", "currency", "FRF" },
 475     { "fr_LU_PREEURO",  "fr_LU", "currency", "LUF" },
 476     { "ga_IE_PREEURO",  "ga_IE", "currency", "IEP" },
 477     { "gl_ES_PREEURO",  "gl_ES", "currency", "ESP" },
 478     { "hi__DIRECT",     "hi", "collation", "direct" }, /* Old ICU name */
 479     { "it_IT_PREEURO",  "it_IT", "currency", "ITL" },
 480     { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */
 481     { "nb_NO_NY",       "nn_NO", NULL, NULL },  /* "markus said this was ok" :-) */
 482     { "nl_BE_PREEURO",  "nl_BE", "currency", "BEF" },
 483     { "nl_NL_PREEURO",  "nl_NL", "currency", "NLG" },
 484     { "pt_PT_PREEURO",  "pt_PT", "currency", "PTE" },
 485     { "sr_SP_CYRL",     "sr_Cyrl_RS", NULL, NULL }, /* .NET name */
 486     { "sr_SP_LATN",     "sr_Latn_RS", NULL, NULL }, /* .NET name */
 487     { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL, NULL }, /* Linux name */
 488     { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */
 489     { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */
 490     { "uz_UZ_CYRL",     "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */
 491     { "uz_UZ_LATN",     "uz_Latn_UZ", NULL, NULL }, /* .NET name */
 492     { "zh_CHS",         "zh_Hans", NULL, NULL }, /* .NET name */
 493     { "zh_CHT",         "zh_Hant", NULL, NULL }, /* .NET name */
 494     { "zh_GAN",         "gan", NULL, NULL }, /* registered name */
 495     { "zh_GUOYU",       "zh", NULL, NULL }, /* registered name */
 496     { "zh_HAKKA",       "hak", NULL, NULL }, /* registered name */
 497     { "zh_MIN_NAN",     "nan", NULL, NULL }, /* registered name */
 498     { "zh_WUU",         "wuu", NULL, NULL }, /* registered name */
 499     { "zh_XIANG",       "hsn", NULL, NULL }, /* registered name */
 500     { "zh_YUE",         "yue", NULL, NULL }, /* registered name */
 501 };
 502
 503 typedef struct VariantMap {
 504     const char *variant;          /* input ID */
 505     const char *keyword;     /* keyword, or NULL if none */
 506     const char *value;       /* keyword value, or NULL if kw==NULL */
 507 } VariantMap;
 508
 509 static const VariantMap VARIANT_MAP[] = {
 510     { "EURO",   "currency", "EUR" },
 511     { "PINYIN", "collation", "pinyin" }, /* Solaris variant */
 512     { "STROKE", "collation", "stroke" }  /* Solaris variant */
 513 };
 514
 515 /* ### BCP47 Conversion *******************************************/
 516 /* Test if the locale id has BCP47 u extension and does not have '@' */
 517 #define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
 518 /* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
 519 #define _ConvertBCP47(finalID, id, buffer, length,err) \
 520         if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || U_FAILURE(*err)) { \
 521             finalID=id; \
 522         } else { \
 523             finalID=buffer; \
 524         }
 525 /* Gets the size of the shortest subtag in the given localeID. */
 526 static int32_t getShortestSubtagLength(const char *localeID) {
 527     int32_t localeIDLength = uprv_strlen(localeID);
 528     int32_t length = localeIDLength;
 529     int32_t tmpLength = 0;
 530     int32_t i;
 531     UBool reset = TRUE;
 532
 533     for (i = 0; i < localeIDLength; i++) {
 534         if (localeID[i] != '_' && localeID[i] != '-') {
 535             if (reset) {
 536                 tmpLength = 0;
 537                 reset = FALSE;
 538             }
 539             tmpLength++;
 540         } else {
 541             if (tmpLength != 0 && tmpLength < length) {
 542                 length = tmpLength;
 543             }
 544             reset = TRUE;
 545         }
 546     }
 547
 548     return length;
 549 }
 550
 551 /* ### Keywords **************************************************/
 552
 553 #define ULOC_KEYWORD_BUFFER_LEN 25
 554 #define ULOC_MAX_NO_KEYWORDS 25
 555
 556 U_CAPI const char * U_EXPORT2
 557 locale_getKeywordsStart(const char *localeID) {
 558     const char *result = NULL;
 559     if((result = uprv_strchr(localeID, '@')) != NULL) {
 560         return result;
 561     }
 562 #if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
 563     else {
 564         /* We do this because the @ sign is variant, and the @ sign used on one
 565         EBCDIC machine won't be compiled the same way on other EBCDIC based
 566         machines. */
 567         static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
 568         const uint8_t *charToFind = ebcdicSigns;
 569         while(*charToFind) {
 570             if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
 571                 return result;
 572             }
 573             charToFind++;
 574         }
 575     }
 576 #endif
 577     return NULL;
 578 }
 579
 580 /**
 581  * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
 582  * @param keywordName incoming name to be canonicalized
 583  * @param status return status (keyword too long)
 584  * @return length of the keyword name
 585  */
 586 static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
 587 {
 588   int32_t i;
 589   int32_t keywordNameLen = (int32_t)uprv_strlen(keywordName);
 590
 591   if(keywordNameLen >= ULOC_KEYWORD_BUFFER_LEN) {
 592     /* keyword name too long for internal buffer */
 593     *status = U_INTERNAL_PROGRAM_ERROR;
 594           return 0;
 595   }
 596
 597   /* normalize the keyword name */
 598   for(i = 0; i < keywordNameLen; i++) {
 599     buf[i] = uprv_tolower(keywordName[i]);
 600   }
 601   buf[i] = 0;
 602
 603   return keywordNameLen;
 604 }
 605
 606 typedef struct {
 607     char keyword[ULOC_KEYWORD_BUFFER_LEN];
 608     int32_t keywordLen;
 609     const char *valueStart;
 610     int32_t valueLen;
 611 } KeywordStruct;
 612
 613 static int32_t U_CALLCONV
 614 compareKeywordStructs(const void * /*context*/, const void *left, const void *right) {
 615     const char* leftString = ((const KeywordStruct *)left)->keyword;
 616     const char* rightString = ((const KeywordStruct *)right)->keyword;
 617     return uprv_strcmp(leftString, rightString);
 618 }
 619
 620 /**
 621  * Both addKeyword and addValue must already be in canonical form.
 622  * Either both addKeyword and addValue are NULL, or neither is NULL.
 623  * If they are not NULL they must be zero terminated.
 624  * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
 625  */
 626 static int32_t
 627 _getKeywords(const char *localeID,
 628              char prev,
 629              char *keywords, int32_t keywordCapacity,
 630              char *values, int32_t valuesCapacity, int32_t *valLen,
 631              UBool valuesToo,
 632              const char* addKeyword,
 633              const char* addValue,
 634              UErrorCode *status)
 635 {
 636     KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
 637
 638     int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
 639     int32_t numKeywords = 0;
 640     const char* pos = localeID;
 641     const char* equalSign = NULL;
 642     const char* semicolon = NULL;
 643     int32_t i = 0, j, n;
 644     int32_t keywordsLen = 0;
 645     int32_t valuesLen = 0;
 646
 647     if(prev == '@') { /* start of keyword definition */
 648         /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
 649         do {
 650             UBool duplicate = FALSE;
 651             /* skip leading spaces */
 652             while(*pos == ' ') {
 653                 pos++;
 654             }
 655             if (!*pos) { /* handle trailing "; " */
 656                 break;
 657             }
 658             if(numKeywords == maxKeywords) {
 659                 *status = U_INTERNAL_PROGRAM_ERROR;
 660                 return 0;
 661             }
 662             equalSign = uprv_strchr(pos, '=');
 663             semicolon = uprv_strchr(pos, ';');
 664             /* lack of '=' [foo@currency] is illegal */
 665             /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
 666             if(!equalSign || (semicolon && semicolon<equalSign)) {
 667                 *status = U_INVALID_FORMAT_ERROR;
 668                 return 0;
 669             }
 670             /* need to normalize both keyword and keyword name */
 671             if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
 672                 /* keyword name too long for internal buffer */
 673                 *status = U_INTERNAL_PROGRAM_ERROR;
 674                 return 0;
 675             }
 676             for(i = 0, n = 0; i < equalSign - pos; ++i) {
 677                 if (pos[i] != ' ') {
 678                     keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
 679                 }
 680             }
 681             keywordList[numKeywords].keyword[n] = 0;
 682             keywordList[numKeywords].keywordLen = n;
 683             /* now grab the value part. First we skip the '=' */
 684             equalSign++;
 685             /* then we leading spaces */
 686             while(*equalSign == ' ') {
 687                 equalSign++;
 688             }
 689             keywordList[numKeywords].valueStart = equalSign;
 690
 691             pos = semicolon;
 692             i = 0;
 693             if(pos) {
 694                 while(*(pos - i - 1) == ' ') {
 695                     i++;
 696                 }
 697                 keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
 698                 pos++;
 699             } else {
 700                 i = (int32_t)uprv_strlen(equalSign);
 701                 while(i && equalSign[i-1] == ' ') {
 702                     i--;
 703                 }
 704                 keywordList[numKeywords].valueLen = i;
 705             }
 706             /* If this is a duplicate keyword, then ignore it */
 707             for (j=0; j<numKeywords; ++j) {
 708                 if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
 709                     duplicate = TRUE;
 710                     break;
 711                 }
 712             }
 713             if (!duplicate) {
 714                 ++numKeywords;
 715             }
 716         } while(pos);
 717
 718         /* Handle addKeyword/addValue. */
 719         if (addKeyword != NULL) {
 720             UBool duplicate = FALSE;
 721             U_ASSERT(addValue != NULL);
 722             /* Search for duplicate; if found, do nothing. Explicit keyword
 723                overrides addKeyword. */
 724             for (j=0; j<numKeywords; ++j) {
 725                 if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) {
 726                     duplicate = TRUE;
 727                     break;
 728                 }
 729             }
 730             if (!duplicate) {
 731                 if (numKeywords == maxKeywords) {
 732                     *status = U_INTERNAL_PROGRAM_ERROR;
 733                     return 0;
 734                 }
 735                 uprv_strcpy(keywordList[numKeywords].keyword, addKeyword);
 736                 keywordList[numKeywords].keywordLen = (int32_t)uprv_strlen(addKeyword);
 737                 keywordList[numKeywords].valueStart = addValue;
 738                 keywordList[numKeywords].valueLen = (int32_t)uprv_strlen(addValue);
 739                 ++numKeywords;
 740             }
 741         } else {
 742             U_ASSERT(addValue == NULL);
 743         }
 744
 745         /* now we have a list of keywords */
 746         /* we need to sort it */
 747         uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
 748
 749         /* Now construct the keyword part */
 750         for(i = 0; i < numKeywords; i++) {
 751             if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {
 752                 uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);
 753                 if(valuesToo) {
 754                     keywords[keywordsLen + keywordList[i].keywordLen] = '=';
 755                 } else {
 756                     keywords[keywordsLen + keywordList[i].keywordLen] = 0;
 757                 }
 758             }
 759             keywordsLen += keywordList[i].keywordLen + 1;
 760             if(valuesToo) {
 761                 if(keywordsLen + keywordList[i].valueLen < keywordCapacity) {
 762                     uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
 763                 }
 764                 keywordsLen += keywordList[i].valueLen;
 765
 766                 if(i < numKeywords - 1) {
 767                     if(keywordsLen < keywordCapacity) {
 768                         keywords[keywordsLen] = ';';
 769                     }
 770                     keywordsLen++;
 771                 }
 772             }
 773             if(values) {
 774                 if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {
 775                     uprv_strcpy(values+valuesLen, keywordList[i].valueStart);
 776                     values[valuesLen + keywordList[i].valueLen] = 0;
 777                 }
 778                 valuesLen += keywordList[i].valueLen + 1;
 779             }
 780         }
 781         if(values) {
 782             values[valuesLen] = 0;
 783             if(valLen) {
 784                 *valLen = valuesLen;
 785             }
 786         }
 787         return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
 788     } else {
 789         return 0;
 790     }
 791 }
 792
 793 U_CFUNC int32_t
 794 locale_getKeywords(const char *localeID,
 795                    char prev,
 796                    char *keywords, int32_t keywordCapacity,
 797                    char *values, int32_t valuesCapacity, int32_t *valLen,
 798                    UBool valuesToo,
 799                    UErrorCode *status) {
 800     return _getKeywords(localeID, prev, keywords, keywordCapacity,
 801                         values, valuesCapacity, valLen, valuesToo,
 802                         NULL, NULL, status);
 803 }
 804
 805 U_CAPI int32_t U_EXPORT2
 806 uloc_getKeywordValue(const char* localeID,
 807                      const char* keywordName,
 808                      char* buffer, int32_t bufferCapacity,
 809                      UErrorCode* status)
 810 {
 811     const char* startSearchHere = NULL;
 812     const char* nextSeparator = NULL;
 813     char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
 814     char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
 815     int32_t i = 0;
 816     int32_t result = 0;
 817
 818     if(status && U_SUCCESS(*status) && localeID) {
 819       char tempBuffer[ULOC_FULLNAME_CAPACITY];
 820       const char* tmpLocaleID;
 821
 822       if (_hasBCP47Extension(localeID)) {
 823           _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
 824       } else {
 825           tmpLocaleID=localeID;
 826       }
 827
 828       startSearchHere = uprv_strchr(tmpLocaleID, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */
 829       if(startSearchHere == NULL) {
 830           /* no keywords, return at once */
 831           return 0;
 832       }
 833
 834       locale_canonKeywordName(keywordNameBuffer, keywordName, status);
 835       if(U_FAILURE(*status)) {
 836         return 0;
 837       }
 838
 839       /* find the first keyword */
 840       while(startSearchHere) {
 841           startSearchHere++;
 842           /* skip leading spaces (allowed?) */
 843           while(*startSearchHere == ' ') {
 844               startSearchHere++;
 845           }
 846           nextSeparator = uprv_strchr(startSearchHere, '=');
 847           /* need to normalize both keyword and keyword name */
 848           if(!nextSeparator) {
 849               break;
 850           }
 851           if(nextSeparator - startSearchHere >= ULOC_KEYWORD_BUFFER_LEN) {
 852               /* keyword name too long for internal buffer */
 853               *status = U_INTERNAL_PROGRAM_ERROR;
 854               return 0;
 855           }
 856           for(i = 0; i < nextSeparator - startSearchHere; i++) {
 857               localeKeywordNameBuffer[i] = uprv_tolower(startSearchHere[i]);
 858           }
 859           /* trim trailing spaces */
 860           while(startSearchHere[i-1] == ' ') {
 861               i--;
 862               U_ASSERT(i>=0);
 863           }
 864           localeKeywordNameBuffer[i] = 0;
 865
 866           startSearchHere = uprv_strchr(nextSeparator, ';');
 867
 868           if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
 869               nextSeparator++;
 870               while(*nextSeparator == ' ') {
 871                   nextSeparator++;
 872               }
 873               /* we actually found the keyword. Copy the value */
 874               if(startSearchHere && startSearchHere - nextSeparator < bufferCapacity) {
 875                   while(*(startSearchHere-1) == ' ') {
 876                       startSearchHere--;
 877                   }
 878                   uprv_strncpy(buffer, nextSeparator, startSearchHere - nextSeparator);
 879                   result = u_terminateChars(buffer, bufferCapacity, (int32_t)(startSearchHere - nextSeparator), status);
 880               } else if(!startSearchHere && (int32_t)uprv_strlen(nextSeparator) < bufferCapacity) { /* last item in string */
 881                   i = (int32_t)uprv_strlen(nextSeparator);
 882                   while(nextSeparator[i - 1] == ' ') {
 883                       i--;
 884                   }
 885                   uprv_strncpy(buffer, nextSeparator, i);
 886                   result = u_terminateChars(buffer, bufferCapacity, i, status);
 887               } else {
 888                   /* give a bigger buffer, please */
 889                   *status = U_BUFFER_OVERFLOW_ERROR;
 890                   if(startSearchHere) {
 891                       result = (int32_t)(startSearchHere - nextSeparator);
 892                   } else {
 893                       result = (int32_t)uprv_strlen(nextSeparator);
 894                   }
 895               }
 896               return result;
 897           }
 898       }
 899     }
 900     return 0;
 901 }
 902
 903 U_CAPI int32_t U_EXPORT2
 904 uloc_setKeywordValue(const char* keywordName,
 905                      const char* keywordValue,
 906                      char* buffer, int32_t bufferCapacity,
 907                      UErrorCode* status)
 908 {
 909     /* TODO: sorting. removal. */
 910     int32_t keywordNameLen;
 911     int32_t keywordValueLen;
 912     int32_t bufLen;
 913     int32_t needLen = 0;
 914     int32_t foundValueLen;
 915     int32_t keywordAtEnd = 0; /* is the keyword at the end of the string? */
 916     char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
 917     char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
 918     int32_t i = 0;
 919     int32_t rc;
 920     char* nextSeparator = NULL;
 921     char* nextEqualsign = NULL;
 922     char* startSearchHere = NULL;
 923     char* keywordStart = NULL;
 924     char *insertHere = NULL;
 925     if(U_FAILURE(*status)) {
 926         return -1;
 927     }
 928     if(bufferCapacity>1) {
 929         bufLen = (int32_t)uprv_strlen(buffer);
 930     } else {
 931         *status = U_ILLEGAL_ARGUMENT_ERROR;
 932         return 0;
 933     }
 934     if(bufferCapacity<bufLen) {
 935         /* The capacity is less than the length?! Is this NULL terminated? */
 936         *status = U_ILLEGAL_ARGUMENT_ERROR;
 937         return 0;
 938     }
 939     if(keywordValue && !*keywordValue) {
 940         keywordValue = NULL;
 941     }
 942     if(keywordValue) {
 943         keywordValueLen = (int32_t)uprv_strlen(keywordValue);
 944     } else {
 945         keywordValueLen = 0;
 946     }
 947     keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
 948     if(U_FAILURE(*status)) {
 949         return 0;
 950     }
 951     startSearchHere = (char*)locale_getKeywordsStart(buffer);
 952     if(startSearchHere == NULL || (startSearchHere[1]==0)) {
 953         if(!keywordValue) { /* no keywords = nothing to remove */
 954             return bufLen;
 955         }
 956
 957         needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
 958         if(startSearchHere) { /* had a single @ */
 959             needLen--; /* already had the @ */
 960             /* startSearchHere points at the @ */
 961         } else {
 962             startSearchHere=buffer+bufLen;
 963         }
 964         if(needLen >= bufferCapacity) {
 965             *status = U_BUFFER_OVERFLOW_ERROR;
 966             return needLen; /* no change */
 967         }
 968         *startSearchHere = '@';
 969         startSearchHere++;
 970         uprv_strcpy(startSearchHere, keywordNameBuffer);
 971         startSearchHere += keywordNameLen;
 972         *startSearchHere = '=';
 973         startSearchHere++;
 974         uprv_strcpy(startSearchHere, keywordValue);
 975         startSearchHere+=keywordValueLen;
 976         return needLen;
 977     } /* end shortcut - no @ */
 978
 979     keywordStart = startSearchHere;
 980     /* search for keyword */
 981     while(keywordStart) {
 982         keywordStart++;
 983         /* skip leading spaces (allowed?) */
 984         while(*keywordStart == ' ') {
 985             keywordStart++;
 986         }
 987         nextEqualsign = uprv_strchr(keywordStart, '=');
 988         /* need to normalize both keyword and keyword name */
 989         if(!nextEqualsign) {
 990             break;
 991         }
 992         if(nextEqualsign - keywordStart >= ULOC_KEYWORD_BUFFER_LEN) {
 993             /* keyword name too long for internal buffer */
 994             *status = U_INTERNAL_PROGRAM_ERROR;
 995             return 0;
 996         }
 997         for(i = 0; i < nextEqualsign - keywordStart; i++) {
 998             localeKeywordNameBuffer[i] = uprv_tolower(keywordStart[i]);
 999         }
1000         /* trim trailing spaces */
1001         while(keywordStart[i-1] == ' ') {
1002             i--;
1003         }
1004         U_ASSERT(i>=0 && i<ULOC_KEYWORD_BUFFER_LEN);
1005         localeKeywordNameBuffer[i] = 0;
1006
1007         nextSeparator = uprv_strchr(nextEqualsign, ';');
1008         rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
1009         if(rc == 0) {
1010             nextEqualsign++;
1011             while(*nextEqualsign == ' ') {
1012                 nextEqualsign++;
1013             }
1014             /* we actually found the keyword. Change the value */
1015             if (nextSeparator) {
1016                 keywordAtEnd = 0;
1017                 foundValueLen = (int32_t)(nextSeparator - nextEqualsign);
1018             } else {
1019                 keywordAtEnd = 1;
1020                 foundValueLen = (int32_t)uprv_strlen(nextEqualsign);
1021             }
1022             if(keywordValue) { /* adding a value - not removing */
1023               if(foundValueLen == keywordValueLen) {
1024                 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1025                 return bufLen; /* no change in size */
1026               } else if(foundValueLen > keywordValueLen) {
1027                 int32_t delta = foundValueLen - keywordValueLen;
1028                 if(nextSeparator) { /* RH side */
1029                   uprv_memmove(nextSeparator - delta, nextSeparator, bufLen-(nextSeparator-buffer));
1030                 }
1031                 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1032                 bufLen -= delta;
1033                 buffer[bufLen]=0;
1034                 return bufLen;
1035               } else { /* FVL < KVL */
1036                 int32_t delta = keywordValueLen - foundValueLen;
1037                 if((bufLen+delta) >= bufferCapacity) {
1038                   *status = U_BUFFER_OVERFLOW_ERROR;
1039                   return bufLen+delta;
1040                 }
1041                 if(nextSeparator) { /* RH side */
1042                   uprv_memmove(nextSeparator+delta,nextSeparator, bufLen-(nextSeparator-buffer));
1043                 }
1044                 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1045                 bufLen += delta;
1046                 buffer[bufLen]=0;
1047                 return bufLen;
1048               }
1049             } else { /* removing a keyword */
1050               if(keywordAtEnd) {
1051                 /* zero out the ';' or '@' just before startSearchhere */
1052                 keywordStart[-1] = 0;
1053                 return (int32_t)((keywordStart-buffer)-1); /* (string length without keyword) minus separator */
1054               } else {
1055                 uprv_memmove(keywordStart, nextSeparator+1, bufLen-((nextSeparator+1)-buffer));
1056                 keywordStart[bufLen-((nextSeparator+1)-buffer)]=0;
1057                 return (int32_t)(bufLen-((nextSeparator+1)-keywordStart));
1058               }
1059             }
1060         } else if(rc<0){ /* end match keyword */
1061           /* could insert at this location. */
1062           insertHere = keywordStart;
1063         }
1064         keywordStart = nextSeparator;
1065     } /* end loop searching */
1066
1067     if(!keywordValue) {
1068       return bufLen; /* removal of non-extant keyword - no change */
1069     }
1070
1071     /* we know there is at least one keyword. */
1072     needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
1073     if(needLen >= bufferCapacity) {
1074         *status = U_BUFFER_OVERFLOW_ERROR;
1075         return needLen; /* no change */
1076     }
1077
1078     if(insertHere) {
1079       uprv_memmove(insertHere+(1+keywordNameLen+1+keywordValueLen), insertHere, bufLen-(insertHere-buffer));
1080       keywordStart = insertHere;
1081     } else {
1082       keywordStart = buffer+bufLen;
1083       *keywordStart = ';';
1084       keywordStart++;
1085     }
1086     uprv_strncpy(keywordStart, keywordNameBuffer, keywordNameLen);
1087     keywordStart += keywordNameLen;
1088     *keywordStart = '=';
1089     keywordStart++;
1090     uprv_strncpy(keywordStart, keywordValue, keywordValueLen); /* terminates. */
1091     keywordStart+=keywordValueLen;
1092     if(insertHere) {
1093       *keywordStart = ';';
1094       keywordStart++;
1095     }
1096     buffer[needLen]=0;
1097     return needLen;
1098 }
1099
1100 /* ### ID parsing implementation **************************************************/
1101
1102 #define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
1103
1104 /*returns TRUE if one of the special prefixes is here (s=string)
1105   'x-' or 'i-' */
1106 #define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1107
1108 /* Dot terminates it because of POSIX form  where dot precedes the codepage
1109  * except for variant
1110  */
1111 #define _isTerminator(a)  ((a==0)||(a=='.')||(a=='@'))
1112
1113 static char* _strnchr(const char* str, int32_t len, char c) {
1114     U_ASSERT(str != 0 && len >= 0);
1115     while (len-- != 0) {
1116         char d = *str;
1117         if (d == c) {
1118             return (char*) str;
1119         } else if (d == 0) {
1120             break;
1121         }
1122         ++str;
1123     }
1124     return NULL;
1125 }
1126
1127 /**
1128  * Lookup 'key' in the array 'list'.  The array 'list' should contain
1129  * a NULL entry, followed by more entries, and a second NULL entry.
1130  *
1131  * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1132  * COUNTRIES_3.
1133  */
1134 static int16_t _findIndex(const char* const* list, const char* key)
1135 {
1136     const char* const* anchor = list;
1137     int32_t pass = 0;
1138
1139     /* Make two passes through two NULL-terminated arrays at 'list' */
1140     while (pass++ < 2) {
1141         while (*list) {
1142             if (uprv_strcmp(key, *list) == 0) {
1143                 return (int16_t)(list - anchor);
1144             }
1145             list++;
1146         }
1147         ++list;     /* skip final NULL *CWB*/
1148     }
1149     return -1;
1150 }
1151
1152 /* count the length of src while copying it to dest; return strlen(src) */
1153 static inline int32_t
1154 _copyCount(char *dest, int32_t destCapacity, const char *src) {
1155     const char *anchor;
1156     char c;
1157
1158     anchor=src;
1159     for(;;) {
1160         if((c=*src)==0) {
1161             return (int32_t)(src-anchor);
1162         }
1163         if(destCapacity<=0) {
1164             return (int32_t)((src-anchor)+uprv_strlen(src));
1165         }
1166         ++src;
1167         *dest++=c;
1168         --destCapacity;
1169     }
1170 }
1171
1172 U_CFUNC const char*
1173 uloc_getCurrentCountryID(const char* oldID){
1174     int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
1175     if (offset >= 0) {
1176         return REPLACEMENT_COUNTRIES[offset];
1177     }
1178     return oldID;
1179 }
1180 U_CFUNC const char*
1181 uloc_getCurrentLanguageID(const char* oldID){
1182     int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
1183     if (offset >= 0) {
1184         return REPLACEMENT_LANGUAGES[offset];
1185     }
1186     return oldID;
1187 }
1188 /*
1189  * the internal functions _getLanguage(), _getCountry(), _getVariant()
1190  * avoid duplicating code to handle the earlier locale ID pieces
1191  * in the functions for the later ones by
1192  * setting the *pEnd pointer to where they stopped parsing
1193  *
1194  * TODO try to use this in Locale
1195  */
1196 U_CFUNC int32_t
1197 ulocimp_getLanguage(const char *localeID,
1198                     char *language, int32_t languageCapacity,
1199                     const char **pEnd) {
1200     int32_t i=0;
1201     int32_t offset;
1202     char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
1203
1204     /* if it starts with i- or x- then copy that prefix */
1205     if(_isIDPrefix(localeID)) {
1206         if(i<languageCapacity) {
1207             language[i]=(char)uprv_tolower(*localeID);
1208         }
1209         if(i<languageCapacity) {
1210             language[i+1]='-';
1211         }
1212         i+=2;
1213         localeID+=2;
1214     }
1215
1216     /* copy the language as far as possible and count its length */
1217     while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
1218         if(i<languageCapacity) {
1219             language[i]=(char)uprv_tolower(*localeID);
1220         }
1221         if(i<3) {
1222             U_ASSERT(i>=0);
1223             lang[i]=(char)uprv_tolower(*localeID);
1224         }
1225         i++;
1226         localeID++;
1227     }
1228
1229     if(i==3) {
1230         /* convert 3 character code to 2 character code if possible *CWB*/
1231         offset=_findIndex(LANGUAGES_3, lang);
1232         if(offset>=0) {
1233             i=_copyCount(language, languageCapacity, LANGUAGES[offset]);
1234         }
1235     }
1236
1237     if(pEnd!=NULL) {
1238         *pEnd=localeID;
1239     }
1240     return i;
1241 }
1242
1243 U_CFUNC int32_t
1244 ulocimp_getScript(const char *localeID,
1245                   char *script, int32_t scriptCapacity,
1246                   const char **pEnd)
1247 {
1248     int32_t idLen = 0;
1249
1250     if (pEnd != NULL) {
1251         *pEnd = localeID;
1252     }
1253
1254     /* copy the second item as far as possible and count its length */
1255     while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
1256             && uprv_isASCIILetter(localeID[idLen])) {
1257         idLen++;
1258     }
1259
1260     /* If it's exactly 4 characters long, then it's a script and not a country. */
1261     if (idLen == 4) {
1262         int32_t i;
1263         if (pEnd != NULL) {
1264             *pEnd = localeID+idLen;
1265         }
1266         if(idLen > scriptCapacity) {
1267             idLen = scriptCapacity;
1268         }
1269         if (idLen >= 1) {
1270             script[0]=(char)uprv_toupper(*(localeID++));
1271         }
1272         for (i = 1; i < idLen; i++) {
1273             script[i]=(char)uprv_tolower(*(localeID++));
1274         }
1275     }
1276     else {
1277         idLen = 0;
1278     }
1279     return idLen;
1280 }
1281
1282 U_CFUNC int32_t
1283 ulocimp_getCountry(const char *localeID,
1284                    char *country, int32_t countryCapacity,
1285                    const char **pEnd)
1286 {
1287     int32_t idLen=0;
1288     char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };
1289     int32_t offset;
1290
1291     /* copy the country as far as possible and count its length */
1292     while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
1293         if(idLen<(ULOC_COUNTRY_CAPACITY-1)) {   /*CWB*/
1294             cnty[idLen]=(char)uprv_toupper(localeID[idLen]);
1295         }
1296         idLen++;
1297     }
1298
1299     /* the country should be either length 2 or 3 */
1300     if (idLen == 2 || idLen == 3) {
1301         UBool gotCountry = FALSE;
1302         /* convert 3 character code to 2 character code if possible *CWB*/
1303         if(idLen==3) {
1304             offset=_findIndex(COUNTRIES_3, cnty);
1305             if(offset>=0) {
1306                 idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]);
1307                 gotCountry = TRUE;
1308             }
1309         }
1310         if (!gotCountry) {
1311             int32_t i = 0;
1312             for (i = 0; i < idLen; i++) {
1313                 if (i < countryCapacity) {
1314                     country[i]=(char)uprv_toupper(localeID[i]);
1315                 }
1316             }
1317         }
1318         localeID+=idLen;
1319     } else {
1320         idLen = 0;
1321     }
1322
1323     if(pEnd!=NULL) {
1324         *pEnd=localeID;
1325     }
1326
1327     return idLen;
1328 }
1329
1330 /**
1331  * @param needSeparator if true, then add leading '_' if any variants
1332  * are added to 'variant'
1333  */
1334 static int32_t
1335 _getVariantEx(const char *localeID,
1336               char prev,
1337               char *variant, int32_t variantCapacity,
1338               UBool needSeparator) {
1339     int32_t i=0;
1340
1341     /* get one or more variant tags and separate them with '_' */
1342     if(_isIDSeparator(prev)) {
1343         /* get a variant string after a '-' or '_' */
1344         while(!_isTerminator(*localeID)) {
1345             if (needSeparator) {
1346                 if (i<variantCapacity) {
1347                     variant[i] = '_';
1348                 }
1349                 ++i;
1350                 needSeparator = FALSE;
1351             }
1352             if(i<variantCapacity) {
1353                 variant[i]=(char)uprv_toupper(*localeID);
1354                 if(variant[i]=='-') {
1355                     variant[i]='_';
1356                 }
1357             }
1358             i++;
1359             localeID++;
1360         }
1361     }
1362
1363     /* if there is no variant tag after a '-' or '_' then look for '@' */
1364     if(i==0) {
1365         if(prev=='@') {
1366             /* keep localeID */
1367         } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
1368             ++localeID; /* point after the '@' */
1369         } else {
1370             return 0;
1371         }
1372         while(!_isTerminator(*localeID)) {
1373             if (needSeparator) {
1374                 if (i<variantCapacity) {
1375                     variant[i] = '_';
1376                 }
1377                 ++i;
1378                 needSeparator = FALSE;
1379             }
1380             if(i<variantCapacity) {
1381                 variant[i]=(char)uprv_toupper(*localeID);
1382                 if(variant[i]=='-' || variant[i]==',') {
1383                     variant[i]='_';
1384                 }
1385             }
1386             i++;
1387             localeID++;
1388         }
1389     }
1390
1391     return i;
1392 }
1393
1394 static int32_t
1395 _getVariant(const char *localeID,
1396             char prev,
1397             char *variant, int32_t variantCapacity) {
1398     return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
1399 }
1400
1401 /**
1402  * Delete ALL instances of a variant from the given list of one or
1403  * more variants.  Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
1404  * @param variants the source string of one or more variants,
1405  * separated by '_'.  This will be MODIFIED IN PLACE.  Not zero
1406  * terminated; if it is, trailing zero will NOT be maintained.
1407  * @param variantsLen length of variants
1408  * @param toDelete variant to delete, without separators, e.g.  "EURO"
1409  * or "PREEURO"; not zero terminated
1410  * @param toDeleteLen length of toDelete
1411  * @return number of characters deleted from variants
1412  */
1413 static int32_t
1414 _deleteVariant(char* variants, int32_t variantsLen,
1415                const char* toDelete, int32_t toDeleteLen)
1416 {
1417     int32_t delta = 0; /* number of chars deleted */
1418     for (;;) {
1419         UBool flag = FALSE;
1420         if (variantsLen < toDeleteLen) {
1421             return delta;
1422         }
1423         if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 &&
1424             (variantsLen == toDeleteLen ||
1425              (flag=(variants[toDeleteLen] == '_'))))
1426         {
1427             int32_t d = toDeleteLen + (flag?1:0);
1428             variantsLen -= d;
1429             delta += d;
1430             if (variantsLen > 0) {
1431                 uprv_memmove(variants, variants+d, variantsLen);
1432             }
1433         } else {
1434             char* p = _strnchr(variants, variantsLen, '_');
1435             if (p == NULL) {
1436                 return delta;
1437             }
1438             ++p;
1439             variantsLen -= (int32_t)(p - variants);
1440             variants = p;
1441         }
1442     }
1443 }
1444
1445 /* Keyword enumeration */
1446
1447 typedef struct UKeywordsContext {
1448     char* keywords;
1449     char* current;
1450 } UKeywordsContext;
1451
1452 static void U_CALLCONV
1453 uloc_kw_closeKeywords(UEnumeration *enumerator) {
1454     uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
1455     uprv_free(enumerator->context);
1456     uprv_free(enumerator);
1457 }
1458
1459 static int32_t U_CALLCONV
1460 uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) {
1461     char *kw = ((UKeywordsContext *)en->context)->keywords;
1462     int32_t result = 0;
1463     while(*kw) {
1464         result++;
1465         kw += uprv_strlen(kw)+1;
1466     }
1467     return result;
1468 }
1469
1470 static const char* U_CALLCONV
1471 uloc_kw_nextKeyword(UEnumeration* en,
1472                     int32_t* resultLength,
1473                     UErrorCode* /*status*/) {
1474     const char* result = ((UKeywordsContext *)en->context)->current;
1475     int32_t len = 0;
1476     if(*result) {
1477         len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
1478         ((UKeywordsContext *)en->context)->current += len+1;
1479     } else {
1480         result = NULL;
1481     }
1482     if (resultLength) {
1483         *resultLength = len;
1484     }
1485     return result;
1486 }
1487
1488 static void U_CALLCONV
1489 uloc_kw_resetKeywords(UEnumeration* en,
1490                       UErrorCode* /*status*/) {
1491     ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
1492 }
1493
1494 static const UEnumeration gKeywordsEnum = {
1495     NULL,
1496     NULL,
1497     uloc_kw_closeKeywords,
1498     uloc_kw_countKeywords,
1499     uenum_unextDefault,
1500     uloc_kw_nextKeyword,
1501     uloc_kw_resetKeywords
1502 };
1503
1504 U_CAPI UEnumeration* U_EXPORT2
1505 uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
1506 {
1507     UKeywordsContext *myContext = NULL;
1508     UEnumeration *result = NULL;
1509
1510     if(U_FAILURE(*status)) {
1511         return NULL;
1512     }
1513     result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
1514     /* Null pointer test */
1515     if (result == NULL) {
1516         *status = U_MEMORY_ALLOCATION_ERROR;
1517         return NULL;
1518     }
1519     uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration));
1520     myContext = static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext)));
1521     if (myContext == NULL) {
1522         *status = U_MEMORY_ALLOCATION_ERROR;
1523         uprv_free(result);
1524         return NULL;
1525     }
1526     myContext->keywords = (char *)uprv_malloc(keywordListSize+1);
1527     uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
1528     myContext->keywords[keywordListSize] = 0;
1529     myContext->current = myContext->keywords;
1530     result->context = myContext;
1531     return result;
1532 }
1533
1534 U_CAPI UEnumeration* U_EXPORT2
1535 uloc_openKeywords(const char* localeID,
1536                         UErrorCode* status)
1537 {
1538     int32_t i=0;
1539     char keywords[256];
1540     int32_t keywordsCapacity = 256;
1541     char tempBuffer[ULOC_FULLNAME_CAPACITY];
1542     const char* tmpLocaleID;
1543
1544     if(status==NULL || U_FAILURE(*status)) {
1545         return 0;
1546     }
1547
1548     if (_hasBCP47Extension(localeID)) {
1549         _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
1550     } else {
1551         if (localeID==NULL) {
1552            localeID=uloc_getDefault();
1553         }
1554         tmpLocaleID=localeID;
1555     }
1556
1557     /* Skip the language */
1558     ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
1559     if(_isIDSeparator(*tmpLocaleID)) {
1560         const char *scriptID;
1561         /* Skip the script if available */
1562         ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
1563         if(scriptID != tmpLocaleID+1) {
1564             /* Found optional script */
1565             tmpLocaleID = scriptID;
1566         }
1567         /* Skip the Country */
1568         if (_isIDSeparator(*tmpLocaleID)) {
1569             ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID);
1570             if(_isIDSeparator(*tmpLocaleID)) {
1571                 _getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0);
1572             }
1573         }
1574     }
1575
1576     /* keywords are located after '@' */
1577     if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
1578         i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
1579     }
1580
1581     if(i) {
1582         return uloc_openKeywordList(keywords, i, status);
1583     } else {
1584         return NULL;
1585     }
1586 }
1587
1588
1589 /* bit-flags for 'options' parameter of _canonicalize */
1590 #define _ULOC_STRIP_KEYWORDS 0x2
1591 #define _ULOC_CANONICALIZE   0x1
1592
1593 #define OPTION_SET(options, mask) ((options & mask) != 0)
1594
1595 static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
1596 #define I_DEFAULT_LENGTH (sizeof i_default / sizeof i_default[0])
1597
1598 /**
1599  * Canonicalize the given localeID, to level 1 or to level 2,
1600  * depending on the options.  To specify level 1, pass in options=0.
1601  * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1602  *
1603  * This is the code underlying uloc_getName and uloc_canonicalize.
1604  */
1605 static int32_t
1606 _canonicalize(const char* localeID,
1607               char* result,
1608               int32_t resultCapacity,
1609               uint32_t options,
1610               UErrorCode* err) {
1611     int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
1612     char localeBuffer[ULOC_FULLNAME_CAPACITY];
1613     char tempBuffer[ULOC_FULLNAME_CAPACITY];
1614     const char* origLocaleID;
1615     const char* tmpLocaleID;
1616     const char* keywordAssign = NULL;
1617     const char* separatorIndicator = NULL;
1618     const char* addKeyword = NULL;
1619     const char* addValue = NULL;
1620     char* name;
1621     char* variant = NULL; /* pointer into name, or NULL */
1622
1623     if (U_FAILURE(*err)) {
1624         return 0;
1625     }
1626
1627     if (_hasBCP47Extension(localeID)) {
1628         _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1629     } else {
1630         if (localeID==NULL) {
1631            localeID=uloc_getDefault();
1632         }
1633         tmpLocaleID=localeID;
1634     }
1635
1636     origLocaleID=tmpLocaleID;
1637
1638     /* if we are doing a full canonicalization, then put results in
1639        localeBuffer, if necessary; otherwise send them to result. */
1640     if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/
1641         (result == NULL || resultCapacity < (int32_t)sizeof(localeBuffer))) {
1642         name = localeBuffer;
1643         nameCapacity = (int32_t)sizeof(localeBuffer);
1644     } else {
1645         name = result;
1646         nameCapacity = resultCapacity;
1647     }
1648
1649     /* get all pieces, one after another, and separate with '_' */
1650     len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID);
1651
1652     if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {
1653         const char *d = uloc_getDefault();
1654
1655         len = (int32_t)uprv_strlen(d);
1656
1657         if (name != NULL) {
1658             uprv_strncpy(name, d, len);
1659         }
1660     } else if(_isIDSeparator(*tmpLocaleID)) {
1661         const char *scriptID;
1662
1663         ++fieldCount;
1664         if(len<nameCapacity) {
1665             name[len]='_';
1666         }
1667         ++len;
1668
1669         scriptSize=ulocimp_getScript(tmpLocaleID+1,
1670             (len<nameCapacity ? name+len : NULL), nameCapacity-len, &scriptID);
1671         if(scriptSize > 0) {
1672             /* Found optional script */
1673             tmpLocaleID = scriptID;
1674             ++fieldCount;
1675             len+=scriptSize;
1676             if (_isIDSeparator(*tmpLocaleID)) {
1677                 /* If there is something else, then we add the _ */
1678                 if(len<nameCapacity) {
1679                     name[len]='_';
1680                 }
1681                 ++len;
1682             }
1683         }
1684
1685         if (_isIDSeparator(*tmpLocaleID)) {
1686             const char *cntryID;
1687             int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1,
1688                 (len<nameCapacity ? name+len : NULL), nameCapacity-len, &cntryID);
1689             if (cntrySize > 0) {
1690                 /* Found optional country */
1691                 tmpLocaleID = cntryID;
1692                 len+=cntrySize;
1693             }
1694             if(_isIDSeparator(*tmpLocaleID)) {
1695                 /* If there is something else, then we add the _  if we found country before. */
1696                 if (cntrySize >= 0 && ! _isIDSeparator(*(tmpLocaleID+1)) ) {
1697                     ++fieldCount;
1698                     if(len<nameCapacity) {
1699                         name[len]='_';
1700                     }
1701                     ++len;
1702                 }
1703
1704                 variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID,
1705                     (len<nameCapacity ? name+len : NULL), nameCapacity-len);
1706                 if (variantSize > 0) {
1707                     variant = len<nameCapacity ? name+len : NULL;
1708                     len += variantSize;
1709                     tmpLocaleID += variantSize + 1; /* skip '_' and variant */
1710                 }
1711             }
1712         }
1713     }
1714
1715     /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1716     if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
1717         UBool done = FALSE;
1718         do {
1719             char c = *tmpLocaleID;
1720             switch (c) {
1721             case 0:
1722             case '@':
1723                 done = TRUE;
1724                 break;
1725             default:
1726                 if (len<nameCapacity) {
1727                     name[len] = c;
1728                 }
1729                 ++len;
1730                 ++tmpLocaleID;
1731                 break;
1732             }
1733         } while (!done);
1734     }
1735
1736     /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1737        After this, tmpLocaleID either points to '@' or is NULL */
1738     if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
1739         keywordAssign = uprv_strchr(tmpLocaleID, '=');
1740         separatorIndicator = uprv_strchr(tmpLocaleID, ';');
1741     }
1742
1743     /* Copy POSIX-style variant, if any [mr@FOO] */
1744     if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
1745         tmpLocaleID != NULL && keywordAssign == NULL) {
1746         for (;;) {
1747             char c = *tmpLocaleID;
1748             if (c == 0) {
1749                 break;
1750             }
1751             if (len<nameCapacity) {
1752                 name[len] = c;
1753             }
1754             ++len;
1755             ++tmpLocaleID;
1756         }
1757     }
1758
1759     if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
1760         /* Handle @FOO variant if @ is present and not followed by = */
1761         if (tmpLocaleID!=NULL && keywordAssign==NULL) {
1762             int32_t posixVariantSize;
1763             /* Add missing '_' if needed */
1764             if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
1765                 do {
1766                     if(len<nameCapacity) {
1767                         name[len]='_';
1768                     }
1769                     ++len;
1770                     ++fieldCount;
1771                 } while(fieldCount<2);
1772             }
1773             posixVariantSize = _getVariantEx(tmpLocaleID+1, '@', name+len, nameCapacity-len,
1774                                              (UBool)(variantSize > 0));
1775             if (posixVariantSize > 0) {
1776                 if (variant == NULL) {
1777                     variant = name+len;
1778                 }
1779                 len += posixVariantSize;
1780                 variantSize += posixVariantSize;
1781             }
1782         }
1783
1784         /* Handle generic variants first */
1785         if (variant) {
1786             for (j=0; j<(int32_t)(sizeof(VARIANT_MAP)/sizeof(VARIANT_MAP[0])); j++) {
1787                 const char* variantToCompare = VARIANT_MAP[j].variant;
1788                 int32_t n = (int32_t)uprv_strlen(variantToCompare);
1789                 int32_t variantLen = _deleteVariant(variant, uprv_min(variantSize, (nameCapacity-len)), variantToCompare, n);
1790                 len -= variantLen;
1791                 if (variantLen > 0) {
1792                     if (len > 0 && name[len-1] == '_') { /* delete trailing '_' */
1793                         --len;
1794                     }
1795                     addKeyword = VARIANT_MAP[j].keyword;
1796                     addValue = VARIANT_MAP[j].value;
1797                     break;
1798                 }
1799             }
1800             if (len > 0 && len <= nameCapacity && name[len-1] == '_') { /* delete trailing '_' */
1801                 --len;
1802             }
1803         }
1804
1805         /* Look up the ID in the canonicalization map */
1806         for (j=0; j<(int32_t)(sizeof(CANONICALIZE_MAP)/sizeof(CANONICALIZE_MAP[0])); j++) {
1807             const char* id = CANONICALIZE_MAP[j].id;
1808             int32_t n = (int32_t)uprv_strlen(id);
1809             if (len == n && uprv_strncmp(name, id, n) == 0) {
1810                 if (n == 0 && tmpLocaleID != NULL) {
1811                     break; /* Don't remap "" if keywords present */
1812                 }
1813                 len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
1814                 if (CANONICALIZE_MAP[j].keyword) {
1815                     addKeyword = CANONICALIZE_MAP[j].keyword;
1816                     addValue = CANONICALIZE_MAP[j].value;
1817                 }
1818                 break;
1819             }
1820         }
1821     }
1822
1823     if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
1824         if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
1825             (!separatorIndicator || separatorIndicator > keywordAssign)) {
1826             if(len<nameCapacity) {
1827                 name[len]='@';
1828             }
1829             ++len;
1830             ++fieldCount;
1831             len += _getKeywords(tmpLocaleID+1, '@', (len<nameCapacity ? name+len : NULL), nameCapacity-len,
1832                                 NULL, 0, NULL, TRUE, addKeyword, addValue, err);
1833         } else if (addKeyword != NULL) {
1834             U_ASSERT(addValue != NULL && len < nameCapacity);
1835             /* inelegant but works -- later make _getKeywords do this? */
1836             len += _copyCount(name+len, nameCapacity-len, "@");
1837             len += _copyCount(name+len, nameCapacity-len, addKeyword);
1838             len += _copyCount(name+len, nameCapacity-len, "=");
1839             len += _copyCount(name+len, nameCapacity-len, addValue);
1840         }
1841     }
1842
1843     if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) {
1844         uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
1845     }
1846
1847     return u_terminateChars(result, resultCapacity, len, err);
1848 }
1849
1850 /* ### ID parsing API **************************************************/
1851
1852 U_CAPI int32_t  U_EXPORT2
1853 uloc_getParent(const char*    localeID,
1854                char* parent,
1855                int32_t parentCapacity,
1856                UErrorCode* err)
1857 {
1858     const char *lastUnderscore;
1859     int32_t i;
1860
1861     if (U_FAILURE(*err))
1862         return 0;
1863
1864     if (localeID == NULL)
1865         localeID = uloc_getDefault();
1866
1867     lastUnderscore=uprv_strrchr(localeID, '_');
1868     if(lastUnderscore!=NULL) {
1869         i=(int32_t)(lastUnderscore-localeID);
1870     } else {
1871         i=0;
1872     }
1873
1874     if(i>0 && parent != localeID) {
1875         uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
1876     }
1877     return u_terminateChars(parent, parentCapacity, i, err);
1878 }
1879
1880 U_CAPI int32_t U_EXPORT2
1881 uloc_getLanguage(const char*    localeID,
1882          char* language,
1883          int32_t languageCapacity,
1884          UErrorCode* err)
1885 {
1886     /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1887     int32_t i=0;
1888
1889     if (err==NULL || U_FAILURE(*err)) {
1890         return 0;
1891     }
1892
1893     if(localeID==NULL) {
1894         localeID=uloc_getDefault();
1895     }
1896
1897     i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL);
1898     return u_terminateChars(language, languageCapacity, i, err);
1899 }
1900
1901 U_CAPI int32_t U_EXPORT2
1902 uloc_getScript(const char*    localeID,
1903          char* script,
1904          int32_t scriptCapacity,
1905          UErrorCode* err)
1906 {
1907     int32_t i=0;
1908
1909     if(err==NULL || U_FAILURE(*err)) {
1910         return 0;
1911     }
1912
1913     if(localeID==NULL) {
1914         localeID=uloc_getDefault();
1915     }
1916
1917     /* skip the language */
1918     ulocimp_getLanguage(localeID, NULL, 0, &localeID);
1919     if(_isIDSeparator(*localeID)) {
1920         i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL);
1921     }
1922     return u_terminateChars(script, scriptCapacity, i, err);
1923 }
1924
1925 U_CAPI int32_t  U_EXPORT2
1926 uloc_getCountry(const char* localeID,
1927             char* country,
1928             int32_t countryCapacity,
1929             UErrorCode* err)
1930 {
1931     int32_t i=0;
1932
1933     if(err==NULL || U_FAILURE(*err)) {
1934         return 0;
1935     }
1936
1937     if(localeID==NULL) {
1938         localeID=uloc_getDefault();
1939     }
1940
1941     /* Skip the language */
1942     ulocimp_getLanguage(localeID, NULL, 0, &localeID);
1943     if(_isIDSeparator(*localeID)) {
1944         const char *scriptID;
1945         /* Skip the script if available */
1946         ulocimp_getScript(localeID+1, NULL, 0, &scriptID);
1947         if(scriptID != localeID+1) {
1948             /* Found optional script */
1949             localeID = scriptID;
1950         }
1951         if(_isIDSeparator(*localeID)) {
1952             i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL);
1953         }
1954     }
1955     return u_terminateChars(country, countryCapacity, i, err);
1956 }
1957
1958 U_CAPI int32_t  U_EXPORT2
1959 uloc_getVariant(const char* localeID,
1960                 char* variant,
1961                 int32_t variantCapacity,
1962                 UErrorCode* err)
1963 {
1964     char tempBuffer[ULOC_FULLNAME_CAPACITY];
1965     const char* tmpLocaleID;
1966     int32_t i=0;
1967
1968     if(err==NULL || U_FAILURE(*err)) {
1969         return 0;
1970     }
1971
1972     if (_hasBCP47Extension(localeID)) {
1973         _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1974     } else {
1975         if (localeID==NULL) {
1976            localeID=uloc_getDefault();
1977         }
1978         tmpLocaleID=localeID;
1979     }
1980
1981     /* Skip the language */
1982     ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
1983     if(_isIDSeparator(*tmpLocaleID)) {
1984         const char *scriptID;
1985         /* Skip the script if available */
1986         ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
1987         if(scriptID != tmpLocaleID+1) {
1988             /* Found optional script */
1989             tmpLocaleID = scriptID;
1990         }
1991         /* Skip the Country */
1992         if (_isIDSeparator(*tmpLocaleID)) {
1993             const char *cntryID;
1994             ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID);
1995             if (cntryID != tmpLocaleID+1) {
1996                 /* Found optional country */
1997                 tmpLocaleID = cntryID;
1998             }
1999             if(_isIDSeparator(*tmpLocaleID)) {
2000                 /* If there was no country ID, skip a possible extra IDSeparator */
2001                 if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
2002                     tmpLocaleID++;
2003                 }
2004                 i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapacity);
2005             }
2006         }
2007     }
2008
2009     /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
2010     /* if we do not have a variant tag yet then try a POSIX variant after '@' */
2011 /*
2012     if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
2013         i=_getVariant(localeID+1, '@', variant, variantCapacity);
2014     }
2015 */
2016     return u_terminateChars(variant, variantCapacity, i, err);
2017 }
2018
2019 U_CAPI int32_t  U_EXPORT2
2020 uloc_getName(const char* localeID,
2021              char* name,
2022              int32_t nameCapacity,
2023              UErrorCode* err)
2024 {
2025     return _canonicalize(localeID, name, nameCapacity, 0, err);
2026 }
2027
2028 U_CAPI int32_t  U_EXPORT2
2029 uloc_getBaseName(const char* localeID,
2030                  char* name,
2031                  int32_t nameCapacity,
2032                  UErrorCode* err)
2033 {
2034     return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);
2035 }
2036
2037 U_CAPI int32_t  U_EXPORT2
2038 uloc_canonicalize(const char* localeID,
2039                   char* name,
2040                   int32_t nameCapacity,
2041                   UErrorCode* err)
2042 {
2043     return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);
2044 }
2045
2046 U_CAPI const char*  U_EXPORT2
2047 uloc_getISO3Language(const char* localeID)
2048 {
2049     int16_t offset;
2050     char lang[ULOC_LANG_CAPACITY];
2051     UErrorCode err = U_ZERO_ERROR;
2052
2053     if (localeID == NULL)
2054     {
2055         localeID = uloc_getDefault();
2056     }
2057     uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
2058     if (U_FAILURE(err))
2059         return "";
2060     offset = _findIndex(LANGUAGES, lang);
2061     if (offset < 0)
2062         return "";
2063     return LANGUAGES_3[offset];
2064 }
2065
2066 U_CAPI const char*  U_EXPORT2
2067 uloc_getISO3Country(const char* localeID)
2068 {
2069     int16_t offset;
2070     char cntry[ULOC_LANG_CAPACITY];
2071     UErrorCode err = U_ZERO_ERROR;
2072
2073     if (localeID == NULL)
2074     {
2075         localeID = uloc_getDefault();
2076     }
2077     uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
2078     if (U_FAILURE(err))
2079         return "";
2080     offset = _findIndex(COUNTRIES, cntry);
2081     if (offset < 0)
2082         return "";
2083
2084     return COUNTRIES_3[offset];
2085 }
2086
2087 U_CAPI uint32_t  U_EXPORT2
2088 uloc_getLCID(const char* localeID)
2089 {
2090     UErrorCode status = U_ZERO_ERROR;
2091     char       langID[ULOC_FULLNAME_CAPACITY];
2092
2093     uloc_getLanguage(localeID, langID, sizeof(langID), &status);
2094     if (U_FAILURE(status)) {
2095         return 0;
2096     }
2097
2098     return uprv_convertToLCID(langID, localeID, &status);
2099 }
2100
2101 U_CAPI int32_t U_EXPORT2
2102 uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
2103                 UErrorCode *status)
2104 {
2105     int32_t length;
2106     const char *posix = uprv_convertToPosix(hostid, status);
2107     if (U_FAILURE(*status) || posix == NULL) {
2108         return 0;
2109     }
2110     length = (int32_t)uprv_strlen(posix);
2111     if (length+1 > localeCapacity) {
2112         *status = U_BUFFER_OVERFLOW_ERROR;
2113     }
2114     else {
2115         uprv_strcpy(locale, posix);
2116     }
2117     return length;
2118 }
2119
2120 /* ### Default locale **************************************************/
2121
2122 U_CAPI const char*  U_EXPORT2
2123 uloc_getDefault()
2124 {
2125     return locale_get_default();
2126 }
2127
2128 U_CAPI void  U_EXPORT2
2129 uloc_setDefault(const char*   newDefaultLocale,
2130              UErrorCode* err)
2131 {
2132     if (U_FAILURE(*err))
2133         return;
2134     /* the error code isn't currently used for anything by this function*/
2135
2136     /* propagate change to C++ */
2137     locale_set_default(newDefaultLocale);
2138 }
2139
2140 /**
2141  * Returns a list of all 2-letter language codes defined in ISO 639.  This is a pointer
2142  * to an array of pointers to arrays of char.  All of these pointers are owned
2143  * by ICU-- do not delete them, and do not write through them.  The array is
2144  * terminated with a null pointer.
2145  */
2146 U_CAPI const char* const*  U_EXPORT2
2147 uloc_getISOLanguages()
2148 {
2149     return LANGUAGES;
2150 }
2151
2152 /**
2153  * Returns a list of all 2-letter country codes defined in ISO 639.  This is a
2154  * pointer to an array of pointers to arrays of char.  All of these pointers are
2155  * owned by ICU-- do not delete them, and do not write through them.  The array is
2156  * terminated with a null pointer.
2157  */
2158 U_CAPI const char* const*  U_EXPORT2
2159 uloc_getISOCountries()
2160 {
2161     return COUNTRIES;
2162 }
2163
2164
2165 /* this function to be moved into cstring.c later */
2166 static char gDecimal = 0;
2167
2168 static /* U_CAPI */
2169 double
2170 /* U_EXPORT2 */
2171 _uloc_strtod(const char *start, char **end) {
2172     char *decimal;
2173     char *myEnd;
2174     char buf[30];
2175     double rv;
2176     if (!gDecimal) {
2177         char rep[5];
2178         /* For machines that decide to change the decimal on you,
2179         and try to be too smart with localization.
2180         This normally should be just a '.'. */
2181         sprintf(rep, "%+1.1f", 1.0);
2182         gDecimal = rep[2];
2183     }
2184
2185     if(gDecimal == '.') {
2186         return uprv_strtod(start, end); /* fall through to OS */
2187     } else {
2188         uprv_strncpy(buf, start, 29);
2189         buf[29]=0;
2190         decimal = uprv_strchr(buf, '.');
2191         if(decimal) {
2192             *decimal = gDecimal;
2193         } else {
2194             return uprv_strtod(start, end); /* no decimal point */
2195         }
2196         rv = uprv_strtod(buf, &myEnd);
2197         if(end) {
2198             *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */
2199         }
2200         return rv;
2201     }
2202 }
2203
2204 typedef struct {
2205     float q;
2206     int32_t dummy;  /* to avoid uninitialized memory copy from qsort */
2207     char *locale;
2208 } _acceptLangItem;
2209
2210 static int32_t U_CALLCONV
2211 uloc_acceptLanguageCompare(const void * /*context*/, const void *a, const void *b)
2212 {
2213     const _acceptLangItem *aa = (const _acceptLangItem*)a;
2214     const _acceptLangItem *bb = (const _acceptLangItem*)b;
2215
2216     int32_t rc = 0;
2217     if(bb->q < aa->q) {
2218         rc = -1;  /* A > B */
2219     } else if(bb->q > aa->q) {
2220         rc = 1;   /* A < B */
2221     } else {
2222         rc = 0;   /* A = B */
2223     }
2224
2225     if(rc==0) {
2226         rc = uprv_stricmp(aa->locale, bb->locale);
2227     }
2228
2229 #if defined(ULOC_DEBUG)
2230     /*  fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
2231     aa->locale, aa->q,
2232     bb->locale, bb->q,
2233     rc);*/
2234 #endif
2235
2236     return rc;
2237 }
2238
2239 /*
2240 mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
2241 */
2242
2243 U_CAPI int32_t U_EXPORT2
2244 uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult,
2245                             const char *httpAcceptLanguage,
2246                             UEnumeration* availableLocales,
2247                             UErrorCode *status)
2248 {
2249     _acceptLangItem *j;
2250     _acceptLangItem smallBuffer[30];
2251     char **strs;
2252     char tmp[ULOC_FULLNAME_CAPACITY +1];
2253     int32_t n = 0;
2254     const char *itemEnd;
2255     const char *paramEnd;
2256     const char *s;
2257     const char *t;
2258     int32_t res;
2259     int32_t i;
2260     int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
2261     int32_t jSize;
2262     char *tempstr; /* Use for null pointer check */
2263
2264     j = smallBuffer;
2265     jSize = sizeof(smallBuffer)/sizeof(smallBuffer[0]);
2266     if(U_FAILURE(*status)) {
2267         return -1;
2268     }
2269
2270     for(s=httpAcceptLanguage;s&&*s;) {
2271         while(isspace(*s)) /* eat space at the beginning */
2272             s++;
2273         itemEnd=uprv_strchr(s,',');
2274         paramEnd=uprv_strchr(s,';');
2275         if(!itemEnd) {
2276             itemEnd = httpAcceptLanguage+l; /* end of string */
2277         }
2278         if(paramEnd && paramEnd<itemEnd) {
2279             /* semicolon (;) is closer than end (,) */
2280             t = paramEnd+1;
2281             if(*t=='q') {
2282                 t++;
2283             }
2284             while(isspace(*t)) {
2285                 t++;
2286             }
2287             if(*t=='=') {
2288                 t++;
2289             }
2290             while(isspace(*t)) {
2291                 t++;
2292             }
2293             j[n].q = (float)_uloc_strtod(t,NULL);
2294         } else {
2295             /* no semicolon - it's 1.0 */
2296             j[n].q = 1.0f;
2297             paramEnd = itemEnd;
2298         }
2299         j[n].dummy=0;
2300         /* eat spaces prior to semi */
2301         for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
2302             ;
2303         /* Check for null pointer from uprv_strndup */
2304         tempstr = uprv_strndup(s,(int32_t)((t+1)-s));
2305         if (tempstr == NULL) {
2306             *status = U_MEMORY_ALLOCATION_ERROR;
2307             return -1;
2308         }
2309         j[n].locale = tempstr;
2310         uloc_canonicalize(j[n].locale,tmp,sizeof(tmp)/sizeof(tmp[0]),status);
2311         if(strcmp(j[n].locale,tmp)) {
2312             uprv_free(j[n].locale);
2313             j[n].locale=uprv_strdup(tmp);
2314         }
2315 #if defined(ULOC_DEBUG)
2316         /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
2317 #endif
2318         n++;
2319         s = itemEnd;
2320         while(*s==',') { /* eat duplicate commas */
2321             s++;
2322         }
2323         if(n>=jSize) {
2324             if(j==smallBuffer) {  /* overflowed the small buffer. */
2325                 j = static_cast<_acceptLangItem *>(uprv_malloc(sizeof(j[0])*(jSize*2)));
2326                 if(j!=NULL) {
2327                     uprv_memcpy(j,smallBuffer,sizeof(j[0])*jSize);
2328                 }
2329 #if defined(ULOC_DEBUG)
2330                 fprintf(stderr,"malloced at size %d\n", jSize);
2331 #endif
2332             } else {
2333                 j = static_cast<_acceptLangItem *>(uprv_realloc(j, sizeof(j[0])*jSize*2));
2334 #if defined(ULOC_DEBUG)
2335                 fprintf(stderr,"re-alloced at size %d\n", jSize);
2336 #endif
2337             }
2338             jSize *= 2;
2339             if(j==NULL) {
2340                 *status = U_MEMORY_ALLOCATION_ERROR;
2341                 return -1;
2342             }
2343         }
2344     }
2345     uprv_sortArray(j, n, sizeof(j[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
2346     if(U_FAILURE(*status)) {
2347         if(j != smallBuffer) {
2348 #if defined(ULOC_DEBUG)
2349             fprintf(stderr,"freeing j %p\n", j);
2350 #endif
2351             uprv_free(j);
2352         }
2353         return -1;
2354     }
2355     strs = static_cast<char **>(uprv_malloc((size_t)(sizeof(strs[0])*n)));
2356     /* Check for null pointer */
2357     if (strs == NULL) {
2358         uprv_free(j); /* Free to avoid memory leak */
2359         *status = U_MEMORY_ALLOCATION_ERROR;
2360         return -1;
2361     }
2362     for(i=0;i<n;i++) {
2363 #if defined(ULOC_DEBUG)
2364         /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
2365 #endif
2366         strs[i]=j[i].locale;
2367     }
2368     res =  uloc_acceptLanguage(result, resultAvailable, outResult,
2369         (const char**)strs, n, availableLocales, status);
2370     for(i=0;i<n;i++) {
2371         uprv_free(strs[i]);
2372     }
2373     uprv_free(strs);
2374     if(j != smallBuffer) {
2375 #if defined(ULOC_DEBUG)
2376         fprintf(stderr,"freeing j %p\n", j);
2377 #endif
2378         uprv_free(j);
2379     }
2380     return res;
2381 }
2382
2383
2384 U_CAPI int32_t U_EXPORT2
2385 uloc_acceptLanguage(char *result, int32_t resultAvailable,
2386                     UAcceptResult *outResult, const char **acceptList,
2387                     int32_t acceptListCount,
2388                     UEnumeration* availableLocales,
2389                     UErrorCode *status)
2390 {
2391     int32_t i,j;
2392     int32_t len;
2393     int32_t maxLen=0;
2394     char tmp[ULOC_FULLNAME_CAPACITY+1];
2395     const char *l;
2396     char **fallbackList;
2397     if(U_FAILURE(*status)) {
2398         return -1;
2399     }
2400     fallbackList = static_cast<char **>(uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount)));
2401     if(fallbackList==NULL) {
2402         *status = U_MEMORY_ALLOCATION_ERROR;
2403         return -1;
2404     }
2405     for(i=0;i<acceptListCount;i++) {
2406 #if defined(ULOC_DEBUG)
2407         fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
2408 #endif
2409         while((l=uenum_next(availableLocales, NULL, status))) {
2410 #if defined(ULOC_DEBUG)
2411             fprintf(stderr,"  %s\n", l);
2412 #endif
2413             len = (int32_t)uprv_strlen(l);
2414             if(!uprv_strcmp(acceptList[i], l)) {
2415                 if(outResult) {
2416                     *outResult = ULOC_ACCEPT_VALID;
2417                 }
2418 #if defined(ULOC_DEBUG)
2419                 fprintf(stderr, "MATCH! %s\n", l);
2420 #endif
2421                 if(len>0) {
2422                     uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2423                 }
2424                 for(j=0;j<i;j++) {
2425                     uprv_free(fallbackList[j]);
2426                 }
2427                 uprv_free(fallbackList);
2428                 return u_terminateChars(result, resultAvailable, len, status);
2429             }
2430             if(len>maxLen) {
2431                 maxLen = len;
2432             }
2433         }
2434         uenum_reset(availableLocales, status);
2435         /* save off parent info */
2436         if(uloc_getParent(acceptList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
2437             fallbackList[i] = uprv_strdup(tmp);
2438         } else {
2439             fallbackList[i]=0;
2440         }
2441     }
2442
2443     for(maxLen--;maxLen>0;maxLen--) {
2444         for(i=0;i<acceptListCount;i++) {
2445             if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {
2446 #if defined(ULOC_DEBUG)
2447                 fprintf(stderr,"Try: [%s]", fallbackList[i]);
2448 #endif
2449                 while((l=uenum_next(availableLocales, NULL, status))) {
2450 #if defined(ULOC_DEBUG)
2451                     fprintf(stderr,"  %s\n", l);
2452 #endif
2453                     len = (int32_t)uprv_strlen(l);
2454                     if(!uprv_strcmp(fallbackList[i], l)) {
2455                         if(outResult) {
2456                             *outResult = ULOC_ACCEPT_FALLBACK;
2457                         }
2458 #if defined(ULOC_DEBUG)
2459                         fprintf(stderr, "fallback MATCH! %s\n", l);
2460 #endif
2461                         if(len>0) {
2462                             uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2463                         }
2464                         for(j=0;j<acceptListCount;j++) {
2465                             uprv_free(fallbackList[j]);
2466                         }
2467                         uprv_free(fallbackList);
2468                         return u_terminateChars(result, resultAvailable, len, status);
2469                     }
2470                 }
2471                 uenum_reset(availableLocales, status);
2472
2473                 if(uloc_getParent(fallbackList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
2474                     uprv_free(fallbackList[i]);
2475                     fallbackList[i] = uprv_strdup(tmp);
2476                 } else {
2477                     uprv_free(fallbackList[i]);
2478                     fallbackList[i]=0;
2479                 }
2480             }
2481         }
2482         if(outResult) {
2483             *outResult = ULOC_ACCEPT_FAILED;
2484         }
2485     }
2486     for(i=0;i<acceptListCount;i++) {
2487         uprv_free(fallbackList[i]);
2488     }
2489     uprv_free(fallbackList);
2490     return -1;
2491 }
2492
2493 /*eof*/