icuSources/common/uloc.c

   1 /*
   2 **********************************************************************
   3 *   Copyright (C) 1997-2004, International Business Machines
   4 *   Corporation and others.  All Rights Reserved.
   5 **********************************************************************
   6 *
   7 * File ULOC.CPP
   8 *
   9 * Modification History:
  10 *
  11 *   Date        Name        Description
  12 *   04/01/97    aliu        Creation.
  13 *   08/21/98    stephen     JDK 1.2 sync
  14 *   12/08/98    rtg         New Locale implementation and C API
  15 *   03/15/99    damiba      overhaul.
  16 *   04/06/99    stephen     changed setDefault() to realloc and copy
  17 *   06/14/99    stephen     Changed calls to ures_open for new params
  18 *   07/21/99    stephen     Modified setDefault() to propagate to C++
  19 *   05/14/04    alan        7 years later: refactored, cleaned up, fixed bugs,
  20 *                           brought canonicalization code into line with spec
  21 *****************************************************************************/
  22
  23 /*
  24    POSIX's locale format, from putil.c: [no spaces]
  25
  26      ll [ _CC ] [ . MM ] [ @ VV]
  27
  28      l = lang, C = ctry, M = charmap, V = variant
  29 */
  30
  31 #include "unicode/utypes.h"
  32 #include "unicode/ustring.h"
  33 #include "unicode/uloc.h"
  34
  35 #include "putilimp.h"
  36 #include "ustr_imp.h"
  37 #include "ulocimp.h"
  38 #include "uresimp.h"
  39 #include "umutex.h"
  40 #include "cstring.h"
  41 #include "cmemory.h"
  42 #include "ucln_cmn.h"
  43 #include "locmap.h"
  44 #include "uarrsort.h"
  45 #include "uenumimp.h"
  46 #include "uassert.h"
  47
  48 #include <stdio.h> /* for sprintf */
  49
  50 /* ### Declarations **************************************************/
  51
  52 /* Locale stuff from locid.cpp */
  53 U_CFUNC void locale_set_default(const char *id);
  54 U_CFUNC const char *locale_get_default(void);
  55 U_CFUNC int32_t
  56 locale_getKeywords(const char *localeID,
  57             char prev,
  58             char *keywords, int32_t keywordCapacity,
  59             char *values, int32_t valuesCapacity, int32_t *valLen,
  60             UBool valuesToo,
  61             UErrorCode *status);
  62
  63 /* ### Constants **************************************************/
  64
  65 /* These strings describe the resources we attempt to load from
  66  the locale ResourceBundle data file.*/
  67 static const char _kLanguages[]       = "Languages";
  68 static const char _kScripts[]         = "Scripts";
  69 static const char _kCountries[]       = "Countries";
  70 static const char _kVariants[]        = "Variants";
  71 static const char _kKeys[]            = "Keys";
  72 static const char _kTypes[]           = "Types";
  73 static const char _kIndexLocaleName[] = "res_index";
  74 static const char _kRootName[]        = "root";
  75 static const char _kIndexTag[]        = "InstalledLocales";
  76 static const char _kCurrency[]        = "currency";
  77 static const char _kCurrencies[]      = "Currencies";
  78 static char** _installedLocales = NULL;
  79 static int32_t _installedLocalesCount = 0;
  80
  81 /* ### Data tables **************************************************/
  82
  83 /**
  84  * Table of language codes, both 2- and 3-letter, with preference
  85  * given to 2-letter codes where possible.  Includes 3-letter codes
  86  * that lack a 2-letter equivalent.
  87  *
  88  * This list must be in sorted order.  This list is returned directly
  89  * to the user by some API.
  90  *
  91  * This list must be kept in sync with LANGUAGES_3, with corresponding
  92  * entries matched.
  93  *
  94  * This table should be terminated with a NULL entry, followed by a
  95  * second list, and another NULL entry.  The first list is visible to
  96  * user code when this array is returned by API.  The second list
  97  * contains codes we support, but do not expose through user API.
  98  *
  99  * Notes
 100  *
 101  * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
 102  * include the revisions up to 2001/7/27 *CWB*
 103  *
 104  * The 3 character codes are the terminology codes like RFC 3066.  This
 105  * is compatible with prior ICU codes
 106  *
 107  * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
 108  * table but now at the end of the table because 3 character codes are
 109  * duplicates.  This avoids bad searches going from 3 to 2 character
 110  * codes.
 111  *
 112  * The range qaa-qtz is reserved for local use
 113  */
 114 static const char * const LANGUAGES[] = {
 115     "aa",  "ab",  "ace", "ach", "ada", "ady", "ae",  "af",  "afa",
 116     "afh", "ak",  "akk", "ale", "alg", "am",  "an",  "ang", "apa",
 117     "ar",  "arc", "arn", "arp", "art", "arw", "as",  "ast",
 118     "ath", "aus", "av",  "awa", "ay",  "az",  "ba",  "bad",
 119     "bai", "bal", "ban", "bas", "bat", "be",  "bej",
 120     "bem", "ber", "bg",  "bh",  "bho", "bi",  "bik", "bin",
 121     "bla", "bm",  "bn",  "bnt", "bo",  "br",  "bra", "bs",
 122     "btk", "bua", "bug", "byn", "ca",  "cad", "cai", "car", "cau",
 123     "ce",  "ceb", "cel", "ch",  "chb", "chg", "chk", "chm",
 124     "chn", "cho", "chp", "chr", "chy", "cmc", "co",  "cop",
 125     "cpe", "cpf", "cpp", "cr",  "crh", "crp", "cs",  "csb", "cu",  "cus",
 126     "cv",  "cy",  "da",  "dak", "dar", "day", "de",  "del", "den",
 127     "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv",  "dyu",
 128     "dz",  "ee",  "efi", "egy", "eka", "el",  "elx", "en",
 129     "enm", "eo",  "es",  "et",  "eu",  "ewo", "fa",
 130     "fan", "fat", "ff",  "fi",  "fiu", "fj",  "fo",  "fon",
 131     "fr",  "frm", "fro", "fur", "fy",  "ga",  "gaa", "gay",
 132     "gba", "gd",  "gem", "gez", "gil", "gl",  "gmh", "gn",
 133     "goh", "gon", "gor", "got", "grb", "grc", "gu",  "gv",
 134     "gwi", "ha",  "hai", "haw", "he",  "hi",  "hil", "him",
 135     "hit", "hmn", "ho",  "hr",  "hsb", "ht",  "hu",  "hup", "hy",  "hz",
 136     "ia",  "iba", "id",  "ie",  "ig",  "ii",  "ijo", "ik",
 137     "ilo", "inc", "ine", "inh", "io",  "ira", "iro", "is",  "it",
 138     "iu",  "ja",  "jbo", "jpr", "jrb", "jv",  "ka",  "kaa", "kab",
 139     "kac", "kam", "kar", "kaw", "kbd", "kg",  "kha", "khi",
 140     "kho", "ki",  "kj",  "kk",  "kl",  "km",  "kmb", "kn",
 141     "ko",  "kok", "kos", "kpe", "kr",  "krc", "kro", "kru", "ks",
 142     "ku",  "kum", "kut", "kv",  "kw",  "ky",  "la",  "lad",
 143     "lah", "lam", "lb",  "lez", "lg",  "li",  "ln",  "lo",  "lol",
 144     "loz", "lt",  "lu",  "lua", "lui", "lun", "luo", "lus",
 145     "lv",  "mad", "mag", "mai", "mak", "man", "map", "mas",
 146     "mdf", "mdr", "men", "mg",  "mga", "mh",  "mi",  "mic", "min",
 147     "mis", "mk",  "mkh", "ml",  "mn",  "mnc", "mni", "mno",
 148     "mo",  "moh", "mos", "mr",  "ms",  "mt",  "mul", "mun",
 149     "mus", "mwr", "my",  "myn", "myv", "na",  "nah", "nai", "nap",
 150     "nb",  "nd",  "nds", "ne",  "new", "ng",  "nia", "nic",
 151     "niu", "nl",  "nn",  "no",  "nog", "non", "nr",  "nso", "nub",
 152     "nv",  "nwc", "ny",  "nym", "nyn", "nyo", "nzi", "oc",  "oj",
 153     "om",  "or",  "os",  "osa", "ota", "oto", "pa",  "paa",
 154     "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",
 155     "pi",  "pl",  "pon", "pra", "pro", "ps",  "pt",  "qu",
 156     "raj", "rap", "rar", "rm",  "rn",  "ro",  "roa", "rom",
 157     "ru",  "rw",  "sa",  "sad", "sah", "sai", "sal", "sam",
 158     "sas", "sat", "sc",  "sco", "sd",  "se",  "sel", "sem",
 159     "sg",  "sga", "sgn", "shn", "si",  "sid", "sio", "sit",
 160     "sk",  "sl",  "sla", "sm",  "sma", "smi", "smj", "smn",
 161     "sms", "sn",  "snk", "so",  "sog", "son", "sq",  "sr",
 162     "srr", "ss",  "ssa", "st",  "su",  "suk", "sus", "sux",
 163     "sv",  "sw",  "syr", "ta",  "tai", "te",  "tem", "ter",
 164     "tet", "tg",  "th",  "ti",  "tig", "tiv", "tk",  "tkl",
 165     "tl",  "tlh", "tli", "tmh", "tn",  "to",  "tog", "tpi", "tr",
 166     "ts",  "tsi", "tt",  "tum", "tup", "tut", "tvl", "tw",
 167     "ty",  "tyv", "udm", "ug",  "uga", "uk",  "umb", "und", "ur",
 168     "uz",  "vai", "ve",  "vi",  "vo",  "vot", "wa",  "wak",
 169     "wal", "war", "was", "wen", "wo",  "xal", "xh",  "yao", "yap",
 170     "yi",  "yo",  "ypk", "za",  "zap", "zen", "zh",  "znd",
 171     "zu",  "zun",
 172 NULL,
 173     "in",  "iw",  "ji",  "jw",  "sh",    /* obsolete language codes */
 174 NULL
 175 };
 176
 177 /**
 178  * Table of 3-letter language codes.
 179  *
 180  * This is a lookup table used to convert 3-letter language codes to
 181  * their 2-letter equivalent, where possible.  It must be kept in sync
 182  * with LANGUAGES.  For all valid i, LANGUAGES[i] must refer to the
 183  * same language as LANGUAGES_3[i].  The commented-out lines are
 184  * copied from LANGUAGES to make eyeballing this baby easier.
 185  *
 186  * Where a 3-letter language code has no 2-letter equivalent, the
 187  * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
 188  *
 189  * This table should be terminated with a NULL entry, followed by a
 190  * second list, and another NULL entry.  The two lists correspond to
 191  * the two lists in LANGUAGES.
 192  */
 193 static const char * const LANGUAGES_3[] = {
 194 /*  "aa",  "ab",  "ace", "ach", "ada", "ady", "ae",  "af",  "afa",    */
 195     "aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr", "afa",
 196 /*  "afh", "ak",  "akk", "ale", "alg", "am",  "an",  "ang", "apa",    */
 197     "afh", "aka", "akk", "ale", "alg", "amh", "arg", "ang", "apa",
 198 /*  "ar",  "arc", "arn", "arp", "art", "arw", "as",  "ast",    */
 199     "ara", "arc", "arn", "arp", "art", "arw", "asm", "ast",
 200 /*  "ath", "aus", "av",  "awa", "ay",  "az",  "ba",  "bad",    */
 201     "ath", "aus", "ava", "awa", "aym", "aze", "bak", "bad",
 202 /*  "bai", "bal", "ban", "bas", "bat", "be",  "bej",    */
 203     "bai", "bal", "ban", "bas", "bat", "bel", "bej",
 204 /*  "bem", "ber", "bg",  "bh",  "bho", "bi",  "bik", "bin",    */
 205     "bem", "ber", "bul", "bih", "bho", "bis", "bik", "bin",
 206 /*  "bla", "bm",  "bn",  "bnt", "bo",  "br",  "bra", "bs",     */
 207     "bla", "bam", "ben", "bnt", "bod", "bre", "bra", "bos",
 208 /*  "btk", "bua", "bug", "byn", "ca",  "cad", "cai", "car", "cau",    */
 209     "btk", "bua", "bug", "byn", "cat", "cad", "cai", "car", "cau",
 210 /*  "ce",  "ceb", "cel", "ch",  "chb", "chg", "chk", "chm",    */
 211     "che", "ceb", "cel", "cha", "chb", "chg", "chk", "chm",
 212 /*  "chn", "cho", "chp", "chr", "chy", "cmc", "co",  "cop",    */
 213     "chn", "cho", "chp", "chr", "chy", "cmc", "cos", "cop",
 214 /*  "cpe", "cpf", "cpp", "cr",  "crh", "crp", "cs",  "csb", "cu",  "cus",    */
 215     "cpe", "cpf", "cpp", "cre", "crh", "crp", "ces", "csb", "chu", "cus",
 216 /*  "cv",  "cy",  "da",  "dak", "dar", "day", "de",  "del", "den",    */
 217     "chv", "cym", "dan", "dak", "dar", "day", "deu", "del", "den",
 218 /*  "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv",  "dyu",    */
 219     "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "div", "dyu",
 220 /*  "dz",  "ee",  "efi", "egy", "eka", "el",  "elx", "en",     */
 221     "dzo", "ewe", "efi", "egy", "eka", "ell", "elx", "eng",
 222 /*  "enm", "eo",  "es",  "et",  "eu",  "ewo", "fa",     */
 223     "enm", "epo", "spa", "est", "eus", "ewo", "fas",
 224 /*  "fan", "fat", "ff",  "fi",  "fiu", "fj",  "fo",  "fon",    */
 225     "fan", "fat", "ful", "fin", "fiu", "fij", "fao", "fon",
 226 /*  "fr",  "frm", "fro", "fur", "fy",  "ga",  "gaa", "gay",    */
 227     "fra", "frm", "fro", "fur", "fry", "gle", "gaa", "gay",
 228 /*  "gba", "gd",  "gem", "gez", "gil", "gl",  "gmh", "gn",     */
 229     "gba", "gla", "gem", "gez", "gil", "glg", "gmh", "grn",
 230 /*  "goh", "gon", "gor", "got", "grb", "grc", "gu",  "gv",     */
 231     "goh", "gon", "gor", "got", "grb", "grc", "guj", "glv",
 232 /*  "gwi", "ha",  "hai", "haw", "he",  "hi",  "hil", "him",    */
 233     "gwi", "hau", "hai", "haw", "heb", "hin", "hil", "him",
 234 /*  "hit", "hmn", "ho",  "hr",  "hsb", "ht",  "hu",  "hup", "hy",  "hz",     */
 235     "hit", "hmn", "hmo", "hrv", "hsb", "hat", "hun", "hup", "hye", "her",
 236 /*  "ia",  "iba", "id",  "ie",  "ig",  "ii",  "ijo", "ik",     */
 237     "ina", "iba", "ind", "ile", "ibo", "iii", "ijo", "ipk",
 238 /*  "ilo", "inc", "ine", "inh", "io",  "ira", "iro", "is",  "it",      */
 239     "ilo", "inc", "ine", "inh", "ido", "ira", "iro", "isl", "ita",
 240 /*  "iu",  "ja",  "jbo", "jpr", "jrb", "jv",  "ka",  "kaa", "kab",   */
 241     "iku", "jpn", "jbo", "jpr", "jrb", "jav", "kat", "kaa", "kab",
 242 /*  "kac", "kam", "kar", "kaw", "kbd", "kg",  "kha", "khi",    */
 243     "kac", "kam", "kar", "kaw", "kbd", "kon", "kha", "khi",
 244 /*  "kho", "ki",  "kj",  "kk",  "kl",  "km",  "kmb", "kn",     */
 245     "kho", "kik", "kua", "kaz", "kal", "khm", "kmb", "kan",
 246 /*  "ko",  "kok", "kos", "kpe", "kr",  "krc", "kro", "kru", "ks",     */
 247     "kor", "kok", "kos", "kpe", "kau", "krc", "kro", "kru", "kas",
 248 /*  "ku",  "kum", "kut", "kv",  "kw",  "ky",  "la",  "lad",    */
 249     "kur", "kum", "kut", "kom", "cor", "kir", "lat", "lad",
 250 /*  "lah", "lam", "lb",  "lez", "lg",  "li",  "ln",  "lo",  "lol",    */
 251     "lah", "lam", "ltz", "lez", "lug", "lim", "lin", "lao", "lol",
 252 /*  "loz", "lt",  "lu",  "lua", "lui", "lun", "luo", "lus",    */
 253     "loz", "lit", "lub", "lua", "lui", "lun", "luo", "lus",
 254 /*  "lv",  "mad", "mag", "mai", "mak", "man", "map", "mas",    */
 255     "lav", "mad", "mag", "mai", "mak", "man", "map", "mas",
 256 /*  "mdf", "mdr", "men", "mg",  "mga", "mh",  "mi",  "mic", "min",    */
 257     "mdf", "mdr", "men", "mlg", "mga", "mah", "mri", "mic", "min",
 258 /*  "mis", "mk",  "mkh", "ml",  "mn",  "mnc", "mni", "mno",    */
 259     "mis", "mkd", "mkh", "mal", "mon", "mnc", "mni", "mno",
 260 /*  "mo",  "moh", "mos", "mr",  "ms",  "mt",  "mul", "mun",    */
 261     "mol", "moh", "mos", "mar", "msa", "mlt", "mul", "mun",
 262 /*  "mus", "mwr", "my",  "myn", "myv", "na",  "nah", "nai", "nap",    */
 263     "mus", "mwr", "mya", "myn", "myv", "nau", "nah", "nai", "nap",
 264 /*  "nb",  "nd",  "nds", "ne",  "new", "ng",  "nia", "nic",    */
 265     "nob", "nde", "nds", "nep", "new", "ndo", "nia", "nic",
 266 /*  "niu", "nl",  "nn",  "no",  "nog", "non", "nr",  "nso", "nub",    */
 267     "niu", "nld", "nno", "nor", "nog", "non", "nbl", "nso", "nub",
 268 /*  "nv",  "nwc", "ny",  "nym", "nyn", "nyo", "nzi", "oc",  "oj",     */
 269     "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi", "oci", "oji",
 270 /*  "om",  "or",  "os",  "osa", "ota", "oto", "pa",  "paa",    */
 271     "orm", "ori", "oss", "osa", "ota", "oto", "pan", "paa",
 272 /*  "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",    */
 273     "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",
 274 /*  "pi",  "pl",  "pon", "pra", "pro", "ps",  "pt",  "qu",     */
 275     "pli", "pol", "pon", "pra", "pro", "pus", "por", "que",
 276 /*  "raj", "rap", "rar", "rm",  "rn",  "ro",  "roa", "rom",    */
 277     "raj", "rap", "rar", "roh", "run", "ron", "roa", "rom",
 278 /*  "ru",  "rw",  "sa",  "sad", "sah", "sai", "sal", "sam",    */
 279     "rus", "kin", "san", "sad", "sah", "sai", "sal", "sam",
 280 /*  "sas", "sat", "sc",  "sco", "sd",  "se",  "sel", "sem",    */
 281     "sas", "sat", "srd", "sco", "snd", "sme", "sel", "sem",
 282 /*  "sg",  "sga", "sgn", "shn", "si",  "sid", "sio", "sit",    */
 283     "sag", "sga", "sgn", "shn", "sin", "sid", "sio", "sit",
 284 /*  "sk",  "sl",  "sla", "sm",  "sma", "smi", "smj", "smn",    */
 285     "slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn",
 286 /*  "sms", "sn",  "snk", "so",  "sog", "son", "sq",  "sr",     */
 287     "sms", "sna", "snk", "som", "sog", "son", "sqi", "srp",
 288 /*  "srr", "ss",  "ssa", "st",  "su",  "suk", "sus", "sux",    */
 289     "srr", "ssw", "ssa", "sot", "sun", "suk", "sus", "sux",
 290 /*  "sv",  "sw",  "syr", "ta",  "tai", "te",  "tem", "ter",    */
 291     "swe", "swa", "syr", "tam", "tai", "tel", "tem", "ter",
 292 /*  "tet", "tg",  "th",  "ti",  "tig", "tiv", "tk",  "tkl",    */
 293     "tet", "tgk", "tha", "tir", "tig", "tiv", "tuk", "tkl",
 294 /*  "tl",  "tlh", "tli", "tmh", "tn",  "to",  "tog", "tpi", "tr",     */
 295     "tgl", "tlh", "tli", "tmh", "tsn", "ton", "tog", "tpi", "tur",
 296 /*  "ts",  "tsi", "tt",  "tum", "tup", "tut", "tvl", "tw",     */
 297     "tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi",
 298 /*  "ty",  "tyv", "udm", "ug",  "uga", "uk",  "umb", "und", "ur",     */
 299     "tah", "tyv", "udm", "uig", "uga", "ukr", "umb", "und", "urd",
 300 /*  "uz",  "vai", "ve",  "vi",  "vo",  "vot", "wa",  "wak",    */
 301     "uzb", "vai", "ven", "vie", "vol", "vot", "wln", "wak",
 302 /*  "wal", "war", "was", "wen", "wo",  "xal", "xh",  "yao", "yap",    */
 303     "wal", "war", "was", "wen", "wol", "xal", "xho", "yao", "yap",
 304 /*  "yi",  "yo",  "ypk", "za",  "zap", "zen", "zh",  "znd",    */
 305     "yid", "yor", "ypk", "zha", "zap", "zen", "zho", "znd",
 306 /*  "zu",  "zun",                                              */
 307     "zul", "zun",
 308 NULL,
 309 /*  "in",  "iw",  "ji",  "jw",  "sh",                          */
 310     "ind", "heb", "yid", "jaw", "srp",
 311 NULL
 312 };
 313
 314 /**
 315  * Table of 2-letter country codes.
 316  *
 317  * This list must be in sorted order.  This list is returned directly
 318  * to the user by some API.
 319  *
 320  * This list must be kept in sync with COUNTRIES_3, with corresponding
 321  * entries matched.
 322  *
 323  * This table should be terminated with a NULL entry, followed by a
 324  * second list, and another NULL entry.  The first list is visible to
 325  * user code when this array is returned by API.  The second list
 326  * contains codes we support, but do not expose through user API.
 327  *
 328  * Notes:
 329  *
 330  * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
 331  * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
 332  * new codes keeping the old ones for compatibility updated to include
 333  * 1999/12/03 revisions *CWB*
 334  *
 335  * RO(ROM) is now RO(ROU) according to
 336  * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
 337  */
 338 static const char * const COUNTRIES[] = {
 339     "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",  "AN",
 340     "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AZ",
 341     "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",
 342     "BJ",  "BM",  "BN",  "BO",  "BR",  "BS",  "BT",  "BV",
 343     "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",
 344     "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CR",
 345     "CU",  "CV",  "CX",  "CY",  "CZ",  "DE",  "DJ",  "DK",
 346     "DM",  "DO",  "DZ",  "EC",  "EE",  "EG",  "EH",  "ER",
 347     "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",
 348     "GA",  "GB",  "GD",  "GE",  "GF",  "GH",  "GI",  "GL",
 349     "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",
 350     "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",
 351     "ID",  "IE",  "IL",  "IN",  "IO",  "IQ",  "IR",  "IS",
 352     "IT",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",
 353     "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",
 354     "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",
 355     "LV",  "LY",  "MA",  "MC",  "MD",  "MG",  "MH",  "MK",
 356     "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",
 357     "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",
 358     "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",
 359     "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",
 360     "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",
 361     "PW",  "PY",  "QA",  "RE",  "RO",  "RU",  "RW",  "SA",
 362     "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",
 363     "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "ST",  "SV",
 364     "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",
 365     "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",
 366     "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",
 367     "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",
 368     "WS",  "YE",  "YT",  "YU",  "ZA",  "ZM",  "ZW",
 369 NULL,
 370     "FX",  "RO",  "TP",  "ZR",   /* obsolete country codes */
 371 NULL
 372 };
 373
 374 /**
 375  * Table of 3-letter country codes.
 376  *
 377  * This is a lookup table used to convert 3-letter country codes to
 378  * their 2-letter equivalent.  It must be kept in sync with COUNTRIES.
 379  * For all valid i, COUNTRIES[i] must refer to the same country as
 380  * COUNTRIES_3[i].  The commented-out lines are copied from COUNTRIES
 381  * to make eyeballing this baby easier.
 382  *
 383  * This table should be terminated with a NULL entry, followed by a
 384  * second list, and another NULL entry.  The two lists correspond to
 385  * the two lists in COUNTRIES.
 386  */
 387 static const char * const COUNTRIES_3[] = {
 388 /*  "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",  "AN",     */
 389     "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM", "ANT",
 390 /*  "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AZ",     */
 391     "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "AZE",
 392 /*  "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",     */
 393     "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
 394 /*  "BJ",  "BM",  "BN",  "BO",  "BR",  "BS",  "BT",  "BV",     */
 395     "BEN", "BMU", "BRN", "BOL", "BRA", "BHS", "BTN", "BVT",
 396 /*  "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",     */
 397     "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
 398 /*  "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CR",     */
 399     "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
 400 /*  "CU",  "CV",  "CX",  "CY",  "CZ",  "DE",  "DJ",  "DK",     */
 401     "CUB", "CPV", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
 402 /*  "DM",  "DO",  "DZ",  "EC",  "EE",  "EG",  "EH",  "ER",     */
 403     "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
 404 /*  "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",     */
 405     "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
 406 /*  "GA",  "GB",  "GD",  "GE",  "GF",  "GH",  "GI",  "GL",     */
 407     "GAB", "GBR", "GRD", "GEO", "GUF", "GHA", "GIB", "GRL",
 408 /*  "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",     */
 409     "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
 410 /*  "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",     */
 411     "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
 412 /*  "ID",  "IE",  "IL",  "IN",  "IO",  "IQ",  "IR",  "IS",     */
 413     "IDN", "IRL", "ISR", "IND", "IOT", "IRQ", "IRN", "ISL",
 414 /*  "IT",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",     */
 415     "ITA", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
 416 /*  "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",     */
 417     "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
 418 /*  "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",     */
 419     "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
 420 /*  "LV",  "LY",  "MA",  "MC",  "MD",  "MG",  "MH",  "MK",     */
 421     "LVA", "LBY", "MAR", "MCO", "MDA", "MDG", "MHL", "MKD",
 422 /*  "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",     */
 423     "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
 424 /*  "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",     */
 425     "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
 426 /*  "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",     */
 427     "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
 428 /*  "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",     */
 429     "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
 430 /*  "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",     */
 431     "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
 432 /*  "PW",  "PY",  "QA",  "RE",  "RO",  "RU",  "RW",  "SA",     */
 433     "PLW", "PRY", "QAT", "REU", "ROU", "RUS", "RWA", "SAU",
 434 /*  "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",     */
 435     "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
 436 /*  "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "ST",  "SV",     */
 437     "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "STP", "SLV",
 438 /*  "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",     */
 439     "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
 440 /*  "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",     */
 441     "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
 442 /*  "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",     */
 443     "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
 444 /*  "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",     */
 445     "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
 446 /*  "WS",  "YE",  "YT",  "YU",  "ZA",  "ZM",  "ZW",            */
 447     "WSM", "YEM", "MYT", "YUG", "ZAF", "ZMB", "ZWE",
 448 NULL,
 449 /*  "FX",  "RO",  "TP",  "ZR",   */
 450     "FXX", "ROM", "TMP", "ZAR",
 451 NULL
 452 };
 453
 454 typedef struct CanonicalizationMap {
 455     const char *id;          /* input ID */
 456     const char *canonicalID; /* canonicalized output ID */
 457     const char *keyword;     /* keyword, or NULL if none */
 458     const char *value;       /* keyword value, or NULL if kw==NULL */
 459 } CanonicalizationMap;
 460
 461 /**
 462  * A map to canonicalize locale IDs.  This handles a variety of
 463  * different semantic kinds of transformations.
 464  */
 465 static const CanonicalizationMap CANONICALIZE_MAP[] = {
 466     { "",               "en_US_POSIX", NULL, NULL }, /* .NET name */
 467     { "C",              "en_US_POSIX", NULL, NULL }, /* POSIX name */
 468     { "art_LOJBAN",     "jbo", NULL, NULL }, /* registered name */
 469     { "az_AZ_CYRL",     "az_Cyrl_AZ", NULL, NULL }, /* .NET name */
 470     { "az_AZ_LATN",     "az_Latn_AZ", NULL, NULL }, /* .NET name */
 471     { "ca_ES_PREEURO",  "ca_ES", "currency", "ESP" },
 472     { "cel_GAULISH",    "cel__GAULISH", NULL, NULL }, /* registered name */
 473     { "de_1901",        "de__1901", NULL, NULL }, /* registered name */
 474     { "de_1906",        "de__1906", NULL, NULL }, /* registered name */
 475     { "de__PHONEBOOK",  "de", "collation", "phonebook" },
 476     { "de_AT_PREEURO",  "de_AT", "currency", "ATS" },
 477     { "de_DE_PREEURO",  "de_DE", "currency", "DEM" },
 478     { "de_LU_PREEURO",  "de_LU", "currency", "LUF" },
 479     { "el_GR_PREEURO",  "el_GR", "currency", "GRD" },
 480     { "en_BOONT",       "en__BOONT", NULL, NULL }, /* registered name */
 481     { "en_SCOUSE",      "en__SCOUSE", NULL, NULL }, /* registered name */
 482     { "en_BE_PREEURO",  "en_BE", "currency", "BEF" },
 483     { "en_IE_PREEURO",  "en_IE", "currency", "IEP" },
 484     { "es__TRADITIONAL", "es", "collation", "traditional" },
 485     { "es_ES_PREEURO",  "es_ES", "currency", "ESP" },
 486     { "eu_ES_PREEURO",  "eu_ES", "currency", "ESP" },
 487     { "fi_FI_PREEURO",  "fi_FI", "currency", "FIM" },
 488     { "fr_BE_PREEURO",  "fr_BE", "currency", "BEF" },
 489     { "fr_FR_PREEURO",  "fr_FR", "currency", "FRF" },
 490     { "fr_LU_PREEURO",  "fr_LU", "currency", "LUF" },
 491     { "ga_IE_PREEURO",  "ga_IE", "currency", "IEP" },
 492     { "gl_ES_PREEURO",  "gl_ES", "currency", "ESP" },
 493     { "hi__DIRECT",     "hi", "collation", "direct" },
 494     { "it_IT_PREEURO",  "it_IT", "currency", "ITL" },
 495     { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" },
 496     { "nb_NO_NY",       "nn_NO", NULL, NULL },  /* "markus said this was ok" :-) */
 497     { "nl_BE_PREEURO",  "nl_BE", "currency", "BEF" },
 498     { "nl_NL_PREEURO",  "nl_NL", "currency", "NLG" },
 499     { "pt_PT_PREEURO",  "pt_PT", "currency", "PTE" },
 500     { "sl_ROZAJ",       "sl__ROZAJ", NULL, NULL }, /* registered name */
 501     { "sr_SP_CYRL",     "sr_Cyrl_SP", NULL, NULL }, /* .NET name */
 502     { "sr_SP_LATN",     "sr_Latn_SP", NULL, NULL }, /* .NET name */
 503     { "uz_UZ_CYRL",     "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */
 504     { "uz_UZ_LATN",     "uz_Latn_UZ", NULL, NULL }, /* .NET name */
 505     { "zh_CHS",         "zh_Hans", NULL, NULL }, /* .NET name */
 506     { "zh_CHT",         "zh_TW", NULL, NULL }, /* .NET name TODO: This should be zh_Hant once the locale structure is fixed. */
 507     { "zh_GAN",         "zh__GAN", NULL, NULL }, /* registered name */
 508     { "zh_GUOYU",       "zh", NULL, NULL }, /* registered name */
 509     { "zh_HAKKA",       "zh__HAKKA", NULL, NULL }, /* registered name */
 510     { "zh_MIN",         "zh__MIN", NULL, NULL }, /* registered name */
 511     { "zh_MIN_NAN",     "zh__MINNAN", NULL, NULL }, /* registered name */
 512     { "zh_WUU",         "zh__WUU", NULL, NULL }, /* registered name */
 513     { "zh_XIANG",       "zh__XIANG", NULL, NULL }, /* registered name */
 514     { "zh_YUE",         "zh__YUE", NULL, NULL }, /* registered name */
 515     { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" },
 516     { "zh_TW_STROKE",   "zh_TW", "collation", "stroke" },
 517     { "zh__PINYIN",     "zh", "collation", "pinyin" }
 518 };
 519
 520 /* ### Keywords **************************************************/
 521
 522 #define ULOC_KEYWORD_BUFFER_LEN 25
 523 #define ULOC_MAX_NO_KEYWORDS 25
 524
 525 static const char *
 526 locale_getKeywordsStart(const char *localeID) {
 527     /* TODO This seems odd. No matter what charset we're on, won't '@'
 528        be '@'? Or are we building on one EBCDIC machine and moving the
 529        library to another? */
 530     const char *result = NULL;
 531     static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
 532     if((result = uprv_strchr(localeID, '@')) != NULL) {
 533         return result;
 534     } else if(U_CHARSET_FAMILY == U_EBCDIC_FAMILY) {
 535         const uint8_t *charToFind = ebcdicSigns;
 536         while(*charToFind) {
 537             if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
 538                 return result;
 539             }
 540             charToFind++;
 541         }
 542     }
 543     return NULL;
 544 }
 545
 546 /**
 547  * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
 548  * @param keywordName incoming name to be canonicalized
 549  * @param status return status (keyword too long)
 550  * @return length of the keyword name
 551  */
 552 static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
 553 {
 554   int32_t i;
 555   int32_t keywordNameLen = uprv_strlen(keywordName);
 556
 557   if(keywordNameLen >= ULOC_KEYWORD_BUFFER_LEN) {
 558     /* keyword name too long for internal buffer */
 559     *status = U_INTERNAL_PROGRAM_ERROR;
 560           return 0;
 561   }
 562
 563   /* normalize the keyword name */
 564   for(i = 0; i < keywordNameLen; i++) {
 565     buf[i] = uprv_tolower(keywordName[i]);
 566   }
 567   buf[i] = 0;
 568
 569   return keywordNameLen;
 570 }
 571
 572 typedef struct {
 573     char keyword[ULOC_KEYWORD_BUFFER_LEN];
 574     int32_t keywordLen;
 575     const char *valueStart;
 576     int32_t valueLen;
 577 } KeywordStruct;
 578
 579 static int32_t U_CALLCONV
 580 compareKeywordStructs(const void *context, const void *left, const void *right) {
 581     const char* leftString = ((const KeywordStruct *)left)->keyword;
 582     const char* rightString = ((const KeywordStruct *)right)->keyword;
 583     return uprv_strcmp(leftString, rightString);
 584 }
 585
 586 /**
 587  * Both addKeyword and addValue must already be in canonical form.
 588  * Either both addKeyword and addValue are NULL, or neither is NULL.
 589  * If they are not NULL they must be zero terminated.
 590  * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
 591  */
 592 static int32_t
 593 _getKeywords(const char *localeID,
 594              char prev,
 595              char *keywords, int32_t keywordCapacity,
 596              char *values, int32_t valuesCapacity, int32_t *valLen,
 597              UBool valuesToo,
 598              const char* addKeyword,
 599              const char* addValue,
 600              UErrorCode *status)
 601 {
 602     KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
 603
 604     int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
 605     int32_t numKeywords = 0;
 606     const char* pos = localeID;
 607     const char* equalSign = NULL;
 608     const char* semicolon = NULL;
 609     int32_t i = 0, j, n;
 610     int32_t keywordsLen = 0;
 611     int32_t valuesLen = 0;
 612
 613     if(prev == '@') { /* start of keyword definition */
 614         /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
 615         do {
 616             UBool duplicate = FALSE;
 617             /* skip leading spaces */
 618             while(*pos == ' ') {
 619                 pos++;
 620             }
 621             if (!*pos) { /* handle trailing "; " */
 622                 break;
 623             }
 624             if(numKeywords == maxKeywords) {
 625                 *status = U_INTERNAL_PROGRAM_ERROR;
 626                 return 0;
 627             }
 628             equalSign = uprv_strchr(pos, '=');
 629             semicolon = uprv_strchr(pos, ';');
 630             /* lack of '=' [foo@currency] is illegal */
 631             /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
 632             if(!equalSign || (semicolon && semicolon<equalSign)) {
 633                 *status = U_INVALID_FORMAT_ERROR;
 634                 return 0;
 635             }
 636             /* need to normalize both keyword and keyword name */
 637             if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
 638                 /* keyword name too long for internal buffer */
 639                 *status = U_INTERNAL_PROGRAM_ERROR;
 640                 return 0;
 641             }
 642             for(i = 0, n = 0; i < equalSign - pos; ++i) {
 643                 if (pos[i] != ' ') {
 644                     keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
 645                 }
 646             }
 647             keywordList[numKeywords].keyword[n] = 0;
 648             keywordList[numKeywords].keywordLen = n;
 649             /* now grab the value part. First we skip the '=' */
 650             equalSign++;
 651             /* then we leading spaces */
 652             while(*equalSign == ' ') {
 653                 equalSign++;
 654             }
 655             keywordList[numKeywords].valueStart = equalSign;
 656
 657             pos = semicolon;
 658             i = 0;
 659             if(pos) {
 660                 while(*(pos - i - 1) == ' ') {
 661                     i++;
 662                 }
 663                 keywordList[numKeywords].valueLen = pos - equalSign - i;
 664                 pos++;
 665             } else {
 666                 i = uprv_strlen(equalSign);
 667                 while(equalSign[i-1] == ' ') {
 668                     i--;
 669                 }
 670                 keywordList[numKeywords].valueLen = i;
 671             }
 672             /* If this is a duplicate keyword, then ignore it */
 673             for (j=0; j<numKeywords; ++j) {
 674                 if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
 675                     duplicate = TRUE;
 676                     break;
 677                 }
 678             }
 679             if (!duplicate) {
 680                 ++numKeywords;
 681             }
 682         } while(pos);
 683
 684         /* Handle addKeyword/addValue. */
 685         if (addKeyword != NULL) {
 686             UBool duplicate = FALSE;
 687             U_ASSERT(addValue != NULL);
 688             /* Search for duplicate; if found, do nothing. Explicit keyword
 689                overrides addKeyword. */
 690             for (j=0; j<numKeywords; ++j) {
 691                 if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) {
 692                     duplicate = TRUE;
 693                     break;
 694                 }
 695             }
 696             if (!duplicate) {
 697                 if (numKeywords == maxKeywords) {
 698                     *status = U_INTERNAL_PROGRAM_ERROR;
 699                     return 0;
 700                 }
 701                 uprv_strcpy(keywordList[numKeywords].keyword, addKeyword);
 702                 keywordList[numKeywords].keywordLen = uprv_strlen(addKeyword);
 703                 keywordList[numKeywords].valueStart = addValue;
 704                 keywordList[numKeywords].valueLen = uprv_strlen(addValue);
 705                 ++numKeywords;
 706             }
 707         } else {
 708             U_ASSERT(addValue == NULL);
 709         }
 710
 711         /* now we have a list of keywords */
 712         /* we need to sort it */
 713         uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
 714
 715         /* Now construct the keyword part */
 716         for(i = 0; i < numKeywords; i++) {
 717             if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {
 718                 uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);
 719                 if(valuesToo) {
 720                     keywords[keywordsLen + keywordList[i].keywordLen] = '=';
 721                 } else {
 722                     keywords[keywordsLen + keywordList[i].keywordLen] = 0;
 723                 }
 724             }
 725             keywordsLen += keywordList[i].keywordLen + 1;
 726             if(valuesToo) {
 727                 if(keywordsLen + keywordList[i].valueLen < keywordCapacity) {
 728                     uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
 729                 }
 730                 keywordsLen += keywordList[i].valueLen;
 731
 732                 if(i < numKeywords - 1) {
 733                     if(keywordsLen < keywordCapacity) {
 734                         keywords[keywordsLen] = ';';
 735                     }
 736                     keywordsLen++;
 737                 }
 738             }
 739             if(values) {
 740                 if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {
 741                     uprv_strcpy(values+valuesLen, keywordList[i].valueStart);
 742                     values[valuesLen + keywordList[i].valueLen] = 0;
 743                 }
 744                 valuesLen += keywordList[i].valueLen + 1;
 745             }
 746         }
 747         if(values) {
 748             values[valuesLen] = 0;
 749             if(valLen) {
 750                 *valLen = valuesLen;
 751             }
 752         }
 753         return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
 754     } else {
 755         return 0;
 756     }
 757 }
 758
 759 U_CFUNC int32_t
 760 locale_getKeywords(const char *localeID,
 761                    char prev,
 762                    char *keywords, int32_t keywordCapacity,
 763                    char *values, int32_t valuesCapacity, int32_t *valLen,
 764                    UBool valuesToo,
 765                    UErrorCode *status) {
 766     return _getKeywords(localeID, prev, keywords, keywordCapacity,
 767                         values, valuesCapacity, valLen, valuesToo,
 768                         NULL, NULL, status);
 769 }
 770
 771 U_CAPI int32_t U_EXPORT2
 772 uloc_getKeywordValue(const char* localeID,
 773                      const char* keywordName,
 774                      char* buffer, int32_t bufferCapacity,
 775                      UErrorCode* status)
 776 {
 777     const char* nextSeparator = NULL;
 778     int32_t keywordNameLen;
 779     char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
 780     char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
 781     int32_t i = 0;
 782     int32_t result = 0;
 783
 784     if(status && U_SUCCESS(*status) && localeID) {
 785
 786       const char* startSearchHere = uprv_strchr(localeID, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */
 787       if(startSearchHere == NULL) {
 788           /* no keywords, return at once */
 789           return 0;
 790       }
 791
 792       keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
 793       if(U_FAILURE(*status)) {
 794         return 0;
 795       }
 796
 797       /* find the first keyword */
 798       while(startSearchHere) {
 799           startSearchHere++;
 800           /* skip leading spaces (allowed?) */
 801           while(*startSearchHere == ' ') {
 802               startSearchHere++;
 803           }
 804           nextSeparator = uprv_strchr(startSearchHere, '=');
 805           /* need to normalize both keyword and keyword name */
 806           if(!nextSeparator) {
 807               break;
 808           }
 809           if(nextSeparator - startSearchHere >= ULOC_KEYWORD_BUFFER_LEN) {
 810               /* keyword name too long for internal buffer */
 811               *status = U_INTERNAL_PROGRAM_ERROR;
 812               return 0;
 813           }
 814           for(i = 0; i < nextSeparator - startSearchHere; i++) {
 815               localeKeywordNameBuffer[i] = uprv_tolower(startSearchHere[i]);
 816           }
 817           /* trim trailing spaces */
 818           while(startSearchHere[i-1] == ' ') {
 819               i--;
 820           }
 821           localeKeywordNameBuffer[i] = 0;
 822
 823           startSearchHere = uprv_strchr(nextSeparator, ';');
 824
 825           if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
 826               nextSeparator++;
 827               while(*nextSeparator == ' ') {
 828                   nextSeparator++;
 829               }
 830               /* we actually found the keyword. Copy the value */
 831               if(startSearchHere && startSearchHere - nextSeparator < bufferCapacity) {
 832                   while(*(startSearchHere-1) == ' ') {
 833                       startSearchHere--;
 834                   }
 835                   uprv_strncpy(buffer, nextSeparator, startSearchHere - nextSeparator);
 836                   result = u_terminateChars(buffer, bufferCapacity, startSearchHere - nextSeparator, status);
 837               } else if(!startSearchHere && (int32_t)uprv_strlen(nextSeparator) < bufferCapacity) { /* last item in string */
 838                   i = uprv_strlen(nextSeparator);
 839                   while(nextSeparator[i - 1] == ' ') {
 840                       i--;
 841                   }
 842                   uprv_strncpy(buffer, nextSeparator, i);
 843                   result = u_terminateChars(buffer, bufferCapacity, i, status);
 844               } else {
 845                   /* give a bigger buffer, please */
 846                   *status = U_BUFFER_OVERFLOW_ERROR;
 847                   if(startSearchHere) {
 848                       result = startSearchHere - nextSeparator;
 849                   } else {
 850                       result = uprv_strlen(nextSeparator);
 851                   }
 852               }
 853               return result;
 854           }
 855       }
 856     }
 857     return 0;
 858 }
 859
 860 U_CAPI int32_t U_EXPORT2
 861 uloc_setKeywordValue(const char* keywordName,
 862                      const char* keywordValue,
 863                      char* buffer, int32_t bufferCapacity,
 864                      UErrorCode* status)
 865 {
 866     /* TODO: sorting. removal. */
 867     int32_t keywordNameLen;
 868     int32_t keywordValueLen;
 869     int32_t bufLen;
 870     int32_t needLen = 0;
 871     int32_t foundValueLen;
 872     int32_t keywordAtEnd = 0; /* is the keyword at the end of the string? */
 873     char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
 874     char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
 875     int32_t i = 0;
 876     int32_t rc;
 877     char* nextSeparator = NULL;
 878     char* nextEqualsign = NULL;
 879     char* startSearchHere = NULL;
 880     char* keywordStart = NULL;
 881     char *insertHere = NULL;
 882     if(U_FAILURE(*status)) {
 883         return -1;
 884     }
 885     if(keywordValue && !*keywordValue) {
 886         keywordValue = NULL;
 887     }
 888     if(keywordValue) {
 889         keywordValueLen = uprv_strlen(keywordValue);
 890     } else {
 891         keywordValueLen = 0;
 892     }
 893     keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
 894     if(U_FAILURE(*status)) {
 895         return 0;
 896     }
 897     startSearchHere = (char*)locale_getKeywordsStart(buffer);
 898     if(bufferCapacity>1) {
 899         bufLen = uprv_strlen(buffer);
 900     } else {
 901         *status = U_ILLEGAL_ARGUMENT_ERROR;
 902         return 0;
 903     }
 904     if(startSearchHere == NULL || (startSearchHere[1]==0)) {
 905         if(!keywordValue) { /* no keywords = nothing to remove */
 906             return bufLen;
 907         }
 908
 909         needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
 910         if(startSearchHere) { /* had a single @ */
 911             needLen--; /* already had the @ */
 912             /* startSearchHere points at the @ */
 913         } else {
 914             startSearchHere=buffer+bufLen;
 915         }
 916         if(needLen >= bufferCapacity) {
 917             *status = U_BUFFER_OVERFLOW_ERROR;
 918             return needLen; /* no change */
 919         }
 920         *startSearchHere = '@';
 921         startSearchHere++;
 922         uprv_strcpy(startSearchHere, keywordNameBuffer);
 923         startSearchHere += keywordNameLen;
 924         *startSearchHere = '=';
 925         startSearchHere++;
 926         uprv_strcpy(startSearchHere, keywordValue);
 927         startSearchHere+=keywordValueLen;
 928         return needLen;
 929     } /* end shortcut - no @ */
 930
 931     keywordStart = startSearchHere;
 932     /* search for keyword */
 933     while(keywordStart) {
 934         keywordStart++;
 935         /* skip leading spaces (allowed?) */
 936         while(*keywordStart == ' ') {
 937             keywordStart++;
 938         }
 939         nextEqualsign = uprv_strchr(keywordStart, '=');
 940         /* need to normalize both keyword and keyword name */
 941         if(!nextEqualsign) {
 942             break;
 943         }
 944         if(nextEqualsign - keywordStart >= ULOC_KEYWORD_BUFFER_LEN) {
 945             /* keyword name too long for internal buffer */
 946             *status = U_INTERNAL_PROGRAM_ERROR;
 947             return 0;
 948         }
 949         for(i = 0; i < nextEqualsign - keywordStart; i++) {
 950             localeKeywordNameBuffer[i] = uprv_tolower(keywordStart[i]);
 951         }
 952         /* trim trailing spaces */
 953         while(keywordStart[i-1] == ' ') {
 954             i--;
 955         }
 956         localeKeywordNameBuffer[i] = 0;
 957
 958         nextSeparator = uprv_strchr(nextEqualsign, ';');
 959         rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
 960         if(rc == 0) {
 961             nextEqualsign++;
 962             while(*nextEqualsign == ' ') {
 963                 nextEqualsign++;
 964             }
 965             /* we actually found the keyword. Change the value */
 966             if (nextSeparator) {
 967                 keywordAtEnd = 0;
 968                 foundValueLen = nextSeparator - nextEqualsign;
 969             } else {
 970                 keywordAtEnd = 1;
 971                 foundValueLen = uprv_strlen(nextEqualsign);
 972             }
 973             if(keywordValue) { /* adding a value - not removing */
 974               if(foundValueLen == keywordValueLen) {
 975                 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
 976                 return bufLen; /* no change in size */
 977               } else if(foundValueLen > keywordValueLen) {
 978                 int32_t delta = foundValueLen - keywordValueLen;
 979                 if(nextSeparator) { /* RH side */
 980                   uprv_memmove(nextSeparator - delta, nextSeparator, bufLen-(nextSeparator-buffer));
 981                 }
 982                 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
 983                 bufLen -= delta;
 984                 buffer[bufLen]=0;
 985                 return bufLen;
 986               } else { /* FVL < KVL */
 987                 int32_t delta = keywordValueLen - foundValueLen;
 988                 if((bufLen+delta) >= bufferCapacity) {
 989                   *status = U_BUFFER_OVERFLOW_ERROR;
 990                   return bufLen+delta;
 991                 }
 992                 if(nextSeparator) { /* RH side */
 993                   uprv_memmove(nextSeparator+delta,nextSeparator, bufLen-(nextSeparator-buffer));
 994                 }
 995                 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
 996                 bufLen += delta;
 997                 buffer[bufLen]=0;
 998                 return bufLen;
 999               }
1000             } else { /* removing a keyword */
1001               if(keywordAtEnd) {
1002                 /* zero out the ';' or '@' just before startSearchhere */
1003                 keywordStart[-1] = 0;
1004                 return (keywordStart-buffer)-1; /* (string length without keyword) minus separator */
1005               } else {
1006                 uprv_memmove(keywordStart, nextSeparator+1, bufLen-((nextSeparator+1)-buffer));
1007                 keywordStart[bufLen-((nextSeparator+1)-buffer)]=0;
1008                 return bufLen-((nextSeparator+1)-keywordStart);
1009               }
1010             }
1011         } else if(rc<0){ /* end match keyword */
1012           /* could insert at this location. */
1013           insertHere = keywordStart;
1014         }
1015         keywordStart = nextSeparator;
1016     } /* end loop searching */
1017
1018     if(!keywordValue) {
1019       return bufLen; /* removal of non-extant keyword - no change */
1020     }
1021
1022     /* we know there is at least one keyword. */
1023     needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
1024     if(needLen >= bufferCapacity) {
1025         *status = U_BUFFER_OVERFLOW_ERROR;
1026         return needLen; /* no change */
1027     }
1028
1029     if(insertHere) {
1030       uprv_memmove(insertHere+(1+keywordNameLen+1+keywordValueLen), insertHere, bufLen-(insertHere-buffer));
1031       keywordStart = insertHere;
1032     } else {
1033       keywordStart = buffer+bufLen;
1034       *keywordStart = ';';
1035       keywordStart++;
1036     }
1037     uprv_strncpy(keywordStart, keywordNameBuffer, keywordNameLen);
1038     keywordStart += keywordNameLen;
1039     *keywordStart = '=';
1040     keywordStart++;
1041     uprv_strncpy(keywordStart, keywordValue, keywordValueLen); /* terminates. */
1042     keywordStart+=keywordValueLen;
1043     if(insertHere) {
1044       *keywordStart = ';';
1045       keywordStart++;
1046     }
1047     buffer[needLen]=0;
1048     return needLen;
1049 }
1050
1051 /* ### ID parsing implementation **************************************************/
1052
1053 /*returns TRUE if a is an ID separator FALSE otherwise*/
1054 #define _isIDSeparator(a) (a == '_' || a == '-')
1055
1056 #define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
1057
1058 /*returns TRUE if one of the special prefixes is here (s=string)
1059   'x-' or 'i-' */
1060 #define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1061
1062 /* Dot terminates it because of POSIX form  where dot precedes the codepage
1063  * except for variant
1064  */
1065 #define _isTerminator(a)  ((a==0)||(a=='.')||(a=='@'))
1066
1067 static char* _strnchr(const char* str, int32_t len, char c) {
1068     U_ASSERT(str != 0 && len >= 0);
1069     while (len-- != 0) {
1070         char d = *str;
1071         if (d == c) {
1072             return (char*) str;
1073         } else if (d == 0) {
1074             break;
1075         }
1076         ++str;
1077     }
1078     return NULL;
1079 }
1080
1081 /**
1082  * Lookup 'key' in the array 'list'.  The array 'list' should contain
1083  * a NULL entry, followed by more entries, and a second NULL entry.
1084  *
1085  * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1086  * COUNTRIES_3.
1087  */
1088 static int16_t _findIndex(const char* const* list, const char* key)
1089 {
1090     const char* const* anchor = list;
1091     int32_t pass = 0;
1092
1093     /* Make two passes through two NULL-terminated arrays at 'list' */
1094     while (pass++ < 2) {
1095         while (*list) {
1096             if (uprv_strcmp(key, *list) == 0) {
1097                 return (int16_t)(list - anchor);
1098             }
1099             list++;
1100         }
1101         ++list;     /* skip final NULL *CWB*/
1102     }
1103     return -1;
1104 }
1105
1106 /* count the length of src while copying it to dest; return strlen(src) */
1107 static U_INLINE int32_t
1108 _copyCount(char *dest, int32_t destCapacity, const char *src) {
1109     const char *anchor;
1110     char c;
1111
1112     anchor=src;
1113     for(;;) {
1114         if((c=*src)==0) {
1115             return (int32_t)(src-anchor);
1116         }
1117         if(destCapacity<=0) {
1118             return (int32_t)((src-anchor)+uprv_strlen(src));
1119         }
1120         ++src;
1121         *dest++=c;
1122         --destCapacity;
1123     }
1124 }
1125
1126 /*
1127  * the internal functions _getLanguage(), _getCountry(), _getVariant()
1128  * avoid duplicating code to handle the earlier locale ID pieces
1129  * in the functions for the later ones by
1130  * setting the *pEnd pointer to where they stopped parsing
1131  *
1132  * TODO try to use this in Locale
1133  */
1134 static int32_t
1135 _getLanguage(const char *localeID,
1136              char *language, int32_t languageCapacity,
1137              const char **pEnd) {
1138     int32_t i=0;
1139     int32_t offset;
1140     char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
1141
1142     /* if it starts with i- or x- then copy that prefix */
1143     if(_isIDPrefix(localeID)) {
1144         if(i<languageCapacity) {
1145             language[i]=(char)uprv_tolower(*localeID);
1146         }
1147         if(i<languageCapacity) {
1148             language[i+1]='-';
1149         }
1150         i+=2;
1151         localeID+=2;
1152     }
1153
1154     /* copy the language as far as possible and count its length */
1155     while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
1156         if(i<languageCapacity) {
1157             language[i]=(char)uprv_tolower(*localeID);
1158         }
1159         if(i<3) {
1160             lang[i]=(char)uprv_tolower(*localeID);
1161         }
1162         i++;
1163         localeID++;
1164     }
1165
1166     if(i==3) {
1167         /* convert 3 character code to 2 character code if possible *CWB*/
1168         offset=_findIndex(LANGUAGES_3, lang);
1169         if(offset>=0) {
1170             i=_copyCount(language, languageCapacity, LANGUAGES[offset]);
1171         }
1172     }
1173
1174     if(pEnd!=NULL) {
1175         *pEnd=localeID;
1176     }
1177     return i;
1178 }
1179
1180 static int32_t
1181 _getScript(const char *localeID,
1182             char *script, int32_t scriptCapacity,
1183             const char **pEnd)
1184 {
1185     int32_t idLen = 0;
1186
1187     if (pEnd != NULL) {
1188         *pEnd = localeID;
1189     }
1190
1191     /* copy the second item as far as possible and count its length */
1192     while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
1193         idLen++;
1194     }
1195
1196     /* If it's exactly 4 characters long, then it's a script and not a country. */
1197     if (idLen == 4) {
1198         int32_t i;
1199         if (pEnd != NULL) {
1200             *pEnd = localeID+idLen;
1201         }
1202         if(idLen > scriptCapacity) {
1203             idLen = scriptCapacity;
1204         }
1205         if (idLen >= 1) {
1206             script[0]=(char)uprv_toupper(*(localeID++));
1207         }
1208         for (i = 1; i < idLen; i++) {
1209             script[i]=(char)uprv_tolower(*(localeID++));
1210         }
1211     }
1212     else {
1213         idLen = 0;
1214     }
1215     return idLen;
1216 }
1217
1218 static int32_t
1219 _getCountry(const char *localeID,
1220             char *country, int32_t countryCapacity,
1221             const char **pEnd)
1222 {
1223     int32_t i=0;
1224     char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };
1225     int32_t offset;
1226
1227     /* copy the country as far as possible and count its length */
1228     while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
1229         if(i<countryCapacity) {
1230             country[i]=(char)uprv_toupper(*localeID);
1231         }
1232         if(i<(ULOC_COUNTRY_CAPACITY-1)) {   /*CWB*/
1233             cnty[i]=(char)uprv_toupper(*localeID);
1234         }
1235         i++;
1236         localeID++;
1237     }
1238
1239     /* convert 3 character code to 2 character code if possible *CWB*/
1240     if(i==3) {
1241         offset=_findIndex(COUNTRIES_3, cnty);
1242         if(offset>=0) {
1243             i=_copyCount(country, countryCapacity, COUNTRIES[offset]);
1244         }
1245     }
1246
1247     if(pEnd!=NULL) {
1248         *pEnd=localeID;
1249     }
1250     return i;
1251 }
1252
1253 /**
1254  * @param needSeparator if true, then add leading '_' if any variants
1255  * are added to 'variant'
1256  */
1257 static int32_t
1258 _getVariantEx(const char *localeID,
1259               char prev,
1260               char *variant, int32_t variantCapacity,
1261               UBool needSeparator) {
1262     int32_t i=0;
1263
1264     /* get one or more variant tags and separate them with '_' */
1265     if(_isIDSeparator(prev)) {
1266         /* get a variant string after a '-' or '_' */
1267         while(!_isTerminator(*localeID)) {
1268             if (needSeparator) {
1269                 if (i<variantCapacity) {
1270                     variant[i] = '_';
1271                 }
1272                 ++i;
1273                 needSeparator = FALSE;
1274             }
1275             if(i<variantCapacity) {
1276                 variant[i]=(char)uprv_toupper(*localeID);
1277                 if(variant[i]=='-') {
1278                     variant[i]='_';
1279                 }
1280             }
1281             i++;
1282             localeID++;
1283         }
1284     }
1285
1286     /* if there is no variant tag after a '-' or '_' then look for '@' */
1287     if(i==0) {
1288         if(prev=='@') {
1289             /* keep localeID */
1290         } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
1291             ++localeID; /* point after the '@' */
1292         } else {
1293             return 0;
1294         }
1295         while(!_isTerminator(*localeID)) {
1296             if (needSeparator) {
1297                 if (i<variantCapacity) {
1298                     variant[i] = '_';
1299                 }
1300                 ++i;
1301                 needSeparator = FALSE;
1302             }
1303             if(i<variantCapacity) {
1304                 variant[i]=(char)uprv_toupper(*localeID);
1305                 if(variant[i]=='-' || variant[i]==',') {
1306                     variant[i]='_';
1307                 }
1308             }
1309             i++;
1310             localeID++;
1311         }
1312     }
1313
1314     return i;
1315 }
1316
1317 static int32_t
1318 _getVariant(const char *localeID,
1319             char prev,
1320             char *variant, int32_t variantCapacity) {
1321     return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
1322 }
1323
1324 /**
1325  * Delete ALL instances of a variant from the given list of one or
1326  * more variants.  Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
1327  * @param variants the source string of one or more variants,
1328  * separated by '_'.  This will be MODIFIED IN PLACE.  Not zero
1329  * terminated; if it is, trailing zero will NOT be maintained.
1330  * @param variantsLen length of variants
1331  * @param toDelete variant to delete, without separators, e.g.  "EURO"
1332  * or "PREEURO"; not zero terminated
1333  * @param toDeleteLen length of toDelete
1334  * @return number of characters deleted from variants
1335  */
1336 static int32_t
1337 _deleteVariant(char* variants, int32_t variantsLen,
1338                const char* toDelete, int32_t toDeleteLen) {
1339     int32_t delta = 0; /* number of chars deleted */
1340     for (;;) {
1341         UBool flag = FALSE;
1342         if (variantsLen < toDeleteLen) {
1343             return delta;
1344         }
1345         if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 &&
1346             (variantsLen == toDeleteLen ||
1347              (flag=(variants[toDeleteLen] == '_')))) {
1348             int32_t d = toDeleteLen + (flag?1:0);
1349             variantsLen -= d;
1350             delta += d;
1351             uprv_memmove(variants, variants+d, variantsLen);
1352         } else {
1353             char* p = _strnchr(variants, variantsLen, '_');
1354             if (p == NULL) {
1355                 return delta;
1356             }
1357             ++p;
1358             variantsLen -= p - variants;
1359             variants = p;
1360         }
1361     }
1362 }
1363
1364 /* Keyword enumeration */
1365
1366 typedef struct UKeywordsContext {
1367     char* keywords;
1368     char* current;
1369 } UKeywordsContext;
1370
1371 static void U_CALLCONV
1372 uloc_kw_closeKeywords(UEnumeration *enumerator) {
1373     uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
1374     uprv_free(enumerator->context);
1375     uprv_free(enumerator);
1376 }
1377
1378 static int32_t U_CALLCONV
1379 uloc_kw_countKeywords(UEnumeration *en, UErrorCode *status) {
1380     char *kw = ((UKeywordsContext *)en->context)->keywords;
1381     int32_t result = 0;
1382     while(*kw) {
1383         result++;
1384         kw += uprv_strlen(kw)+1;
1385     }
1386     return result;
1387 }
1388
1389 static const char* U_CALLCONV
1390 uloc_kw_nextKeyword(UEnumeration* en,
1391                     int32_t* resultLength,
1392                     UErrorCode* status) {
1393     const char* result = ((UKeywordsContext *)en->context)->current;
1394     int32_t len = 0;
1395     if(*result) {
1396         len = uprv_strlen(((UKeywordsContext *)en->context)->current);
1397         ((UKeywordsContext *)en->context)->current += len+1;
1398     } else {
1399         result = NULL;
1400     }
1401     if (resultLength) {
1402         *resultLength = len;
1403     }
1404     return result;
1405 }
1406
1407 static void U_CALLCONV
1408 uloc_kw_resetKeywords(UEnumeration* en,
1409                       UErrorCode* status) {
1410     ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
1411 }
1412
1413 static const UEnumeration gKeywordsEnum = {
1414     NULL,
1415     NULL,
1416     uloc_kw_closeKeywords,
1417     uloc_kw_countKeywords,
1418     uenum_unextDefault,
1419     uloc_kw_nextKeyword,
1420     uloc_kw_resetKeywords
1421 };
1422
1423 U_CAPI UEnumeration* U_EXPORT2
1424 uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
1425 {
1426   UKeywordsContext *myContext = NULL;
1427   UEnumeration *result = NULL;
1428
1429   if(U_FAILURE(*status)) {
1430     return NULL;
1431   }
1432   result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
1433   uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration));
1434   myContext = uprv_malloc(sizeof(UKeywordsContext));
1435   if (myContext == NULL) {
1436     *status = U_MEMORY_ALLOCATION_ERROR;
1437     uprv_free(result);
1438     return NULL;
1439   }
1440   myContext->keywords = (char *)uprv_malloc(keywordListSize+1);
1441   uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
1442   myContext->keywords[keywordListSize] = 0;
1443   myContext->current = myContext->keywords;
1444   result->context = myContext;
1445   return result;
1446 }
1447
1448 U_CAPI UEnumeration* U_EXPORT2
1449 uloc_openKeywords(const char* localeID,
1450                         UErrorCode* status)
1451 {
1452     int32_t i=0;
1453     char keywords[256];
1454     int32_t keywordsCapacity = 256;
1455     if(status==NULL || U_FAILURE(*status)) {
1456         return 0;
1457     }
1458
1459     if(localeID==NULL) {
1460         localeID=uloc_getDefault();
1461     }
1462
1463     /* Skip the language */
1464     _getLanguage(localeID, NULL, 0, &localeID);
1465     if(_isIDSeparator(*localeID)) {
1466         const char *scriptID;
1467         /* Skip the script if available */
1468         _getScript(localeID+1, NULL, 0, &scriptID);
1469         if(scriptID != localeID+1) {
1470             /* Found optional script */
1471             localeID = scriptID;
1472         }
1473         /* Skip the Country */
1474         if (_isIDSeparator(*localeID)) {
1475             _getCountry(localeID+1, NULL, 0, &localeID);
1476             if(_isIDSeparator(*localeID)) {
1477                 _getVariant(localeID+1, *localeID, NULL, 0);
1478             }
1479         }
1480     }
1481
1482     /* keywords are located after '@' */
1483     if((localeID = locale_getKeywordsStart(localeID)) != NULL) {
1484         i=locale_getKeywords(localeID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
1485     }
1486
1487     if(i) {
1488         return uloc_openKeywordList(keywords, i, status);
1489     } else {
1490         return NULL;
1491     }
1492 }
1493
1494
1495 /* bit-flags for 'options' parameter of _canonicalize */
1496 #define _ULOC_STRIP_KEYWORDS 0x2
1497 #define _ULOC_CANONICALIZE   0x1
1498
1499 #define OPTION_SET(options, mask) ((options & mask) != 0)
1500
1501 /**
1502  * Canonicalize the given localeID, to level 1 or to level 2,
1503  * depending on the options.  To specify level 1, pass in options=0.
1504  * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1505  *
1506  * This is the code underlying uloc_getName and uloc_canonicalize.
1507  */
1508 static int32_t
1509 _canonicalize(const char* localeID,
1510               char* result,
1511               int32_t resultCapacity,
1512               uint32_t options,
1513               UErrorCode* err) {
1514     int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
1515     char localeBuffer[ULOC_FULLNAME_CAPACITY];
1516     const char* keywordAssign = NULL;
1517     const char* separatorIndicator = NULL;
1518     const char* addKeyword = NULL;
1519     const char* addValue = NULL;
1520     char* name;
1521     char* variant = NULL; /* pointer into name, or NULL */
1522     int32_t sawEuro = 0;
1523
1524     if (U_FAILURE(*err)) {
1525         return 0;
1526     }
1527
1528     if (localeID==NULL) {
1529         localeID=uloc_getDefault();
1530     }
1531
1532     /* if we are doing a full canonicalization, then put results in
1533        localeBuffer, if necessary; otherwise send them to result. */
1534     if (OPTION_SET(options, _ULOC_CANONICALIZE) &&
1535         (result == NULL || resultCapacity <  sizeof(localeBuffer))) {
1536         name = localeBuffer;
1537         nameCapacity = sizeof(localeBuffer);
1538     } else {
1539         name = result;
1540         nameCapacity = resultCapacity;
1541     }
1542
1543     /* get all pieces, one after another, and separate with '_' */
1544     len=_getLanguage(localeID, name, nameCapacity, &localeID);
1545     if(_isIDSeparator(*localeID)) {
1546         const char *scriptID;
1547
1548         ++fieldCount;
1549         if(len<nameCapacity) {
1550             name[len]='_';
1551         }
1552         ++len;
1553
1554         scriptSize=_getScript(localeID+1, name+len, nameCapacity-len, &scriptID);
1555         if(scriptSize > 0) {
1556             /* Found optional script */
1557             localeID = scriptID;
1558             ++fieldCount;
1559             len+=scriptSize;
1560             if (_isIDSeparator(*localeID)) {
1561                 /* If there is something else, then we add the _ */
1562                 if(len<nameCapacity) {
1563                     name[len]='_';
1564                 }
1565                 ++len;
1566             }
1567         }
1568
1569         if (_isIDSeparator(*localeID)) {
1570             len+=_getCountry(localeID+1, name+len, nameCapacity-len, &localeID);
1571             if(_isIDSeparator(*localeID)) {
1572                 ++fieldCount;
1573                 if(len<nameCapacity) {
1574                     name[len]='_';
1575                 }
1576                 ++len;
1577                 variantSize = _getVariant(localeID+1, *localeID, name+len, nameCapacity-len);
1578                 if (variantSize > 0) {
1579                     variant = name+len;
1580                     len += variantSize;
1581                     localeID += variantSize + 1; /* skip '_' and variant */
1582                 }
1583             }
1584         }
1585     }
1586
1587     /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1588     if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *localeID == '.') {
1589         UBool done = FALSE;
1590         do {
1591             char c = *localeID;
1592             switch (c) {
1593             case 0:
1594             case '@':
1595                 done = TRUE;
1596                 break;
1597             default:
1598                 if (len<nameCapacity) {
1599                     name[len] = c;
1600                 }
1601                 ++len;
1602                 ++localeID;
1603                 break;
1604             }
1605         } while (!done);
1606     }
1607
1608     /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1609        After this, localeID either points to '@' or is NULL */
1610     if ((localeID=locale_getKeywordsStart(localeID))!=NULL) {
1611         keywordAssign = uprv_strchr(localeID, '=');
1612         separatorIndicator = uprv_strchr(localeID, ';');
1613     }
1614
1615     /* Copy POSIX-style variant, if any [mr@FOO] */
1616     if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
1617         localeID != NULL && keywordAssign == NULL) {
1618         for (;;) {
1619             char c = *localeID;
1620             if (c == 0) {
1621                 break;
1622             }
1623             if (len<nameCapacity) {
1624                 name[len] = c;
1625             }
1626             ++len;
1627             ++localeID;
1628         }
1629     }
1630
1631     if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
1632         /* Handle @FOO variant if @ is present and not followed by = */
1633         if (localeID!=NULL && keywordAssign==NULL) {
1634             int32_t posixVariantSize;
1635             /* Add missing '_' if needed */
1636             if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
1637                 do {
1638                     if(len<nameCapacity) {
1639                         name[len]='_';
1640                     }
1641                     ++len;
1642                     ++fieldCount;
1643                 } while(fieldCount<2);
1644             }
1645             posixVariantSize = _getVariantEx(localeID+1, '@', name+len, nameCapacity-len,
1646                                              (UBool)(variantSize > 0));
1647             if (posixVariantSize > 0) {
1648                 if (variant == NULL) {
1649                     variant = name+len;
1650                 }
1651                 len += posixVariantSize;
1652                 variantSize += posixVariantSize;
1653             }
1654         }
1655
1656         /* Check for EURO variants. */
1657         sawEuro = _deleteVariant(variant, variantSize, "EURO", 4);
1658         len -= sawEuro;
1659         if (sawEuro > 0 && name[len-1] == '_') { /* delete trailing '_' */
1660             --len;
1661         }
1662
1663         /* Look up the ID in the canonicalization map */
1664         for (j=0; j<(int32_t)(sizeof(CANONICALIZE_MAP)/sizeof(CANONICALIZE_MAP[0])); j++) {
1665             const char* id = CANONICALIZE_MAP[j].id;
1666             int32_t n = uprv_strlen(id);
1667             if (len == n && uprv_strncmp(name, id, n) == 0) {
1668                 if (n == 0 && localeID != NULL) {
1669                     break; /* Don't remap "" if keywords present */
1670                 }
1671                 len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
1672                 addKeyword = CANONICALIZE_MAP[j].keyword;
1673                 addValue = CANONICALIZE_MAP[j].value;
1674                 break;
1675             }
1676         }
1677
1678         /* Explicit EURO variant overrides keyword in CANONICALIZE_MAP */
1679         if (sawEuro > 0) {
1680             addKeyword = "currency";
1681             addValue = "EUR";
1682         }
1683     }
1684
1685     if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
1686         if (localeID!=NULL && keywordAssign!=NULL &&
1687             (!separatorIndicator || separatorIndicator > keywordAssign)) {
1688             if(len<nameCapacity) {
1689                 name[len]='@';
1690             }
1691             ++len;
1692             ++fieldCount;
1693             len += _getKeywords(localeID+1, '@', name+len, nameCapacity-len, NULL, 0, NULL, TRUE,
1694                                 addKeyword, addValue, err);
1695         } else if (addKeyword != NULL) {
1696             U_ASSERT(addValue != NULL);
1697             /* inelegant but works -- later make _getKeywords do this? */
1698             len += _copyCount(name+len, nameCapacity-len, "@");
1699             len += _copyCount(name+len, nameCapacity-len, addKeyword);
1700             len += _copyCount(name+len, nameCapacity-len, "=");
1701             len += _copyCount(name+len, nameCapacity-len, addValue);
1702         }
1703     }
1704
1705     if (U_SUCCESS(*err) && name == localeBuffer) {
1706         uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
1707     }
1708
1709     return u_terminateChars(result, resultCapacity, len, err);
1710 }
1711
1712 /* ### ID parsing API **************************************************/
1713
1714 U_CAPI int32_t  U_EXPORT2
1715 uloc_getParent(const char*    localeID,
1716                char* parent,
1717                int32_t parentCapacity,
1718                UErrorCode* err)
1719 {
1720     const char *lastUnderscore;
1721     int32_t i;
1722
1723     if (U_FAILURE(*err))
1724         return 0;
1725
1726     if (localeID == NULL)
1727         localeID = uloc_getDefault();
1728
1729     lastUnderscore=uprv_strrchr(localeID, '_');
1730     if(lastUnderscore!=NULL) {
1731         i=(int32_t)(lastUnderscore-localeID);
1732     } else {
1733         i=0;
1734     }
1735
1736     if(i>0) {
1737         uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
1738     }
1739     return u_terminateChars(parent, parentCapacity, i, err);
1740 }
1741
1742 U_CAPI int32_t U_EXPORT2
1743 uloc_getLanguage(const char*    localeID,
1744          char* language,
1745          int32_t languageCapacity,
1746          UErrorCode* err)
1747 {
1748     /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1749     int32_t i=0;
1750
1751     if (err==NULL || U_FAILURE(*err)) {
1752         return 0;
1753     }
1754
1755     if(localeID==NULL) {
1756         localeID=uloc_getDefault();
1757     }
1758
1759     i=_getLanguage(localeID, language, languageCapacity, NULL);
1760     return u_terminateChars(language, languageCapacity, i, err);
1761 }
1762
1763 U_CAPI int32_t U_EXPORT2
1764 uloc_getScript(const char*    localeID,
1765          char* script,
1766          int32_t scriptCapacity,
1767          UErrorCode* err)
1768 {
1769     int32_t i=0;
1770
1771     if(err==NULL || U_FAILURE(*err)) {
1772         return 0;
1773     }
1774
1775     if(localeID==NULL) {
1776         localeID=uloc_getDefault();
1777     }
1778
1779     /* skip the language */
1780     _getLanguage(localeID, NULL, 0, &localeID);
1781     if(_isIDSeparator(*localeID)) {
1782         i=_getScript(localeID+1, script, scriptCapacity, NULL);
1783     }
1784     return u_terminateChars(script, scriptCapacity, i, err);
1785 }
1786
1787 U_CAPI int32_t  U_EXPORT2
1788 uloc_getCountry(const char* localeID,
1789             char* country,
1790             int32_t countryCapacity,
1791             UErrorCode* err)
1792 {
1793     int32_t i=0;
1794
1795     if(err==NULL || U_FAILURE(*err)) {
1796         return 0;
1797     }
1798
1799     if(localeID==NULL) {
1800         localeID=uloc_getDefault();
1801     }
1802
1803     /* Skip the language */
1804     _getLanguage(localeID, NULL, 0, &localeID);
1805     if(_isIDSeparator(*localeID)) {
1806         const char *scriptID;
1807         /* Skip the script if available */
1808         _getScript(localeID+1, NULL, 0, &scriptID);
1809         if(scriptID != localeID+1) {
1810             /* Found optional script */
1811             localeID = scriptID;
1812         }
1813         if(_isIDSeparator(*localeID)) {
1814             i=_getCountry(localeID+1, country, countryCapacity, NULL);
1815         }
1816     }
1817     return u_terminateChars(country, countryCapacity, i, err);
1818 }
1819
1820 U_CAPI int32_t  U_EXPORT2
1821 uloc_getVariant(const char* localeID,
1822                 char* variant,
1823                 int32_t variantCapacity,
1824                 UErrorCode* err)
1825 {
1826     int32_t i=0;
1827     UBool haveVariant=FALSE;
1828
1829     if(err==NULL || U_FAILURE(*err)) {
1830         return 0;
1831     }
1832
1833     if(localeID==NULL) {
1834         localeID=uloc_getDefault();
1835     }
1836
1837     /* Skip the language */
1838     _getLanguage(localeID, NULL, 0, &localeID);
1839     if(_isIDSeparator(*localeID)) {
1840         const char *scriptID;
1841         /* Skip the script if available */
1842         _getScript(localeID+1, NULL, 0, &scriptID);
1843         if(scriptID != localeID+1) {
1844             /* Found optional script */
1845             localeID = scriptID;
1846         }
1847         /* Skip the Country */
1848         if (_isIDSeparator(*localeID)) {
1849             _getCountry(localeID+1, NULL, 0, &localeID);
1850             if(_isIDSeparator(*localeID)) {
1851                 haveVariant=TRUE;
1852                 i=_getVariant(localeID+1, *localeID, variant, variantCapacity);
1853             }
1854         }
1855     }
1856
1857     /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
1858     /* if we do not have a variant tag yet then try a POSIX variant after '@' */
1859 /*
1860     if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
1861         i=_getVariant(localeID+1, '@', variant, variantCapacity);
1862     }
1863 */
1864     return u_terminateChars(variant, variantCapacity, i, err);
1865 }
1866
1867 U_CAPI int32_t  U_EXPORT2
1868 uloc_getName(const char* localeID,
1869              char* name,
1870              int32_t nameCapacity,
1871              UErrorCode* err)
1872 {
1873     return _canonicalize(localeID, name, nameCapacity, 0, err);
1874 }
1875
1876 U_CAPI int32_t  U_EXPORT2
1877 uloc_getBaseName(const char* localeID,
1878                  char* name,
1879                  int32_t nameCapacity,
1880                  UErrorCode* err)
1881 {
1882     return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);
1883 }
1884
1885 U_CAPI int32_t  U_EXPORT2
1886 uloc_canonicalize(const char* localeID,
1887                   char* name,
1888                   int32_t nameCapacity,
1889                   UErrorCode* err)
1890 {
1891     return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);
1892 }
1893
1894 U_CAPI const char*  U_EXPORT2
1895 uloc_getISO3Language(const char* localeID)
1896 {
1897     int16_t offset;
1898     char lang[ULOC_LANG_CAPACITY];
1899     UErrorCode err = U_ZERO_ERROR;
1900
1901     if (localeID == NULL)
1902     {
1903         localeID = uloc_getDefault();
1904     }
1905     uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
1906     if (U_FAILURE(err))
1907         return "";
1908     offset = _findIndex(LANGUAGES, lang);
1909     if (offset < 0)
1910         return "";
1911     return LANGUAGES_3[offset];
1912 }
1913
1914 U_CAPI const char*  U_EXPORT2
1915 uloc_getISO3Country(const char* localeID)
1916 {
1917     int16_t offset;
1918     char cntry[ULOC_LANG_CAPACITY];
1919     UErrorCode err = U_ZERO_ERROR;
1920
1921     if (localeID == NULL)
1922     {
1923         localeID = uloc_getDefault();
1924     }
1925     uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
1926     if (U_FAILURE(err))
1927         return "";
1928     offset = _findIndex(COUNTRIES, cntry);
1929     if (offset < 0)
1930         return "";
1931
1932     return COUNTRIES_3[offset];
1933 }
1934
1935 U_CAPI uint32_t  U_EXPORT2
1936 uloc_getLCID(const char* localeID)
1937 {
1938     UErrorCode status = U_ZERO_ERROR;
1939     char       langID[ULOC_FULLNAME_CAPACITY];
1940
1941     uloc_getLanguage(localeID, langID, sizeof(langID), &status);
1942     if (U_FAILURE(status)) {
1943         return 0;
1944     }
1945
1946     return uprv_convertToLCID(langID, localeID, &status);
1947 }
1948
1949 /* ### Default locale **************************************************/
1950
1951 U_CAPI const char*  U_EXPORT2
1952 uloc_getDefault()
1953 {
1954     return locale_get_default();
1955 }
1956
1957 U_CAPI void  U_EXPORT2
1958 uloc_setDefault(const char*   newDefaultLocale,
1959              UErrorCode* err)
1960 {
1961     if (U_FAILURE(*err))
1962         return;
1963     /* the error code isn't currently used for anything by this function*/
1964
1965     /* propagate change to C++ */
1966     locale_set_default(newDefaultLocale);
1967 }
1968
1969 /* ### Display name **************************************************/
1970
1971 /*
1972  * Lookup a resource bundle table item with fallback on the table level.
1973  * Regular resource bundle lookups perform fallback to parent locale bundles
1974  * and eventually the root bundle, but only for top-level items.
1975  * This function takes the name of a top-level table and of an item in that table
1976  * and performs a lookup of both, falling back until a bundle contains a table
1977  * with this item.
1978  *
1979  * Note: Only the opening of entire bundles falls back through the default locale
1980  * before root. Once a bundle is open, item lookups do not go through the
1981  * default locale because that would result in a mix of languages that is
1982  * unpredictable to the programmer and most likely useless.
1983  */
1984 static const UChar *
1985 _res_getTableStringWithFallback(const char *path, const char *locale,
1986                               const char *tableKey, const char *subTableKey,
1987                               const char *itemKey,
1988                               int32_t *pLength,
1989                               UErrorCode *pErrorCode)
1990 {
1991     char localeBuffer[ULOC_FULLNAME_CAPACITY*4];
1992     UResourceBundle *rb, table;
1993     const UChar *item;
1994     UErrorCode errorCode;
1995     char explicitFallbackName[ULOC_FULLNAME_CAPACITY] = {0};
1996     int32_t efnLen =0;
1997     const UChar* ef = NULL;
1998     UBool overrideExplicitFallback = FALSE;
1999     for(;;) {
2000         /*
2001          * open the bundle for the current locale
2002          * this falls back through the locale's chain to root
2003          */
2004         errorCode=U_ZERO_ERROR;
2005         rb=ures_open(path, locale, &errorCode);
2006         if(U_FAILURE(errorCode)) {
2007             /* total failure, not even root could be opened */
2008             *pErrorCode=errorCode;
2009             return NULL;
2010         } else if(errorCode==U_USING_DEFAULT_WARNING ||
2011                   (errorCode==U_USING_FALLBACK_WARNING && *pErrorCode!=U_USING_DEFAULT_WARNING)
2012         ) {
2013             /* set the "strongest" error code (success->fallback->default->failure) */
2014             *pErrorCode=errorCode;
2015         }
2016
2017         /*
2018          * try to open the requested table
2019          * this falls back through the locale's chain to root, but not through the default locale
2020          */
2021         errorCode=U_ZERO_ERROR;
2022         ures_initStackObject(&table);
2023         ures_getByKey(rb, tableKey, &table, &errorCode);
2024         if(U_FAILURE(errorCode)) {
2025             /* no such table anywhere in this fallback chain */
2026             ures_close(rb);
2027             *pErrorCode=errorCode;
2028             return NULL;
2029         } else if(errorCode==U_USING_DEFAULT_WARNING ||
2030                   (errorCode==U_USING_FALLBACK_WARNING && *pErrorCode!=U_USING_DEFAULT_WARNING)
2031         ) {
2032             /* set the "strongest" error code (success->fallback->default->failure) */
2033             *pErrorCode=errorCode;
2034         }
2035
2036         /* check if the fallback token is set */
2037         ef = ures_getStringByKey(&table, "Fallback", &efnLen, &errorCode);
2038         if(U_SUCCESS(errorCode)){
2039             /* set the fallback chain */
2040             u_UCharsToChars(ef, explicitFallbackName, efnLen);
2041             /* null terminate the buffer */
2042             explicitFallbackName[efnLen]=0;
2043         }else if(errorCode==U_USING_DEFAULT_WARNING ||
2044               (errorCode==U_USING_FALLBACK_WARNING && *pErrorCode!=U_USING_DEFAULT_WARNING)
2045         ) {
2046             /* set the "strongest" error code (success->fallback->default->failure) */
2047             *pErrorCode=errorCode;
2048         }
2049
2050         /* try to open the requested item in the table */
2051         errorCode=U_ZERO_ERROR;
2052         if(subTableKey == NULL){
2053             item=ures_getStringByKey(&table, itemKey, pLength, &errorCode);
2054         }else{
2055             UResourceBundle subTable;
2056             ures_initStackObject(&subTable);
2057             ures_getByKey(&table, subTableKey, &subTable, &errorCode);
2058             item = ures_getStringByKey(&subTable, itemKey, pLength, &errorCode);
2059             ures_close(&subTable);
2060         }
2061         if(U_SUCCESS(errorCode)) {
2062             /* if the item for the key is empty ... override the explicit fall back set */
2063             if(item[0]==0 && efnLen > 0){
2064                 overrideExplicitFallback = TRUE;
2065             }else{
2066                 /* we got the requested item! */
2067                 ures_close(&table);
2068                 ures_close(rb);
2069
2070                 if(errorCode==U_USING_DEFAULT_WARNING ||
2071                    (errorCode==U_USING_FALLBACK_WARNING && *pErrorCode!=U_USING_DEFAULT_WARNING)
2072                 ) {
2073                     /* set the "strongest" error code (success->fallback->default->failure) */
2074                     *pErrorCode=errorCode;
2075                 }
2076
2077                 /*
2078                  * It is safe to close the bundle and still return the
2079                  * string pointer because resource bundles are
2080                  * cached until u_cleanup().
2081                  */
2082                 return item;
2083             }
2084         }
2085
2086         /*
2087          * We get here if the item was not found.
2088          * We will follow the chain to the parent locale bundle and look in
2089          * the table there.
2090          */
2091
2092         /* get the real locale ID for this table */
2093         errorCode=U_ZERO_ERROR;
2094         locale=ures_getLocale(&table, &errorCode);
2095         /* keep table and rb open until we are done using the locale string owned by the table bundle */
2096         if(U_FAILURE(errorCode)) {
2097             /* error getting the locale ID for an open RB - should never happen */
2098             ures_close(&table);
2099             ures_close(rb);
2100             *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
2101             return NULL;
2102         }
2103
2104         if(*locale==0 || 0==uprv_strcmp(locale, _kRootName) || 0==uprv_strcmp(locale,explicitFallbackName)) {
2105             /* end of fallback; even root does not have the requested item either */
2106             ures_close(&table);
2107             ures_close(rb);
2108             *pErrorCode=U_MISSING_RESOURCE_ERROR;
2109             return NULL;
2110         }
2111
2112         /* could not find the table, or its item, try to fall back to a different RB and table */
2113         errorCode=U_ZERO_ERROR;
2114         if(efnLen > 0 && overrideExplicitFallback == FALSE){
2115             /* continue the fallback lookup with the explicit fallback that is requested */
2116             locale = explicitFallbackName;
2117         }else{
2118             uloc_getParent(locale, localeBuffer, sizeof(localeBuffer), &errorCode);
2119             if(U_FAILURE(errorCode) || errorCode==U_STRING_NOT_TERMINATED_WARNING) {
2120                 /* error getting the parent locale ID - should never happen */
2121                 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
2122                 return NULL;
2123             }
2124
2125             /* continue the fallback lookup with the parent locale ID */
2126             locale=localeBuffer;
2127
2128             /* adjust error code as we fall back */
2129             if (uprv_strlen(locale) == 0)   /* Falling back to root locale? */
2130                   *pErrorCode = U_USING_DEFAULT_WARNING;
2131             else if (*pErrorCode != U_USING_DEFAULT_WARNING)
2132                   *pErrorCode = U_USING_FALLBACK_WARNING;
2133         }
2134         /* done with the locale string - ready to close table and rb */
2135         ures_close(&table);
2136         ures_close(rb);
2137     }
2138 }
2139
2140 static int32_t
2141 _getStringOrCopyKey(const char *path, const char *locale,
2142                     const char *tableKey,
2143                     const char* subTableKey,
2144                     const char *itemKey,
2145                     const char *substitute,
2146                     UChar *dest, int32_t destCapacity,
2147                     UErrorCode *pErrorCode) {
2148     const UChar *s = NULL;
2149     int32_t length;
2150
2151     if(itemKey==NULL) {
2152         /* top-level item: normal resource bundle access */
2153         UResourceBundle *rb;
2154
2155         rb=ures_open(path, locale, pErrorCode);
2156         if(U_SUCCESS(*pErrorCode)) {
2157             s=ures_getStringByKey(rb, tableKey, &length, pErrorCode);
2158             /* see comment about closing rb near "return item;" in _res_getTableStringWithFallback() */
2159             ures_close(rb);
2160         }
2161     } else {
2162         /* second-level item, use special fallback */
2163         s=_res_getTableStringWithFallback(path, locale,
2164                                            tableKey,
2165                                            subTableKey,
2166                                            itemKey,
2167                                            &length,
2168                                            pErrorCode);
2169     }
2170     if(U_SUCCESS(*pErrorCode)) {
2171         int32_t copyLength=uprv_min(length, destCapacity);
2172         if(copyLength>0 && s != NULL) {
2173             u_memcpy(dest, s, copyLength);
2174         }
2175     } else {
2176         /* no string from a resource bundle: convert the substitute */
2177         length=(int32_t)uprv_strlen(substitute);
2178         u_charsToUChars(substitute, dest, uprv_min(length, destCapacity));
2179         *pErrorCode=U_USING_DEFAULT_WARNING;
2180     }
2181
2182     return u_terminateUChars(dest, destCapacity, length, pErrorCode);
2183 }
2184
2185 static int32_t
2186 _getDisplayNameForComponent(const char *locale,
2187                             const char *displayLocale,
2188                             UChar *dest, int32_t destCapacity,
2189                             int32_t (*getter)(const char *, char *, int32_t, UErrorCode *),
2190                             const char *tag,
2191                             UErrorCode *pErrorCode) {
2192     char localeBuffer[ULOC_FULLNAME_CAPACITY*4];
2193     int32_t length;
2194     UErrorCode localStatus;
2195
2196     /* argument checking */
2197     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
2198         return 0;
2199     }
2200
2201     if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
2202         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2203         return 0;
2204     }
2205
2206     localStatus = U_ZERO_ERROR;
2207     length=(*getter)(locale, localeBuffer, sizeof(localeBuffer), &localStatus);
2208     if(U_FAILURE(localStatus) || localStatus==U_STRING_NOT_TERMINATED_WARNING) {
2209         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2210         return 0;
2211     }
2212     if(length==0) {
2213         return u_terminateUChars(dest, destCapacity, 0, pErrorCode);
2214     }
2215
2216     return _getStringOrCopyKey(NULL, displayLocale,
2217                                tag, NULL, localeBuffer,
2218                                localeBuffer,
2219                                dest, destCapacity,
2220                                pErrorCode);
2221 }
2222
2223 U_CAPI int32_t U_EXPORT2
2224 uloc_getDisplayLanguage(const char *locale,
2225                         const char *displayLocale,
2226                         UChar *dest, int32_t destCapacity,
2227                         UErrorCode *pErrorCode) {
2228     return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
2229                 uloc_getLanguage, _kLanguages, pErrorCode);
2230 }
2231
2232 U_CAPI int32_t U_EXPORT2
2233 uloc_getDisplayScript(const char* locale,
2234                       const char* displayLocale,
2235                       UChar *dest, int32_t destCapacity,
2236                       UErrorCode *pErrorCode)
2237 {
2238     return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
2239                 uloc_getScript, _kScripts, pErrorCode);
2240 }
2241
2242 U_CAPI int32_t U_EXPORT2
2243 uloc_getDisplayCountry(const char *locale,
2244                        const char *displayLocale,
2245                        UChar *dest, int32_t destCapacity,
2246                        UErrorCode *pErrorCode) {
2247     return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
2248                 uloc_getCountry, _kCountries, pErrorCode);
2249 }
2250
2251 /*
2252  * TODO separate variant1_variant2_variant3...
2253  * by getting each tag's display string and concatenating them with ", "
2254  * in between - similar to uloc_getDisplayName()
2255  */
2256 U_CAPI int32_t U_EXPORT2
2257 uloc_getDisplayVariant(const char *locale,
2258                        const char *displayLocale,
2259                        UChar *dest, int32_t destCapacity,
2260                        UErrorCode *pErrorCode) {
2261     return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
2262                 uloc_getVariant, _kVariants, pErrorCode);
2263 }
2264
2265 U_CAPI int32_t U_EXPORT2
2266 uloc_getDisplayName(const char *locale,
2267                     const char *displayLocale,
2268                     UChar *dest, int32_t destCapacity,
2269                     UErrorCode *pErrorCode)
2270 {
2271     int32_t length, length2, length3 = 0;
2272     UBool hasLanguage, hasScript, hasCountry, hasVariant, hasKeywords;
2273     UEnumeration* keywordEnum = NULL;
2274     int32_t keywordCount = 0;
2275     const char *keyword = NULL;
2276     int32_t keywordLen = 0;
2277     char keywordValue[256];
2278     int32_t keywordValueLen = 0;
2279
2280     /* argument checking */
2281     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
2282         return 0;
2283     }
2284
2285     if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
2286         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2287         return 0;
2288     }
2289
2290     /*
2291      * if there is a language, then write "language (country, variant)"
2292      * otherwise write "country, variant"
2293      */
2294
2295     /* write the language */
2296     length=uloc_getDisplayLanguage(locale, displayLocale,
2297                                    dest, destCapacity,
2298                                    pErrorCode);
2299     hasLanguage= length>0;
2300
2301     if(hasLanguage) {
2302         /* append " (" */
2303         if(length<destCapacity) {
2304             dest[length]=0x20;
2305         }
2306         ++length;
2307         if(length<destCapacity) {
2308             dest[length]=0x28;
2309         }
2310         ++length;
2311     }
2312
2313     if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2314         /* keep preflighting */
2315         *pErrorCode=U_ZERO_ERROR;
2316     }
2317
2318     /* append the script */
2319     if(length<destCapacity) {
2320         length2=uloc_getDisplayScript(locale, displayLocale,
2321                                        dest+length, destCapacity-length,
2322                                        pErrorCode);
2323     } else {
2324         length2=uloc_getDisplayScript(locale, displayLocale,
2325                                        NULL, 0,
2326                                        pErrorCode);
2327     }
2328     hasScript= length2>0;
2329     length+=length2;
2330
2331     if(hasScript) {
2332         /* append ", " */
2333         if(length<destCapacity) {
2334             dest[length]=0x2c;
2335         }
2336         ++length;
2337         if(length<destCapacity) {
2338             dest[length]=0x20;
2339         }
2340         ++length;
2341     }
2342
2343     if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2344         /* keep preflighting */
2345         *pErrorCode=U_ZERO_ERROR;
2346     }
2347
2348     /* append the country */
2349     if(length<destCapacity) {
2350         length2=uloc_getDisplayCountry(locale, displayLocale,
2351                                        dest+length, destCapacity-length,
2352                                        pErrorCode);
2353     } else {
2354         length2=uloc_getDisplayCountry(locale, displayLocale,
2355                                        NULL, 0,
2356                                        pErrorCode);
2357     }
2358     hasCountry= length2>0;
2359     length+=length2;
2360
2361     if(hasCountry) {
2362         /* append ", " */
2363         if(length<destCapacity) {
2364             dest[length]=0x2c;
2365         }
2366         ++length;
2367         if(length<destCapacity) {
2368             dest[length]=0x20;
2369         }
2370         ++length;
2371     }
2372
2373     if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2374         /* keep preflighting */
2375         *pErrorCode=U_ZERO_ERROR;
2376     }
2377
2378     /* append the variant */
2379     if(length<destCapacity) {
2380         length2=uloc_getDisplayVariant(locale, displayLocale,
2381                                        dest+length, destCapacity-length,
2382                                        pErrorCode);
2383     } else {
2384         length2=uloc_getDisplayVariant(locale, displayLocale,
2385                                        NULL, 0,
2386                                        pErrorCode);
2387     }
2388     hasVariant= length2>0;
2389     length+=length2;
2390
2391     if(hasVariant) {
2392         /* append ", " */
2393         if(length<destCapacity) {
2394             dest[length]=0x2c;
2395         }
2396         ++length;
2397         if(length<destCapacity) {
2398             dest[length]=0x20;
2399         }
2400         ++length;
2401     }
2402
2403     keywordEnum = uloc_openKeywords(locale, pErrorCode);
2404
2405     for(keywordCount = uenum_count(keywordEnum, pErrorCode); keywordCount > 0 ; keywordCount--){
2406           if(U_FAILURE(*pErrorCode)){
2407               break;
2408           }
2409           /* the uenum_next returns NUL terminated string */
2410           keyword = uenum_next(keywordEnum, &keywordLen, pErrorCode);
2411           if(length + length3 < destCapacity) {
2412             length3 += uloc_getDisplayKeyword(keyword, displayLocale, dest+length+length3, destCapacity-length-length3, pErrorCode);
2413           } else {
2414             length3 += uloc_getDisplayKeyword(keyword, displayLocale, NULL, 0, pErrorCode);
2415           }
2416           if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2417               /* keep preflighting */
2418               *pErrorCode=U_ZERO_ERROR;
2419           }
2420           keywordValueLen = uloc_getKeywordValue(locale, keyword, keywordValue, 256, pErrorCode);
2421           if(keywordValueLen) {
2422             if(length + length3 < destCapacity) {
2423               dest[length + length3] = 0x3D;
2424             }
2425             length3++;
2426             if(length + length3 < destCapacity) {
2427               length3 += uloc_getDisplayKeywordValue(locale, keyword, displayLocale, dest+length+length3, destCapacity-length-length3, pErrorCode);
2428             } else {
2429               length3 += uloc_getDisplayKeywordValue(locale, keyword, displayLocale, NULL, 0, pErrorCode);
2430             }
2431             if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2432                 /* keep preflighting */
2433                 *pErrorCode=U_ZERO_ERROR;
2434             }
2435           }
2436           if(keywordCount > 1) {
2437             if(length + length3 + 1 < destCapacity && keywordCount) {
2438               dest[length + length3]=0x2c;
2439               dest[length + length3+1]=0x20;
2440             }
2441             length3++; /* ',' */
2442             length3++; /* ' ' */
2443           }
2444     }
2445     uenum_close(keywordEnum);
2446
2447     hasKeywords = length3 > 0;
2448     length += length3;
2449
2450
2451
2452     if ((hasScript && !hasCountry)
2453         || ((hasScript || hasCountry) && !hasVariant && !hasKeywords)
2454         || ((hasScript || hasCountry || hasVariant) && !hasKeywords)
2455         || (hasLanguage && !hasScript && !hasCountry && !hasVariant && !hasKeywords))
2456     {
2457         /* remove ", " or " (" */
2458         length-=2;
2459     }
2460
2461     if (hasLanguage && (hasScript || hasCountry || hasVariant || hasKeywords)) {
2462         /* append ")" */
2463         if(length<destCapacity) {
2464             dest[length]=0x29;
2465         }
2466         ++length;
2467     }
2468
2469     if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2470         /* keep preflighting */
2471         *pErrorCode=U_ZERO_ERROR;
2472     }
2473
2474     return u_terminateUChars(dest, destCapacity, length, pErrorCode);
2475 }
2476
2477 U_CAPI int32_t U_EXPORT2
2478 uloc_getDisplayKeyword(const char* keyword,
2479                        const char* displayLocale,
2480                        UChar* dest,
2481                        int32_t destCapacity,
2482                        UErrorCode* status){
2483
2484     /* argument checking */
2485     if(status==NULL || U_FAILURE(*status)) {
2486         return 0;
2487     }
2488
2489     if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
2490         *status=U_ILLEGAL_ARGUMENT_ERROR;
2491         return 0;
2492     }
2493
2494
2495     /* pass itemKey=NULL to look for a top-level item */
2496     return _getStringOrCopyKey(NULL, displayLocale,
2497                                _kKeys, NULL,
2498                                keyword,
2499                                keyword,
2500                                dest, destCapacity,
2501                                status);
2502
2503 }
2504
2505
2506 #define UCURRENCY_DISPLAY_NAME_INDEX 1
2507
2508 U_CAPI int32_t U_EXPORT2
2509 uloc_getDisplayKeywordValue(   const char* locale,
2510                                const char* keyword,
2511                                const char* displayLocale,
2512                                UChar* dest,
2513                                int32_t destCapacity,
2514                                UErrorCode* status){
2515
2516
2517     char keywordValue[ULOC_FULLNAME_CAPACITY*4];
2518     int32_t capacity = ULOC_FULLNAME_CAPACITY*4;
2519     int32_t keywordValueLen =0;
2520
2521     /* argument checking */
2522     if(status==NULL || U_FAILURE(*status)) {
2523         return 0;
2524     }
2525
2526     if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
2527         *status=U_ILLEGAL_ARGUMENT_ERROR;
2528         return 0;
2529     }
2530
2531     /* get the keyword value */
2532     keywordValue[0]=0;
2533     keywordValueLen = uloc_getKeywordValue(locale, keyword, keywordValue, capacity, status);
2534
2535     /*
2536      * if the keyword is equal to currency .. then to get the display name
2537      * we need to do the fallback ourselves
2538      */
2539     if(uprv_stricmp(keyword, _kCurrency)==0){
2540
2541         int32_t dispNameLen = 0;
2542         const UChar *dispName = NULL;
2543
2544         UResourceBundle *bundle     = ures_open(NULL, displayLocale, status);
2545         UResourceBundle *currencies = ures_getByKey(bundle, _kCurrencies, NULL, status);
2546         UResourceBundle *currency   = ures_getByKeyWithFallback(currencies, keywordValue, NULL, status);
2547
2548         dispName = ures_getStringByIndex(currency, UCURRENCY_DISPLAY_NAME_INDEX, &dispNameLen, status);
2549
2550         /*close the bundles */
2551         ures_close(currency);
2552         ures_close(currencies);
2553         ures_close(bundle);
2554
2555         if(U_FAILURE(*status)){
2556             if(*status == U_MISSING_RESOURCE_ERROR){
2557                 /* we just want to write the value over if nothing is available */
2558                 *status = U_USING_DEFAULT_WARNING;
2559             }else{
2560                 return 0;
2561             }
2562         }
2563
2564         /* now copy the dispName over if not NULL */
2565         if(dispName != NULL){
2566             if(dispNameLen <= destCapacity){
2567                 uprv_memcpy(dest, dispName, dispNameLen * U_SIZEOF_UCHAR);
2568                 return u_terminateUChars(dest, destCapacity, dispNameLen, status);
2569             }else{
2570                 *status = U_BUFFER_OVERFLOW_ERROR;
2571                 return dispNameLen;
2572             }
2573         }else{
2574             /* we have not found the display name for the value .. just copy over */
2575             if(keywordValueLen <= destCapacity){
2576                 u_charsToUChars(keywordValue, dest, keywordValueLen);
2577                 return u_terminateUChars(dest, destCapacity, keywordValueLen, status);
2578             }else{
2579                  *status = U_BUFFER_OVERFLOW_ERROR;
2580                 return keywordValueLen;
2581             }
2582         }
2583
2584
2585     }else{
2586
2587         return _getStringOrCopyKey(NULL, displayLocale,
2588                                    _kTypes, keyword,
2589                                    keywordValue,
2590                                    keywordValue,
2591                                    dest, destCapacity,
2592                                    status);
2593     }
2594 }
2595
2596 /* ### Get available **************************************************/
2597
2598 static UBool U_CALLCONV uloc_cleanup(void) {
2599     char ** temp;
2600
2601     if (_installedLocales) {
2602         temp = _installedLocales;
2603         _installedLocales = NULL;
2604
2605         _installedLocalesCount = 0;
2606
2607         uprv_free(temp);
2608     }
2609     return TRUE;
2610 }
2611
2612 static void _load_installedLocales()
2613 {
2614     UBool   localesLoaded;
2615
2616     umtx_lock(NULL);
2617     localesLoaded = _installedLocales != NULL;
2618     umtx_unlock(NULL);
2619
2620     if (localesLoaded == FALSE) {
2621         UResourceBundle *index = NULL;
2622         UResourceBundle installed;
2623         UErrorCode status = U_ZERO_ERROR;
2624         char ** temp;
2625         int32_t i = 0;
2626         int32_t localeCount;
2627
2628         ures_initStackObject(&installed);
2629         index = ures_openDirect(NULL, _kIndexLocaleName, &status);
2630         ures_getByKey(index, _kIndexTag, &installed, &status);
2631
2632         if(U_SUCCESS(status)) {
2633             localeCount = ures_getSize(&installed);
2634             temp = (char **) uprv_malloc(sizeof(char*) * (localeCount+1));
2635
2636             ures_resetIterator(&installed);
2637             while(ures_hasNext(&installed)) {
2638                 ures_getNextString(&installed, NULL, (const char **)&temp[i++], &status);
2639             }
2640             temp[i] = NULL;
2641
2642             umtx_lock(NULL);
2643             if (_installedLocales == NULL)
2644             {
2645                 _installedLocales = temp;
2646                 _installedLocalesCount = localeCount;
2647                 temp = NULL;
2648                 ucln_common_registerCleanup(UCLN_COMMON_ULOC, uloc_cleanup);
2649             }
2650             umtx_unlock(NULL);
2651
2652             uprv_free(temp);
2653             ures_close(&installed);
2654         }
2655         ures_close(index);
2656     }
2657 }
2658
2659 U_CAPI const char* U_EXPORT2
2660 uloc_getAvailable(int32_t offset)
2661 {
2662
2663     _load_installedLocales();
2664
2665     if (offset > _installedLocalesCount)
2666         return NULL;
2667     return _installedLocales[offset];
2668 }
2669
2670 U_CAPI int32_t  U_EXPORT2
2671 uloc_countAvailable()
2672 {
2673     _load_installedLocales();
2674     return _installedLocalesCount;
2675 }
2676
2677 /**
2678  * Returns a list of all language codes defined in ISO 639.  This is a pointer
2679  * to an array of pointers to arrays of char.  All of these pointers are owned
2680  * by ICU-- do not delete them, and do not write through them.  The array is
2681  * terminated with a null pointer.
2682  */
2683 U_CAPI const char* const*  U_EXPORT2
2684 uloc_getISOLanguages()
2685 {
2686     return LANGUAGES;
2687 }
2688
2689 /**
2690  * Returns a list of all 2-letter country codes defined in ISO 639.  This is a
2691  * pointer to an array of pointers to arrays of char.  All of these pointers are
2692  * owned by ICU-- do not delete them, and do not write through them.  The array is
2693  * terminated with a null pointer.
2694  */
2695 U_CAPI const char* const*  U_EXPORT2
2696 uloc_getISOCountries()
2697 {
2698     return COUNTRIES;
2699 }
2700
2701
2702 /* this function to be moved into cstring.c later */
2703 static char gDecimal = 0;
2704
2705 static /* U_CAPI */
2706 double
2707 /* U_EXPORT2 */
2708 _uloc_strtod(const char *start, char **end) {
2709   char *decimal;
2710   char *myEnd;
2711   char buf[30];
2712   double rv;
2713   if (!gDecimal) {
2714     char rep[5];
2715     /* For machines that decide to change the decimal on you,
2716        and try to be too smart with localization.
2717        This normally should be just a '.'. */
2718     sprintf(rep, "%+1.1f", 1.0);
2719     gDecimal = rep[2];
2720   }
2721
2722   if(gDecimal == '.') {
2723     return uprv_strtod(start, end); /* fall through to OS */
2724   } else {
2725     uprv_strncpy(buf, start, 29);
2726     buf[29]=0;
2727     decimal = uprv_strchr(buf, '.');
2728     if(decimal) {
2729       *decimal = gDecimal;
2730     } else {
2731       return uprv_strtod(start, end); /* no decimal point */
2732     }
2733     rv = uprv_strtod(buf, &myEnd);
2734     if(end) {
2735       *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */
2736     }
2737     return rv;
2738   }
2739 }
2740
2741 typedef struct {
2742     double q;
2743     char *locale;
2744 #if defined(ULOC_DEBUG_PURIFY)
2745     int32_t dummy;  /* to avoid uninitialized memory copy from qsort */
2746 #endif
2747 } _acceptLangItem;
2748
2749 static int32_t U_CALLCONV
2750 uloc_acceptLanguageCompare(const void *context, const void *a, const void *b)
2751 {
2752     const _acceptLangItem *aa = (const _acceptLangItem*)a;
2753     const _acceptLangItem *bb = (const _acceptLangItem*)b;
2754
2755     int32_t rc = 0;
2756     if(bb->q < aa->q) {
2757         rc = -1;  /* A > B */
2758     } else if(bb->q > aa->q) {
2759         rc = 1;   /* A < B */
2760     } else {
2761         rc = 0;   /* A = B */
2762     }
2763
2764     if(rc==0) {
2765         rc = uprv_stricmp(aa->locale, bb->locale);
2766     }
2767
2768 #if defined(ULOC_DEBUG)
2769     /*  fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
2770     aa->locale, aa->q,
2771     bb->locale, bb->q,
2772     rc);*/
2773 #endif
2774
2775     return rc;
2776 }
2777
2778 /*
2779 mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
2780 */
2781
2782 U_CAPI int32_t U_EXPORT2
2783 uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult,
2784                             const char *httpAcceptLanguage,
2785                             UEnumeration* availableLocales,
2786                             UErrorCode *status)
2787 {
2788     _acceptLangItem *j;
2789     _acceptLangItem smallBuffer[30];
2790     char **strs;
2791     char tmp[ULOC_FULLNAME_CAPACITY +1];
2792     int32_t n = 0;
2793     const char *itemEnd;
2794     const char *paramEnd;
2795     const char *s;
2796     const char *t;
2797     int32_t res;
2798     int32_t i;
2799     int32_t l = uprv_strlen(httpAcceptLanguage);
2800     int32_t jSize;
2801
2802     j = smallBuffer;
2803     jSize = sizeof(smallBuffer)/sizeof(smallBuffer[0]);
2804     if(U_FAILURE(*status)) {
2805         return -1;
2806     }
2807
2808     for(s=httpAcceptLanguage;s&&*s;) {
2809         while(isspace(*s)) /* eat space at the beginning */
2810             s++;
2811         itemEnd=uprv_strchr(s,',');
2812         paramEnd=uprv_strchr(s,';');
2813         if(!itemEnd) {
2814             itemEnd = httpAcceptLanguage+l; /* end of string */
2815         }
2816         if(paramEnd && paramEnd<itemEnd) {
2817             /* semicolon (;) is closer than end (,) */
2818             t = paramEnd+1;
2819             if(*t=='q') {
2820                 t++;
2821             }
2822             while(isspace(*t)) {
2823                 t++;
2824             }
2825             if(*t=='=') {
2826                 t++;
2827             }
2828             while(isspace(*t)) {
2829                 t++;
2830             }
2831             j[n].q = _uloc_strtod(t,NULL);
2832         } else {
2833             /* no semicolon - it's 1.0 */
2834             j[n].q = 1.0;
2835             paramEnd = itemEnd;
2836         }
2837 #if defined(ULOC_DEBUG_PURIFY)
2838         j[n].dummy=0xDECAFBAD;
2839 #endif
2840         /* eat spaces prior to semi */
2841         for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
2842             ;
2843         j[n].locale = uprv_strndup(s,(t+1)-s);
2844         uloc_canonicalize(j[n].locale,tmp,sizeof(tmp)/sizeof(tmp[0]),status);
2845         if(strcmp(j[n].locale,tmp)) {
2846             uprv_free(j[n].locale);
2847             j[n].locale=uprv_strdup(tmp);
2848         }
2849 #if defined(ULOC_DEBUG)
2850         /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
2851 #endif
2852         n++;
2853         s = itemEnd;
2854         while(*s==',') { /* eat duplicate commas */
2855             s++;
2856         }
2857         if(n>=jSize) {
2858           if(j==smallBuffer) {  /* overflowed the small buffer. */
2859             j = uprv_malloc(sizeof(j[0])*(jSize*2));
2860             if(j!=NULL) {
2861               uprv_memcpy(j,smallBuffer,sizeof(j[0])*jSize);
2862             }
2863 #if defined(ULOC_DEBUG)
2864             fprintf(stderr,"malloced at size %d\n", jSize);
2865 #endif
2866           } else {
2867             j = uprv_realloc(j, sizeof(j[0])*jSize*2);
2868 #if defined(ULOC_DEBUG)
2869             fprintf(stderr,"re-alloced at size %d\n", jSize);
2870 #endif
2871           }
2872           jSize *= 2;
2873           if(j==NULL) {
2874             *status = U_MEMORY_ALLOCATION_ERROR;
2875             return -1;
2876           }
2877         }
2878     }
2879     uprv_sortArray(j, n, sizeof(j[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
2880     if(U_FAILURE(*status)) {
2881       if(j != smallBuffer) {
2882 #if defined(ULOC_DEBUG)
2883         fprintf(stderr,"freeing j %p\n", j);
2884 #endif
2885         uprv_free(j);
2886       }
2887       return -1;
2888     }
2889     strs = uprv_malloc((size_t)(sizeof(strs[0])*n));
2890     for(i=0;i<n;i++) {
2891 #if defined(ULOC_DEBUG)
2892         /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
2893 #endif
2894         strs[i]=j[i].locale;
2895     }
2896     res =  uloc_acceptLanguage(result, resultAvailable, outResult,
2897         (const char**)strs, n, availableLocales, status);
2898     for(i=0;i<n;i++) {
2899         uprv_free(strs[i]);
2900     }
2901     uprv_free(strs);
2902     if(j != smallBuffer) {
2903 #if defined(ULOC_DEBUG)
2904       fprintf(stderr,"freeing j %p\n", j);
2905 #endif
2906       uprv_free(j);
2907     }
2908     return res;
2909 }
2910
2911
2912 U_CAPI int32_t U_EXPORT2
2913 uloc_acceptLanguage(char *result, int32_t resultAvailable,
2914                     UAcceptResult *outResult, const char **acceptList,
2915                     int32_t acceptListCount,
2916                     UEnumeration* availableLocales,
2917                     UErrorCode *status)
2918 {
2919     int32_t i,j;
2920     int32_t len;
2921     int32_t maxLen=0;
2922     char tmp[ULOC_FULLNAME_CAPACITY+1];
2923     const char *l;
2924     char **fallbackList;
2925     if(U_FAILURE(*status)) {
2926         return -1;
2927     }
2928     fallbackList = uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount));
2929     if(fallbackList==NULL) {
2930       *status = U_MEMORY_ALLOCATION_ERROR;
2931       return -1;
2932     }
2933     for(i=0;i<acceptListCount;i++) {
2934 #if defined(ULOC_DEBUG)
2935         fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
2936 #endif
2937         while((l=uenum_next(availableLocales, NULL, status))) {
2938 #if defined(ULOC_DEBUG)
2939             fprintf(stderr,"  %s\n", l);
2940 #endif
2941             len = uprv_strlen(l);
2942             if(!uprv_strcmp(acceptList[i], l)) {
2943                 if(outResult) {
2944                     *outResult = ULOC_ACCEPT_VALID;
2945                 }
2946 #if defined(ULOC_DEBUG)
2947                 fprintf(stderr, "MATCH! %s\n", l);
2948 #endif
2949                 if(len>0) {
2950                     uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2951                 }
2952                 for(j=0;j<i;j++) {
2953                     uprv_free(fallbackList[j]);
2954                 }
2955                 uprv_free(fallbackList);
2956                 return u_terminateChars(result, resultAvailable, len, status);
2957             }
2958             if(len>maxLen) {
2959                 maxLen = len;
2960             }
2961         }
2962         uenum_reset(availableLocales, status);
2963         /* save off parent info */
2964         if(uloc_getParent(acceptList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
2965             fallbackList[i] = uprv_strdup(tmp);
2966         } else {
2967             fallbackList[i]=0;
2968         }
2969     }
2970
2971     for(maxLen--;maxLen>0;maxLen--) {
2972         for(i=0;i<acceptListCount;i++) {
2973             if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {
2974 #if defined(ULOC_DEBUG)
2975                 fprintf(stderr,"Try: [%s]", fallbackList[i]);
2976 #endif
2977                 while((l=uenum_next(availableLocales, NULL, status))) {
2978 #if defined(ULOC_DEBUG)
2979                     fprintf(stderr,"  %s\n", l);
2980 #endif
2981                     len = uprv_strlen(l);
2982                     if(!uprv_strcmp(fallbackList[i], l)) {
2983                         if(outResult) {
2984                             *outResult = ULOC_ACCEPT_FALLBACK;
2985                         }
2986 #if defined(ULOC_DEBUG)
2987                         fprintf(stderr, "fallback MATCH! %s\n", l);
2988 #endif
2989                         if(len>0) {
2990                             uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2991                         }
2992                         for(i=0;i<acceptListCount;i++) {
2993                             uprv_free(fallbackList[i]);
2994                         }
2995                         uprv_free(fallbackList);
2996                         return u_terminateChars(result, resultAvailable, len, status);
2997                     }
2998                 }
2999                 uenum_reset(availableLocales, status);
3000
3001                 if(uloc_getParent(fallbackList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
3002                     uprv_free(fallbackList[i]);
3003                     fallbackList[i] = uprv_strdup(tmp);
3004                 } else {
3005                     uprv_free(fallbackList[i]);
3006                     fallbackList[i]=0;
3007                 }
3008             }
3009         }
3010         if(outResult) {
3011             *outResult = ULOC_ACCEPT_FAILED;
3012         }
3013     }
3014     for(i=0;i<acceptListCount;i++) {
3015         uprv_free(fallbackList[i]);
3016     }
3017     uprv_free(fallbackList);
3018     return -1;
3019 }
3020
3021 /*eof*/