icuSources/common/uloc.c

   1 /*
   2 **********************************************************************
   3 *   Copyright (C) 1997-2007, International Business Machines
   4 *   Corporation and others.  All Rights Reserved.
   5 **********************************************************************
   6 *
   7 * File ULOC.CPP
   8 *
   9 * Modification History:
  10 *
  11 *   Date        Name        Description
  12 *   04/01/97    aliu        Creation.
  13 *   08/21/98    stephen     JDK 1.2 sync
  14 *   12/08/98    rtg         New Locale implementation and C API
  15 *   03/15/99    damiba      overhaul.
  16 *   04/06/99    stephen     changed setDefault() to realloc and copy
  17 *   06/14/99    stephen     Changed calls to ures_open for new params
  18 *   07/21/99    stephen     Modified setDefault() to propagate to C++
  19 *   05/14/04    alan        7 years later: refactored, cleaned up, fixed bugs,
  20 *                           brought canonicalization code into line with spec
  21 *****************************************************************************/
  22
  23 /*
  24    POSIX's locale format, from putil.c: [no spaces]
  25
  26      ll [ _CC ] [ . MM ] [ @ VV]
  27
  28      l = lang, C = ctry, M = charmap, V = variant
  29 */
  30
  31 #include "unicode/utypes.h"
  32 #include "unicode/ustring.h"
  33 #include "unicode/uloc.h"
  34 #include "unicode/ures.h"
  35
  36 #include "putilimp.h"
  37 #include "ustr_imp.h"
  38 #include "ulocimp.h"
  39 #include "uresimp.h"
  40 #include "umutex.h"
  41 #include "cstring.h"
  42 #include "cmemory.h"
  43 #include "ucln_cmn.h"
  44 #include "locmap.h"
  45 #include "uarrsort.h"
  46 #include "uenumimp.h"
  47 #include "uassert.h"
  48
  49 #include <stdio.h> /* for sprintf */
  50
  51 /* ### Declarations **************************************************/
  52
  53 /* Locale stuff from locid.cpp */
  54 U_CFUNC void locale_set_default(const char *id);
  55 U_CFUNC const char *locale_get_default(void);
  56 U_CFUNC int32_t
  57 locale_getKeywords(const char *localeID,
  58             char prev,
  59             char *keywords, int32_t keywordCapacity,
  60             char *values, int32_t valuesCapacity, int32_t *valLen,
  61             UBool valuesToo,
  62             UErrorCode *status);
  63
  64 /* ### Constants **************************************************/
  65
  66 /* These strings describe the resources we attempt to load from
  67  the locale ResourceBundle data file.*/
  68 static const char _kLanguages[]       = "Languages";
  69 static const char _kScripts[]         = "Scripts";
  70 static const char _kCountries[]       = "Countries";
  71 static const char _kVariants[]        = "Variants";
  72 static const char _kKeys[]            = "Keys";
  73 static const char _kTypes[]           = "Types";
  74 static const char _kIndexLocaleName[] = "res_index";
  75 static const char _kRootName[]        = "root";
  76 static const char _kIndexTag[]        = "InstalledLocales";
  77 static const char _kCurrency[]        = "currency";
  78 static const char _kCurrencies[]      = "Currencies";
  79 static char** _installedLocales = NULL;
  80 static int32_t _installedLocalesCount = 0;
  81
  82 /* ### Data tables **************************************************/
  83
  84 /**
  85  * Table of language codes, both 2- and 3-letter, with preference
  86  * given to 2-letter codes where possible.  Includes 3-letter codes
  87  * that lack a 2-letter equivalent.
  88  *
  89  * This list must be in sorted order.  This list is returned directly
  90  * to the user by some API.
  91  *
  92  * This list must be kept in sync with LANGUAGES_3, with corresponding
  93  * entries matched.
  94  *
  95  * This table should be terminated with a NULL entry, followed by a
  96  * second list, and another NULL entry.  The first list is visible to
  97  * user code when this array is returned by API.  The second list
  98  * contains codes we support, but do not expose through user API.
  99  *
 100  * Notes
 101  *
 102  * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
 103  * include the revisions up to 2001/7/27 *CWB*
 104  *
 105  * The 3 character codes are the terminology codes like RFC 3066.  This
 106  * is compatible with prior ICU codes
 107  *
 108  * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
 109  * table but now at the end of the table because 3 character codes are
 110  * duplicates.  This avoids bad searches going from 3 to 2 character
 111  * codes.
 112  *
 113  * The range qaa-qtz is reserved for local use
 114  */
 115 static const char * const LANGUAGES[] = {
 116     "aa",  "ab",  "ace", "ach", "ada", "ady", "ae",  "af",  "afa",
 117     "afh", "ain", "ak",  "akk", "ale", "alg", "alt", "am",  "an",
 118     "ang", "anp", "apa",
 119     "ar",  "arc", "arn", "arp", "art", "arw", "as",  "ast",
 120     "ath", "aus", "av",  "awa", "ay",  "az",  "ba",  "bad",
 121     "bai", "bal", "ban", "bas", "bat", "be",  "bej",
 122     "bem", "ber", "bg",  "bh",  "bho", "bi",  "bik", "bin",
 123     "bla", "bm",  "bn",  "bnt", "bo",  "br",  "bra", "bs",
 124     "btk", "bua", "bug", "byn", "ca",  "cad", "cai", "car", "cau",
 125     "cch", "ce",  "ceb", "cel", "ch",  "chb", "chg", "chk", "chm",
 126     "chn", "cho", "chp", "chr", "chy", "cmc", "co",  "cop",
 127     "cpe", "cpf", "cpp", "cr",  "crh", "crp", "cs",  "csb", "cu",  "cus",
 128     "cv",  "cy",  "da",  "dak", "dar", "day", "de",  "del", "den",
 129     "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv",  "dyu",
 130     "dz",  "ee",  "efi", "egy", "eka", "el",  "elx", "en",
 131     "enm", "eo",  "es",  "et",  "eu",  "ewo", "fa",
 132     "fan", "fat", "ff",  "fi",  "fil", "fiu", "fj",  "fo",  "fon",
 133     "fr",  "frm", "fro", "frr", "frs", "fur", "fy",
 134     "ga",  "gaa", "gay", "gba", "gd",  "gem", "gez", "gil",
 135     "gl",  "gmh", "gn",  "goh", "gon", "gor", "got", "grb",
 136     "grc", "gsw", "gu",  "gv", "gwi",
 137     "ha",  "hai", "haw", "he",  "hi",  "hil", "him",
 138     "hit", "hmn", "ho",  "hr",  "hsb", "ht",  "hu",  "hup", "hy",  "hz",
 139     "ia",  "iba", "id",  "ie",  "ig",  "ii",  "ijo", "ik",
 140     "ilo", "inc", "ine", "inh", "io",  "ira", "iro", "is",  "it",
 141     "iu",  "ja",  "jbo", "jpr", "jrb", "jv",  "ka",  "kaa", "kab",
 142     "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg",  "kha", "khi",
 143     "kho", "ki",  "kj",  "kk",  "kl",  "km",  "kmb", "kn",
 144     "ko",  "kok", "kos", "kpe", "kr",  "krc", "krl", "kro", "kru", "ks",
 145     "ku",  "kum", "kut", "kv",  "kw",  "ky",  "la",  "lad",
 146     "lah", "lam", "lb",  "lez", "lg",  "li",  "ln",  "lo",  "lol",
 147     "loz", "lt",  "lu",  "lua", "lui", "lun", "luo", "lus",
 148     "lv",  "mad", "mag", "mai", "mak", "man", "map", "mas",
 149     "mdf", "mdr", "men", "mg",  "mga", "mh",  "mi",  "mic", "min",
 150     "mis", "mk",  "mkh", "ml",  "mn",  "mnc", "mni", "mno",
 151     "mo",  "moh", "mos", "mr",  "ms",  "mt",  "mul", "mun",
 152     "mus", "mwl", "mwr", "my",  "myn", "myv", "na",  "nah", "nai", "nap",
 153     "nb",  "nd",  "nds", "ne",  "new", "ng",  "nia", "nic",
 154     "niu", "nl",  "nn",  "no",  "nog", "non", "nr",  "nso", "nub",
 155     "nv",  "nwc", "ny",  "nym", "nyn", "nyo", "nzi", "oc",  "oj",
 156     "om",  "or",  "os",  "osa", "ota", "oto", "pa",  "paa",
 157     "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",
 158     "pi",  "pl",  "pon", "pra", "pro", "ps",  "pt",  "qu",
 159     "raj", "rap", "rar", "rm",  "rn",  "ro",  "roa", "rom",
 160     "ru",  "rup", "rw",  "sa",  "sad", "sah", "sai", "sal", "sam",
 161     "sas", "sat", "sc",  "scn", "sco", "sd",  "se",  "sel", "sem",
 162     "sg",  "sga", "sgn", "shn", "si",  "sid", "sio", "sit",
 163     "sk",  "sl",  "sla", "sm",  "sma", "smi", "smj", "smn",
 164     "sms", "sn",  "snk", "so",  "sog", "son", "sq",  "sr",
 165     "srn", "srr", "ss",  "ssa", "st",  "su",  "suk", "sus", "sux",
 166     "sv",  "sw",  "syr", "ta",  "tai", "te",  "tem", "ter",
 167     "tet", "tg",  "th",  "ti",  "tig", "tiv", "tk",  "tkl",
 168     "tl",  "tlh", "tli", "tmh", "tn",  "to",  "tog", "tpi", "tr",
 169     "ts",  "tsi", "tt",  "tum", "tup", "tut", "tvl", "tw",
 170     "ty",  "tyv", "udm", "ug",  "uga", "uk",  "umb", "und", "ur",
 171     "uz",  "vai", "ve",  "vi",  "vo",  "vot", "wa",  "wak",
 172     "wal", "war", "was", "wen", "wo",  "xal", "xh",  "yao", "yap",
 173     "yi",  "yo",  "ypk", "za",  "zap", "zen", "zh",  "znd",
 174     "zu",  "zun", "zxx",
 175 NULL,
 176     "in",  "iw",  "ji",  "jw",  "sh",    /* obsolete language codes */
 177 NULL
 178 };
 179 static const char* const DEPRECATED_LANGUAGES[]={
 180     "in", "iw", "ji", "jw", NULL, NULL
 181 };
 182 static const char* const REPLACEMENT_LANGUAGES[]={
 183     "id", "he", "yi", "jv", NULL, NULL
 184 };
 185
 186 /**
 187  * Table of 3-letter language codes.
 188  *
 189  * This is a lookup table used to convert 3-letter language codes to
 190  * their 2-letter equivalent, where possible.  It must be kept in sync
 191  * with LANGUAGES.  For all valid i, LANGUAGES[i] must refer to the
 192  * same language as LANGUAGES_3[i].  The commented-out lines are
 193  * copied from LANGUAGES to make eyeballing this baby easier.
 194  *
 195  * Where a 3-letter language code has no 2-letter equivalent, the
 196  * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
 197  *
 198  * This table should be terminated with a NULL entry, followed by a
 199  * second list, and another NULL entry.  The two lists correspond to
 200  * the two lists in LANGUAGES.
 201  */
 202 static const char * const LANGUAGES_3[] = {
 203 /*  "aa",  "ab",  "ace", "ach", "ada", "ady", "ae",  "af",  "afa",    */
 204     "aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr", "afa",
 205 /*  "afh", "ain", "ak",  "akk", "ale", "alg", "alt", "am",  "an",  "ang", "anp", "apa",    */
 206     "afh", "ain", "aka", "akk", "ale", "alg", "alt", "amh", "arg", "ang", "anp", "apa",
 207 /*  "ar",  "arc", "arn", "arp", "art", "arw", "as",  "ast",    */
 208     "ara", "arc", "arn", "arp", "art", "arw", "asm", "ast",
 209 /*  "ath", "aus", "av",  "awa", "ay",  "az",  "ba",  "bad",    */
 210     "ath", "aus", "ava", "awa", "aym", "aze", "bak", "bad",
 211 /*  "bai", "bal", "ban", "bas", "bat", "be",  "bej",    */
 212     "bai", "bal", "ban", "bas", "bat", "bel", "bej",
 213 /*  "bem", "ber", "bg",  "bh",  "bho", "bi",  "bik", "bin",    */
 214     "bem", "ber", "bul", "bih", "bho", "bis", "bik", "bin",
 215 /*  "bla", "bm",  "bn",  "bnt", "bo",  "br",  "bra", "bs",     */
 216     "bla", "bam", "ben", "bnt", "bod", "bre", "bra", "bos",
 217 /*  "btk", "bua", "bug", "byn", "ca",  "cad", "cai", "car", "cau",    */
 218     "btk", "bua", "bug", "byn", "cat", "cad", "cai", "car", "cau",
 219 /*  "cch", "ce",  "ceb", "cel", "ch",  "chb", "chg", "chk", "chm",    */
 220     "cch", "che", "ceb", "cel", "cha", "chb", "chg", "chk", "chm",
 221 /*  "chn", "cho", "chp", "chr", "chy", "cmc", "co",  "cop",    */
 222     "chn", "cho", "chp", "chr", "chy", "cmc", "cos", "cop",
 223 /*  "cpe", "cpf", "cpp", "cr",  "crh", "crp", "cs",  "csb", "cu",  "cus",    */
 224     "cpe", "cpf", "cpp", "cre", "crh", "crp", "ces", "csb", "chu", "cus",
 225 /*  "cv",  "cy",  "da",  "dak", "dar", "day", "de",  "del", "den",    */
 226     "chv", "cym", "dan", "dak", "dar", "day", "deu", "del", "den",
 227 /*  "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv",  "dyu",    */
 228     "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "div", "dyu",
 229 /*  "dz",  "ee",  "efi", "egy", "eka", "el",  "elx", "en",     */
 230     "dzo", "ewe", "efi", "egy", "eka", "ell", "elx", "eng",
 231 /*  "enm", "eo",  "es",  "et",  "eu",  "ewo", "fa",     */
 232     "enm", "epo", "spa", "est", "eus", "ewo", "fas",
 233 /*  "fan", "fat", "ff",  "fi",  "fil", "fiu", "fj",  "fo",  "fon",    */
 234     "fan", "fat", "ful", "fin", "fil", "fiu", "fij", "fao", "fon",
 235 /*  "fr",  "frm", "fro", "fur", "frr", "frs", "fy",  "ga",  "gaa", "gay",    */
 236     "fra", "frm", "fro", "fur", "frr", "frs", "fry", "gle", "gaa", "gay",
 237 /*  "gba", "gd",  "gem", "gez", "gil", "gl",  "gmh", "gn",     */
 238     "gba", "gla", "gem", "gez", "gil", "glg", "gmh", "grn",
 239 /*  "goh", "gon", "gor", "got", "grb", "grc", "gsw", "gu",  "gv",     */
 240     "goh", "gon", "gor", "got", "grb", "grc", "gsw", "guj", "glv",
 241 /*  "gwi", "ha",  "hai", "haw", "he",  "hi",  "hil", "him",    */
 242     "gwi", "hau", "hai", "haw", "heb", "hin", "hil", "him",
 243 /*  "hit", "hmn", "ho",  "hr",  "hsb", "ht",  "hu",  "hup", "hy",  "hz",     */
 244     "hit", "hmn", "hmo", "hrv", "hsb", "hat", "hun", "hup", "hye", "her",
 245 /*  "ia",  "iba", "id",  "ie",  "ig",  "ii",  "ijo", "ik",     */
 246     "ina", "iba", "ind", "ile", "ibo", "iii", "ijo", "ipk",
 247 /*  "ilo", "inc", "ine", "inh", "io",  "ira", "iro", "is",  "it",      */
 248     "ilo", "inc", "ine", "inh", "ido", "ira", "iro", "isl", "ita",
 249 /*  "iu",  "ja",  "jbo", "jpr", "jrb", "jv",  "ka",  "kaa", "kab",   */
 250     "iku", "jpn", "jbo", "jpr", "jrb", "jav", "kat", "kaa", "kab",
 251 /*  "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg",  "kha", "khi",*/
 252     "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg",  "kha", "khi",
 253 /*  "kho", "ki",  "kj",  "kk",  "kl",  "km",  "kmb", "kn",     */
 254     "kho", "kik", "kua", "kaz", "kal", "khm", "kmb", "kan",
 255 /*  "ko",  "kok", "kos", "kpe", "kr",  "krc", "krl", "kro", "kru", "ks",     */
 256     "kor", "kok", "kos", "kpe", "kau", "krc", "krl", "kro", "kru", "kas",
 257 /*  "ku",  "kum", "kut", "kv",  "kw",  "ky",  "la",  "lad",    */
 258     "kur", "kum", "kut", "kom", "cor", "kir", "lat", "lad",
 259 /*  "lah", "lam", "lb",  "lez", "lg",  "li",  "ln",  "lo",  "lol",    */
 260     "lah", "lam", "ltz", "lez", "lug", "lim", "lin", "lao", "lol",
 261 /*  "loz", "lt",  "lu",  "lua", "lui", "lun", "luo", "lus",    */
 262     "loz", "lit", "lub", "lua", "lui", "lun", "luo", "lus",
 263 /*  "lv",  "mad", "mag", "mai", "mak", "man", "map", "mas",    */
 264     "lav", "mad", "mag", "mai", "mak", "man", "map", "mas",
 265 /*  "mdf", "mdr", "men", "mg",  "mga", "mh",  "mi",  "mic", "min",    */
 266     "mdf", "mdr", "men", "mlg", "mga", "mah", "mri", "mic", "min",
 267 /*  "mis", "mk",  "mkh", "ml",  "mn",  "mnc", "mni", "mno",    */
 268     "mis", "mkd", "mkh", "mal", "mon", "mnc", "mni", "mno",
 269 /*  "mo",  "moh", "mos", "mr",  "ms",  "mt",  "mul", "mun",    */
 270     "mol", "moh", "mos", "mar", "msa", "mlt", "mul", "mun",
 271 /*  "mus", "mwl", "mwr", "my",  "myn", "myv", "na",  "nah", "nai", "nap",    */
 272     "mus", "mwl", "mwr", "mya", "myn", "myv", "nau", "nah", "nai", "nap",
 273 /*  "nb",  "nd",  "nds", "ne",  "new", "ng",  "nia", "nic",    */
 274     "nob", "nde", "nds", "nep", "new", "ndo", "nia", "nic",
 275 /*  "niu", "nl",  "nn",  "no",  "nog", "non", "nr",  "nso", "nub",    */
 276     "niu", "nld", "nno", "nor", "nog", "non", "nbl", "nso", "nub",
 277 /*  "nv",  "nwc", "ny",  "nym", "nyn", "nyo", "nzi", "oc",  "oj",     */
 278     "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi", "oci", "oji",
 279 /*  "om",  "or",  "os",  "osa", "ota", "oto", "pa",  "paa",    */
 280     "orm", "ori", "oss", "osa", "ota", "oto", "pan", "paa",
 281 /*  "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",    */
 282     "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",
 283 /*  "pi",  "pl",  "pon", "pra", "pro", "ps",  "pt",  "qu",     */
 284     "pli", "pol", "pon", "pra", "pro", "pus", "por", "que",
 285 /*  "raj", "rap", "rar", "rm",  "rn",  "ro",  "roa", "rom",    */
 286     "raj", "rap", "rar", "roh", "run", "ron", "roa", "rom",
 287 /*  "ru",  "rup", "rw",  "sa",  "sad", "sah", "sai", "sal", "sam",    */
 288     "rus", "rup", "kin", "san", "sad", "sah", "sai", "sal", "sam",
 289 /*  "sas", "sat", "sc",  "scn", "sco", "sd",  "se",  "sel", "sem",    */
 290     "sas", "sat", "srd", "scn", "sco", "snd", "sme", "sel", "sem",
 291 /*  "sg",  "sga", "sgn", "shn", "si",  "sid", "sio", "sit",    */
 292     "sag", "sga", "sgn", "shn", "sin", "sid", "sio", "sit",
 293 /*  "sk",  "sl",  "sla", "sm",  "sma", "smi", "smj", "smn",    */
 294     "slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn",
 295 /*  "sms", "sn",  "snk", "so",  "sog", "son", "sq",  "sr",     */
 296     "sms", "sna", "snk", "som", "sog", "son", "sqi", "srp",
 297 /*  "srn", "srr", "ss",  "ssa", "st",  "su",  "suk", "sus", "sux",    */
 298     "srn", "srr", "ssw", "ssa", "sot", "sun", "suk", "sus", "sux",
 299 /*  "sv",  "sw",  "syr", "ta",  "tai", "te",  "tem", "ter",    */
 300     "swe", "swa", "syr", "tam", "tai", "tel", "tem", "ter",
 301 /*  "tet", "tg",  "th",  "ti",  "tig", "tiv", "tk",  "tkl",    */
 302     "tet", "tgk", "tha", "tir", "tig", "tiv", "tuk", "tkl",
 303 /*  "tl",  "tlh", "tli", "tmh", "tn",  "to",  "tog", "tpi", "tr",     */
 304     "tgl", "tlh", "tli", "tmh", "tsn", "ton", "tog", "tpi", "tur",
 305 /*  "ts",  "tsi", "tt",  "tum", "tup", "tut", "tvl", "tw",     */
 306     "tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi",
 307 /*  "ty",  "tyv", "udm", "ug",  "uga", "uk",  "umb", "und", "ur",     */
 308     "tah", "tyv", "udm", "uig", "uga", "ukr", "umb", "und", "urd",
 309 /*  "uz",  "vai", "ve",  "vi",  "vo",  "vot", "wa",  "wak",    */
 310     "uzb", "vai", "ven", "vie", "vol", "vot", "wln", "wak",
 311 /*  "wal", "war", "was", "wen", "wo",  "xal", "xh",  "yao", "yap",    */
 312     "wal", "war", "was", "wen", "wol", "xal", "xho", "yao", "yap",
 313 /*  "yi",  "yo",  "ypk", "za",  "zap", "zen", "zh",  "znd",    */
 314     "yid", "yor", "ypk", "zha", "zap", "zen", "zho", "znd",
 315 /*  "zu",  "zun",                                              */
 316     "zul", "zun", "zxx",
 317 NULL,
 318 /*  "in",  "iw",  "ji",  "jw",  "sh",                          */
 319     "ind", "heb", "yid", "jaw", "srp",
 320 NULL
 321 };
 322
 323 /**
 324  * Table of 2-letter country codes.
 325  *
 326  * This list must be in sorted order.  This list is returned directly
 327  * to the user by some API.
 328  *
 329  * This list must be kept in sync with COUNTRIES_3, with corresponding
 330  * entries matched.
 331  *
 332  * This table should be terminated with a NULL entry, followed by a
 333  * second list, and another NULL entry.  The first list is visible to
 334  * user code when this array is returned by API.  The second list
 335  * contains codes we support, but do not expose through user API.
 336  *
 337  * Notes:
 338  *
 339  * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
 340  * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
 341  * new codes keeping the old ones for compatibility updated to include
 342  * 1999/12/03 revisions *CWB*
 343  *
 344  * RO(ROM) is now RO(ROU) according to
 345  * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
 346  */
 347 static const char * const COUNTRIES[] = {
 348     "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",  "AN",
 349     "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",
 350     "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",
 351     "BJ",  "BM",  "BN",  "BO",  "BR",  "BS",  "BT",  "BV",
 352     "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",
 353     "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CR",
 354     "CU",  "CV",  "CX",  "CY",  "CZ",  "DE",  "DJ",  "DK",
 355     "DM",  "DO",  "DZ",  "EC",  "EE",  "EG",  "EH",  "ER",
 356     "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",
 357     "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",
 358     "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",
 359     "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",
 360     "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS",
 361     "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",
 362     "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",
 363     "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",
 364     "LV",  "LY",  "MA",  "MC",  "MD",  "MG",  "MH",  "MK",
 365     "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",
 366     "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",
 367     "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",
 368     "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",
 369     "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",
 370     "PW",  "PY",  "QA",  "RE",  "RO",  "RU",  "RW",  "SA",
 371     "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",
 372     "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "ST",  "SV",
 373     "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",
 374     "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",
 375     "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",
 376     "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",
 377     "WS",  "YE",  "YT",  "YU",  "ZA",  "ZM",  "ZW",  "ZZ",
 378 NULL,
 379     "FX",  "RO",  "TP",  "ZR",   /* obsolete country codes */
 380 NULL
 381 };
 382
 383 static const char* const DEPRECATED_COUNTRIES[] ={
 384     "BU", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR", NULL, NULL /* deprecated country list */
 385 };
 386 static const char* const REPLACEMENT_COUNTRIES[] = {
 387 /*  "BU", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR" */
 388     "MM", "BJ", "FR", "BF", "VU", "ZW", "TL", "CS", "CD", NULL, NULL  /* replacement country codes */
 389 };
 390
 391 /**
 392  * Table of 3-letter country codes.
 393  *
 394  * This is a lookup table used to convert 3-letter country codes to
 395  * their 2-letter equivalent.  It must be kept in sync with COUNTRIES.
 396  * For all valid i, COUNTRIES[i] must refer to the same country as
 397  * COUNTRIES_3[i].  The commented-out lines are copied from COUNTRIES
 398  * to make eyeballing this baby easier.
 399  *
 400  * This table should be terminated with a NULL entry, followed by a
 401  * second list, and another NULL entry.  The two lists correspond to
 402  * the two lists in COUNTRIES.
 403  */
 404 static const char * const COUNTRIES_3[] = {
 405 /*  "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",  "AN",     */
 406     "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM", "ANT",
 407 /*  "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",     */
 408     "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
 409 /*  "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",     */
 410     "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
 411 /*  "BJ",  "BM",  "BN",  "BO",  "BR",  "BS",  "BT",  "BV",     */
 412     "BEN", "BMU", "BRN", "BOL", "BRA", "BHS", "BTN", "BVT",
 413 /*  "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",     */
 414     "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
 415 /*  "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CR",     */
 416     "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
 417 /*  "CU",  "CV",  "CX",  "CY",  "CZ",  "DE",  "DJ",  "DK",     */
 418     "CUB", "CPV", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
 419 /*  "DM",  "DO",  "DZ",  "EC",  "EE",  "EG",  "EH",  "ER",     */
 420     "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
 421 /*  "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",     */
 422     "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
 423 /*  "GA",   "GB",   "GD",   "GE",    "GF",   "GG",   "GH",  "GI",  "GL",     */
 424     "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
 425 /*  "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",     */
 426     "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
 427 /*  "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",     */
 428     "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
 429 /*  ID",    "IE",   "IL",    "IM",   "IN",   "IO",   "IQ",   "IR",   "IS" */
 430     "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
 431 /*  "IT",   "JE",   "JM",   "JO",    "JP",   "KE",   "KG",  "KH",  "KI",     */
 432     "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
 433 /*  "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",     */
 434     "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
 435 /*  "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",     */
 436     "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
 437 /*  "LV",  "LY",  "MA",  "MC",  "MD",  "MG",  "MH",  "MK",     */
 438     "LVA", "LBY", "MAR", "MCO", "MDA", "MDG", "MHL", "MKD",
 439 /*  "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",     */
 440     "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
 441 /*  "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",     */
 442     "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
 443 /*  "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",     */
 444     "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
 445 /*  "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",     */
 446     "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
 447 /*  "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",     */
 448     "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
 449 /*  "PW",  "PY",  "QA",  "RE",  "RO",  "RU",  "RW",  "SA",     */
 450     "PLW", "PRY", "QAT", "REU", "ROU", "RUS", "RWA", "SAU",
 451 /*  "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",     */
 452     "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
 453 /*  "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "ST",  "SV",     */
 454     "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "STP", "SLV",
 455 /*  "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",     */
 456     "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
 457 /*  "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",     */
 458     "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
 459 /*  "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",     */
 460     "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
 461 /*  "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",     */
 462     "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
 463 /*  "WS",  "YE",  "YT",  "YU",  "ZA",  "ZM",  "ZW", "ZZZ"          */
 464     "WSM", "YEM", "MYT", "YUG", "ZAF", "ZMB", "ZWE",  "ZZZ",
 465 NULL,
 466 /*  "FX",  "RO",  "TP",  "ZR",   */
 467     "FXX", "ROM", "TMP", "ZAR",
 468 NULL
 469 };
 470
 471 typedef struct CanonicalizationMap {
 472     const char *id;          /* input ID */
 473     const char *canonicalID; /* canonicalized output ID */
 474     const char *keyword;     /* keyword, or NULL if none */
 475     const char *value;       /* keyword value, or NULL if kw==NULL */
 476 } CanonicalizationMap;
 477
 478 /**
 479  * A map to canonicalize locale IDs.  This handles a variety of
 480  * different semantic kinds of transformations.
 481  */
 482 static const CanonicalizationMap CANONICALIZE_MAP[] = {
 483     { "",               "en_US_POSIX", NULL, NULL }, /* .NET name */
 484     { "C",              "en_US_POSIX", NULL, NULL }, /* POSIX name */
 485     { "posix",          "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */
 486     { "art_LOJBAN",     "jbo", NULL, NULL }, /* registered name */
 487     { "az_AZ_CYRL",     "az_Cyrl_AZ", NULL, NULL }, /* .NET name */
 488     { "az_AZ_LATN",     "az_Latn_AZ", NULL, NULL }, /* .NET name */
 489     { "ca_ES_PREEURO",  "ca_ES", "currency", "ESP" },
 490     { "cel_GAULISH",    "cel__GAULISH", NULL, NULL }, /* registered name */
 491     { "de_1901",        "de__1901", NULL, NULL }, /* registered name */
 492     { "de_1906",        "de__1906", NULL, NULL }, /* registered name */
 493     { "de__PHONEBOOK",  "de", "collation", "phonebook" },
 494     { "de_AT_PREEURO",  "de_AT", "currency", "ATS" },
 495     { "de_DE_PREEURO",  "de_DE", "currency", "DEM" },
 496     { "de_LU_PREEURO",  "de_LU", "currency", "LUF" },
 497     { "el_GR_PREEURO",  "el_GR", "currency", "GRD" },
 498     { "en_BOONT",       "en__BOONT", NULL, NULL }, /* registered name */
 499     { "en_SCOUSE",      "en__SCOUSE", NULL, NULL }, /* registered name */
 500     { "en_BE_PREEURO",  "en_BE", "currency", "BEF" },
 501     { "en_IE_PREEURO",  "en_IE", "currency", "IEP" },
 502     { "es__TRADITIONAL", "es", "collation", "traditional" },
 503     { "es_ES_PREEURO",  "es_ES", "currency", "ESP" },
 504     { "eu_ES_PREEURO",  "eu_ES", "currency", "ESP" },
 505     { "fi_FI_PREEURO",  "fi_FI", "currency", "FIM" },
 506     { "fr_BE_PREEURO",  "fr_BE", "currency", "BEF" },
 507     { "fr_FR_PREEURO",  "fr_FR", "currency", "FRF" },
 508     { "fr_LU_PREEURO",  "fr_LU", "currency", "LUF" },
 509     { "ga_IE_PREEURO",  "ga_IE", "currency", "IEP" },
 510     { "gl_ES_PREEURO",  "gl_ES", "currency", "ESP" },
 511     { "hi__DIRECT",     "hi", "collation", "direct" },
 512     { "it_IT_PREEURO",  "it_IT", "currency", "ITL" },
 513     { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" },
 514     { "nb_NO_NY",       "nn_NO", NULL, NULL },  /* "markus said this was ok" :-) */
 515     { "nl_BE_PREEURO",  "nl_BE", "currency", "BEF" },
 516     { "nl_NL_PREEURO",  "nl_NL", "currency", "NLG" },
 517     { "pt_PT_PREEURO",  "pt_PT", "currency", "PTE" },
 518     { "sl_ROZAJ",       "sl__ROZAJ", NULL, NULL }, /* registered name */
 519     { "sr_SP_CYRL",     "sr_Cyrl_CS", NULL, NULL }, /* .NET name */
 520     { "sr_SP_LATN",     "sr_Latn_CS", NULL, NULL }, /* .NET name */
 521     { "sr_YU_CYRILLIC", "sr_Cyrl_CS", NULL, NULL }, /* Linux name */
 522     { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */
 523     { "uz_UZ_CYRL",     "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */
 524     { "uz_UZ_LATN",     "uz_Latn_UZ", NULL, NULL }, /* .NET name */
 525     { "zh_CHS",         "zh_Hans", NULL, NULL }, /* .NET name */
 526     { "zh_CHT",         "zh_Hant", NULL, NULL }, /* .NET name TODO: This should be zh_Hant once the locale structure is fixed. */
 527     { "zh_GAN",         "zh__GAN", NULL, NULL }, /* registered name */
 528     { "zh_GUOYU",       "zh", NULL, NULL }, /* registered name */
 529     { "zh_HAKKA",       "zh__HAKKA", NULL, NULL }, /* registered name */
 530     { "zh_MIN",         "zh__MIN", NULL, NULL }, /* registered name */
 531     { "zh_MIN_NAN",     "zh__MINNAN", NULL, NULL }, /* registered name */
 532     { "zh_WUU",         "zh__WUU", NULL, NULL }, /* registered name */
 533     { "zh_XIANG",       "zh__XIANG", NULL, NULL }, /* registered name */
 534     { "zh_YUE",         "zh__YUE", NULL, NULL }, /* registered name */
 535     { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" },
 536     { "zh_TW_STROKE",   "zh_Hant_TW", "collation", "stroke" },
 537     { "zh__PINYIN",     "zh", "collation", "pinyin" }
 538 };
 539
 540 /* ### Keywords **************************************************/
 541
 542 #define ULOC_KEYWORD_BUFFER_LEN 25
 543 #define ULOC_MAX_NO_KEYWORDS 25
 544
 545 static const char *
 546 locale_getKeywordsStart(const char *localeID) {
 547     const char *result = NULL;
 548     if((result = uprv_strchr(localeID, '@')) != NULL) {
 549         return result;
 550     }
 551 #if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
 552     else {
 553         /* We do this because the @ sign is variant, and the @ sign used on one
 554         EBCDIC machine won't be compiled the same way on other EBCDIC based
 555         machines. */
 556         static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
 557         const uint8_t *charToFind = ebcdicSigns;
 558         while(*charToFind) {
 559             if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
 560                 return result;
 561             }
 562             charToFind++;
 563         }
 564     }
 565 #endif
 566     return NULL;
 567 }
 568
 569 /**
 570  * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
 571  * @param keywordName incoming name to be canonicalized
 572  * @param status return status (keyword too long)
 573  * @return length of the keyword name
 574  */
 575 static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
 576 {
 577   int32_t i;
 578   int32_t keywordNameLen = (int32_t)uprv_strlen(keywordName);
 579
 580   if(keywordNameLen >= ULOC_KEYWORD_BUFFER_LEN) {
 581     /* keyword name too long for internal buffer */
 582     *status = U_INTERNAL_PROGRAM_ERROR;
 583           return 0;
 584   }
 585
 586   /* normalize the keyword name */
 587   for(i = 0; i < keywordNameLen; i++) {
 588     buf[i] = uprv_tolower(keywordName[i]);
 589   }
 590   buf[i] = 0;
 591
 592   return keywordNameLen;
 593 }
 594
 595 typedef struct {
 596     char keyword[ULOC_KEYWORD_BUFFER_LEN];
 597     int32_t keywordLen;
 598     const char *valueStart;
 599     int32_t valueLen;
 600 } KeywordStruct;
 601
 602 static int32_t U_CALLCONV
 603 compareKeywordStructs(const void *context, const void *left, const void *right) {
 604     const char* leftString = ((const KeywordStruct *)left)->keyword;
 605     const char* rightString = ((const KeywordStruct *)right)->keyword;
 606     return uprv_strcmp(leftString, rightString);
 607 }
 608
 609 /**
 610  * Both addKeyword and addValue must already be in canonical form.
 611  * Either both addKeyword and addValue are NULL, or neither is NULL.
 612  * If they are not NULL they must be zero terminated.
 613  * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
 614  */
 615 static int32_t
 616 _getKeywords(const char *localeID,
 617              char prev,
 618              char *keywords, int32_t keywordCapacity,
 619              char *values, int32_t valuesCapacity, int32_t *valLen,
 620              UBool valuesToo,
 621              const char* addKeyword,
 622              const char* addValue,
 623              UErrorCode *status)
 624 {
 625     KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
 626
 627     int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
 628     int32_t numKeywords = 0;
 629     const char* pos = localeID;
 630     const char* equalSign = NULL;
 631     const char* semicolon = NULL;
 632     int32_t i = 0, j, n;
 633     int32_t keywordsLen = 0;
 634     int32_t valuesLen = 0;
 635
 636     if(prev == '@') { /* start of keyword definition */
 637         /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
 638         do {
 639             UBool duplicate = FALSE;
 640             /* skip leading spaces */
 641             while(*pos == ' ') {
 642                 pos++;
 643             }
 644             if (!*pos) { /* handle trailing "; " */
 645                 break;
 646             }
 647             if(numKeywords == maxKeywords) {
 648                 *status = U_INTERNAL_PROGRAM_ERROR;
 649                 return 0;
 650             }
 651             equalSign = uprv_strchr(pos, '=');
 652             semicolon = uprv_strchr(pos, ';');
 653             /* lack of '=' [foo@currency] is illegal */
 654             /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
 655             if(!equalSign || (semicolon && semicolon<equalSign)) {
 656                 *status = U_INVALID_FORMAT_ERROR;
 657                 return 0;
 658             }
 659             /* need to normalize both keyword and keyword name */
 660             if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
 661                 /* keyword name too long for internal buffer */
 662                 *status = U_INTERNAL_PROGRAM_ERROR;
 663                 return 0;
 664             }
 665             for(i = 0, n = 0; i < equalSign - pos; ++i) {
 666                 if (pos[i] != ' ') {
 667                     keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
 668                 }
 669             }
 670             keywordList[numKeywords].keyword[n] = 0;
 671             keywordList[numKeywords].keywordLen = n;
 672             /* now grab the value part. First we skip the '=' */
 673             equalSign++;
 674             /* then we leading spaces */
 675             while(*equalSign == ' ') {
 676                 equalSign++;
 677             }
 678             keywordList[numKeywords].valueStart = equalSign;
 679
 680             pos = semicolon;
 681             i = 0;
 682             if(pos) {
 683                 while(*(pos - i - 1) == ' ') {
 684                     i++;
 685                 }
 686                 keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
 687                 pos++;
 688             } else {
 689                 i = (int32_t)uprv_strlen(equalSign);
 690                 while(equalSign[i-1] == ' ') {
 691                     i--;
 692                 }
 693                 keywordList[numKeywords].valueLen = i;
 694             }
 695             /* If this is a duplicate keyword, then ignore it */
 696             for (j=0; j<numKeywords; ++j) {
 697                 if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
 698                     duplicate = TRUE;
 699                     break;
 700                 }
 701             }
 702             if (!duplicate) {
 703                 ++numKeywords;
 704             }
 705         } while(pos);
 706
 707         /* Handle addKeyword/addValue. */
 708         if (addKeyword != NULL) {
 709             UBool duplicate = FALSE;
 710             U_ASSERT(addValue != NULL);
 711             /* Search for duplicate; if found, do nothing. Explicit keyword
 712                overrides addKeyword. */
 713             for (j=0; j<numKeywords; ++j) {
 714                 if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) {
 715                     duplicate = TRUE;
 716                     break;
 717                 }
 718             }
 719             if (!duplicate) {
 720                 if (numKeywords == maxKeywords) {
 721                     *status = U_INTERNAL_PROGRAM_ERROR;
 722                     return 0;
 723                 }
 724                 uprv_strcpy(keywordList[numKeywords].keyword, addKeyword);
 725                 keywordList[numKeywords].keywordLen = (int32_t)uprv_strlen(addKeyword);
 726                 keywordList[numKeywords].valueStart = addValue;
 727                 keywordList[numKeywords].valueLen = (int32_t)uprv_strlen(addValue);
 728                 ++numKeywords;
 729             }
 730         } else {
 731             U_ASSERT(addValue == NULL);
 732         }
 733
 734         /* now we have a list of keywords */
 735         /* we need to sort it */
 736         uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
 737
 738         /* Now construct the keyword part */
 739         for(i = 0; i < numKeywords; i++) {
 740             if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {
 741                 uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);
 742                 if(valuesToo) {
 743                     keywords[keywordsLen + keywordList[i].keywordLen] = '=';
 744                 } else {
 745                     keywords[keywordsLen + keywordList[i].keywordLen] = 0;
 746                 }
 747             }
 748             keywordsLen += keywordList[i].keywordLen + 1;
 749             if(valuesToo) {
 750                 if(keywordsLen + keywordList[i].valueLen < keywordCapacity) {
 751                     uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
 752                 }
 753                 keywordsLen += keywordList[i].valueLen;
 754
 755                 if(i < numKeywords - 1) {
 756                     if(keywordsLen < keywordCapacity) {
 757                         keywords[keywordsLen] = ';';
 758                     }
 759                     keywordsLen++;
 760                 }
 761             }
 762             if(values) {
 763                 if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {
 764                     uprv_strcpy(values+valuesLen, keywordList[i].valueStart);
 765                     values[valuesLen + keywordList[i].valueLen] = 0;
 766                 }
 767                 valuesLen += keywordList[i].valueLen + 1;
 768             }
 769         }
 770         if(values) {
 771             values[valuesLen] = 0;
 772             if(valLen) {
 773                 *valLen = valuesLen;
 774             }
 775         }
 776         return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
 777     } else {
 778         return 0;
 779     }
 780 }
 781
 782 U_CFUNC int32_t
 783 locale_getKeywords(const char *localeID,
 784                    char prev,
 785                    char *keywords, int32_t keywordCapacity,
 786                    char *values, int32_t valuesCapacity, int32_t *valLen,
 787                    UBool valuesToo,
 788                    UErrorCode *status) {
 789     return _getKeywords(localeID, prev, keywords, keywordCapacity,
 790                         values, valuesCapacity, valLen, valuesToo,
 791                         NULL, NULL, status);
 792 }
 793
 794 U_CAPI int32_t U_EXPORT2
 795 uloc_getKeywordValue(const char* localeID,
 796                      const char* keywordName,
 797                      char* buffer, int32_t bufferCapacity,
 798                      UErrorCode* status)
 799 {
 800     const char* nextSeparator = NULL;
 801     char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
 802     char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
 803     int32_t i = 0;
 804     int32_t result = 0;
 805
 806     if(status && U_SUCCESS(*status) && localeID) {
 807
 808       const char* startSearchHere = uprv_strchr(localeID, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */
 809       if(startSearchHere == NULL) {
 810           /* no keywords, return at once */
 811           return 0;
 812       }
 813
 814       locale_canonKeywordName(keywordNameBuffer, keywordName, status);
 815       if(U_FAILURE(*status)) {
 816         return 0;
 817       }
 818
 819       /* find the first keyword */
 820       while(startSearchHere) {
 821           startSearchHere++;
 822           /* skip leading spaces (allowed?) */
 823           while(*startSearchHere == ' ') {
 824               startSearchHere++;
 825           }
 826           nextSeparator = uprv_strchr(startSearchHere, '=');
 827           /* need to normalize both keyword and keyword name */
 828           if(!nextSeparator) {
 829               break;
 830           }
 831           if(nextSeparator - startSearchHere >= ULOC_KEYWORD_BUFFER_LEN) {
 832               /* keyword name too long for internal buffer */
 833               *status = U_INTERNAL_PROGRAM_ERROR;
 834               return 0;
 835           }
 836           for(i = 0; i < nextSeparator - startSearchHere; i++) {
 837               localeKeywordNameBuffer[i] = uprv_tolower(startSearchHere[i]);
 838           }
 839           /* trim trailing spaces */
 840           while(startSearchHere[i-1] == ' ') {
 841               i--;
 842           }
 843           localeKeywordNameBuffer[i] = 0;
 844
 845           startSearchHere = uprv_strchr(nextSeparator, ';');
 846
 847           if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
 848               nextSeparator++;
 849               while(*nextSeparator == ' ') {
 850                   nextSeparator++;
 851               }
 852               /* we actually found the keyword. Copy the value */
 853               if(startSearchHere && startSearchHere - nextSeparator < bufferCapacity) {
 854                   while(*(startSearchHere-1) == ' ') {
 855                       startSearchHere--;
 856                   }
 857                   uprv_strncpy(buffer, nextSeparator, startSearchHere - nextSeparator);
 858                   result = u_terminateChars(buffer, bufferCapacity, (int32_t)(startSearchHere - nextSeparator), status);
 859               } else if(!startSearchHere && (int32_t)uprv_strlen(nextSeparator) < bufferCapacity) { /* last item in string */
 860                   i = (int32_t)uprv_strlen(nextSeparator);
 861                   while(nextSeparator[i - 1] == ' ') {
 862                       i--;
 863                   }
 864                   uprv_strncpy(buffer, nextSeparator, i);
 865                   result = u_terminateChars(buffer, bufferCapacity, i, status);
 866               } else {
 867                   /* give a bigger buffer, please */
 868                   *status = U_BUFFER_OVERFLOW_ERROR;
 869                   if(startSearchHere) {
 870                       result = (int32_t)(startSearchHere - nextSeparator);
 871                   } else {
 872                       result = (int32_t)uprv_strlen(nextSeparator);
 873                   }
 874               }
 875               return result;
 876           }
 877       }
 878     }
 879     return 0;
 880 }
 881
 882 U_CAPI int32_t U_EXPORT2
 883 uloc_setKeywordValue(const char* keywordName,
 884                      const char* keywordValue,
 885                      char* buffer, int32_t bufferCapacity,
 886                      UErrorCode* status)
 887 {
 888     /* TODO: sorting. removal. */
 889     int32_t keywordNameLen;
 890     int32_t keywordValueLen;
 891     int32_t bufLen;
 892     int32_t needLen = 0;
 893     int32_t foundValueLen;
 894     int32_t keywordAtEnd = 0; /* is the keyword at the end of the string? */
 895     char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
 896     char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
 897     int32_t i = 0;
 898     int32_t rc;
 899     char* nextSeparator = NULL;
 900     char* nextEqualsign = NULL;
 901     char* startSearchHere = NULL;
 902     char* keywordStart = NULL;
 903     char *insertHere = NULL;
 904     if(U_FAILURE(*status)) {
 905         return -1;
 906     }
 907     if(bufferCapacity>1) {
 908         bufLen = (int32_t)uprv_strlen(buffer);
 909     } else {
 910         *status = U_ILLEGAL_ARGUMENT_ERROR;
 911         return 0;
 912     }
 913     if(bufferCapacity<bufLen) {
 914         /* The capacity is less than the length?! Is this NULL terminated? */
 915         *status = U_ILLEGAL_ARGUMENT_ERROR;
 916         return 0;
 917     }
 918     if(keywordValue && !*keywordValue) {
 919         keywordValue = NULL;
 920     }
 921     if(keywordValue) {
 922         keywordValueLen = (int32_t)uprv_strlen(keywordValue);
 923     } else {
 924         keywordValueLen = 0;
 925     }
 926     keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
 927     if(U_FAILURE(*status)) {
 928         return 0;
 929     }
 930     startSearchHere = (char*)locale_getKeywordsStart(buffer);
 931     if(startSearchHere == NULL || (startSearchHere[1]==0)) {
 932         if(!keywordValue) { /* no keywords = nothing to remove */
 933             return bufLen;
 934         }
 935
 936         needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
 937         if(startSearchHere) { /* had a single @ */
 938             needLen--; /* already had the @ */
 939             /* startSearchHere points at the @ */
 940         } else {
 941             startSearchHere=buffer+bufLen;
 942         }
 943         if(needLen >= bufferCapacity) {
 944             *status = U_BUFFER_OVERFLOW_ERROR;
 945             return needLen; /* no change */
 946         }
 947         *startSearchHere = '@';
 948         startSearchHere++;
 949         uprv_strcpy(startSearchHere, keywordNameBuffer);
 950         startSearchHere += keywordNameLen;
 951         *startSearchHere = '=';
 952         startSearchHere++;
 953         uprv_strcpy(startSearchHere, keywordValue);
 954         startSearchHere+=keywordValueLen;
 955         return needLen;
 956     } /* end shortcut - no @ */
 957
 958     keywordStart = startSearchHere;
 959     /* search for keyword */
 960     while(keywordStart) {
 961         keywordStart++;
 962         /* skip leading spaces (allowed?) */
 963         while(*keywordStart == ' ') {
 964             keywordStart++;
 965         }
 966         nextEqualsign = uprv_strchr(keywordStart, '=');
 967         /* need to normalize both keyword and keyword name */
 968         if(!nextEqualsign) {
 969             break;
 970         }
 971         if(nextEqualsign - keywordStart >= ULOC_KEYWORD_BUFFER_LEN) {
 972             /* keyword name too long for internal buffer */
 973             *status = U_INTERNAL_PROGRAM_ERROR;
 974             return 0;
 975         }
 976         for(i = 0; i < nextEqualsign - keywordStart; i++) {
 977             localeKeywordNameBuffer[i] = uprv_tolower(keywordStart[i]);
 978         }
 979         /* trim trailing spaces */
 980         while(keywordStart[i-1] == ' ') {
 981             i--;
 982         }
 983         localeKeywordNameBuffer[i] = 0;
 984
 985         nextSeparator = uprv_strchr(nextEqualsign, ';');
 986         rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
 987         if(rc == 0) {
 988             nextEqualsign++;
 989             while(*nextEqualsign == ' ') {
 990                 nextEqualsign++;
 991             }
 992             /* we actually found the keyword. Change the value */
 993             if (nextSeparator) {
 994                 keywordAtEnd = 0;
 995                 foundValueLen = (int32_t)(nextSeparator - nextEqualsign);
 996             } else {
 997                 keywordAtEnd = 1;
 998                 foundValueLen = (int32_t)uprv_strlen(nextEqualsign);
 999             }
1000             if(keywordValue) { /* adding a value - not removing */
1001               if(foundValueLen == keywordValueLen) {
1002                 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1003                 return bufLen; /* no change in size */
1004               } else if(foundValueLen > keywordValueLen) {
1005                 int32_t delta = foundValueLen - keywordValueLen;
1006                 if(nextSeparator) { /* RH side */
1007                   uprv_memmove(nextSeparator - delta, nextSeparator, bufLen-(nextSeparator-buffer));
1008                 }
1009                 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1010                 bufLen -= delta;
1011                 buffer[bufLen]=0;
1012                 return bufLen;
1013               } else { /* FVL < KVL */
1014                 int32_t delta = keywordValueLen - foundValueLen;
1015                 if((bufLen+delta) >= bufferCapacity) {
1016                   *status = U_BUFFER_OVERFLOW_ERROR;
1017                   return bufLen+delta;
1018                 }
1019                 if(nextSeparator) { /* RH side */
1020                   uprv_memmove(nextSeparator+delta,nextSeparator, bufLen-(nextSeparator-buffer));
1021                 }
1022                 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1023                 bufLen += delta;
1024                 buffer[bufLen]=0;
1025                 return bufLen;
1026               }
1027             } else { /* removing a keyword */
1028               if(keywordAtEnd) {
1029                 /* zero out the ';' or '@' just before startSearchhere */
1030                 keywordStart[-1] = 0;
1031                 return (int32_t)((keywordStart-buffer)-1); /* (string length without keyword) minus separator */
1032               } else {
1033                 uprv_memmove(keywordStart, nextSeparator+1, bufLen-((nextSeparator+1)-buffer));
1034                 keywordStart[bufLen-((nextSeparator+1)-buffer)]=0;
1035                 return (int32_t)(bufLen-((nextSeparator+1)-keywordStart));
1036               }
1037             }
1038         } else if(rc<0){ /* end match keyword */
1039           /* could insert at this location. */
1040           insertHere = keywordStart;
1041         }
1042         keywordStart = nextSeparator;
1043     } /* end loop searching */
1044
1045     if(!keywordValue) {
1046       return bufLen; /* removal of non-extant keyword - no change */
1047     }
1048
1049     /* we know there is at least one keyword. */
1050     needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
1051     if(needLen >= bufferCapacity) {
1052         *status = U_BUFFER_OVERFLOW_ERROR;
1053         return needLen; /* no change */
1054     }
1055
1056     if(insertHere) {
1057       uprv_memmove(insertHere+(1+keywordNameLen+1+keywordValueLen), insertHere, bufLen-(insertHere-buffer));
1058       keywordStart = insertHere;
1059     } else {
1060       keywordStart = buffer+bufLen;
1061       *keywordStart = ';';
1062       keywordStart++;
1063     }
1064     uprv_strncpy(keywordStart, keywordNameBuffer, keywordNameLen);
1065     keywordStart += keywordNameLen;
1066     *keywordStart = '=';
1067     keywordStart++;
1068     uprv_strncpy(keywordStart, keywordValue, keywordValueLen); /* terminates. */
1069     keywordStart+=keywordValueLen;
1070     if(insertHere) {
1071       *keywordStart = ';';
1072       keywordStart++;
1073     }
1074     buffer[needLen]=0;
1075     return needLen;
1076 }
1077
1078 /* ### ID parsing implementation **************************************************/
1079
1080 /*returns TRUE if a is an ID separator FALSE otherwise*/
1081 #define _isIDSeparator(a) (a == '_' || a == '-')
1082
1083 #define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
1084
1085 /*returns TRUE if one of the special prefixes is here (s=string)
1086   'x-' or 'i-' */
1087 #define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1088
1089 /* Dot terminates it because of POSIX form  where dot precedes the codepage
1090  * except for variant
1091  */
1092 #define _isTerminator(a)  ((a==0)||(a=='.')||(a=='@'))
1093
1094 static char* _strnchr(const char* str, int32_t len, char c) {
1095     U_ASSERT(str != 0 && len >= 0);
1096     while (len-- != 0) {
1097         char d = *str;
1098         if (d == c) {
1099             return (char*) str;
1100         } else if (d == 0) {
1101             break;
1102         }
1103         ++str;
1104     }
1105     return NULL;
1106 }
1107
1108 /**
1109  * Lookup 'key' in the array 'list'.  The array 'list' should contain
1110  * a NULL entry, followed by more entries, and a second NULL entry.
1111  *
1112  * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1113  * COUNTRIES_3.
1114  */
1115 static int16_t _findIndex(const char* const* list, const char* key)
1116 {
1117     const char* const* anchor = list;
1118     int32_t pass = 0;
1119
1120     /* Make two passes through two NULL-terminated arrays at 'list' */
1121     while (pass++ < 2) {
1122         while (*list) {
1123             if (uprv_strcmp(key, *list) == 0) {
1124                 return (int16_t)(list - anchor);
1125             }
1126             list++;
1127         }
1128         ++list;     /* skip final NULL *CWB*/
1129     }
1130     return -1;
1131 }
1132
1133 /* count the length of src while copying it to dest; return strlen(src) */
1134 static U_INLINE int32_t
1135 _copyCount(char *dest, int32_t destCapacity, const char *src) {
1136     const char *anchor;
1137     char c;
1138
1139     anchor=src;
1140     for(;;) {
1141         if((c=*src)==0) {
1142             return (int32_t)(src-anchor);
1143         }
1144         if(destCapacity<=0) {
1145             return (int32_t)((src-anchor)+uprv_strlen(src));
1146         }
1147         ++src;
1148         *dest++=c;
1149         --destCapacity;
1150     }
1151 }
1152
1153 static const char*
1154 uloc_getCurrentCountryID(const char* oldID){
1155     int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
1156     if (offset >= 0) {
1157         return REPLACEMENT_COUNTRIES[offset];
1158     }
1159     return oldID;
1160 }
1161 static const char*
1162 uloc_getCurrentLanguageID(const char* oldID){
1163     int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
1164     if (offset >= 0) {
1165         return REPLACEMENT_LANGUAGES[offset];
1166     }
1167     return oldID;
1168 }
1169 /*
1170  * the internal functions _getLanguage(), _getCountry(), _getVariant()
1171  * avoid duplicating code to handle the earlier locale ID pieces
1172  * in the functions for the later ones by
1173  * setting the *pEnd pointer to where they stopped parsing
1174  *
1175  * TODO try to use this in Locale
1176  */
1177 static int32_t
1178 _getLanguage(const char *localeID,
1179              char *language, int32_t languageCapacity,
1180              const char **pEnd) {
1181     int32_t i=0;
1182     int32_t offset;
1183     char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
1184
1185     /* if it starts with i- or x- then copy that prefix */
1186     if(_isIDPrefix(localeID)) {
1187         if(i<languageCapacity) {
1188             language[i]=(char)uprv_tolower(*localeID);
1189         }
1190         if(i<languageCapacity) {
1191             language[i+1]='-';
1192         }
1193         i+=2;
1194         localeID+=2;
1195     }
1196
1197     /* copy the language as far as possible and count its length */
1198     while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
1199         if(i<languageCapacity) {
1200             language[i]=(char)uprv_tolower(*localeID);
1201         }
1202         if(i<3) {
1203             lang[i]=(char)uprv_tolower(*localeID);
1204         }
1205         i++;
1206         localeID++;
1207     }
1208
1209     if(i==3) {
1210         /* convert 3 character code to 2 character code if possible *CWB*/
1211         offset=_findIndex(LANGUAGES_3, lang);
1212         if(offset>=0) {
1213             i=_copyCount(language, languageCapacity, LANGUAGES[offset]);
1214         }
1215     }
1216
1217     if(pEnd!=NULL) {
1218         *pEnd=localeID;
1219     }
1220     return i;
1221 }
1222
1223 static int32_t
1224 _getScript(const char *localeID,
1225             char *script, int32_t scriptCapacity,
1226             const char **pEnd)
1227 {
1228     int32_t idLen = 0;
1229
1230     if (pEnd != NULL) {
1231         *pEnd = localeID;
1232     }
1233
1234     /* copy the second item as far as possible and count its length */
1235     while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
1236         idLen++;
1237     }
1238
1239     /* If it's exactly 4 characters long, then it's a script and not a country. */
1240     if (idLen == 4) {
1241         int32_t i;
1242         if (pEnd != NULL) {
1243             *pEnd = localeID+idLen;
1244         }
1245         if(idLen > scriptCapacity) {
1246             idLen = scriptCapacity;
1247         }
1248         if (idLen >= 1) {
1249             script[0]=(char)uprv_toupper(*(localeID++));
1250         }
1251         for (i = 1; i < idLen; i++) {
1252             script[i]=(char)uprv_tolower(*(localeID++));
1253         }
1254     }
1255     else {
1256         idLen = 0;
1257     }
1258     return idLen;
1259 }
1260
1261 static int32_t
1262 _getCountry(const char *localeID,
1263             char *country, int32_t countryCapacity,
1264             const char **pEnd)
1265 {
1266     int32_t i=0;
1267     char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };
1268     int32_t offset;
1269
1270     /* copy the country as far as possible and count its length */
1271     while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
1272         if(i<countryCapacity) {
1273             country[i]=(char)uprv_toupper(*localeID);
1274         }
1275         if(i<(ULOC_COUNTRY_CAPACITY-1)) {   /*CWB*/
1276             cnty[i]=(char)uprv_toupper(*localeID);
1277         }
1278         i++;
1279         localeID++;
1280     }
1281
1282     /* convert 3 character code to 2 character code if possible *CWB*/
1283     if(i==3) {
1284         offset=_findIndex(COUNTRIES_3, cnty);
1285         if(offset>=0) {
1286             i=_copyCount(country, countryCapacity, COUNTRIES[offset]);
1287         }
1288     }
1289
1290     if(pEnd!=NULL) {
1291         *pEnd=localeID;
1292     }
1293     return i;
1294 }
1295
1296 /**
1297  * @param needSeparator if true, then add leading '_' if any variants
1298  * are added to 'variant'
1299  */
1300 static int32_t
1301 _getVariantEx(const char *localeID,
1302               char prev,
1303               char *variant, int32_t variantCapacity,
1304               UBool needSeparator) {
1305     int32_t i=0;
1306
1307     /* get one or more variant tags and separate them with '_' */
1308     if(_isIDSeparator(prev)) {
1309         /* get a variant string after a '-' or '_' */
1310         while(!_isTerminator(*localeID)) {
1311             if (needSeparator) {
1312                 if (i<variantCapacity) {
1313                     variant[i] = '_';
1314                 }
1315                 ++i;
1316                 needSeparator = FALSE;
1317             }
1318             if(i<variantCapacity) {
1319                 variant[i]=(char)uprv_toupper(*localeID);
1320                 if(variant[i]=='-') {
1321                     variant[i]='_';
1322                 }
1323             }
1324             i++;
1325             localeID++;
1326         }
1327     }
1328
1329     /* if there is no variant tag after a '-' or '_' then look for '@' */
1330     if(i==0) {
1331         if(prev=='@') {
1332             /* keep localeID */
1333         } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
1334             ++localeID; /* point after the '@' */
1335         } else {
1336             return 0;
1337         }
1338         while(!_isTerminator(*localeID)) {
1339             if (needSeparator) {
1340                 if (i<variantCapacity) {
1341                     variant[i] = '_';
1342                 }
1343                 ++i;
1344                 needSeparator = FALSE;
1345             }
1346             if(i<variantCapacity) {
1347                 variant[i]=(char)uprv_toupper(*localeID);
1348                 if(variant[i]=='-' || variant[i]==',') {
1349                     variant[i]='_';
1350                 }
1351             }
1352             i++;
1353             localeID++;
1354         }
1355     }
1356
1357     return i;
1358 }
1359
1360 static int32_t
1361 _getVariant(const char *localeID,
1362             char prev,
1363             char *variant, int32_t variantCapacity) {
1364     return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
1365 }
1366
1367 /**
1368  * Delete ALL instances of a variant from the given list of one or
1369  * more variants.  Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
1370  * @param variants the source string of one or more variants,
1371  * separated by '_'.  This will be MODIFIED IN PLACE.  Not zero
1372  * terminated; if it is, trailing zero will NOT be maintained.
1373  * @param variantsLen length of variants
1374  * @param toDelete variant to delete, without separators, e.g.  "EURO"
1375  * or "PREEURO"; not zero terminated
1376  * @param toDeleteLen length of toDelete
1377  * @return number of characters deleted from variants
1378  */
1379 static int32_t
1380 _deleteVariant(char* variants, int32_t variantsLen,
1381                const char* toDelete, int32_t toDeleteLen) {
1382     int32_t delta = 0; /* number of chars deleted */
1383     for (;;) {
1384         UBool flag = FALSE;
1385         if (variantsLen < toDeleteLen) {
1386             return delta;
1387         }
1388         if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 &&
1389             (variantsLen == toDeleteLen ||
1390              (flag=(variants[toDeleteLen] == '_')))) {
1391             int32_t d = toDeleteLen + (flag?1:0);
1392             variantsLen -= d;
1393             delta += d;
1394             uprv_memmove(variants, variants+d, variantsLen);
1395         } else {
1396             char* p = _strnchr(variants, variantsLen, '_');
1397             if (p == NULL) {
1398                 return delta;
1399             }
1400             ++p;
1401             variantsLen -= (int32_t)(p - variants);
1402             variants = p;
1403         }
1404     }
1405 }
1406
1407 /* Keyword enumeration */
1408
1409 typedef struct UKeywordsContext {
1410     char* keywords;
1411     char* current;
1412 } UKeywordsContext;
1413
1414 static void U_CALLCONV
1415 uloc_kw_closeKeywords(UEnumeration *enumerator) {
1416     uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
1417     uprv_free(enumerator->context);
1418     uprv_free(enumerator);
1419 }
1420
1421 static int32_t U_CALLCONV
1422 uloc_kw_countKeywords(UEnumeration *en, UErrorCode *status) {
1423     char *kw = ((UKeywordsContext *)en->context)->keywords;
1424     int32_t result = 0;
1425     while(*kw) {
1426         result++;
1427         kw += uprv_strlen(kw)+1;
1428     }
1429     return result;
1430 }
1431
1432 static const char* U_CALLCONV
1433 uloc_kw_nextKeyword(UEnumeration* en,
1434                     int32_t* resultLength,
1435                     UErrorCode* status) {
1436     const char* result = ((UKeywordsContext *)en->context)->current;
1437     int32_t len = 0;
1438     if(*result) {
1439         len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
1440         ((UKeywordsContext *)en->context)->current += len+1;
1441     } else {
1442         result = NULL;
1443     }
1444     if (resultLength) {
1445         *resultLength = len;
1446     }
1447     return result;
1448 }
1449
1450 static void U_CALLCONV
1451 uloc_kw_resetKeywords(UEnumeration* en,
1452                       UErrorCode* status) {
1453     ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
1454 }
1455
1456 static const UEnumeration gKeywordsEnum = {
1457     NULL,
1458     NULL,
1459     uloc_kw_closeKeywords,
1460     uloc_kw_countKeywords,
1461     uenum_unextDefault,
1462     uloc_kw_nextKeyword,
1463     uloc_kw_resetKeywords
1464 };
1465
1466 U_CAPI UEnumeration* U_EXPORT2
1467 uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
1468 {
1469   UKeywordsContext *myContext = NULL;
1470   UEnumeration *result = NULL;
1471
1472   if(U_FAILURE(*status)) {
1473     return NULL;
1474   }
1475   result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
1476   uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration));
1477   myContext = uprv_malloc(sizeof(UKeywordsContext));
1478   if (myContext == NULL) {
1479     *status = U_MEMORY_ALLOCATION_ERROR;
1480     uprv_free(result);
1481     return NULL;
1482   }
1483   myContext->keywords = (char *)uprv_malloc(keywordListSize+1);
1484   uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
1485   myContext->keywords[keywordListSize] = 0;
1486   myContext->current = myContext->keywords;
1487   result->context = myContext;
1488   return result;
1489 }
1490
1491 U_CAPI UEnumeration* U_EXPORT2
1492 uloc_openKeywords(const char* localeID,
1493                         UErrorCode* status)
1494 {
1495     int32_t i=0;
1496     char keywords[256];
1497     int32_t keywordsCapacity = 256;
1498     if(status==NULL || U_FAILURE(*status)) {
1499         return 0;
1500     }
1501
1502     if(localeID==NULL) {
1503         localeID=uloc_getDefault();
1504     }
1505
1506     /* Skip the language */
1507     _getLanguage(localeID, NULL, 0, &localeID);
1508     if(_isIDSeparator(*localeID)) {
1509         const char *scriptID;
1510         /* Skip the script if available */
1511         _getScript(localeID+1, NULL, 0, &scriptID);
1512         if(scriptID != localeID+1) {
1513             /* Found optional script */
1514             localeID = scriptID;
1515         }
1516         /* Skip the Country */
1517         if (_isIDSeparator(*localeID)) {
1518             _getCountry(localeID+1, NULL, 0, &localeID);
1519             if(_isIDSeparator(*localeID)) {
1520                 _getVariant(localeID+1, *localeID, NULL, 0);
1521             }
1522         }
1523     }
1524
1525     /* keywords are located after '@' */
1526     if((localeID = locale_getKeywordsStart(localeID)) != NULL) {
1527         i=locale_getKeywords(localeID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
1528     }
1529
1530     if(i) {
1531         return uloc_openKeywordList(keywords, i, status);
1532     } else {
1533         return NULL;
1534     }
1535 }
1536
1537
1538 /* bit-flags for 'options' parameter of _canonicalize */
1539 #define _ULOC_STRIP_KEYWORDS 0x2
1540 #define _ULOC_CANONICALIZE   0x1
1541
1542 #define OPTION_SET(options, mask) ((options & mask) != 0)
1543
1544 static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
1545 #define I_DEFAULT_LENGTH (sizeof i_default / sizeof i_default[0])
1546
1547 /**
1548  * Canonicalize the given localeID, to level 1 or to level 2,
1549  * depending on the options.  To specify level 1, pass in options=0.
1550  * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1551  *
1552  * This is the code underlying uloc_getName and uloc_canonicalize.
1553  */
1554 static int32_t
1555 _canonicalize(const char* localeID,
1556               char* result,
1557               int32_t resultCapacity,
1558               uint32_t options,
1559               UErrorCode* err) {
1560     int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
1561     char localeBuffer[ULOC_FULLNAME_CAPACITY];
1562     const char* origLocaleID = localeID;
1563     const char* keywordAssign = NULL;
1564     const char* separatorIndicator = NULL;
1565     const char* addKeyword = NULL;
1566     const char* addValue = NULL;
1567     char* name;
1568     char* variant = NULL; /* pointer into name, or NULL */
1569     int32_t sawEuro = 0;
1570
1571     if (U_FAILURE(*err)) {
1572         return 0;
1573     }
1574
1575     if (localeID==NULL) {
1576         localeID=uloc_getDefault();
1577     }
1578
1579     /* if we are doing a full canonicalization, then put results in
1580        localeBuffer, if necessary; otherwise send them to result. */
1581     if (OPTION_SET(options, _ULOC_CANONICALIZE) &&
1582         (result == NULL || resultCapacity <  sizeof(localeBuffer))) {
1583         name = localeBuffer;
1584         nameCapacity = sizeof(localeBuffer);
1585     } else {
1586         name = result;
1587         nameCapacity = resultCapacity;
1588     }
1589
1590     /* get all pieces, one after another, and separate with '_' */
1591     len=_getLanguage(localeID, name, nameCapacity, &localeID);
1592
1593     if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {
1594         const char *d = uloc_getDefault();
1595
1596         len = uprv_strlen(d);
1597
1598         if (name != NULL) {
1599             uprv_strncpy(name, d, len);
1600         }
1601     } else if(_isIDSeparator(*localeID)) {
1602         const char *scriptID;
1603
1604         ++fieldCount;
1605         if(len<nameCapacity) {
1606             name[len]='_';
1607         }
1608         ++len;
1609
1610         scriptSize=_getScript(localeID+1, name+len, nameCapacity-len, &scriptID);
1611         if(scriptSize > 0) {
1612             /* Found optional script */
1613             localeID = scriptID;
1614             ++fieldCount;
1615             len+=scriptSize;
1616             if (_isIDSeparator(*localeID)) {
1617                 /* If there is something else, then we add the _ */
1618                 if(len<nameCapacity) {
1619                     name[len]='_';
1620                 }
1621                 ++len;
1622             }
1623         }
1624
1625         if (_isIDSeparator(*localeID)) {
1626             len+=_getCountry(localeID+1, name+len, nameCapacity-len, &localeID);
1627             if(_isIDSeparator(*localeID)) {
1628                 ++fieldCount;
1629                 if(len<nameCapacity) {
1630                     name[len]='_';
1631                 }
1632                 ++len;
1633                 variantSize = _getVariant(localeID+1, *localeID, name+len, nameCapacity-len);
1634                 if (variantSize > 0) {
1635                     variant = name+len;
1636                     len += variantSize;
1637                     localeID += variantSize + 1; /* skip '_' and variant */
1638                 }
1639             }
1640         }
1641     }
1642
1643     /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1644     if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *localeID == '.') {
1645         UBool done = FALSE;
1646         do {
1647             char c = *localeID;
1648             switch (c) {
1649             case 0:
1650             case '@':
1651                 done = TRUE;
1652                 break;
1653             default:
1654                 if (len<nameCapacity) {
1655                     name[len] = c;
1656                 }
1657                 ++len;
1658                 ++localeID;
1659                 break;
1660             }
1661         } while (!done);
1662     }
1663
1664     /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1665        After this, localeID either points to '@' or is NULL */
1666     if ((localeID=locale_getKeywordsStart(localeID))!=NULL) {
1667         keywordAssign = uprv_strchr(localeID, '=');
1668         separatorIndicator = uprv_strchr(localeID, ';');
1669     }
1670
1671     /* Copy POSIX-style variant, if any [mr@FOO] */
1672     if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
1673         localeID != NULL && keywordAssign == NULL) {
1674         for (;;) {
1675             char c = *localeID;
1676             if (c == 0) {
1677                 break;
1678             }
1679             if (len<nameCapacity) {
1680                 name[len] = c;
1681             }
1682             ++len;
1683             ++localeID;
1684         }
1685     }
1686
1687     if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
1688         /* Handle @FOO variant if @ is present and not followed by = */
1689         if (localeID!=NULL && keywordAssign==NULL) {
1690             int32_t posixVariantSize;
1691             /* Add missing '_' if needed */
1692             if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
1693                 do {
1694                     if(len<nameCapacity) {
1695                         name[len]='_';
1696                     }
1697                     ++len;
1698                     ++fieldCount;
1699                 } while(fieldCount<2);
1700             }
1701             posixVariantSize = _getVariantEx(localeID+1, '@', name+len, nameCapacity-len,
1702                                              (UBool)(variantSize > 0));
1703             if (posixVariantSize > 0) {
1704                 if (variant == NULL) {
1705                     variant = name+len;
1706                 }
1707                 len += posixVariantSize;
1708                 variantSize += posixVariantSize;
1709             }
1710         }
1711
1712         /* Check for EURO variants. */
1713         sawEuro = _deleteVariant(variant, variantSize, "EURO", 4);
1714         len -= sawEuro;
1715         if (sawEuro > 0 && name[len-1] == '_') { /* delete trailing '_' */
1716             --len;
1717         }
1718
1719         /* Look up the ID in the canonicalization map */
1720         for (j=0; j<(int32_t)(sizeof(CANONICALIZE_MAP)/sizeof(CANONICALIZE_MAP[0])); j++) {
1721             const char* id = CANONICALIZE_MAP[j].id;
1722             int32_t n = (int32_t)uprv_strlen(id);
1723             if (len == n && uprv_strncmp(name, id, n) == 0) {
1724                 if (n == 0 && localeID != NULL) {
1725                     break; /* Don't remap "" if keywords present */
1726                 }
1727                 len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
1728                 addKeyword = CANONICALIZE_MAP[j].keyword;
1729                 addValue = CANONICALIZE_MAP[j].value;
1730                 break;
1731             }
1732         }
1733
1734         /* Explicit EURO variant overrides keyword in CANONICALIZE_MAP */
1735         if (sawEuro > 0) {
1736             addKeyword = "currency";
1737             addValue = "EUR";
1738         }
1739     }
1740
1741     if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
1742         if (localeID!=NULL && keywordAssign!=NULL &&
1743             (!separatorIndicator || separatorIndicator > keywordAssign)) {
1744             if(len<nameCapacity) {
1745                 name[len]='@';
1746             }
1747             ++len;
1748             ++fieldCount;
1749             len += _getKeywords(localeID+1, '@', name+len, nameCapacity-len, NULL, 0, NULL, TRUE,
1750                                 addKeyword, addValue, err);
1751         } else if (addKeyword != NULL) {
1752             U_ASSERT(addValue != NULL);
1753             /* inelegant but works -- later make _getKeywords do this? */
1754             len += _copyCount(name+len, nameCapacity-len, "@");
1755             len += _copyCount(name+len, nameCapacity-len, addKeyword);
1756             len += _copyCount(name+len, nameCapacity-len, "=");
1757             len += _copyCount(name+len, nameCapacity-len, addValue);
1758         }
1759     }
1760
1761     if (U_SUCCESS(*err) && name == localeBuffer) {
1762         uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
1763     }
1764
1765     return u_terminateChars(result, resultCapacity, len, err);
1766 }
1767
1768 /* ### ID parsing API **************************************************/
1769
1770 U_CAPI int32_t  U_EXPORT2
1771 uloc_getParent(const char*    localeID,
1772                char* parent,
1773                int32_t parentCapacity,
1774                UErrorCode* err)
1775 {
1776     const char *lastUnderscore;
1777     int32_t i;
1778
1779     if (U_FAILURE(*err))
1780         return 0;
1781
1782     if (localeID == NULL)
1783         localeID = uloc_getDefault();
1784
1785     lastUnderscore=uprv_strrchr(localeID, '_');
1786     if(lastUnderscore!=NULL) {
1787         i=(int32_t)(lastUnderscore-localeID);
1788     } else {
1789         i=0;
1790     }
1791
1792     if(i>0 && parent != localeID) {
1793         uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
1794     }
1795     return u_terminateChars(parent, parentCapacity, i, err);
1796 }
1797
1798 U_CAPI int32_t U_EXPORT2
1799 uloc_getLanguage(const char*    localeID,
1800          char* language,
1801          int32_t languageCapacity,
1802          UErrorCode* err)
1803 {
1804     /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1805     int32_t i=0;
1806
1807     if (err==NULL || U_FAILURE(*err)) {
1808         return 0;
1809     }
1810
1811     if(localeID==NULL) {
1812         localeID=uloc_getDefault();
1813     }
1814
1815     i=_getLanguage(localeID, language, languageCapacity, NULL);
1816     return u_terminateChars(language, languageCapacity, i, err);
1817 }
1818
1819 U_CAPI int32_t U_EXPORT2
1820 uloc_getScript(const char*    localeID,
1821          char* script,
1822          int32_t scriptCapacity,
1823          UErrorCode* err)
1824 {
1825     int32_t i=0;
1826
1827     if(err==NULL || U_FAILURE(*err)) {
1828         return 0;
1829     }
1830
1831     if(localeID==NULL) {
1832         localeID=uloc_getDefault();
1833     }
1834
1835     /* skip the language */
1836     _getLanguage(localeID, NULL, 0, &localeID);
1837     if(_isIDSeparator(*localeID)) {
1838         i=_getScript(localeID+1, script, scriptCapacity, NULL);
1839     }
1840     return u_terminateChars(script, scriptCapacity, i, err);
1841 }
1842
1843 U_CAPI int32_t  U_EXPORT2
1844 uloc_getCountry(const char* localeID,
1845             char* country,
1846             int32_t countryCapacity,
1847             UErrorCode* err)
1848 {
1849     int32_t i=0;
1850
1851     if(err==NULL || U_FAILURE(*err)) {
1852         return 0;
1853     }
1854
1855     if(localeID==NULL) {
1856         localeID=uloc_getDefault();
1857     }
1858
1859     /* Skip the language */
1860     _getLanguage(localeID, NULL, 0, &localeID);
1861     if(_isIDSeparator(*localeID)) {
1862         const char *scriptID;
1863         /* Skip the script if available */
1864         _getScript(localeID+1, NULL, 0, &scriptID);
1865         if(scriptID != localeID+1) {
1866             /* Found optional script */
1867             localeID = scriptID;
1868         }
1869         if(_isIDSeparator(*localeID)) {
1870             i=_getCountry(localeID+1, country, countryCapacity, NULL);
1871         }
1872     }
1873     return u_terminateChars(country, countryCapacity, i, err);
1874 }
1875
1876 U_CAPI int32_t  U_EXPORT2
1877 uloc_getVariant(const char* localeID,
1878                 char* variant,
1879                 int32_t variantCapacity,
1880                 UErrorCode* err)
1881 {
1882     int32_t i=0;
1883
1884     if(err==NULL || U_FAILURE(*err)) {
1885         return 0;
1886     }
1887
1888     if(localeID==NULL) {
1889         localeID=uloc_getDefault();
1890     }
1891
1892     /* Skip the language */
1893     _getLanguage(localeID, NULL, 0, &localeID);
1894     if(_isIDSeparator(*localeID)) {
1895         const char *scriptID;
1896         /* Skip the script if available */
1897         _getScript(localeID+1, NULL, 0, &scriptID);
1898         if(scriptID != localeID+1) {
1899             /* Found optional script */
1900             localeID = scriptID;
1901         }
1902         /* Skip the Country */
1903         if (_isIDSeparator(*localeID)) {
1904             _getCountry(localeID+1, NULL, 0, &localeID);
1905             if(_isIDSeparator(*localeID)) {
1906                 i=_getVariant(localeID+1, *localeID, variant, variantCapacity);
1907             }
1908         }
1909     }
1910
1911     /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
1912     /* if we do not have a variant tag yet then try a POSIX variant after '@' */
1913 /*
1914     if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
1915         i=_getVariant(localeID+1, '@', variant, variantCapacity);
1916     }
1917 */
1918     return u_terminateChars(variant, variantCapacity, i, err);
1919 }
1920
1921 U_CAPI int32_t  U_EXPORT2
1922 uloc_getName(const char* localeID,
1923              char* name,
1924              int32_t nameCapacity,
1925              UErrorCode* err)
1926 {
1927     return _canonicalize(localeID, name, nameCapacity, 0, err);
1928 }
1929
1930 U_CAPI int32_t  U_EXPORT2
1931 uloc_getBaseName(const char* localeID,
1932                  char* name,
1933                  int32_t nameCapacity,
1934                  UErrorCode* err)
1935 {
1936     return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);
1937 }
1938
1939 U_CAPI int32_t  U_EXPORT2
1940 uloc_canonicalize(const char* localeID,
1941                   char* name,
1942                   int32_t nameCapacity,
1943                   UErrorCode* err)
1944 {
1945     return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);
1946 }
1947
1948 U_CAPI const char*  U_EXPORT2
1949 uloc_getISO3Language(const char* localeID)
1950 {
1951     int16_t offset;
1952     char lang[ULOC_LANG_CAPACITY];
1953     UErrorCode err = U_ZERO_ERROR;
1954
1955     if (localeID == NULL)
1956     {
1957         localeID = uloc_getDefault();
1958     }
1959     uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
1960     if (U_FAILURE(err))
1961         return "";
1962     offset = _findIndex(LANGUAGES, lang);
1963     if (offset < 0)
1964         return "";
1965     return LANGUAGES_3[offset];
1966 }
1967
1968 U_CAPI const char*  U_EXPORT2
1969 uloc_getISO3Country(const char* localeID)
1970 {
1971     int16_t offset;
1972     char cntry[ULOC_LANG_CAPACITY];
1973     UErrorCode err = U_ZERO_ERROR;
1974
1975     if (localeID == NULL)
1976     {
1977         localeID = uloc_getDefault();
1978     }
1979     uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
1980     if (U_FAILURE(err))
1981         return "";
1982     offset = _findIndex(COUNTRIES, cntry);
1983     if (offset < 0)
1984         return "";
1985
1986     return COUNTRIES_3[offset];
1987 }
1988
1989 U_CAPI uint32_t  U_EXPORT2
1990 uloc_getLCID(const char* localeID)
1991 {
1992     UErrorCode status = U_ZERO_ERROR;
1993     char       langID[ULOC_FULLNAME_CAPACITY];
1994
1995     uloc_getLanguage(localeID, langID, sizeof(langID), &status);
1996     if (U_FAILURE(status)) {
1997         return 0;
1998     }
1999
2000     return uprv_convertToLCID(langID, localeID, &status);
2001 }
2002
2003 U_CAPI int32_t U_EXPORT2
2004 uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
2005                 UErrorCode *status)
2006 {
2007     int32_t length;
2008     const char *posix = uprv_convertToPosix(hostid, status);
2009     if (U_FAILURE(*status) || posix == NULL) {
2010         return 0;
2011     }
2012     length = (int32_t)uprv_strlen(posix);
2013     if (length+1 > localeCapacity) {
2014         *status = U_BUFFER_OVERFLOW_ERROR;
2015     }
2016     else {
2017         uprv_strcpy(locale, posix);
2018     }
2019     return length;
2020 }
2021
2022 /* ### Default locale **************************************************/
2023
2024 U_CAPI const char*  U_EXPORT2
2025 uloc_getDefault()
2026 {
2027     return locale_get_default();
2028 }
2029
2030 U_CAPI void  U_EXPORT2
2031 uloc_setDefault(const char*   newDefaultLocale,
2032              UErrorCode* err)
2033 {
2034     if (U_FAILURE(*err))
2035         return;
2036     /* the error code isn't currently used for anything by this function*/
2037
2038     /* propagate change to C++ */
2039     locale_set_default(newDefaultLocale);
2040 }
2041
2042 /* ### Display name **************************************************/
2043
2044 /*
2045  * Lookup a resource bundle table item with fallback on the table level.
2046  * Regular resource bundle lookups perform fallback to parent locale bundles
2047  * and eventually the root bundle, but only for top-level items.
2048  * This function takes the name of a top-level table and of an item in that table
2049  * and performs a lookup of both, falling back until a bundle contains a table
2050  * with this item.
2051  *
2052  * Note: Only the opening of entire bundles falls back through the default locale
2053  * before root. Once a bundle is open, item lookups do not go through the
2054  * default locale because that would result in a mix of languages that is
2055  * unpredictable to the programmer and most likely useless.
2056  */
2057 static const UChar *
2058 _res_getTableStringWithFallback(const char *path, const char *locale,
2059                               const char *tableKey, const char *subTableKey,
2060                               const char *itemKey,
2061                               int32_t *pLength,
2062                               UErrorCode *pErrorCode)
2063 {
2064 /*    char localeBuffer[ULOC_FULLNAME_CAPACITY*4];*/
2065     UResourceBundle *rb=NULL, table, subTable;
2066     const UChar *item=NULL;
2067     UErrorCode errorCode;
2068     char explicitFallbackName[ULOC_FULLNAME_CAPACITY] = {0};
2069
2070     /*
2071      * open the bundle for the current locale
2072      * this falls back through the locale's chain to root
2073      */
2074     errorCode=U_ZERO_ERROR;
2075     rb=ures_open(path, locale, &errorCode);
2076     if(U_FAILURE(errorCode)) {
2077         /* total failure, not even root could be opened */
2078         *pErrorCode=errorCode;
2079         return NULL;
2080     } else if(errorCode==U_USING_DEFAULT_WARNING ||
2081                 (errorCode==U_USING_FALLBACK_WARNING && *pErrorCode!=U_USING_DEFAULT_WARNING)
2082     ) {
2083         /* set the "strongest" error code (success->fallback->default->failure) */
2084         *pErrorCode=errorCode;
2085     }
2086
2087     for(;;){
2088         ures_initStackObject(&table);
2089         ures_initStackObject(&subTable);
2090         ures_getByKeyWithFallback(rb, tableKey, &table, &errorCode);
2091         if (subTableKey != NULL) {
2092             /*
2093             ures_getByKeyWithFallback(&table,subTableKey, &subTable, &errorCode);
2094             item = ures_getStringByKeyWithFallback(&subTable, itemKey, pLength, &errorCode);
2095             if(U_FAILURE(errorCode)){
2096                 *pErrorCode = errorCode;
2097             }
2098
2099             break;*/
2100
2101             ures_getByKeyWithFallback(&table,subTableKey, &table, &errorCode);
2102         }
2103         if(U_SUCCESS(errorCode)){
2104             item = ures_getStringByKeyWithFallback(&table, itemKey, pLength, &errorCode);
2105             if(U_FAILURE(errorCode)){
2106                 const char* replacement = NULL;
2107                 *pErrorCode = errorCode; /*save the errorCode*/
2108                 errorCode = U_ZERO_ERROR;
2109                 /* may be a deprecated code */
2110                 if(uprv_strcmp(tableKey, "Countries")==0){
2111                     replacement =  uloc_getCurrentCountryID(itemKey);
2112                 }else if(uprv_strcmp(tableKey, "Languages")==0){
2113                     replacement =  uloc_getCurrentLanguageID(itemKey);
2114                 }
2115                 /*pointer comparison is ok since uloc_getCurrentCountryID & uloc_getCurrentLanguageID return the key itself is replacement is not found*/
2116                 if(replacement!=NULL && itemKey != replacement){
2117                     item = ures_getStringByKeyWithFallback(&table, replacement, pLength, &errorCode);
2118                     if(U_SUCCESS(errorCode)){
2119                         *pErrorCode = errorCode;
2120                         break;
2121                     }
2122                 }
2123             }else{
2124                 break;
2125             }
2126         }
2127
2128         if(U_FAILURE(errorCode)){
2129
2130             /* still can't figure out ?.. try the fallback mechanism */
2131             int32_t len = 0;
2132             const UChar* fallbackLocale =  NULL;
2133             *pErrorCode = errorCode;
2134             errorCode = U_ZERO_ERROR;
2135
2136             fallbackLocale = ures_getStringByKeyWithFallback(&table, "Fallback", &len, &errorCode);
2137             if(U_FAILURE(errorCode)){
2138                *pErrorCode = errorCode;
2139                 break;
2140             }
2141
2142             u_UCharsToChars(fallbackLocale, explicitFallbackName, len);
2143
2144             /* guard against recursive fallback */
2145             if(uprv_strcmp(explicitFallbackName, locale)==0){
2146                 *pErrorCode = U_INTERNAL_PROGRAM_ERROR;
2147                 break;
2148             }
2149             ures_close(rb);
2150             rb = ures_open(NULL, explicitFallbackName, &errorCode);
2151             if(U_FAILURE(errorCode)){
2152                 *pErrorCode = errorCode;
2153                 break;
2154             }
2155             /* succeeded in opening the fallback bundle .. continue and try to fetch the item */
2156         }else{
2157             break;
2158         }
2159     }
2160     /* done with the locale string - ready to close table and rb */
2161     ures_close(&subTable);
2162     ures_close(&table);
2163     ures_close(rb);
2164     return item;
2165 }
2166
2167 static int32_t
2168 _getStringOrCopyKey(const char *path, const char *locale,
2169                     const char *tableKey,
2170                     const char* subTableKey,
2171                     const char *itemKey,
2172                     const char *substitute,
2173                     UChar *dest, int32_t destCapacity,
2174                     UErrorCode *pErrorCode) {
2175     const UChar *s = NULL;
2176     int32_t length = 0;
2177
2178     if(itemKey==NULL) {
2179         /* top-level item: normal resource bundle access */
2180         UResourceBundle *rb;
2181
2182         rb=ures_open(path, locale, pErrorCode);
2183         if(U_SUCCESS(*pErrorCode)) {
2184             s=ures_getStringByKey(rb, tableKey, &length, pErrorCode);
2185             /* see comment about closing rb near "return item;" in _res_getTableStringWithFallback() */
2186             ures_close(rb);
2187         }
2188     } else {
2189         /* second-level item, use special fallback */
2190         s=_res_getTableStringWithFallback(path, locale,
2191                                            tableKey,
2192                                            subTableKey,
2193                                            itemKey,
2194                                            &length,
2195                                            pErrorCode);
2196     }
2197     if(U_SUCCESS(*pErrorCode)) {
2198         int32_t copyLength=uprv_min(length, destCapacity);
2199         if(copyLength>0 && s != NULL) {
2200             u_memcpy(dest, s, copyLength);
2201         }
2202     } else {
2203         /* no string from a resource bundle: convert the substitute */
2204         length=(int32_t)uprv_strlen(substitute);
2205         u_charsToUChars(substitute, dest, uprv_min(length, destCapacity));
2206         *pErrorCode=U_USING_DEFAULT_WARNING;
2207     }
2208
2209     return u_terminateUChars(dest, destCapacity, length, pErrorCode);
2210 }
2211
2212 static int32_t
2213 _getDisplayNameForComponent(const char *locale,
2214                             const char *displayLocale,
2215                             UChar *dest, int32_t destCapacity,
2216                             int32_t (*getter)(const char *, char *, int32_t, UErrorCode *),
2217                             const char *tag,
2218                             UErrorCode *pErrorCode) {
2219     char localeBuffer[ULOC_FULLNAME_CAPACITY*4];
2220     int32_t length;
2221     UErrorCode localStatus;
2222
2223     /* argument checking */
2224     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
2225         return 0;
2226     }
2227
2228     if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
2229         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2230         return 0;
2231     }
2232
2233     localStatus = U_ZERO_ERROR;
2234     length=(*getter)(locale, localeBuffer, sizeof(localeBuffer), &localStatus);
2235     if(U_FAILURE(localStatus) || localStatus==U_STRING_NOT_TERMINATED_WARNING) {
2236         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2237         return 0;
2238     }
2239     if(length==0) {
2240         return u_terminateUChars(dest, destCapacity, 0, pErrorCode);
2241     }
2242
2243     return _getStringOrCopyKey(NULL, displayLocale,
2244                                tag, NULL, localeBuffer,
2245                                localeBuffer,
2246                                dest, destCapacity,
2247                                pErrorCode);
2248 }
2249
2250 U_CAPI int32_t U_EXPORT2
2251 uloc_getDisplayLanguage(const char *locale,
2252                         const char *displayLocale,
2253                         UChar *dest, int32_t destCapacity,
2254                         UErrorCode *pErrorCode) {
2255     return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
2256                 uloc_getLanguage, _kLanguages, pErrorCode);
2257 }
2258
2259 U_CAPI int32_t U_EXPORT2
2260 uloc_getDisplayScript(const char* locale,
2261                       const char* displayLocale,
2262                       UChar *dest, int32_t destCapacity,
2263                       UErrorCode *pErrorCode)
2264 {
2265     return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
2266                 uloc_getScript, _kScripts, pErrorCode);
2267 }
2268
2269 U_CAPI int32_t U_EXPORT2
2270 uloc_getDisplayCountry(const char *locale,
2271                        const char *displayLocale,
2272                        UChar *dest, int32_t destCapacity,
2273                        UErrorCode *pErrorCode) {
2274     return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
2275                 uloc_getCountry, _kCountries, pErrorCode);
2276 }
2277
2278 /*
2279  * TODO separate variant1_variant2_variant3...
2280  * by getting each tag's display string and concatenating them with ", "
2281  * in between - similar to uloc_getDisplayName()
2282  */
2283 U_CAPI int32_t U_EXPORT2
2284 uloc_getDisplayVariant(const char *locale,
2285                        const char *displayLocale,
2286                        UChar *dest, int32_t destCapacity,
2287                        UErrorCode *pErrorCode) {
2288     return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
2289                 uloc_getVariant, _kVariants, pErrorCode);
2290 }
2291
2292 U_CAPI int32_t U_EXPORT2
2293 uloc_getDisplayName(const char *locale,
2294                     const char *displayLocale,
2295                     UChar *dest, int32_t destCapacity,
2296                     UErrorCode *pErrorCode)
2297 {
2298     int32_t length, length2, length3 = 0;
2299     UBool hasLanguage, hasScript, hasCountry, hasVariant, hasKeywords;
2300     UEnumeration* keywordEnum = NULL;
2301     int32_t keywordCount = 0;
2302     const char *keyword = NULL;
2303     int32_t keywordLen = 0;
2304     char keywordValue[256];
2305     int32_t keywordValueLen = 0;
2306
2307     /* argument checking */
2308     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
2309         return 0;
2310     }
2311
2312     if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
2313         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2314         return 0;
2315     }
2316
2317     /*
2318      * if there is a language, then write "language (country, variant)"
2319      * otherwise write "country, variant"
2320      */
2321
2322     /* write the language */
2323     length=uloc_getDisplayLanguage(locale, displayLocale,
2324                                    dest, destCapacity,
2325                                    pErrorCode);
2326     hasLanguage= length>0;
2327
2328     if(hasLanguage) {
2329         /* append " (" */
2330         if(length<destCapacity) {
2331             dest[length]=0x20;
2332         }
2333         ++length;
2334         if(length<destCapacity) {
2335             dest[length]=0x28;
2336         }
2337         ++length;
2338     }
2339
2340     if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2341         /* keep preflighting */
2342         *pErrorCode=U_ZERO_ERROR;
2343     }
2344
2345     /* append the script */
2346     if(length<destCapacity) {
2347         length2=uloc_getDisplayScript(locale, displayLocale,
2348                                        dest+length, destCapacity-length,
2349                                        pErrorCode);
2350     } else {
2351         length2=uloc_getDisplayScript(locale, displayLocale,
2352                                        NULL, 0,
2353                                        pErrorCode);
2354     }
2355     hasScript= length2>0;
2356     length+=length2;
2357
2358     if(hasScript) {
2359         /* append ", " */
2360         if(length<destCapacity) {
2361             dest[length]=0x2c;
2362         }
2363         ++length;
2364         if(length<destCapacity) {
2365             dest[length]=0x20;
2366         }
2367         ++length;
2368     }
2369
2370     if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2371         /* keep preflighting */
2372         *pErrorCode=U_ZERO_ERROR;
2373     }
2374
2375     /* append the country */
2376     if(length<destCapacity) {
2377         length2=uloc_getDisplayCountry(locale, displayLocale,
2378                                        dest+length, destCapacity-length,
2379                                        pErrorCode);
2380     } else {
2381         length2=uloc_getDisplayCountry(locale, displayLocale,
2382                                        NULL, 0,
2383                                        pErrorCode);
2384     }
2385     hasCountry= length2>0;
2386     length+=length2;
2387
2388     if(hasCountry) {
2389         /* append ", " */
2390         if(length<destCapacity) {
2391             dest[length]=0x2c;
2392         }
2393         ++length;
2394         if(length<destCapacity) {
2395             dest[length]=0x20;
2396         }
2397         ++length;
2398     }
2399
2400     if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2401         /* keep preflighting */
2402         *pErrorCode=U_ZERO_ERROR;
2403     }
2404
2405     /* append the variant */
2406     if(length<destCapacity) {
2407         length2=uloc_getDisplayVariant(locale, displayLocale,
2408                                        dest+length, destCapacity-length,
2409                                        pErrorCode);
2410     } else {
2411         length2=uloc_getDisplayVariant(locale, displayLocale,
2412                                        NULL, 0,
2413                                        pErrorCode);
2414     }
2415     hasVariant= length2>0;
2416     length+=length2;
2417
2418     if(hasVariant) {
2419         /* append ", " */
2420         if(length<destCapacity) {
2421             dest[length]=0x2c;
2422         }
2423         ++length;
2424         if(length<destCapacity) {
2425             dest[length]=0x20;
2426         }
2427         ++length;
2428     }
2429
2430     keywordEnum = uloc_openKeywords(locale, pErrorCode);
2431
2432     for(keywordCount = uenum_count(keywordEnum, pErrorCode); keywordCount > 0 ; keywordCount--){
2433           if(U_FAILURE(*pErrorCode)){
2434               break;
2435           }
2436           /* the uenum_next returns NUL terminated string */
2437           keyword = uenum_next(keywordEnum, &keywordLen, pErrorCode);
2438           if(length + length3 < destCapacity) {
2439             length3 += uloc_getDisplayKeyword(keyword, displayLocale, dest+length+length3, destCapacity-length-length3, pErrorCode);
2440           } else {
2441             length3 += uloc_getDisplayKeyword(keyword, displayLocale, NULL, 0, pErrorCode);
2442           }
2443           if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2444               /* keep preflighting */
2445               *pErrorCode=U_ZERO_ERROR;
2446           }
2447           keywordValueLen = uloc_getKeywordValue(locale, keyword, keywordValue, 256, pErrorCode);
2448           if(keywordValueLen) {
2449             if(length + length3 < destCapacity) {
2450               dest[length + length3] = 0x3D;
2451             }
2452             length3++;
2453             if(length + length3 < destCapacity) {
2454               length3 += uloc_getDisplayKeywordValue(locale, keyword, displayLocale, dest+length+length3, destCapacity-length-length3, pErrorCode);
2455             } else {
2456               length3 += uloc_getDisplayKeywordValue(locale, keyword, displayLocale, NULL, 0, pErrorCode);
2457             }
2458             if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2459                 /* keep preflighting */
2460                 *pErrorCode=U_ZERO_ERROR;
2461             }
2462           }
2463           if(keywordCount > 1) {
2464             if(length + length3 + 1 < destCapacity && keywordCount) {
2465               dest[length + length3]=0x2c;
2466               dest[length + length3+1]=0x20;
2467             }
2468             length3++; /* ',' */
2469             length3++; /* ' ' */
2470           }
2471     }
2472     uenum_close(keywordEnum);
2473
2474     hasKeywords = length3 > 0;
2475     length += length3;
2476
2477
2478
2479     if ((hasScript && !hasCountry)
2480         || ((hasScript || hasCountry) && !hasVariant && !hasKeywords)
2481         || ((hasScript || hasCountry || hasVariant) && !hasKeywords)
2482         || (hasLanguage && !hasScript && !hasCountry && !hasVariant && !hasKeywords))
2483     {
2484         /* remove ", " or " (" */
2485         length-=2;
2486     }
2487
2488     if (hasLanguage && (hasScript || hasCountry || hasVariant || hasKeywords)) {
2489         /* append ")" */
2490         if(length<destCapacity) {
2491             dest[length]=0x29;
2492         }
2493         ++length;
2494     }
2495
2496     if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2497         /* keep preflighting */
2498         *pErrorCode=U_ZERO_ERROR;
2499     }
2500
2501     return u_terminateUChars(dest, destCapacity, length, pErrorCode);
2502 }
2503
2504 U_CAPI int32_t U_EXPORT2
2505 uloc_getDisplayKeyword(const char* keyword,
2506                        const char* displayLocale,
2507                        UChar* dest,
2508                        int32_t destCapacity,
2509                        UErrorCode* status){
2510
2511     /* argument checking */
2512     if(status==NULL || U_FAILURE(*status)) {
2513         return 0;
2514     }
2515
2516     if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
2517         *status=U_ILLEGAL_ARGUMENT_ERROR;
2518         return 0;
2519     }
2520
2521
2522     /* pass itemKey=NULL to look for a top-level item */
2523     return _getStringOrCopyKey(NULL, displayLocale,
2524                                _kKeys, NULL,
2525                                keyword,
2526                                keyword,
2527                                dest, destCapacity,
2528                                status);
2529
2530 }
2531
2532
2533 #define UCURRENCY_DISPLAY_NAME_INDEX 1
2534
2535 U_CAPI int32_t U_EXPORT2
2536 uloc_getDisplayKeywordValue(   const char* locale,
2537                                const char* keyword,
2538                                const char* displayLocale,
2539                                UChar* dest,
2540                                int32_t destCapacity,
2541                                UErrorCode* status){
2542
2543
2544     char keywordValue[ULOC_FULLNAME_CAPACITY*4];
2545     int32_t capacity = ULOC_FULLNAME_CAPACITY*4;
2546     int32_t keywordValueLen =0;
2547
2548     /* argument checking */
2549     if(status==NULL || U_FAILURE(*status)) {
2550         return 0;
2551     }
2552
2553     if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
2554         *status=U_ILLEGAL_ARGUMENT_ERROR;
2555         return 0;
2556     }
2557
2558     /* get the keyword value */
2559     keywordValue[0]=0;
2560     keywordValueLen = uloc_getKeywordValue(locale, keyword, keywordValue, capacity, status);
2561
2562     /*
2563      * if the keyword is equal to currency .. then to get the display name
2564      * we need to do the fallback ourselves
2565      */
2566     if(uprv_stricmp(keyword, _kCurrency)==0){
2567
2568         int32_t dispNameLen = 0;
2569         const UChar *dispName = NULL;
2570
2571         UResourceBundle *bundle     = ures_open(NULL, displayLocale, status);
2572         UResourceBundle *currencies = ures_getByKey(bundle, _kCurrencies, NULL, status);
2573         UResourceBundle *currency   = ures_getByKeyWithFallback(currencies, keywordValue, NULL, status);
2574
2575         dispName = ures_getStringByIndex(currency, UCURRENCY_DISPLAY_NAME_INDEX, &dispNameLen, status);
2576
2577         /*close the bundles */
2578         ures_close(currency);
2579         ures_close(currencies);
2580         ures_close(bundle);
2581
2582         if(U_FAILURE(*status)){
2583             if(*status == U_MISSING_RESOURCE_ERROR){
2584                 /* we just want to write the value over if nothing is available */
2585                 *status = U_USING_DEFAULT_WARNING;
2586             }else{
2587                 return 0;
2588             }
2589         }
2590
2591         /* now copy the dispName over if not NULL */
2592         if(dispName != NULL){
2593             if(dispNameLen <= destCapacity){
2594                 uprv_memcpy(dest, dispName, dispNameLen * U_SIZEOF_UCHAR);
2595                 return u_terminateUChars(dest, destCapacity, dispNameLen, status);
2596             }else{
2597                 *status = U_BUFFER_OVERFLOW_ERROR;
2598                 return dispNameLen;
2599             }
2600         }else{
2601             /* we have not found the display name for the value .. just copy over */
2602             if(keywordValueLen <= destCapacity){
2603                 u_charsToUChars(keywordValue, dest, keywordValueLen);
2604                 return u_terminateUChars(dest, destCapacity, keywordValueLen, status);
2605             }else{
2606                  *status = U_BUFFER_OVERFLOW_ERROR;
2607                 return keywordValueLen;
2608             }
2609         }
2610
2611
2612     }else{
2613
2614         return _getStringOrCopyKey(NULL, displayLocale,
2615                                    _kTypes, keyword,
2616                                    keywordValue,
2617                                    keywordValue,
2618                                    dest, destCapacity,
2619                                    status);
2620     }
2621 }
2622
2623 /* ### Get available **************************************************/
2624
2625 static UBool U_CALLCONV uloc_cleanup(void) {
2626     char ** temp;
2627
2628     if (_installedLocales) {
2629         temp = _installedLocales;
2630         _installedLocales = NULL;
2631
2632         _installedLocalesCount = 0;
2633
2634         uprv_free(temp);
2635     }
2636     return TRUE;
2637 }
2638
2639 static void _load_installedLocales()
2640 {
2641     UBool   localesLoaded;
2642
2643     umtx_lock(NULL);
2644     localesLoaded = _installedLocales != NULL;
2645     umtx_unlock(NULL);
2646
2647     if (localesLoaded == FALSE) {
2648         UResourceBundle *index = NULL;
2649         UResourceBundle installed;
2650         UErrorCode status = U_ZERO_ERROR;
2651         char ** temp;
2652         int32_t i = 0;
2653         int32_t localeCount;
2654
2655         ures_initStackObject(&installed);
2656         index = ures_openDirect(NULL, _kIndexLocaleName, &status);
2657         ures_getByKey(index, _kIndexTag, &installed, &status);
2658
2659         if(U_SUCCESS(status)) {
2660             localeCount = ures_getSize(&installed);
2661             temp = (char **) uprv_malloc(sizeof(char*) * (localeCount+1));
2662
2663             ures_resetIterator(&installed);
2664             while(ures_hasNext(&installed)) {
2665                 ures_getNextString(&installed, NULL, (const char **)&temp[i++], &status);
2666             }
2667             temp[i] = NULL;
2668
2669             umtx_lock(NULL);
2670             if (_installedLocales == NULL)
2671             {
2672                 _installedLocales = temp;
2673                 _installedLocalesCount = localeCount;
2674                 temp = NULL;
2675                 ucln_common_registerCleanup(UCLN_COMMON_ULOC, uloc_cleanup);
2676             }
2677             umtx_unlock(NULL);
2678
2679             uprv_free(temp);
2680             ures_close(&installed);
2681         }
2682         ures_close(index);
2683     }
2684 }
2685
2686 U_CAPI const char* U_EXPORT2
2687 uloc_getAvailable(int32_t offset)
2688 {
2689
2690     _load_installedLocales();
2691
2692     if (offset > _installedLocalesCount)
2693         return NULL;
2694     return _installedLocales[offset];
2695 }
2696
2697 U_CAPI int32_t  U_EXPORT2
2698 uloc_countAvailable()
2699 {
2700     _load_installedLocales();
2701     return _installedLocalesCount;
2702 }
2703
2704 /**
2705  * Returns a list of all language codes defined in ISO 639.  This is a pointer
2706  * to an array of pointers to arrays of char.  All of these pointers are owned
2707  * by ICU-- do not delete them, and do not write through them.  The array is
2708  * terminated with a null pointer.
2709  */
2710 U_CAPI const char* const*  U_EXPORT2
2711 uloc_getISOLanguages()
2712 {
2713     return LANGUAGES;
2714 }
2715
2716 /**
2717  * Returns a list of all 2-letter country codes defined in ISO 639.  This is a
2718  * pointer to an array of pointers to arrays of char.  All of these pointers are
2719  * owned by ICU-- do not delete them, and do not write through them.  The array is
2720  * terminated with a null pointer.
2721  */
2722 U_CAPI const char* const*  U_EXPORT2
2723 uloc_getISOCountries()
2724 {
2725     return COUNTRIES;
2726 }
2727
2728
2729 /* this function to be moved into cstring.c later */
2730 static char gDecimal = 0;
2731
2732 static /* U_CAPI */
2733 double
2734 /* U_EXPORT2 */
2735 _uloc_strtod(const char *start, char **end) {
2736   char *decimal;
2737   char *myEnd;
2738   char buf[30];
2739   double rv;
2740   if (!gDecimal) {
2741     char rep[5];
2742     /* For machines that decide to change the decimal on you,
2743        and try to be too smart with localization.
2744        This normally should be just a '.'. */
2745     sprintf(rep, "%+1.1f", 1.0);
2746     gDecimal = rep[2];
2747   }
2748
2749   if(gDecimal == '.') {
2750     return uprv_strtod(start, end); /* fall through to OS */
2751   } else {
2752     uprv_strncpy(buf, start, 29);
2753     buf[29]=0;
2754     decimal = uprv_strchr(buf, '.');
2755     if(decimal) {
2756       *decimal = gDecimal;
2757     } else {
2758       return uprv_strtod(start, end); /* no decimal point */
2759     }
2760     rv = uprv_strtod(buf, &myEnd);
2761     if(end) {
2762       *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */
2763     }
2764     return rv;
2765   }
2766 }
2767
2768 typedef struct {
2769     double q;
2770     char *locale;
2771 #if defined(ULOC_DEBUG_PURIFY)
2772     int32_t dummy;  /* to avoid uninitialized memory copy from qsort */
2773 #endif
2774 } _acceptLangItem;
2775
2776 static int32_t U_CALLCONV
2777 uloc_acceptLanguageCompare(const void *context, const void *a, const void *b)
2778 {
2779     const _acceptLangItem *aa = (const _acceptLangItem*)a;
2780     const _acceptLangItem *bb = (const _acceptLangItem*)b;
2781
2782     int32_t rc = 0;
2783     if(bb->q < aa->q) {
2784         rc = -1;  /* A > B */
2785     } else if(bb->q > aa->q) {
2786         rc = 1;   /* A < B */
2787     } else {
2788         rc = 0;   /* A = B */
2789     }
2790
2791     if(rc==0) {
2792         rc = uprv_stricmp(aa->locale, bb->locale);
2793     }
2794
2795 #if defined(ULOC_DEBUG)
2796     /*  fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
2797     aa->locale, aa->q,
2798     bb->locale, bb->q,
2799     rc);*/
2800 #endif
2801
2802     return rc;
2803 }
2804
2805 /*
2806 mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
2807 */
2808
2809 U_CAPI int32_t U_EXPORT2
2810 uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult,
2811                             const char *httpAcceptLanguage,
2812                             UEnumeration* availableLocales,
2813                             UErrorCode *status)
2814 {
2815     _acceptLangItem *j;
2816     _acceptLangItem smallBuffer[30];
2817     char **strs;
2818     char tmp[ULOC_FULLNAME_CAPACITY +1];
2819     int32_t n = 0;
2820     const char *itemEnd;
2821     const char *paramEnd;
2822     const char *s;
2823     const char *t;
2824     int32_t res;
2825     int32_t i;
2826     int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
2827     int32_t jSize;
2828
2829     j = smallBuffer;
2830     jSize = sizeof(smallBuffer)/sizeof(smallBuffer[0]);
2831     if(U_FAILURE(*status)) {
2832         return -1;
2833     }
2834
2835     for(s=httpAcceptLanguage;s&&*s;) {
2836         while(isspace(*s)) /* eat space at the beginning */
2837             s++;
2838         itemEnd=uprv_strchr(s,',');
2839         paramEnd=uprv_strchr(s,';');
2840         if(!itemEnd) {
2841             itemEnd = httpAcceptLanguage+l; /* end of string */
2842         }
2843         if(paramEnd && paramEnd<itemEnd) {
2844             /* semicolon (;) is closer than end (,) */
2845             t = paramEnd+1;
2846             if(*t=='q') {
2847                 t++;
2848             }
2849             while(isspace(*t)) {
2850                 t++;
2851             }
2852             if(*t=='=') {
2853                 t++;
2854             }
2855             while(isspace(*t)) {
2856                 t++;
2857             }
2858             j[n].q = _uloc_strtod(t,NULL);
2859         } else {
2860             /* no semicolon - it's 1.0 */
2861             j[n].q = 1.0;
2862             paramEnd = itemEnd;
2863         }
2864 #if defined(ULOC_DEBUG_PURIFY)
2865         j[n].dummy=0xDECAFBAD;
2866 #endif
2867         /* eat spaces prior to semi */
2868         for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
2869             ;
2870         j[n].locale = uprv_strndup(s,(int32_t)((t+1)-s));
2871         uloc_canonicalize(j[n].locale,tmp,sizeof(tmp)/sizeof(tmp[0]),status);
2872         if(strcmp(j[n].locale,tmp)) {
2873             uprv_free(j[n].locale);
2874             j[n].locale=uprv_strdup(tmp);
2875         }
2876 #if defined(ULOC_DEBUG)
2877         /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
2878 #endif
2879         n++;
2880         s = itemEnd;
2881         while(*s==',') { /* eat duplicate commas */
2882             s++;
2883         }
2884         if(n>=jSize) {
2885           if(j==smallBuffer) {  /* overflowed the small buffer. */
2886             j = uprv_malloc(sizeof(j[0])*(jSize*2));
2887             if(j!=NULL) {
2888               uprv_memcpy(j,smallBuffer,sizeof(j[0])*jSize);
2889             }
2890 #if defined(ULOC_DEBUG)
2891             fprintf(stderr,"malloced at size %d\n", jSize);
2892 #endif
2893           } else {
2894             j = uprv_realloc(j, sizeof(j[0])*jSize*2);
2895 #if defined(ULOC_DEBUG)
2896             fprintf(stderr,"re-alloced at size %d\n", jSize);
2897 #endif
2898           }
2899           jSize *= 2;
2900           if(j==NULL) {
2901             *status = U_MEMORY_ALLOCATION_ERROR;
2902             return -1;
2903           }
2904         }
2905     }
2906     uprv_sortArray(j, n, sizeof(j[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
2907     if(U_FAILURE(*status)) {
2908       if(j != smallBuffer) {
2909 #if defined(ULOC_DEBUG)
2910         fprintf(stderr,"freeing j %p\n", j);
2911 #endif
2912         uprv_free(j);
2913       }
2914       return -1;
2915     }
2916     strs = uprv_malloc((size_t)(sizeof(strs[0])*n));
2917     for(i=0;i<n;i++) {
2918 #if defined(ULOC_DEBUG)
2919         /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
2920 #endif
2921         strs[i]=j[i].locale;
2922     }
2923     res =  uloc_acceptLanguage(result, resultAvailable, outResult,
2924         (const char**)strs, n, availableLocales, status);
2925     for(i=0;i<n;i++) {
2926         uprv_free(strs[i]);
2927     }
2928     uprv_free(strs);
2929     if(j != smallBuffer) {
2930 #if defined(ULOC_DEBUG)
2931       fprintf(stderr,"freeing j %p\n", j);
2932 #endif
2933       uprv_free(j);
2934     }
2935     return res;
2936 }
2937
2938
2939 U_CAPI int32_t U_EXPORT2
2940 uloc_acceptLanguage(char *result, int32_t resultAvailable,
2941                     UAcceptResult *outResult, const char **acceptList,
2942                     int32_t acceptListCount,
2943                     UEnumeration* availableLocales,
2944                     UErrorCode *status)
2945 {
2946     int32_t i,j;
2947     int32_t len;
2948     int32_t maxLen=0;
2949     char tmp[ULOC_FULLNAME_CAPACITY+1];
2950     const char *l;
2951     char **fallbackList;
2952     if(U_FAILURE(*status)) {
2953         return -1;
2954     }
2955     fallbackList = uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount));
2956     if(fallbackList==NULL) {
2957       *status = U_MEMORY_ALLOCATION_ERROR;
2958       return -1;
2959     }
2960     for(i=0;i<acceptListCount;i++) {
2961 #if defined(ULOC_DEBUG)
2962         fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
2963 #endif
2964         while((l=uenum_next(availableLocales, NULL, status))) {
2965 #if defined(ULOC_DEBUG)
2966             fprintf(stderr,"  %s\n", l);
2967 #endif
2968             len = (int32_t)uprv_strlen(l);
2969             if(!uprv_strcmp(acceptList[i], l)) {
2970                 if(outResult) {
2971                     *outResult = ULOC_ACCEPT_VALID;
2972                 }
2973 #if defined(ULOC_DEBUG)
2974                 fprintf(stderr, "MATCH! %s\n", l);
2975 #endif
2976                 if(len>0) {
2977                     uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2978                 }
2979                 for(j=0;j<i;j++) {
2980                     uprv_free(fallbackList[j]);
2981                 }
2982                 uprv_free(fallbackList);
2983                 return u_terminateChars(result, resultAvailable, len, status);
2984             }
2985             if(len>maxLen) {
2986                 maxLen = len;
2987             }
2988         }
2989         uenum_reset(availableLocales, status);
2990         /* save off parent info */
2991         if(uloc_getParent(acceptList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
2992             fallbackList[i] = uprv_strdup(tmp);
2993         } else {
2994             fallbackList[i]=0;
2995         }
2996     }
2997
2998     for(maxLen--;maxLen>0;maxLen--) {
2999         for(i=0;i<acceptListCount;i++) {
3000             if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {
3001 #if defined(ULOC_DEBUG)
3002                 fprintf(stderr,"Try: [%s]", fallbackList[i]);
3003 #endif
3004                 while((l=uenum_next(availableLocales, NULL, status))) {
3005 #if defined(ULOC_DEBUG)
3006                     fprintf(stderr,"  %s\n", l);
3007 #endif
3008                     len = (int32_t)uprv_strlen(l);
3009                     if(!uprv_strcmp(fallbackList[i], l)) {
3010                         if(outResult) {
3011                             *outResult = ULOC_ACCEPT_FALLBACK;
3012                         }
3013 #if defined(ULOC_DEBUG)
3014                         fprintf(stderr, "fallback MATCH! %s\n", l);
3015 #endif
3016                         if(len>0) {
3017                             uprv_strncpy(result, l, uprv_min(len, resultAvailable));
3018                         }
3019                         for(j=0;j<acceptListCount;j++) {
3020                             uprv_free(fallbackList[j]);
3021                         }
3022                         uprv_free(fallbackList);
3023                         return u_terminateChars(result, resultAvailable, len, status);
3024                     }
3025                 }
3026                 uenum_reset(availableLocales, status);
3027
3028                 if(uloc_getParent(fallbackList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
3029                     uprv_free(fallbackList[i]);
3030                     fallbackList[i] = uprv_strdup(tmp);
3031                 } else {
3032                     uprv_free(fallbackList[i]);
3033                     fallbackList[i]=0;
3034                 }
3035             }
3036         }
3037         if(outResult) {
3038             *outResult = ULOC_ACCEPT_FAILED;
3039         }
3040     }
3041     for(i=0;i<acceptListCount;i++) {
3042         uprv_free(fallbackList[i]);
3043     }
3044     uprv_free(fallbackList);
3045     return -1;
3046 }
3047
3048 /*eof*/