icuSources/common/uloc.c

   1 /*
   2 **********************************************************************
   3 *   Copyright (C) 1997-2008, International Business Machines
   4 *   Corporation and others.  All Rights Reserved.
   5 **********************************************************************
   6 *
   7 * File ULOC.CPP
   8 *
   9 * Modification History:
  10 *
  11 *   Date        Name        Description
  12 *   04/01/97    aliu        Creation.
  13 *   08/21/98    stephen     JDK 1.2 sync
  14 *   12/08/98    rtg         New Locale implementation and C API
  15 *   03/15/99    damiba      overhaul.
  16 *   04/06/99    stephen     changed setDefault() to realloc and copy
  17 *   06/14/99    stephen     Changed calls to ures_open for new params
  18 *   07/21/99    stephen     Modified setDefault() to propagate to C++
  19 *   05/14/04    alan        7 years later: refactored, cleaned up, fixed bugs,
  20 *                           brought canonicalization code into line with spec
  21 *****************************************************************************/
  22
  23 /*
  24    POSIX's locale format, from putil.c: [no spaces]
  25
  26      ll [ _CC ] [ . MM ] [ @ VV]
  27
  28      l = lang, C = ctry, M = charmap, V = variant
  29 */
  30
  31 #include "unicode/utypes.h"
  32 #include "unicode/ustring.h"
  33 #include "unicode/uloc.h"
  34 #include "unicode/ures.h"
  35
  36 #include "putilimp.h"
  37 #include "ustr_imp.h"
  38 #include "ulocimp.h"
  39 #include "uresimp.h"
  40 #include "umutex.h"
  41 #include "cstring.h"
  42 #include "cmemory.h"
  43 #include "ucln_cmn.h"
  44 #include "locmap.h"
  45 #include "uarrsort.h"
  46 #include "uenumimp.h"
  47 #include "uassert.h"
  48
  49 #include <stdio.h> /* for sprintf */
  50
  51 /* ### Declarations **************************************************/
  52
  53 /* Locale stuff from locid.cpp */
  54 U_CFUNC void locale_set_default(const char *id);
  55 U_CFUNC const char *locale_get_default(void);
  56 U_CFUNC int32_t
  57 locale_getKeywords(const char *localeID,
  58             char prev,
  59             char *keywords, int32_t keywordCapacity,
  60             char *values, int32_t valuesCapacity, int32_t *valLen,
  61             UBool valuesToo,
  62             UErrorCode *status);
  63
  64 /* ### Constants **************************************************/
  65
  66 /* These strings describe the resources we attempt to load from
  67  the locale ResourceBundle data file.*/
  68 static const char _kLanguages[]       = "Languages";
  69 static const char _kScripts[]         = "Scripts";
  70 static const char _kCountries[]       = "Countries";
  71 static const char _kVariants[]        = "Variants";
  72 static const char _kKeys[]            = "Keys";
  73 static const char _kTypes[]           = "Types";
  74 static const char _kIndexLocaleName[] = "res_index";
  75 static const char _kRootName[]        = "root";
  76 static const char _kIndexTag[]        = "InstalledLocales";
  77 static const char _kCurrency[]        = "currency";
  78 static const char _kCurrencies[]      = "Currencies";
  79 static char** _installedLocales = NULL;
  80 static int32_t _installedLocalesCount = 0;
  81
  82 /* ### Data tables **************************************************/
  83
  84 /**
  85  * Table of language codes, both 2- and 3-letter, with preference
  86  * given to 2-letter codes where possible.  Includes 3-letter codes
  87  * that lack a 2-letter equivalent.
  88  *
  89  * This list must be in sorted order.  This list is returned directly
  90  * to the user by some API.
  91  *
  92  * This list must be kept in sync with LANGUAGES_3, with corresponding
  93  * entries matched.
  94  *
  95  * This table should be terminated with a NULL entry, followed by a
  96  * second list, and another NULL entry.  The first list is visible to
  97  * user code when this array is returned by API.  The second list
  98  * contains codes we support, but do not expose through user API.
  99  *
 100  * Notes
 101  *
 102  * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
 103  * include the revisions up to 2001/7/27 *CWB*
 104  *
 105  * The 3 character codes are the terminology codes like RFC 3066.  This
 106  * is compatible with prior ICU codes
 107  *
 108  * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
 109  * table but now at the end of the table because 3 character codes are
 110  * duplicates.  This avoids bad searches going from 3 to 2 character
 111  * codes.
 112  *
 113  * The range qaa-qtz is reserved for local use
 114  */
 115 static const char * const LANGUAGES[] = {
 116     "aa",  "ab",  "ace", "ach", "ada", "ady", "ae",  "af",  "afa",
 117     "afh", "ain", "ak",  "akk", "ale", "alg", "alt", "am",  "an",
 118     "ang", "anp", "apa",
 119     "ar",  "arc", "arn", "arp", "art", "arw", "as",  "ast",
 120     "ath", "aus", "av",  "awa", "ay",  "az",  "ba",  "bad",
 121     "bai", "bal", "ban", "bas", "bat", "be",  "bej",
 122     "bem", "ber", "bg",  "bh",  "bho", "bi",  "bik", "bin",
 123     "bla", "bm",  "bn",  "bnt", "bo",  "br",  "bra", "bs",
 124     "btk", "bua", "bug", "byn", "ca",  "cad", "cai", "car", "cau",
 125     "cch", "ce",  "ceb", "cel", "ch",  "chb", "chg", "chk", "chm",
 126     "chn", "cho", "chp", "chr", "chy", "cmc", "co",  "cop",
 127     "cpe", "cpf", "cpp", "cr",  "crh", "crp", "cs",  "csb", "cu",  "cus",
 128     "cv",  "cy",  "da",  "dak", "dar", "day", "de",  "del", "den",
 129     "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv",  "dyu",
 130     "dz",  "ee",  "efi", "egy", "eka", "el",  "elx", "en",
 131     "enm", "eo",  "es",  "et",  "eu",  "ewo", "fa",
 132     "fan", "fat", "ff",  "fi",  "fil", "fiu", "fj",  "fo",  "fon",
 133     "fr",  "frm", "fro", "frr", "frs", "fur", "fy",
 134     "ga",  "gaa", "gay", "gba", "gd",  "gem", "gez", "gil",
 135     "gl",  "gmh", "gn",  "goh", "gon", "gor", "got", "grb",
 136     "grc", "gsw", "gu",  "gv", "gwi",
 137     "ha",  "hai", "haw", "he",  "hi",  "hil", "him",
 138     "hit", "hmn", "ho",  "hr",  "hsb", "ht",  "hu",  "hup", "hy",  "hz",
 139     "ia",  "iba", "id",  "ie",  "ig",  "ii",  "ijo", "ik",
 140     "ilo", "inc", "ine", "inh", "io",  "ira", "iro", "is",  "it",
 141     "iu",  "ja",  "jbo", "jpr", "jrb", "jv",  "ka",  "kaa", "kab",
 142     "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg",  "kha", "khi",
 143     "kho", "ki",  "kj",  "kk",  "kl",  "km",  "kmb", "kn",
 144     "ko",  "kok", "kos", "kpe", "kr",  "krc", "krl", "kro", "kru", "ks",
 145     "ku",  "kum", "kut", "kv",  "kw",  "ky",  "la",  "lad",
 146     "lah", "lam", "lb",  "lez", "lg",  "li",  "ln",  "lo",  "lol",
 147     "loz", "lt",  "lu",  "lua", "lui", "lun", "luo", "lus",
 148     "lv",  "mad", "mag", "mai", "mak", "man", "map", "mas",
 149     "mdf", "mdr", "men", "mg",  "mga", "mh",  "mi",  "mic", "min",
 150     "mis", "mk",  "mkh", "ml",  "mn",  "mnc", "mni", "mno",
 151     "mo",  "moh", "mos", "mr",  "ms",  "mt",  "mul", "mun",
 152     "mus", "mwl", "mwr", "my",  "myn", "myv", "na",  "nah", "nai", "nap",
 153     "nb",  "nd",  "nds", "ne",  "new", "ng",  "nia", "nic",
 154     "niu", "nl",  "nn",  "no",  "nog", "non", "nqo", "nr",  "nso", "nub",
 155     "nv",  "nwc", "ny",  "nym", "nyn", "nyo", "nzi", "oc",  "oj",
 156     "om",  "or",  "os",  "osa", "ota", "oto", "pa",  "paa",
 157     "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",
 158     "pi",  "pl",  "pon", "pra", "pro", "ps",  "pt",  "qu",
 159     "raj", "rap", "rar", "rm",  "rn",  "ro",  "roa", "rom",
 160     "ru",  "rup", "rw",  "sa",  "sad", "sah", "sai", "sal", "sam",
 161     "sas", "sat", "sc",  "scn", "sco", "sd",  "se",  "sel", "sem",
 162     "sg",  "sga", "sgn", "shn", "si",  "sid", "sio", "sit",
 163     "sk",  "sl",  "sla", "sm",  "sma", "smi", "smj", "smn",
 164     "sms", "sn",  "snk", "so",  "sog", "son", "sq",  "sr",
 165     "srn", "srr", "ss",  "ssa", "st",  "su",  "suk", "sus", "sux",
 166     "sv",  "sw",  "syc", "syr", "ta",  "tai", "te",  "tem", "ter",
 167     "tet", "tg",  "th",  "ti",  "tig", "tiv", "tk",  "tkl",
 168     "tl",  "tlh", "tli", "tmh", "tn",  "to",  "tog", "tpi", "tr",
 169     "ts",  "tsi", "tt",  "tum", "tup", "tut", "tvl", "tw",
 170     "ty",  "tyv", "udm", "ug",  "uga", "uk",  "umb", "und", "ur",
 171     "uz",  "vai", "ve",  "vi",  "vo",  "vot", "wa",  "wak",
 172     "wal", "war", "was", "wen", "wo",  "xal", "xh",  "yao", "yap",
 173     "yi",  "yo",  "ypk", "za",  "zap", "zbl", "zen", "zh",  "znd",
 174     "zu",  "zun", "zxx", "zza",
 175 NULL,
 176     "in",  "iw",  "ji",  "jw",  "sh",    /* obsolete language codes */
 177 NULL
 178 };
 179 static const char* const DEPRECATED_LANGUAGES[]={
 180     "in", "iw", "ji", "jw", NULL, NULL
 181 };
 182 static const char* const REPLACEMENT_LANGUAGES[]={
 183     "id", "he", "yi", "jv", NULL, NULL
 184 };
 185
 186 /**
 187  * Table of 3-letter language codes.
 188  *
 189  * This is a lookup table used to convert 3-letter language codes to
 190  * their 2-letter equivalent, where possible.  It must be kept in sync
 191  * with LANGUAGES.  For all valid i, LANGUAGES[i] must refer to the
 192  * same language as LANGUAGES_3[i].  The commented-out lines are
 193  * copied from LANGUAGES to make eyeballing this baby easier.
 194  *
 195  * Where a 3-letter language code has no 2-letter equivalent, the
 196  * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
 197  *
 198  * This table should be terminated with a NULL entry, followed by a
 199  * second list, and another NULL entry.  The two lists correspond to
 200  * the two lists in LANGUAGES.
 201  */
 202 static const char * const LANGUAGES_3[] = {
 203 /*  "aa",  "ab",  "ace", "ach", "ada", "ady", "ae",  "af",  "afa",    */
 204     "aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr", "afa",
 205 /*  "afh", "ain", "ak",  "akk", "ale", "alg", "alt", "am",  "an",  "ang", "anp", "apa",    */
 206     "afh", "ain", "aka", "akk", "ale", "alg", "alt", "amh", "arg", "ang", "anp", "apa",
 207 /*  "ar",  "arc", "arn", "arp", "art", "arw", "as",  "ast",    */
 208     "ara", "arc", "arn", "arp", "art", "arw", "asm", "ast",
 209 /*  "ath", "aus", "av",  "awa", "ay",  "az",  "ba",  "bad",    */
 210     "ath", "aus", "ava", "awa", "aym", "aze", "bak", "bad",
 211 /*  "bai", "bal", "ban", "bas", "bat", "be",  "bej",    */
 212     "bai", "bal", "ban", "bas", "bat", "bel", "bej",
 213 /*  "bem", "ber", "bg",  "bh",  "bho", "bi",  "bik", "bin",    */
 214     "bem", "ber", "bul", "bih", "bho", "bis", "bik", "bin",
 215 /*  "bla", "bm",  "bn",  "bnt", "bo",  "br",  "bra", "bs",     */
 216     "bla", "bam", "ben", "bnt", "bod", "bre", "bra", "bos",
 217 /*  "btk", "bua", "bug", "byn", "ca",  "cad", "cai", "car", "cau",    */
 218     "btk", "bua", "bug", "byn", "cat", "cad", "cai", "car", "cau",
 219 /*  "cch", "ce",  "ceb", "cel", "ch",  "chb", "chg", "chk", "chm",    */
 220     "cch", "che", "ceb", "cel", "cha", "chb", "chg", "chk", "chm",
 221 /*  "chn", "cho", "chp", "chr", "chy", "cmc", "co",  "cop",    */
 222     "chn", "cho", "chp", "chr", "chy", "cmc", "cos", "cop",
 223 /*  "cpe", "cpf", "cpp", "cr",  "crh", "crp", "cs",  "csb", "cu",  "cus",    */
 224     "cpe", "cpf", "cpp", "cre", "crh", "crp", "ces", "csb", "chu", "cus",
 225 /*  "cv",  "cy",  "da",  "dak", "dar", "day", "de",  "del", "den",    */
 226     "chv", "cym", "dan", "dak", "dar", "day", "deu", "del", "den",
 227 /*  "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv",  "dyu",    */
 228     "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "div", "dyu",
 229 /*  "dz",  "ee",  "efi", "egy", "eka", "el",  "elx", "en",     */
 230     "dzo", "ewe", "efi", "egy", "eka", "ell", "elx", "eng",
 231 /*  "enm", "eo",  "es",  "et",  "eu",  "ewo", "fa",     */
 232     "enm", "epo", "spa", "est", "eus", "ewo", "fas",
 233 /*  "fan", "fat", "ff",  "fi",  "fil", "fiu", "fj",  "fo",  "fon",    */
 234     "fan", "fat", "ful", "fin", "fil", "fiu", "fij", "fao", "fon",
 235 /*  "fr",  "frm", "fro", "frr", "frs", "fur", "fy",  "ga",  "gaa", "gay",    */
 236     "fra", "frm", "fro", "frr", "frs", "fur", "fry", "gle", "gaa", "gay",
 237 /*  "gba", "gd",  "gem", "gez", "gil", "gl",  "gmh", "gn",     */
 238     "gba", "gla", "gem", "gez", "gil", "glg", "gmh", "grn",
 239 /*  "goh", "gon", "gor", "got", "grb", "grc", "gsw", "gu",  "gv",     */
 240     "goh", "gon", "gor", "got", "grb", "grc", "gsw", "guj", "glv",
 241 /*  "gwi", "ha",  "hai", "haw", "he",  "hi",  "hil", "him",    */
 242     "gwi", "hau", "hai", "haw", "heb", "hin", "hil", "him",
 243 /*  "hit", "hmn", "ho",  "hr",  "hsb", "ht",  "hu",  "hup", "hy",  "hz",     */
 244     "hit", "hmn", "hmo", "hrv", "hsb", "hat", "hun", "hup", "hye", "her",
 245 /*  "ia",  "iba", "id",  "ie",  "ig",  "ii",  "ijo", "ik",     */
 246     "ina", "iba", "ind", "ile", "ibo", "iii", "ijo", "ipk",
 247 /*  "ilo", "inc", "ine", "inh", "io",  "ira", "iro", "is",  "it",      */
 248     "ilo", "inc", "ine", "inh", "ido", "ira", "iro", "isl", "ita",
 249 /*  "iu",  "ja",  "jbo", "jpr", "jrb", "jv",  "ka",  "kaa", "kab",   */
 250     "iku", "jpn", "jbo", "jpr", "jrb", "jav", "kat", "kaa", "kab",
 251 /*  "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg",  "kha", "khi",*/
 252     "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg",  "kha", "khi",
 253 /*  "kho", "ki",  "kj",  "kk",  "kl",  "km",  "kmb", "kn",     */
 254     "kho", "kik", "kua", "kaz", "kal", "khm", "kmb", "kan",
 255 /*  "ko",  "kok", "kos", "kpe", "kr",  "krc", "krl", "kro", "kru", "ks",     */
 256     "kor", "kok", "kos", "kpe", "kau", "krc", "krl", "kro", "kru", "kas",
 257 /*  "ku",  "kum", "kut", "kv",  "kw",  "ky",  "la",  "lad",    */
 258     "kur", "kum", "kut", "kom", "cor", "kir", "lat", "lad",
 259 /*  "lah", "lam", "lb",  "lez", "lg",  "li",  "ln",  "lo",  "lol",    */
 260     "lah", "lam", "ltz", "lez", "lug", "lim", "lin", "lao", "lol",
 261 /*  "loz", "lt",  "lu",  "lua", "lui", "lun", "luo", "lus",    */
 262     "loz", "lit", "lub", "lua", "lui", "lun", "luo", "lus",
 263 /*  "lv",  "mad", "mag", "mai", "mak", "man", "map", "mas",    */
 264     "lav", "mad", "mag", "mai", "mak", "man", "map", "mas",
 265 /*  "mdf", "mdr", "men", "mg",  "mga", "mh",  "mi",  "mic", "min",    */
 266     "mdf", "mdr", "men", "mlg", "mga", "mah", "mri", "mic", "min",
 267 /*  "mis", "mk",  "mkh", "ml",  "mn",  "mnc", "mni", "mno",    */
 268     "mis", "mkd", "mkh", "mal", "mon", "mnc", "mni", "mno",
 269 /*  "mo",  "moh", "mos", "mr",  "ms",  "mt",  "mul", "mun",    */
 270     "mol", "moh", "mos", "mar", "msa", "mlt", "mul", "mun",
 271 /*  "mus", "mwl", "mwr", "my",  "myn", "myv", "na",  "nah", "nai", "nap",    */
 272     "mus", "mwl", "mwr", "mya", "myn", "myv", "nau", "nah", "nai", "nap",
 273 /*  "nb",  "nd",  "nds", "ne",  "new", "ng",  "nia", "nic",    */
 274     "nob", "nde", "nds", "nep", "new", "ndo", "nia", "nic",
 275 /*  "niu", "nl",  "nn",  "no",  "nog", "non", "nqo", "nr",  "nso", "nub",    */
 276     "niu", "nld", "nno", "nor", "nog", "non", "nqo", "nbl", "nso", "nub",
 277 /*  "nv",  "nwc", "ny",  "nym", "nyn", "nyo", "nzi", "oc",  "oj",     */
 278     "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi", "oci", "oji",
 279 /*  "om",  "or",  "os",  "osa", "ota", "oto", "pa",  "paa",    */
 280     "orm", "ori", "oss", "osa", "ota", "oto", "pan", "paa",
 281 /*  "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",    */
 282     "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",
 283 /*  "pi",  "pl",  "pon", "pra", "pro", "ps",  "pt",  "qu",     */
 284     "pli", "pol", "pon", "pra", "pro", "pus", "por", "que",
 285 /*  "raj", "rap", "rar", "rm",  "rn",  "ro",  "roa", "rom",    */
 286     "raj", "rap", "rar", "roh", "run", "ron", "roa", "rom",
 287 /*  "ru",  "rup", "rw",  "sa",  "sad", "sah", "sai", "sal", "sam",    */
 288     "rus", "rup", "kin", "san", "sad", "sah", "sai", "sal", "sam",
 289 /*  "sas", "sat", "sc",  "scn", "sco", "sd",  "se",  "sel", "sem",    */
 290     "sas", "sat", "srd", "scn", "sco", "snd", "sme", "sel", "sem",
 291 /*  "sg",  "sga", "sgn", "shn", "si",  "sid", "sio", "sit",    */
 292     "sag", "sga", "sgn", "shn", "sin", "sid", "sio", "sit",
 293 /*  "sk",  "sl",  "sla", "sm",  "sma", "smi", "smj", "smn",    */
 294     "slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn",
 295 /*  "sms", "sn",  "snk", "so",  "sog", "son", "sq",  "sr",     */
 296     "sms", "sna", "snk", "som", "sog", "son", "sqi", "srp",
 297 /*  "srn", "srr", "ss",  "ssa", "st",  "su",  "suk", "sus", "sux",    */
 298     "srn", "srr", "ssw", "ssa", "sot", "sun", "suk", "sus", "sux",
 299 /*  "sv",  "sw",  "syc", "syr", "ta",  "tai", "te",  "tem", "ter",    */
 300     "swe", "swa", "syc", "syr", "tam", "tai", "tel", "tem", "ter",
 301 /*  "tet", "tg",  "th",  "ti",  "tig", "tiv", "tk",  "tkl",    */
 302     "tet", "tgk", "tha", "tir", "tig", "tiv", "tuk", "tkl",
 303 /*  "tl",  "tlh", "tli", "tmh", "tn",  "to",  "tog", "tpi", "tr",     */
 304     "tgl", "tlh", "tli", "tmh", "tsn", "ton", "tog", "tpi", "tur",
 305 /*  "ts",  "tsi", "tt",  "tum", "tup", "tut", "tvl", "tw",     */
 306     "tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi",
 307 /*  "ty",  "tyv", "udm", "ug",  "uga", "uk",  "umb", "und", "ur",     */
 308     "tah", "tyv", "udm", "uig", "uga", "ukr", "umb", "und", "urd",
 309 /*  "uz",  "vai", "ve",  "vi",  "vo",  "vot", "wa",  "wak",    */
 310     "uzb", "vai", "ven", "vie", "vol", "vot", "wln", "wak",
 311 /*  "wal", "war", "was", "wen", "wo",  "xal", "xh",  "yao", "yap",    */
 312     "wal", "war", "was", "wen", "wol", "xal", "xho", "yao", "yap",
 313 /*  "yi",  "yo",  "ypk", "za",  "zap", "zbl", "zen", "zh",  "znd",    */
 314     "yid", "yor", "ypk", "zha", "zap", "zbl", "zen", "zho", "znd",
 315 /*  "zu",  "zun", "zxx", "zza",                                         */
 316     "zul", "zun", "zxx", "zza",
 317 NULL,
 318 /*  "in",  "iw",  "ji",  "jw",  "sh",                          */
 319     "ind", "heb", "yid", "jaw", "srp",
 320 NULL
 321 };
 322
 323 /**
 324  * Table of 2-letter country codes.
 325  *
 326  * This list must be in sorted order.  This list is returned directly
 327  * to the user by some API.
 328  *
 329  * This list must be kept in sync with COUNTRIES_3, with corresponding
 330  * entries matched.
 331  *
 332  * This table should be terminated with a NULL entry, followed by a
 333  * second list, and another NULL entry.  The first list is visible to
 334  * user code when this array is returned by API.  The second list
 335  * contains codes we support, but do not expose through user API.
 336  *
 337  * Notes:
 338  *
 339  * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
 340  * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
 341  * new codes keeping the old ones for compatibility updated to include
 342  * 1999/12/03 revisions *CWB*
 343  *
 344  * RO(ROM) is now RO(ROU) according to
 345  * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
 346  */
 347 static const char * const COUNTRIES[] = {
 348     "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",  "AN",
 349     "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",
 350     "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",
 351     "BJ",  "BL",  "BM",  "BN",  "BO",  "BR",  "BS",  "BT",  "BV",
 352     "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",
 353     "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CR",
 354     "CU",  "CV",  "CX",  "CY",  "CZ",  "DE",  "DJ",  "DK",
 355     "DM",  "DO",  "DZ",  "EC",  "EE",  "EG",  "EH",  "ER",
 356     "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",
 357     "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",
 358     "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",
 359     "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",
 360     "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS",
 361     "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",
 362     "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",
 363     "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",
 364     "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",
 365     "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",
 366     "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",
 367     "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",
 368     "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",
 369     "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",
 370     "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",
 371     "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",
 372     "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "ST",  "SV",
 373     "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",
 374     "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",
 375     "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",
 376     "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",
 377     "WS",  "YE",  "YT",  "ZA",  "ZM",  "ZW",
 378 NULL,
 379     "FX",  "CS",  "RO",  "TP",  "YU",  "ZR",   /* obsolete country codes */
 380 NULL
 381 };
 382
 383 static const char* const DEPRECATED_COUNTRIES[] ={
 384     "BU", "CS", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR", NULL, NULL /* deprecated country list */
 385 };
 386 static const char* const REPLACEMENT_COUNTRIES[] = {
 387 /*  "BU", "CS", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR" */
 388     "MM", "RS", "BJ", "FR", "BF", "VU", "ZW", "TL", "RS", "CD", NULL, NULL  /* replacement country codes */
 389 };
 390
 391 /**
 392  * Table of 3-letter country codes.
 393  *
 394  * This is a lookup table used to convert 3-letter country codes to
 395  * their 2-letter equivalent.  It must be kept in sync with COUNTRIES.
 396  * For all valid i, COUNTRIES[i] must refer to the same country as
 397  * COUNTRIES_3[i].  The commented-out lines are copied from COUNTRIES
 398  * to make eyeballing this baby easier.
 399  *
 400  * This table should be terminated with a NULL entry, followed by a
 401  * second list, and another NULL entry.  The two lists correspond to
 402  * the two lists in COUNTRIES.
 403  */
 404 static const char * const COUNTRIES_3[] = {
 405 /*  "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",  "AN",     */
 406     "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM", "ANT",
 407 /*  "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",     */
 408     "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
 409 /*  "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",     */
 410     "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
 411 /*  "BJ",  "BL",  "BM",  "BN",  "BO",  "BR",  "BS",  "BT",  "BV",     */
 412     "BEN", "BLM", "BMU", "BRN", "BOL", "BRA", "BHS", "BTN", "BVT",
 413 /*  "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",     */
 414     "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
 415 /*  "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CR",     */
 416     "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
 417 /*  "CU",  "CV",  "CX",  "CY",  "CZ",  "DE",  "DJ",  "DK",     */
 418     "CUB", "CPV", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
 419 /*  "DM",  "DO",  "DZ",  "EC",  "EE",  "EG",  "EH",  "ER",     */
 420     "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
 421 /*  "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",     */
 422     "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
 423 /*  "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",     */
 424     "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
 425 /*  "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",     */
 426     "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
 427 /*  "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",     */
 428     "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
 429 /*  "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS" */
 430     "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
 431 /*  "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",     */
 432     "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
 433 /*  "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",     */
 434     "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
 435 /*  "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",     */
 436     "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
 437 /*  "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",     */
 438     "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
 439 /*  "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",     */
 440     "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
 441 /*  "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",     */
 442     "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
 443 /*  "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",     */
 444     "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
 445 /*  "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",     */
 446     "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
 447 /*  "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",     */
 448     "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
 449 /*  "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",     */
 450     "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
 451 /*  "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",     */
 452     "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
 453 /*  "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "ST",  "SV",     */
 454     "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "STP", "SLV",
 455 /*  "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",     */
 456     "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
 457 /*  "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",     */
 458     "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
 459 /*  "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",     */
 460     "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
 461 /*  "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",     */
 462     "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
 463 /*  "WS",  "YE",  "YT",  "ZA",  "ZM",  "ZW",          */
 464     "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
 465 NULL,
 466 /*  "FX",  "CS",  "RO",  "TP",  "YU",  "ZR",   */
 467     "FXX", "SCG", "ROM", "TMP", "YUG", "ZAR",
 468 NULL
 469 };
 470
 471 typedef struct CanonicalizationMap {
 472     const char *id;          /* input ID */
 473     const char *canonicalID; /* canonicalized output ID */
 474     const char *keyword;     /* keyword, or NULL if none */
 475     const char *value;       /* keyword value, or NULL if kw==NULL */
 476 } CanonicalizationMap;
 477
 478 /**
 479  * A map to canonicalize locale IDs.  This handles a variety of
 480  * different semantic kinds of transformations.
 481  */
 482 static const CanonicalizationMap CANONICALIZE_MAP[] = {
 483     { "",               "en_US_POSIX", NULL, NULL }, /* .NET name */
 484     { "C",              "en_US_POSIX", NULL, NULL }, /* POSIX name */
 485     { "posix",          "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */
 486     { "art_LOJBAN",     "jbo", NULL, NULL }, /* registered name */
 487     { "az_AZ_CYRL",     "az_Cyrl_AZ", NULL, NULL }, /* .NET name */
 488     { "az_AZ_LATN",     "az_Latn_AZ", NULL, NULL }, /* .NET name */
 489     { "ca_ES_PREEURO",  "ca_ES", "currency", "ESP" },
 490     { "cel_GAULISH",    "cel__GAULISH", NULL, NULL }, /* registered name */
 491     { "de_1901",        "de__1901", NULL, NULL }, /* registered name */
 492     { "de_1906",        "de__1906", NULL, NULL }, /* registered name */
 493     { "de__PHONEBOOK",  "de", "collation", "phonebook" }, /* Old ICU name */
 494     { "de_AT_PREEURO",  "de_AT", "currency", "ATS" },
 495     { "de_DE_PREEURO",  "de_DE", "currency", "DEM" },
 496     { "de_LU_PREEURO",  "de_LU", "currency", "LUF" },
 497     { "el_GR_PREEURO",  "el_GR", "currency", "GRD" },
 498     { "en_BOONT",       "en__BOONT", NULL, NULL }, /* registered name */
 499     { "en_SCOUSE",      "en__SCOUSE", NULL, NULL }, /* registered name */
 500     { "en_BE_PREEURO",  "en_BE", "currency", "BEF" },
 501     { "en_IE_PREEURO",  "en_IE", "currency", "IEP" },
 502     { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */
 503     { "es_ES_PREEURO",  "es_ES", "currency", "ESP" },
 504     { "eu_ES_PREEURO",  "eu_ES", "currency", "ESP" },
 505     { "fi_FI_PREEURO",  "fi_FI", "currency", "FIM" },
 506     { "fr_BE_PREEURO",  "fr_BE", "currency", "BEF" },
 507     { "fr_FR_PREEURO",  "fr_FR", "currency", "FRF" },
 508     { "fr_LU_PREEURO",  "fr_LU", "currency", "LUF" },
 509     { "ga_IE_PREEURO",  "ga_IE", "currency", "IEP" },
 510     { "gl_ES_PREEURO",  "gl_ES", "currency", "ESP" },
 511     { "hi__DIRECT",     "hi", "collation", "direct" }, /* Old ICU name */
 512     { "it_IT_PREEURO",  "it_IT", "currency", "ITL" },
 513     { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */
 514     { "nb_NO_NY",       "nn_NO", NULL, NULL },  /* "markus said this was ok" :-) */
 515     { "nl_BE_PREEURO",  "nl_BE", "currency", "BEF" },
 516     { "nl_NL_PREEURO",  "nl_NL", "currency", "NLG" },
 517     { "pt_PT_PREEURO",  "pt_PT", "currency", "PTE" },
 518     { "sl_ROZAJ",       "sl__ROZAJ", NULL, NULL }, /* registered name */
 519     { "sr_SP_CYRL",     "sr_Cyrl_RS", NULL, NULL }, /* .NET name */
 520     { "sr_SP_LATN",     "sr_Latn_RS", NULL, NULL }, /* .NET name */
 521     { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL, NULL }, /* Linux name */
 522     { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */
 523     { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */
 524     { "uz_UZ_CYRL",     "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */
 525     { "uz_UZ_LATN",     "uz_Latn_UZ", NULL, NULL }, /* .NET name */
 526     { "zh_CHS",         "zh_Hans", NULL, NULL }, /* .NET name */
 527     { "zh_CHT",         "zh_Hant", NULL, NULL }, /* .NET name */
 528     { "zh_GAN",         "zh__GAN", NULL, NULL }, /* registered name */
 529     { "zh_GUOYU",       "zh", NULL, NULL }, /* registered name */
 530     { "zh_HAKKA",       "zh__HAKKA", NULL, NULL }, /* registered name */
 531     { "zh_MIN",         "zh__MIN", NULL, NULL }, /* registered name */
 532     { "zh_MIN_NAN",     "zh__MINNAN", NULL, NULL }, /* registered name */
 533     { "zh_WUU",         "zh__WUU", NULL, NULL }, /* registered name */
 534     { "zh_XIANG",       "zh__XIANG", NULL, NULL }, /* registered name */
 535     { "zh_YUE",         "zh__YUE", NULL, NULL }, /* registered name */
 536 };
 537
 538 typedef struct VariantMap {
 539     const char *variant;          /* input ID */
 540     const char *keyword;     /* keyword, or NULL if none */
 541     const char *value;       /* keyword value, or NULL if kw==NULL */
 542 } VariantMap;
 543
 544 static const VariantMap VARIANT_MAP[] = {
 545     { "EURO",   "currency", "EUR" },
 546     { "PINYIN", "collation", "pinyin" }, /* Solaris variant */
 547     { "STROKE", "collation", "stroke" }  /* Solaris variant */
 548 };
 549
 550 /* ### Keywords **************************************************/
 551
 552 #define ULOC_KEYWORD_BUFFER_LEN 25
 553 #define ULOC_MAX_NO_KEYWORDS 25
 554
 555 static const char *
 556 locale_getKeywordsStart(const char *localeID) {
 557     const char *result = NULL;
 558     if((result = uprv_strchr(localeID, '@')) != NULL) {
 559         return result;
 560     }
 561 #if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
 562     else {
 563         /* We do this because the @ sign is variant, and the @ sign used on one
 564         EBCDIC machine won't be compiled the same way on other EBCDIC based
 565         machines. */
 566         static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
 567         const uint8_t *charToFind = ebcdicSigns;
 568         while(*charToFind) {
 569             if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
 570                 return result;
 571             }
 572             charToFind++;
 573         }
 574     }
 575 #endif
 576     return NULL;
 577 }
 578
 579 /**
 580  * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
 581  * @param keywordName incoming name to be canonicalized
 582  * @param status return status (keyword too long)
 583  * @return length of the keyword name
 584  */
 585 static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
 586 {
 587   int32_t i;
 588   int32_t keywordNameLen = (int32_t)uprv_strlen(keywordName);
 589
 590   if(keywordNameLen >= ULOC_KEYWORD_BUFFER_LEN) {
 591     /* keyword name too long for internal buffer */
 592     *status = U_INTERNAL_PROGRAM_ERROR;
 593           return 0;
 594   }
 595
 596   /* normalize the keyword name */
 597   for(i = 0; i < keywordNameLen; i++) {
 598     buf[i] = uprv_tolower(keywordName[i]);
 599   }
 600   buf[i] = 0;
 601
 602   return keywordNameLen;
 603 }
 604
 605 typedef struct {
 606     char keyword[ULOC_KEYWORD_BUFFER_LEN];
 607     int32_t keywordLen;
 608     const char *valueStart;
 609     int32_t valueLen;
 610 } KeywordStruct;
 611
 612 static int32_t U_CALLCONV
 613 compareKeywordStructs(const void *context, const void *left, const void *right) {
 614     const char* leftString = ((const KeywordStruct *)left)->keyword;
 615     const char* rightString = ((const KeywordStruct *)right)->keyword;
 616     return uprv_strcmp(leftString, rightString);
 617 }
 618
 619 /**
 620  * Both addKeyword and addValue must already be in canonical form.
 621  * Either both addKeyword and addValue are NULL, or neither is NULL.
 622  * If they are not NULL they must be zero terminated.
 623  * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
 624  */
 625 static int32_t
 626 _getKeywords(const char *localeID,
 627              char prev,
 628              char *keywords, int32_t keywordCapacity,
 629              char *values, int32_t valuesCapacity, int32_t *valLen,
 630              UBool valuesToo,
 631              const char* addKeyword,
 632              const char* addValue,
 633              UErrorCode *status)
 634 {
 635     KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
 636
 637     int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
 638     int32_t numKeywords = 0;
 639     const char* pos = localeID;
 640     const char* equalSign = NULL;
 641     const char* semicolon = NULL;
 642     int32_t i = 0, j, n;
 643     int32_t keywordsLen = 0;
 644     int32_t valuesLen = 0;
 645
 646     if(prev == '@') { /* start of keyword definition */
 647         /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
 648         do {
 649             UBool duplicate = FALSE;
 650             /* skip leading spaces */
 651             while(*pos == ' ') {
 652                 pos++;
 653             }
 654             if (!*pos) { /* handle trailing "; " */
 655                 break;
 656             }
 657             if(numKeywords == maxKeywords) {
 658                 *status = U_INTERNAL_PROGRAM_ERROR;
 659                 return 0;
 660             }
 661             equalSign = uprv_strchr(pos, '=');
 662             semicolon = uprv_strchr(pos, ';');
 663             /* lack of '=' [foo@currency] is illegal */
 664             /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
 665             if(!equalSign || (semicolon && semicolon<equalSign)) {
 666                 *status = U_INVALID_FORMAT_ERROR;
 667                 return 0;
 668             }
 669             /* need to normalize both keyword and keyword name */
 670             if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
 671                 /* keyword name too long for internal buffer */
 672                 *status = U_INTERNAL_PROGRAM_ERROR;
 673                 return 0;
 674             }
 675             for(i = 0, n = 0; i < equalSign - pos; ++i) {
 676                 if (pos[i] != ' ') {
 677                     keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
 678                 }
 679             }
 680             keywordList[numKeywords].keyword[n] = 0;
 681             keywordList[numKeywords].keywordLen = n;
 682             /* now grab the value part. First we skip the '=' */
 683             equalSign++;
 684             /* then we leading spaces */
 685             while(*equalSign == ' ') {
 686                 equalSign++;
 687             }
 688             keywordList[numKeywords].valueStart = equalSign;
 689
 690             pos = semicolon;
 691             i = 0;
 692             if(pos) {
 693                 while(*(pos - i - 1) == ' ') {
 694                     i++;
 695                 }
 696                 keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
 697                 pos++;
 698             } else {
 699                 i = (int32_t)uprv_strlen(equalSign);
 700                 while(equalSign[i-1] == ' ') {
 701                     i--;
 702                 }
 703                 keywordList[numKeywords].valueLen = i;
 704             }
 705             /* If this is a duplicate keyword, then ignore it */
 706             for (j=0; j<numKeywords; ++j) {
 707                 if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
 708                     duplicate = TRUE;
 709                     break;
 710                 }
 711             }
 712             if (!duplicate) {
 713                 ++numKeywords;
 714             }
 715         } while(pos);
 716
 717         /* Handle addKeyword/addValue. */
 718         if (addKeyword != NULL) {
 719             UBool duplicate = FALSE;
 720             U_ASSERT(addValue != NULL);
 721             /* Search for duplicate; if found, do nothing. Explicit keyword
 722                overrides addKeyword. */
 723             for (j=0; j<numKeywords; ++j) {
 724                 if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) {
 725                     duplicate = TRUE;
 726                     break;
 727                 }
 728             }
 729             if (!duplicate) {
 730                 if (numKeywords == maxKeywords) {
 731                     *status = U_INTERNAL_PROGRAM_ERROR;
 732                     return 0;
 733                 }
 734                 uprv_strcpy(keywordList[numKeywords].keyword, addKeyword);
 735                 keywordList[numKeywords].keywordLen = (int32_t)uprv_strlen(addKeyword);
 736                 keywordList[numKeywords].valueStart = addValue;
 737                 keywordList[numKeywords].valueLen = (int32_t)uprv_strlen(addValue);
 738                 ++numKeywords;
 739             }
 740         } else {
 741             U_ASSERT(addValue == NULL);
 742         }
 743
 744         /* now we have a list of keywords */
 745         /* we need to sort it */
 746         uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
 747
 748         /* Now construct the keyword part */
 749         for(i = 0; i < numKeywords; i++) {
 750             if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {
 751                 uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);
 752                 if(valuesToo) {
 753                     keywords[keywordsLen + keywordList[i].keywordLen] = '=';
 754                 } else {
 755                     keywords[keywordsLen + keywordList[i].keywordLen] = 0;
 756                 }
 757             }
 758             keywordsLen += keywordList[i].keywordLen + 1;
 759             if(valuesToo) {
 760                 if(keywordsLen + keywordList[i].valueLen < keywordCapacity) {
 761                     uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
 762                 }
 763                 keywordsLen += keywordList[i].valueLen;
 764
 765                 if(i < numKeywords - 1) {
 766                     if(keywordsLen < keywordCapacity) {
 767                         keywords[keywordsLen] = ';';
 768                     }
 769                     keywordsLen++;
 770                 }
 771             }
 772             if(values) {
 773                 if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {
 774                     uprv_strcpy(values+valuesLen, keywordList[i].valueStart);
 775                     values[valuesLen + keywordList[i].valueLen] = 0;
 776                 }
 777                 valuesLen += keywordList[i].valueLen + 1;
 778             }
 779         }
 780         if(values) {
 781             values[valuesLen] = 0;
 782             if(valLen) {
 783                 *valLen = valuesLen;
 784             }
 785         }
 786         return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
 787     } else {
 788         return 0;
 789     }
 790 }
 791
 792 U_CFUNC int32_t
 793 locale_getKeywords(const char *localeID,
 794                    char prev,
 795                    char *keywords, int32_t keywordCapacity,
 796                    char *values, int32_t valuesCapacity, int32_t *valLen,
 797                    UBool valuesToo,
 798                    UErrorCode *status) {
 799     return _getKeywords(localeID, prev, keywords, keywordCapacity,
 800                         values, valuesCapacity, valLen, valuesToo,
 801                         NULL, NULL, status);
 802 }
 803
 804 U_CAPI int32_t U_EXPORT2
 805 uloc_getKeywordValue(const char* localeID,
 806                      const char* keywordName,
 807                      char* buffer, int32_t bufferCapacity,
 808                      UErrorCode* status)
 809 {
 810     const char* nextSeparator = NULL;
 811     char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
 812     char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
 813     int32_t i = 0;
 814     int32_t result = 0;
 815
 816     if(status && U_SUCCESS(*status) && localeID) {
 817
 818       const char* startSearchHere = uprv_strchr(localeID, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */
 819       if(startSearchHere == NULL) {
 820           /* no keywords, return at once */
 821           return 0;
 822       }
 823
 824       locale_canonKeywordName(keywordNameBuffer, keywordName, status);
 825       if(U_FAILURE(*status)) {
 826         return 0;
 827       }
 828
 829       /* find the first keyword */
 830       while(startSearchHere) {
 831           startSearchHere++;
 832           /* skip leading spaces (allowed?) */
 833           while(*startSearchHere == ' ') {
 834               startSearchHere++;
 835           }
 836           nextSeparator = uprv_strchr(startSearchHere, '=');
 837           /* need to normalize both keyword and keyword name */
 838           if(!nextSeparator) {
 839               break;
 840           }
 841           if(nextSeparator - startSearchHere >= ULOC_KEYWORD_BUFFER_LEN) {
 842               /* keyword name too long for internal buffer */
 843               *status = U_INTERNAL_PROGRAM_ERROR;
 844               return 0;
 845           }
 846           for(i = 0; i < nextSeparator - startSearchHere; i++) {
 847               localeKeywordNameBuffer[i] = uprv_tolower(startSearchHere[i]);
 848           }
 849           /* trim trailing spaces */
 850           while(startSearchHere[i-1] == ' ') {
 851               i--;
 852           }
 853           localeKeywordNameBuffer[i] = 0;
 854
 855           startSearchHere = uprv_strchr(nextSeparator, ';');
 856
 857           if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
 858               nextSeparator++;
 859               while(*nextSeparator == ' ') {
 860                   nextSeparator++;
 861               }
 862               /* we actually found the keyword. Copy the value */
 863               if(startSearchHere && startSearchHere - nextSeparator < bufferCapacity) {
 864                   while(*(startSearchHere-1) == ' ') {
 865                       startSearchHere--;
 866                   }
 867                   uprv_strncpy(buffer, nextSeparator, startSearchHere - nextSeparator);
 868                   result = u_terminateChars(buffer, bufferCapacity, (int32_t)(startSearchHere - nextSeparator), status);
 869               } else if(!startSearchHere && (int32_t)uprv_strlen(nextSeparator) < bufferCapacity) { /* last item in string */
 870                   i = (int32_t)uprv_strlen(nextSeparator);
 871                   while(nextSeparator[i - 1] == ' ') {
 872                       i--;
 873                   }
 874                   uprv_strncpy(buffer, nextSeparator, i);
 875                   result = u_terminateChars(buffer, bufferCapacity, i, status);
 876               } else {
 877                   /* give a bigger buffer, please */
 878                   *status = U_BUFFER_OVERFLOW_ERROR;
 879                   if(startSearchHere) {
 880                       result = (int32_t)(startSearchHere - nextSeparator);
 881                   } else {
 882                       result = (int32_t)uprv_strlen(nextSeparator);
 883                   }
 884               }
 885               return result;
 886           }
 887       }
 888     }
 889     return 0;
 890 }
 891
 892 U_CAPI int32_t U_EXPORT2
 893 uloc_setKeywordValue(const char* keywordName,
 894                      const char* keywordValue,
 895                      char* buffer, int32_t bufferCapacity,
 896                      UErrorCode* status)
 897 {
 898     /* TODO: sorting. removal. */
 899     int32_t keywordNameLen;
 900     int32_t keywordValueLen;
 901     int32_t bufLen;
 902     int32_t needLen = 0;
 903     int32_t foundValueLen;
 904     int32_t keywordAtEnd = 0; /* is the keyword at the end of the string? */
 905     char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
 906     char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
 907     int32_t i = 0;
 908     int32_t rc;
 909     char* nextSeparator = NULL;
 910     char* nextEqualsign = NULL;
 911     char* startSearchHere = NULL;
 912     char* keywordStart = NULL;
 913     char *insertHere = NULL;
 914     if(U_FAILURE(*status)) {
 915         return -1;
 916     }
 917     if(bufferCapacity>1) {
 918         bufLen = (int32_t)uprv_strlen(buffer);
 919     } else {
 920         *status = U_ILLEGAL_ARGUMENT_ERROR;
 921         return 0;
 922     }
 923     if(bufferCapacity<bufLen) {
 924         /* The capacity is less than the length?! Is this NULL terminated? */
 925         *status = U_ILLEGAL_ARGUMENT_ERROR;
 926         return 0;
 927     }
 928     if(keywordValue && !*keywordValue) {
 929         keywordValue = NULL;
 930     }
 931     if(keywordValue) {
 932         keywordValueLen = (int32_t)uprv_strlen(keywordValue);
 933     } else {
 934         keywordValueLen = 0;
 935     }
 936     keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
 937     if(U_FAILURE(*status)) {
 938         return 0;
 939     }
 940     startSearchHere = (char*)locale_getKeywordsStart(buffer);
 941     if(startSearchHere == NULL || (startSearchHere[1]==0)) {
 942         if(!keywordValue) { /* no keywords = nothing to remove */
 943             return bufLen;
 944         }
 945
 946         needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
 947         if(startSearchHere) { /* had a single @ */
 948             needLen--; /* already had the @ */
 949             /* startSearchHere points at the @ */
 950         } else {
 951             startSearchHere=buffer+bufLen;
 952         }
 953         if(needLen >= bufferCapacity) {
 954             *status = U_BUFFER_OVERFLOW_ERROR;
 955             return needLen; /* no change */
 956         }
 957         *startSearchHere = '@';
 958         startSearchHere++;
 959         uprv_strcpy(startSearchHere, keywordNameBuffer);
 960         startSearchHere += keywordNameLen;
 961         *startSearchHere = '=';
 962         startSearchHere++;
 963         uprv_strcpy(startSearchHere, keywordValue);
 964         startSearchHere+=keywordValueLen;
 965         return needLen;
 966     } /* end shortcut - no @ */
 967
 968     keywordStart = startSearchHere;
 969     /* search for keyword */
 970     while(keywordStart) {
 971         keywordStart++;
 972         /* skip leading spaces (allowed?) */
 973         while(*keywordStart == ' ') {
 974             keywordStart++;
 975         }
 976         nextEqualsign = uprv_strchr(keywordStart, '=');
 977         /* need to normalize both keyword and keyword name */
 978         if(!nextEqualsign) {
 979             break;
 980         }
 981         if(nextEqualsign - keywordStart >= ULOC_KEYWORD_BUFFER_LEN) {
 982             /* keyword name too long for internal buffer */
 983             *status = U_INTERNAL_PROGRAM_ERROR;
 984             return 0;
 985         }
 986         for(i = 0; i < nextEqualsign - keywordStart; i++) {
 987             localeKeywordNameBuffer[i] = uprv_tolower(keywordStart[i]);
 988         }
 989         /* trim trailing spaces */
 990         while(keywordStart[i-1] == ' ') {
 991             i--;
 992         }
 993         localeKeywordNameBuffer[i] = 0;
 994
 995         nextSeparator = uprv_strchr(nextEqualsign, ';');
 996         rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
 997         if(rc == 0) {
 998             nextEqualsign++;
 999             while(*nextEqualsign == ' ') {
1000                 nextEqualsign++;
1001             }
1002             /* we actually found the keyword. Change the value */
1003             if (nextSeparator) {
1004                 keywordAtEnd = 0;
1005                 foundValueLen = (int32_t)(nextSeparator - nextEqualsign);
1006             } else {
1007                 keywordAtEnd = 1;
1008                 foundValueLen = (int32_t)uprv_strlen(nextEqualsign);
1009             }
1010             if(keywordValue) { /* adding a value - not removing */
1011               if(foundValueLen == keywordValueLen) {
1012                 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1013                 return bufLen; /* no change in size */
1014               } else if(foundValueLen > keywordValueLen) {
1015                 int32_t delta = foundValueLen - keywordValueLen;
1016                 if(nextSeparator) { /* RH side */
1017                   uprv_memmove(nextSeparator - delta, nextSeparator, bufLen-(nextSeparator-buffer));
1018                 }
1019                 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1020                 bufLen -= delta;
1021                 buffer[bufLen]=0;
1022                 return bufLen;
1023               } else { /* FVL < KVL */
1024                 int32_t delta = keywordValueLen - foundValueLen;
1025                 if((bufLen+delta) >= bufferCapacity) {
1026                   *status = U_BUFFER_OVERFLOW_ERROR;
1027                   return bufLen+delta;
1028                 }
1029                 if(nextSeparator) { /* RH side */
1030                   uprv_memmove(nextSeparator+delta,nextSeparator, bufLen-(nextSeparator-buffer));
1031                 }
1032                 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1033                 bufLen += delta;
1034                 buffer[bufLen]=0;
1035                 return bufLen;
1036               }
1037             } else { /* removing a keyword */
1038               if(keywordAtEnd) {
1039                 /* zero out the ';' or '@' just before startSearchhere */
1040                 keywordStart[-1] = 0;
1041                 return (int32_t)((keywordStart-buffer)-1); /* (string length without keyword) minus separator */
1042               } else {
1043                 uprv_memmove(keywordStart, nextSeparator+1, bufLen-((nextSeparator+1)-buffer));
1044                 keywordStart[bufLen-((nextSeparator+1)-buffer)]=0;
1045                 return (int32_t)(bufLen-((nextSeparator+1)-keywordStart));
1046               }
1047             }
1048         } else if(rc<0){ /* end match keyword */
1049           /* could insert at this location. */
1050           insertHere = keywordStart;
1051         }
1052         keywordStart = nextSeparator;
1053     } /* end loop searching */
1054
1055     if(!keywordValue) {
1056       return bufLen; /* removal of non-extant keyword - no change */
1057     }
1058
1059     /* we know there is at least one keyword. */
1060     needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
1061     if(needLen >= bufferCapacity) {
1062         *status = U_BUFFER_OVERFLOW_ERROR;
1063         return needLen; /* no change */
1064     }
1065
1066     if(insertHere) {
1067       uprv_memmove(insertHere+(1+keywordNameLen+1+keywordValueLen), insertHere, bufLen-(insertHere-buffer));
1068       keywordStart = insertHere;
1069     } else {
1070       keywordStart = buffer+bufLen;
1071       *keywordStart = ';';
1072       keywordStart++;
1073     }
1074     uprv_strncpy(keywordStart, keywordNameBuffer, keywordNameLen);
1075     keywordStart += keywordNameLen;
1076     *keywordStart = '=';
1077     keywordStart++;
1078     uprv_strncpy(keywordStart, keywordValue, keywordValueLen); /* terminates. */
1079     keywordStart+=keywordValueLen;
1080     if(insertHere) {
1081       *keywordStart = ';';
1082       keywordStart++;
1083     }
1084     buffer[needLen]=0;
1085     return needLen;
1086 }
1087
1088 /* ### ID parsing implementation **************************************************/
1089
1090 /*returns TRUE if a is an ID separator FALSE otherwise*/
1091 #define _isIDSeparator(a) (a == '_' || a == '-')
1092
1093 #define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
1094
1095 /*returns TRUE if one of the special prefixes is here (s=string)
1096   'x-' or 'i-' */
1097 #define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1098
1099 /* Dot terminates it because of POSIX form  where dot precedes the codepage
1100  * except for variant
1101  */
1102 #define _isTerminator(a)  ((a==0)||(a=='.')||(a=='@'))
1103
1104 static char* _strnchr(const char* str, int32_t len, char c) {
1105     U_ASSERT(str != 0 && len >= 0);
1106     while (len-- != 0) {
1107         char d = *str;
1108         if (d == c) {
1109             return (char*) str;
1110         } else if (d == 0) {
1111             break;
1112         }
1113         ++str;
1114     }
1115     return NULL;
1116 }
1117
1118 /**
1119  * Lookup 'key' in the array 'list'.  The array 'list' should contain
1120  * a NULL entry, followed by more entries, and a second NULL entry.
1121  *
1122  * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1123  * COUNTRIES_3.
1124  */
1125 static int16_t _findIndex(const char* const* list, const char* key)
1126 {
1127     const char* const* anchor = list;
1128     int32_t pass = 0;
1129
1130     /* Make two passes through two NULL-terminated arrays at 'list' */
1131     while (pass++ < 2) {
1132         while (*list) {
1133             if (uprv_strcmp(key, *list) == 0) {
1134                 return (int16_t)(list - anchor);
1135             }
1136             list++;
1137         }
1138         ++list;     /* skip final NULL *CWB*/
1139     }
1140     return -1;
1141 }
1142
1143 /* count the length of src while copying it to dest; return strlen(src) */
1144 static U_INLINE int32_t
1145 _copyCount(char *dest, int32_t destCapacity, const char *src) {
1146     const char *anchor;
1147     char c;
1148
1149     anchor=src;
1150     for(;;) {
1151         if((c=*src)==0) {
1152             return (int32_t)(src-anchor);
1153         }
1154         if(destCapacity<=0) {
1155             return (int32_t)((src-anchor)+uprv_strlen(src));
1156         }
1157         ++src;
1158         *dest++=c;
1159         --destCapacity;
1160     }
1161 }
1162
1163 static const char*
1164 uloc_getCurrentCountryID(const char* oldID){
1165     int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
1166     if (offset >= 0) {
1167         return REPLACEMENT_COUNTRIES[offset];
1168     }
1169     return oldID;
1170 }
1171 static const char*
1172 uloc_getCurrentLanguageID(const char* oldID){
1173     int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
1174     if (offset >= 0) {
1175         return REPLACEMENT_LANGUAGES[offset];
1176     }
1177     return oldID;
1178 }
1179 /*
1180  * the internal functions _getLanguage(), _getCountry(), _getVariant()
1181  * avoid duplicating code to handle the earlier locale ID pieces
1182  * in the functions for the later ones by
1183  * setting the *pEnd pointer to where they stopped parsing
1184  *
1185  * TODO try to use this in Locale
1186  */
1187 static int32_t
1188 _getLanguage(const char *localeID,
1189              char *language, int32_t languageCapacity,
1190              const char **pEnd) {
1191     int32_t i=0;
1192     int32_t offset;
1193     char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
1194
1195     /* if it starts with i- or x- then copy that prefix */
1196     if(_isIDPrefix(localeID)) {
1197         if(i<languageCapacity) {
1198             language[i]=(char)uprv_tolower(*localeID);
1199         }
1200         if(i<languageCapacity) {
1201             language[i+1]='-';
1202         }
1203         i+=2;
1204         localeID+=2;
1205     }
1206
1207     /* copy the language as far as possible and count its length */
1208     while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
1209         if(i<languageCapacity) {
1210             language[i]=(char)uprv_tolower(*localeID);
1211         }
1212         if(i<3) {
1213             lang[i]=(char)uprv_tolower(*localeID);
1214         }
1215         i++;
1216         localeID++;
1217     }
1218
1219     if(i==3) {
1220         /* convert 3 character code to 2 character code if possible *CWB*/
1221         offset=_findIndex(LANGUAGES_3, lang);
1222         if(offset>=0) {
1223             i=_copyCount(language, languageCapacity, LANGUAGES[offset]);
1224         }
1225     }
1226
1227     if(pEnd!=NULL) {
1228         *pEnd=localeID;
1229     }
1230     return i;
1231 }
1232
1233 static int32_t
1234 _getScript(const char *localeID,
1235             char *script, int32_t scriptCapacity,
1236             const char **pEnd)
1237 {
1238     int32_t idLen = 0;
1239
1240     if (pEnd != NULL) {
1241         *pEnd = localeID;
1242     }
1243
1244     /* copy the second item as far as possible and count its length */
1245     while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
1246         idLen++;
1247     }
1248
1249     /* If it's exactly 4 characters long, then it's a script and not a country. */
1250     if (idLen == 4) {
1251         int32_t i;
1252         if (pEnd != NULL) {
1253             *pEnd = localeID+idLen;
1254         }
1255         if(idLen > scriptCapacity) {
1256             idLen = scriptCapacity;
1257         }
1258         if (idLen >= 1) {
1259             script[0]=(char)uprv_toupper(*(localeID++));
1260         }
1261         for (i = 1; i < idLen; i++) {
1262             script[i]=(char)uprv_tolower(*(localeID++));
1263         }
1264     }
1265     else {
1266         idLen = 0;
1267     }
1268     return idLen;
1269 }
1270
1271 static int32_t
1272 _getCountry(const char *localeID,
1273             char *country, int32_t countryCapacity,
1274             const char **pEnd)
1275 {
1276     int32_t i=0;
1277     char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };
1278     int32_t offset;
1279
1280     /* copy the country as far as possible and count its length */
1281     while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
1282         if(i<countryCapacity) {
1283             country[i]=(char)uprv_toupper(*localeID);
1284         }
1285         if(i<(ULOC_COUNTRY_CAPACITY-1)) {   /*CWB*/
1286             cnty[i]=(char)uprv_toupper(*localeID);
1287         }
1288         i++;
1289         localeID++;
1290     }
1291
1292     /* convert 3 character code to 2 character code if possible *CWB*/
1293     if(i==3) {
1294         offset=_findIndex(COUNTRIES_3, cnty);
1295         if(offset>=0) {
1296             i=_copyCount(country, countryCapacity, COUNTRIES[offset]);
1297         }
1298     }
1299
1300     if(pEnd!=NULL) {
1301         *pEnd=localeID;
1302     }
1303     return i;
1304 }
1305
1306 /**
1307  * @param needSeparator if true, then add leading '_' if any variants
1308  * are added to 'variant'
1309  */
1310 static int32_t
1311 _getVariantEx(const char *localeID,
1312               char prev,
1313               char *variant, int32_t variantCapacity,
1314               UBool needSeparator) {
1315     int32_t i=0;
1316
1317     /* get one or more variant tags and separate them with '_' */
1318     if(_isIDSeparator(prev)) {
1319         /* get a variant string after a '-' or '_' */
1320         while(!_isTerminator(*localeID)) {
1321             if (needSeparator) {
1322                 if (i<variantCapacity) {
1323                     variant[i] = '_';
1324                 }
1325                 ++i;
1326                 needSeparator = FALSE;
1327             }
1328             if(i<variantCapacity) {
1329                 variant[i]=(char)uprv_toupper(*localeID);
1330                 if(variant[i]=='-') {
1331                     variant[i]='_';
1332                 }
1333             }
1334             i++;
1335             localeID++;
1336         }
1337     }
1338
1339     /* if there is no variant tag after a '-' or '_' then look for '@' */
1340     if(i==0) {
1341         if(prev=='@') {
1342             /* keep localeID */
1343         } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
1344             ++localeID; /* point after the '@' */
1345         } else {
1346             return 0;
1347         }
1348         while(!_isTerminator(*localeID)) {
1349             if (needSeparator) {
1350                 if (i<variantCapacity) {
1351                     variant[i] = '_';
1352                 }
1353                 ++i;
1354                 needSeparator = FALSE;
1355             }
1356             if(i<variantCapacity) {
1357                 variant[i]=(char)uprv_toupper(*localeID);
1358                 if(variant[i]=='-' || variant[i]==',') {
1359                     variant[i]='_';
1360                 }
1361             }
1362             i++;
1363             localeID++;
1364         }
1365     }
1366
1367     return i;
1368 }
1369
1370 static int32_t
1371 _getVariant(const char *localeID,
1372             char prev,
1373             char *variant, int32_t variantCapacity) {
1374     return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
1375 }
1376
1377 /**
1378  * Delete ALL instances of a variant from the given list of one or
1379  * more variants.  Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
1380  * @param variants the source string of one or more variants,
1381  * separated by '_'.  This will be MODIFIED IN PLACE.  Not zero
1382  * terminated; if it is, trailing zero will NOT be maintained.
1383  * @param variantsLen length of variants
1384  * @param toDelete variant to delete, without separators, e.g.  "EURO"
1385  * or "PREEURO"; not zero terminated
1386  * @param toDeleteLen length of toDelete
1387  * @return number of characters deleted from variants
1388  */
1389 static int32_t
1390 _deleteVariant(char* variants, int32_t variantsLen,
1391                const char* toDelete, int32_t toDeleteLen)
1392 {
1393     int32_t delta = 0; /* number of chars deleted */
1394     for (;;) {
1395         UBool flag = FALSE;
1396         if (variantsLen < toDeleteLen) {
1397             return delta;
1398         }
1399         if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 &&
1400             (variantsLen == toDeleteLen ||
1401              (flag=(variants[toDeleteLen] == '_'))))
1402         {
1403             int32_t d = toDeleteLen + (flag?1:0);
1404             variantsLen -= d;
1405             delta += d;
1406             if (variantsLen > 0) {
1407                 uprv_memmove(variants, variants+d, variantsLen);
1408             }
1409         } else {
1410             char* p = _strnchr(variants, variantsLen, '_');
1411             if (p == NULL) {
1412                 return delta;
1413             }
1414             ++p;
1415             variantsLen -= (int32_t)(p - variants);
1416             variants = p;
1417         }
1418     }
1419 }
1420
1421 /* Keyword enumeration */
1422
1423 typedef struct UKeywordsContext {
1424     char* keywords;
1425     char* current;
1426 } UKeywordsContext;
1427
1428 static void U_CALLCONV
1429 uloc_kw_closeKeywords(UEnumeration *enumerator) {
1430     uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
1431     uprv_free(enumerator->context);
1432     uprv_free(enumerator);
1433 }
1434
1435 static int32_t U_CALLCONV
1436 uloc_kw_countKeywords(UEnumeration *en, UErrorCode *status) {
1437     char *kw = ((UKeywordsContext *)en->context)->keywords;
1438     int32_t result = 0;
1439     while(*kw) {
1440         result++;
1441         kw += uprv_strlen(kw)+1;
1442     }
1443     return result;
1444 }
1445
1446 static const char* U_CALLCONV
1447 uloc_kw_nextKeyword(UEnumeration* en,
1448                     int32_t* resultLength,
1449                     UErrorCode* status) {
1450     const char* result = ((UKeywordsContext *)en->context)->current;
1451     int32_t len = 0;
1452     if(*result) {
1453         len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
1454         ((UKeywordsContext *)en->context)->current += len+1;
1455     } else {
1456         result = NULL;
1457     }
1458     if (resultLength) {
1459         *resultLength = len;
1460     }
1461     return result;
1462 }
1463
1464 static void U_CALLCONV
1465 uloc_kw_resetKeywords(UEnumeration* en,
1466                       UErrorCode* status) {
1467     ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
1468 }
1469
1470 static const UEnumeration gKeywordsEnum = {
1471     NULL,
1472     NULL,
1473     uloc_kw_closeKeywords,
1474     uloc_kw_countKeywords,
1475     uenum_unextDefault,
1476     uloc_kw_nextKeyword,
1477     uloc_kw_resetKeywords
1478 };
1479
1480 U_CAPI UEnumeration* U_EXPORT2
1481 uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
1482 {
1483     UKeywordsContext *myContext = NULL;
1484     UEnumeration *result = NULL;
1485
1486     if(U_FAILURE(*status)) {
1487         return NULL;
1488     }
1489     result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
1490     /* Null pointer test */
1491     if (result == NULL) {
1492         *status = U_MEMORY_ALLOCATION_ERROR;
1493         return NULL;
1494     }
1495     uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration));
1496     myContext = uprv_malloc(sizeof(UKeywordsContext));
1497     if (myContext == NULL) {
1498         *status = U_MEMORY_ALLOCATION_ERROR;
1499         uprv_free(result);
1500         return NULL;
1501     }
1502     myContext->keywords = (char *)uprv_malloc(keywordListSize+1);
1503     uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
1504     myContext->keywords[keywordListSize] = 0;
1505     myContext->current = myContext->keywords;
1506     result->context = myContext;
1507     return result;
1508 }
1509
1510 U_CAPI UEnumeration* U_EXPORT2
1511 uloc_openKeywords(const char* localeID,
1512                         UErrorCode* status)
1513 {
1514     int32_t i=0;
1515     char keywords[256];
1516     int32_t keywordsCapacity = 256;
1517     if(status==NULL || U_FAILURE(*status)) {
1518         return 0;
1519     }
1520
1521     if(localeID==NULL) {
1522         localeID=uloc_getDefault();
1523     }
1524
1525     /* Skip the language */
1526     _getLanguage(localeID, NULL, 0, &localeID);
1527     if(_isIDSeparator(*localeID)) {
1528         const char *scriptID;
1529         /* Skip the script if available */
1530         _getScript(localeID+1, NULL, 0, &scriptID);
1531         if(scriptID != localeID+1) {
1532             /* Found optional script */
1533             localeID = scriptID;
1534         }
1535         /* Skip the Country */
1536         if (_isIDSeparator(*localeID)) {
1537             _getCountry(localeID+1, NULL, 0, &localeID);
1538             if(_isIDSeparator(*localeID)) {
1539                 _getVariant(localeID+1, *localeID, NULL, 0);
1540             }
1541         }
1542     }
1543
1544     /* keywords are located after '@' */
1545     if((localeID = locale_getKeywordsStart(localeID)) != NULL) {
1546         i=locale_getKeywords(localeID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
1547     }
1548
1549     if(i) {
1550         return uloc_openKeywordList(keywords, i, status);
1551     } else {
1552         return NULL;
1553     }
1554 }
1555
1556
1557 /* bit-flags for 'options' parameter of _canonicalize */
1558 #define _ULOC_STRIP_KEYWORDS 0x2
1559 #define _ULOC_CANONICALIZE   0x1
1560
1561 #define OPTION_SET(options, mask) ((options & mask) != 0)
1562
1563 static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
1564 #define I_DEFAULT_LENGTH (sizeof i_default / sizeof i_default[0])
1565
1566 /**
1567  * Canonicalize the given localeID, to level 1 or to level 2,
1568  * depending on the options.  To specify level 1, pass in options=0.
1569  * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1570  *
1571  * This is the code underlying uloc_getName and uloc_canonicalize.
1572  */
1573 static int32_t
1574 _canonicalize(const char* localeID,
1575               char* result,
1576               int32_t resultCapacity,
1577               uint32_t options,
1578               UErrorCode* err) {
1579     int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
1580     char localeBuffer[ULOC_FULLNAME_CAPACITY];
1581     const char* origLocaleID;
1582     const char* keywordAssign = NULL;
1583     const char* separatorIndicator = NULL;
1584     const char* addKeyword = NULL;
1585     const char* addValue = NULL;
1586     char* name;
1587     char* variant = NULL; /* pointer into name, or NULL */
1588
1589     if (U_FAILURE(*err)) {
1590         return 0;
1591     }
1592
1593     if (localeID==NULL) {
1594         localeID=uloc_getDefault();
1595     }
1596     origLocaleID=localeID;
1597
1598     /* if we are doing a full canonicalization, then put results in
1599        localeBuffer, if necessary; otherwise send them to result. */
1600     if (OPTION_SET(options, _ULOC_CANONICALIZE) &&
1601         (result == NULL || resultCapacity <  sizeof(localeBuffer))) {
1602         name = localeBuffer;
1603         nameCapacity = sizeof(localeBuffer);
1604     } else {
1605         name = result;
1606         nameCapacity = resultCapacity;
1607     }
1608
1609     /* get all pieces, one after another, and separate with '_' */
1610     len=_getLanguage(localeID, name, nameCapacity, &localeID);
1611
1612     if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {
1613         const char *d = uloc_getDefault();
1614
1615         len = uprv_strlen(d);
1616
1617         if (name != NULL) {
1618             uprv_strncpy(name, d, len);
1619         }
1620     } else if(_isIDSeparator(*localeID)) {
1621         const char *scriptID;
1622
1623         ++fieldCount;
1624         if(len<nameCapacity) {
1625             name[len]='_';
1626         }
1627         ++len;
1628
1629         scriptSize=_getScript(localeID+1, name+len, nameCapacity-len, &scriptID);
1630         if(scriptSize > 0) {
1631             /* Found optional script */
1632             localeID = scriptID;
1633             ++fieldCount;
1634             len+=scriptSize;
1635             if (_isIDSeparator(*localeID)) {
1636                 /* If there is something else, then we add the _ */
1637                 if(len<nameCapacity) {
1638                     name[len]='_';
1639                 }
1640                 ++len;
1641             }
1642         }
1643
1644         if (_isIDSeparator(*localeID)) {
1645             len+=_getCountry(localeID+1, name+len, nameCapacity-len, &localeID);
1646             if(_isIDSeparator(*localeID)) {
1647                 ++fieldCount;
1648                 if(len<nameCapacity) {
1649                     name[len]='_';
1650                 }
1651                 ++len;
1652                 variantSize = _getVariant(localeID+1, *localeID, name+len, nameCapacity-len);
1653                 if (variantSize > 0) {
1654                     variant = name+len;
1655                     len += variantSize;
1656                     localeID += variantSize + 1; /* skip '_' and variant */
1657                 }
1658             }
1659         }
1660     }
1661
1662     /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1663     if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *localeID == '.') {
1664         UBool done = FALSE;
1665         do {
1666             char c = *localeID;
1667             switch (c) {
1668             case 0:
1669             case '@':
1670                 done = TRUE;
1671                 break;
1672             default:
1673                 if (len<nameCapacity) {
1674                     name[len] = c;
1675                 }
1676                 ++len;
1677                 ++localeID;
1678                 break;
1679             }
1680         } while (!done);
1681     }
1682
1683     /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1684        After this, localeID either points to '@' or is NULL */
1685     if ((localeID=locale_getKeywordsStart(localeID))!=NULL) {
1686         keywordAssign = uprv_strchr(localeID, '=');
1687         separatorIndicator = uprv_strchr(localeID, ';');
1688     }
1689
1690     /* Copy POSIX-style variant, if any [mr@FOO] */
1691     if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
1692         localeID != NULL && keywordAssign == NULL) {
1693         for (;;) {
1694             char c = *localeID;
1695             if (c == 0) {
1696                 break;
1697             }
1698             if (len<nameCapacity) {
1699                 name[len] = c;
1700             }
1701             ++len;
1702             ++localeID;
1703         }
1704     }
1705
1706     if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
1707         /* Handle @FOO variant if @ is present and not followed by = */
1708         if (localeID!=NULL && keywordAssign==NULL) {
1709             int32_t posixVariantSize;
1710             /* Add missing '_' if needed */
1711             if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
1712                 do {
1713                     if(len<nameCapacity) {
1714                         name[len]='_';
1715                     }
1716                     ++len;
1717                     ++fieldCount;
1718                 } while(fieldCount<2);
1719             }
1720             posixVariantSize = _getVariantEx(localeID+1, '@', name+len, nameCapacity-len,
1721                                              (UBool)(variantSize > 0));
1722             if (posixVariantSize > 0) {
1723                 if (variant == NULL) {
1724                     variant = name+len;
1725                 }
1726                 len += posixVariantSize;
1727                 variantSize += posixVariantSize;
1728             }
1729         }
1730
1731         /* Handle generic variants first */
1732         if (variant) {
1733             for (j=0; j<(int32_t)(sizeof(VARIANT_MAP)/sizeof(VARIANT_MAP[0])); j++) {
1734                 const char* variantToCompare = VARIANT_MAP[j].variant;
1735                 int32_t n = (int32_t)uprv_strlen(variantToCompare);
1736                 int32_t variantLen = _deleteVariant(variant, uprv_min(variantSize, (nameCapacity-len)), variantToCompare, n);
1737                 len -= variantLen;
1738                 if (variantLen > 0) {
1739                     if (name[len-1] == '_') { /* delete trailing '_' */
1740                         --len;
1741                     }
1742                     addKeyword = VARIANT_MAP[j].keyword;
1743                     addValue = VARIANT_MAP[j].value;
1744                     break;
1745                 }
1746             }
1747             if (name[len-1] == '_') { /* delete trailing '_' */
1748                 --len;
1749             }
1750         }
1751
1752         /* Look up the ID in the canonicalization map */
1753         for (j=0; j<(int32_t)(sizeof(CANONICALIZE_MAP)/sizeof(CANONICALIZE_MAP[0])); j++) {
1754             const char* id = CANONICALIZE_MAP[j].id;
1755             int32_t n = (int32_t)uprv_strlen(id);
1756             if (len == n && uprv_strncmp(name, id, n) == 0) {
1757                 if (n == 0 && localeID != NULL) {
1758                     break; /* Don't remap "" if keywords present */
1759                 }
1760                 len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
1761                 if (CANONICALIZE_MAP[j].keyword) {
1762                     addKeyword = CANONICALIZE_MAP[j].keyword;
1763                     addValue = CANONICALIZE_MAP[j].value;
1764                 }
1765                 break;
1766             }
1767         }
1768     }
1769
1770     if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
1771         if (localeID!=NULL && keywordAssign!=NULL &&
1772             (!separatorIndicator || separatorIndicator > keywordAssign)) {
1773             if(len<nameCapacity) {
1774                 name[len]='@';
1775             }
1776             ++len;
1777             ++fieldCount;
1778             len += _getKeywords(localeID+1, '@', name+len, nameCapacity-len, NULL, 0, NULL, TRUE,
1779                                 addKeyword, addValue, err);
1780         } else if (addKeyword != NULL) {
1781             U_ASSERT(addValue != NULL);
1782             /* inelegant but works -- later make _getKeywords do this? */
1783             len += _copyCount(name+len, nameCapacity-len, "@");
1784             len += _copyCount(name+len, nameCapacity-len, addKeyword);
1785             len += _copyCount(name+len, nameCapacity-len, "=");
1786             len += _copyCount(name+len, nameCapacity-len, addValue);
1787         }
1788     }
1789
1790     if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) {
1791         uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
1792     }
1793
1794     return u_terminateChars(result, resultCapacity, len, err);
1795 }
1796
1797 /* ### ID parsing API **************************************************/
1798
1799 U_CAPI int32_t  U_EXPORT2
1800 uloc_getParent(const char*    localeID,
1801                char* parent,
1802                int32_t parentCapacity,
1803                UErrorCode* err)
1804 {
1805     const char *lastUnderscore;
1806     int32_t i;
1807
1808     if (U_FAILURE(*err))
1809         return 0;
1810
1811     if (localeID == NULL)
1812         localeID = uloc_getDefault();
1813
1814     lastUnderscore=uprv_strrchr(localeID, '_');
1815     if(lastUnderscore!=NULL) {
1816         i=(int32_t)(lastUnderscore-localeID);
1817     } else {
1818         i=0;
1819     }
1820
1821     if(i>0 && parent != localeID) {
1822         uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
1823     }
1824     return u_terminateChars(parent, parentCapacity, i, err);
1825 }
1826
1827 U_CAPI int32_t U_EXPORT2
1828 uloc_getLanguage(const char*    localeID,
1829          char* language,
1830          int32_t languageCapacity,
1831          UErrorCode* err)
1832 {
1833     /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1834     int32_t i=0;
1835
1836     if (err==NULL || U_FAILURE(*err)) {
1837         return 0;
1838     }
1839
1840     if(localeID==NULL) {
1841         localeID=uloc_getDefault();
1842     }
1843
1844     i=_getLanguage(localeID, language, languageCapacity, NULL);
1845     return u_terminateChars(language, languageCapacity, i, err);
1846 }
1847
1848 U_CAPI int32_t U_EXPORT2
1849 uloc_getScript(const char*    localeID,
1850          char* script,
1851          int32_t scriptCapacity,
1852          UErrorCode* err)
1853 {
1854     int32_t i=0;
1855
1856     if(err==NULL || U_FAILURE(*err)) {
1857         return 0;
1858     }
1859
1860     if(localeID==NULL) {
1861         localeID=uloc_getDefault();
1862     }
1863
1864     /* skip the language */
1865     _getLanguage(localeID, NULL, 0, &localeID);
1866     if(_isIDSeparator(*localeID)) {
1867         i=_getScript(localeID+1, script, scriptCapacity, NULL);
1868     }
1869     return u_terminateChars(script, scriptCapacity, i, err);
1870 }
1871
1872 U_CAPI int32_t  U_EXPORT2
1873 uloc_getCountry(const char* localeID,
1874             char* country,
1875             int32_t countryCapacity,
1876             UErrorCode* err)
1877 {
1878     int32_t i=0;
1879
1880     if(err==NULL || U_FAILURE(*err)) {
1881         return 0;
1882     }
1883
1884     if(localeID==NULL) {
1885         localeID=uloc_getDefault();
1886     }
1887
1888     /* Skip the language */
1889     _getLanguage(localeID, NULL, 0, &localeID);
1890     if(_isIDSeparator(*localeID)) {
1891         const char *scriptID;
1892         /* Skip the script if available */
1893         _getScript(localeID+1, NULL, 0, &scriptID);
1894         if(scriptID != localeID+1) {
1895             /* Found optional script */
1896             localeID = scriptID;
1897         }
1898         if(_isIDSeparator(*localeID)) {
1899             i=_getCountry(localeID+1, country, countryCapacity, NULL);
1900         }
1901     }
1902     return u_terminateChars(country, countryCapacity, i, err);
1903 }
1904
1905 U_CAPI int32_t  U_EXPORT2
1906 uloc_getVariant(const char* localeID,
1907                 char* variant,
1908                 int32_t variantCapacity,
1909                 UErrorCode* err)
1910 {
1911     int32_t i=0;
1912
1913     if(err==NULL || U_FAILURE(*err)) {
1914         return 0;
1915     }
1916
1917     if(localeID==NULL) {
1918         localeID=uloc_getDefault();
1919     }
1920
1921     /* Skip the language */
1922     _getLanguage(localeID, NULL, 0, &localeID);
1923     if(_isIDSeparator(*localeID)) {
1924         const char *scriptID;
1925         /* Skip the script if available */
1926         _getScript(localeID+1, NULL, 0, &scriptID);
1927         if(scriptID != localeID+1) {
1928             /* Found optional script */
1929             localeID = scriptID;
1930         }
1931         /* Skip the Country */
1932         if (_isIDSeparator(*localeID)) {
1933             _getCountry(localeID+1, NULL, 0, &localeID);
1934             if(_isIDSeparator(*localeID)) {
1935                 i=_getVariant(localeID+1, *localeID, variant, variantCapacity);
1936             }
1937         }
1938     }
1939
1940     /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
1941     /* if we do not have a variant tag yet then try a POSIX variant after '@' */
1942 /*
1943     if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
1944         i=_getVariant(localeID+1, '@', variant, variantCapacity);
1945     }
1946 */
1947     return u_terminateChars(variant, variantCapacity, i, err);
1948 }
1949
1950 U_CAPI int32_t  U_EXPORT2
1951 uloc_getName(const char* localeID,
1952              char* name,
1953              int32_t nameCapacity,
1954              UErrorCode* err)
1955 {
1956     return _canonicalize(localeID, name, nameCapacity, 0, err);
1957 }
1958
1959 U_CAPI int32_t  U_EXPORT2
1960 uloc_getBaseName(const char* localeID,
1961                  char* name,
1962                  int32_t nameCapacity,
1963                  UErrorCode* err)
1964 {
1965     return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);
1966 }
1967
1968 U_CAPI int32_t  U_EXPORT2
1969 uloc_canonicalize(const char* localeID,
1970                   char* name,
1971                   int32_t nameCapacity,
1972                   UErrorCode* err)
1973 {
1974     return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);
1975 }
1976
1977 U_CAPI const char*  U_EXPORT2
1978 uloc_getISO3Language(const char* localeID)
1979 {
1980     int16_t offset;
1981     char lang[ULOC_LANG_CAPACITY];
1982     UErrorCode err = U_ZERO_ERROR;
1983
1984     if (localeID == NULL)
1985     {
1986         localeID = uloc_getDefault();
1987     }
1988     uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
1989     if (U_FAILURE(err))
1990         return "";
1991     offset = _findIndex(LANGUAGES, lang);
1992     if (offset < 0)
1993         return "";
1994     return LANGUAGES_3[offset];
1995 }
1996
1997 U_CAPI const char*  U_EXPORT2
1998 uloc_getISO3Country(const char* localeID)
1999 {
2000     int16_t offset;
2001     char cntry[ULOC_LANG_CAPACITY];
2002     UErrorCode err = U_ZERO_ERROR;
2003
2004     if (localeID == NULL)
2005     {
2006         localeID = uloc_getDefault();
2007     }
2008     uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
2009     if (U_FAILURE(err))
2010         return "";
2011     offset = _findIndex(COUNTRIES, cntry);
2012     if (offset < 0)
2013         return "";
2014
2015     return COUNTRIES_3[offset];
2016 }
2017
2018 U_CAPI uint32_t  U_EXPORT2
2019 uloc_getLCID(const char* localeID)
2020 {
2021     UErrorCode status = U_ZERO_ERROR;
2022     char       langID[ULOC_FULLNAME_CAPACITY];
2023
2024     uloc_getLanguage(localeID, langID, sizeof(langID), &status);
2025     if (U_FAILURE(status)) {
2026         return 0;
2027     }
2028
2029     return uprv_convertToLCID(langID, localeID, &status);
2030 }
2031
2032 U_CAPI int32_t U_EXPORT2
2033 uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
2034                 UErrorCode *status)
2035 {
2036     int32_t length;
2037     const char *posix = uprv_convertToPosix(hostid, status);
2038     if (U_FAILURE(*status) || posix == NULL) {
2039         return 0;
2040     }
2041     length = (int32_t)uprv_strlen(posix);
2042     if (length+1 > localeCapacity) {
2043         *status = U_BUFFER_OVERFLOW_ERROR;
2044     }
2045     else {
2046         uprv_strcpy(locale, posix);
2047     }
2048     return length;
2049 }
2050
2051 /* ### Default locale **************************************************/
2052
2053 U_CAPI const char*  U_EXPORT2
2054 uloc_getDefault()
2055 {
2056     return locale_get_default();
2057 }
2058
2059 U_CAPI void  U_EXPORT2
2060 uloc_setDefault(const char*   newDefaultLocale,
2061              UErrorCode* err)
2062 {
2063     if (U_FAILURE(*err))
2064         return;
2065     /* the error code isn't currently used for anything by this function*/
2066
2067     /* propagate change to C++ */
2068     locale_set_default(newDefaultLocale);
2069 }
2070
2071 /* ### Display name **************************************************/
2072
2073 /*
2074  * Lookup a resource bundle table item with fallback on the table level.
2075  * Regular resource bundle lookups perform fallback to parent locale bundles
2076  * and eventually the root bundle, but only for top-level items.
2077  * This function takes the name of a top-level table and of an item in that table
2078  * and performs a lookup of both, falling back until a bundle contains a table
2079  * with this item.
2080  *
2081  * Note: Only the opening of entire bundles falls back through the default locale
2082  * before root. Once a bundle is open, item lookups do not go through the
2083  * default locale because that would result in a mix of languages that is
2084  * unpredictable to the programmer and most likely useless.
2085  */
2086 static const UChar *
2087 _res_getTableStringWithFallback(const char *path, const char *locale,
2088                               const char *tableKey, const char *subTableKey,
2089                               const char *itemKey,
2090                               int32_t *pLength,
2091                               UErrorCode *pErrorCode)
2092 {
2093 /*    char localeBuffer[ULOC_FULLNAME_CAPACITY*4];*/
2094     UResourceBundle *rb=NULL, table, subTable;
2095     const UChar *item=NULL;
2096     UErrorCode errorCode;
2097     char explicitFallbackName[ULOC_FULLNAME_CAPACITY] = {0};
2098
2099     /*
2100      * open the bundle for the current locale
2101      * this falls back through the locale's chain to root
2102      */
2103     errorCode=U_ZERO_ERROR;
2104     rb=ures_open(path, locale, &errorCode);
2105     if(U_FAILURE(errorCode)) {
2106         /* total failure, not even root could be opened */
2107         *pErrorCode=errorCode;
2108         return NULL;
2109     } else if(errorCode==U_USING_DEFAULT_WARNING ||
2110                 (errorCode==U_USING_FALLBACK_WARNING && *pErrorCode!=U_USING_DEFAULT_WARNING)
2111     ) {
2112         /* set the "strongest" error code (success->fallback->default->failure) */
2113         *pErrorCode=errorCode;
2114     }
2115
2116     for(;;){
2117         ures_initStackObject(&table);
2118         ures_initStackObject(&subTable);
2119         ures_getByKeyWithFallback(rb, tableKey, &table, &errorCode);
2120         if (subTableKey != NULL) {
2121             /*
2122             ures_getByKeyWithFallback(&table,subTableKey, &subTable, &errorCode);
2123             item = ures_getStringByKeyWithFallback(&subTable, itemKey, pLength, &errorCode);
2124             if(U_FAILURE(errorCode)){
2125                 *pErrorCode = errorCode;
2126             }
2127
2128             break;*/
2129
2130             ures_getByKeyWithFallback(&table,subTableKey, &table, &errorCode);
2131         }
2132         if(U_SUCCESS(errorCode)){
2133             item = ures_getStringByKeyWithFallback(&table, itemKey, pLength, &errorCode);
2134             if(U_FAILURE(errorCode)){
2135                 const char* replacement = NULL;
2136                 *pErrorCode = errorCode; /*save the errorCode*/
2137                 errorCode = U_ZERO_ERROR;
2138                 /* may be a deprecated code */
2139                 if(uprv_strcmp(tableKey, "Countries")==0){
2140                     replacement =  uloc_getCurrentCountryID(itemKey);
2141                 }else if(uprv_strcmp(tableKey, "Languages")==0){
2142                     replacement =  uloc_getCurrentLanguageID(itemKey);
2143                 }
2144                 /*pointer comparison is ok since uloc_getCurrentCountryID & uloc_getCurrentLanguageID return the key itself is replacement is not found*/
2145                 if(replacement!=NULL && itemKey != replacement){
2146                     item = ures_getStringByKeyWithFallback(&table, replacement, pLength, &errorCode);
2147                     if(U_SUCCESS(errorCode)){
2148                         *pErrorCode = errorCode;
2149                         break;
2150                     }
2151                 }
2152             }else{
2153                 break;
2154             }
2155         }
2156
2157         if(U_FAILURE(errorCode)){
2158
2159             /* still can't figure out ?.. try the fallback mechanism */
2160             int32_t len = 0;
2161             const UChar* fallbackLocale =  NULL;
2162             *pErrorCode = errorCode;
2163             errorCode = U_ZERO_ERROR;
2164
2165             fallbackLocale = ures_getStringByKeyWithFallback(&table, "Fallback", &len, &errorCode);
2166             if(U_FAILURE(errorCode)){
2167                *pErrorCode = errorCode;
2168                 break;
2169             }
2170
2171             u_UCharsToChars(fallbackLocale, explicitFallbackName, len);
2172
2173             /* guard against recursive fallback */
2174             if(uprv_strcmp(explicitFallbackName, locale)==0){
2175                 *pErrorCode = U_INTERNAL_PROGRAM_ERROR;
2176                 break;
2177             }
2178             ures_close(rb);
2179             rb = ures_open(NULL, explicitFallbackName, &errorCode);
2180             if(U_FAILURE(errorCode)){
2181                 *pErrorCode = errorCode;
2182                 break;
2183             }
2184             /* succeeded in opening the fallback bundle .. continue and try to fetch the item */
2185         }else{
2186             break;
2187         }
2188     }
2189     /* done with the locale string - ready to close table and rb */
2190     ures_close(&subTable);
2191     ures_close(&table);
2192     ures_close(rb);
2193     return item;
2194 }
2195
2196 static int32_t
2197 _getStringOrCopyKey(const char *path, const char *locale,
2198                     const char *tableKey,
2199                     const char* subTableKey,
2200                     const char *itemKey,
2201                     const char *substitute,
2202                     UChar *dest, int32_t destCapacity,
2203                     UErrorCode *pErrorCode) {
2204     const UChar *s = NULL;
2205     int32_t length = 0;
2206
2207     if(itemKey==NULL) {
2208         /* top-level item: normal resource bundle access */
2209         UResourceBundle *rb;
2210
2211         rb=ures_open(path, locale, pErrorCode);
2212         if(U_SUCCESS(*pErrorCode)) {
2213             s=ures_getStringByKey(rb, tableKey, &length, pErrorCode);
2214             /* see comment about closing rb near "return item;" in _res_getTableStringWithFallback() */
2215             ures_close(rb);
2216         }
2217     } else {
2218         /* Language code should not be a number. If it is, set the error code. */
2219         if (!uprv_strncmp(tableKey, "Languages", 9) && uprv_strtol(itemKey, NULL, 10)) {
2220             *pErrorCode = U_MISSING_RESOURCE_ERROR;
2221         } else {
2222             /* second-level item, use special fallback */
2223             s=_res_getTableStringWithFallback(path, locale,
2224                                                tableKey,
2225                                                subTableKey,
2226                                                itemKey,
2227                                                &length,
2228                                                pErrorCode);
2229         }
2230     }
2231     if(U_SUCCESS(*pErrorCode)) {
2232         int32_t copyLength=uprv_min(length, destCapacity);
2233         if(copyLength>0 && s != NULL) {
2234             u_memcpy(dest, s, copyLength);
2235         }
2236     } else {
2237         /* no string from a resource bundle: convert the substitute */
2238         length=(int32_t)uprv_strlen(substitute);
2239         u_charsToUChars(substitute, dest, uprv_min(length, destCapacity));
2240         *pErrorCode=U_USING_DEFAULT_WARNING;
2241     }
2242
2243     return u_terminateUChars(dest, destCapacity, length, pErrorCode);
2244 }
2245
2246 static int32_t
2247 _getDisplayNameForComponent(const char *locale,
2248                             const char *displayLocale,
2249                             UChar *dest, int32_t destCapacity,
2250                             int32_t (*getter)(const char *, char *, int32_t, UErrorCode *),
2251                             const char *tag,
2252                             UErrorCode *pErrorCode) {
2253     char localeBuffer[ULOC_FULLNAME_CAPACITY*4];
2254     int32_t length;
2255     UErrorCode localStatus;
2256
2257     /* argument checking */
2258     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
2259         return 0;
2260     }
2261
2262     if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
2263         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2264         return 0;
2265     }
2266
2267     localStatus = U_ZERO_ERROR;
2268     length=(*getter)(locale, localeBuffer, sizeof(localeBuffer), &localStatus);
2269     if(U_FAILURE(localStatus) || localStatus==U_STRING_NOT_TERMINATED_WARNING) {
2270         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2271         return 0;
2272     }
2273     if(length==0) {
2274         return u_terminateUChars(dest, destCapacity, 0, pErrorCode);
2275     }
2276
2277     return _getStringOrCopyKey(NULL, displayLocale,
2278                                tag, NULL, localeBuffer,
2279                                localeBuffer,
2280                                dest, destCapacity,
2281                                pErrorCode);
2282 }
2283
2284 U_CAPI int32_t U_EXPORT2
2285 uloc_getDisplayLanguage(const char *locale,
2286                         const char *displayLocale,
2287                         UChar *dest, int32_t destCapacity,
2288                         UErrorCode *pErrorCode) {
2289     return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
2290                 uloc_getLanguage, _kLanguages, pErrorCode);
2291 }
2292
2293 U_CAPI int32_t U_EXPORT2
2294 uloc_getDisplayScript(const char* locale,
2295                       const char* displayLocale,
2296                       UChar *dest, int32_t destCapacity,
2297                       UErrorCode *pErrorCode)
2298 {
2299     return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
2300                 uloc_getScript, _kScripts, pErrorCode);
2301 }
2302
2303 U_CAPI int32_t U_EXPORT2
2304 uloc_getDisplayCountry(const char *locale,
2305                        const char *displayLocale,
2306                        UChar *dest, int32_t destCapacity,
2307                        UErrorCode *pErrorCode) {
2308     return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
2309                 uloc_getCountry, _kCountries, pErrorCode);
2310 }
2311
2312 /*
2313  * TODO separate variant1_variant2_variant3...
2314  * by getting each tag's display string and concatenating them with ", "
2315  * in between - similar to uloc_getDisplayName()
2316  */
2317 U_CAPI int32_t U_EXPORT2
2318 uloc_getDisplayVariant(const char *locale,
2319                        const char *displayLocale,
2320                        UChar *dest, int32_t destCapacity,
2321                        UErrorCode *pErrorCode) {
2322     return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
2323                 uloc_getVariant, _kVariants, pErrorCode);
2324 }
2325
2326 U_CAPI int32_t U_EXPORT2
2327 uloc_getDisplayName(const char *locale,
2328                     const char *displayLocale,
2329                     UChar *dest, int32_t destCapacity,
2330                     UErrorCode *pErrorCode)
2331 {
2332     int32_t length, length2, length3 = 0;
2333     UBool hasLanguage, hasScript, hasCountry, hasVariant, hasKeywords;
2334     UEnumeration* keywordEnum = NULL;
2335     int32_t keywordCount = 0;
2336     const char *keyword = NULL;
2337     int32_t keywordLen = 0;
2338     char keywordValue[256];
2339     int32_t keywordValueLen = 0;
2340
2341     /* argument checking */
2342     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
2343         return 0;
2344     }
2345
2346     if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
2347         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2348         return 0;
2349     }
2350
2351     /*
2352      * if there is a language, then write "language (country, variant)"
2353      * otherwise write "country, variant"
2354      */
2355
2356     /* write the language */
2357     length=uloc_getDisplayLanguage(locale, displayLocale,
2358                                    dest, destCapacity,
2359                                    pErrorCode);
2360     hasLanguage= length>0;
2361
2362     if(hasLanguage) {
2363         /* append " (" */
2364         if(length<destCapacity) {
2365             dest[length]=0x20;
2366         }
2367         ++length;
2368         if(length<destCapacity) {
2369             dest[length]=0x28;
2370         }
2371         ++length;
2372     }
2373
2374     if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2375         /* keep preflighting */
2376         *pErrorCode=U_ZERO_ERROR;
2377     }
2378
2379     /* append the script */
2380     if(length<destCapacity) {
2381         length2=uloc_getDisplayScript(locale, displayLocale,
2382                                        dest+length, destCapacity-length,
2383                                        pErrorCode);
2384     } else {
2385         length2=uloc_getDisplayScript(locale, displayLocale,
2386                                        NULL, 0,
2387                                        pErrorCode);
2388     }
2389     hasScript= length2>0;
2390     length+=length2;
2391
2392     if(hasScript) {
2393         /* append ", " */
2394         if(length<destCapacity) {
2395             dest[length]=0x2c;
2396         }
2397         ++length;
2398         if(length<destCapacity) {
2399             dest[length]=0x20;
2400         }
2401         ++length;
2402     }
2403
2404     if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2405         /* keep preflighting */
2406         *pErrorCode=U_ZERO_ERROR;
2407     }
2408
2409     /* append the country */
2410     if(length<destCapacity) {
2411         length2=uloc_getDisplayCountry(locale, displayLocale,
2412                                        dest+length, destCapacity-length,
2413                                        pErrorCode);
2414     } else {
2415         length2=uloc_getDisplayCountry(locale, displayLocale,
2416                                        NULL, 0,
2417                                        pErrorCode);
2418     }
2419     hasCountry= length2>0;
2420     length+=length2;
2421
2422     if(hasCountry) {
2423         /* append ", " */
2424         if(length<destCapacity) {
2425             dest[length]=0x2c;
2426         }
2427         ++length;
2428         if(length<destCapacity) {
2429             dest[length]=0x20;
2430         }
2431         ++length;
2432     }
2433
2434     if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2435         /* keep preflighting */
2436         *pErrorCode=U_ZERO_ERROR;
2437     }
2438
2439     /* append the variant */
2440     if(length<destCapacity) {
2441         length2=uloc_getDisplayVariant(locale, displayLocale,
2442                                        dest+length, destCapacity-length,
2443                                        pErrorCode);
2444     } else {
2445         length2=uloc_getDisplayVariant(locale, displayLocale,
2446                                        NULL, 0,
2447                                        pErrorCode);
2448     }
2449     hasVariant= length2>0;
2450     length+=length2;
2451
2452     if(hasVariant) {
2453         /* append ", " */
2454         if(length<destCapacity) {
2455             dest[length]=0x2c;
2456         }
2457         ++length;
2458         if(length<destCapacity) {
2459             dest[length]=0x20;
2460         }
2461         ++length;
2462     }
2463
2464     keywordEnum = uloc_openKeywords(locale, pErrorCode);
2465
2466     for(keywordCount = uenum_count(keywordEnum, pErrorCode); keywordCount > 0 ; keywordCount--){
2467           if(U_FAILURE(*pErrorCode)){
2468               break;
2469           }
2470           /* the uenum_next returns NUL terminated string */
2471           keyword = uenum_next(keywordEnum, &keywordLen, pErrorCode);
2472           if(length + length3 < destCapacity) {
2473             length3 += uloc_getDisplayKeyword(keyword, displayLocale, dest+length+length3, destCapacity-length-length3, pErrorCode);
2474           } else {
2475             length3 += uloc_getDisplayKeyword(keyword, displayLocale, NULL, 0, pErrorCode);
2476           }
2477           if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2478               /* keep preflighting */
2479               *pErrorCode=U_ZERO_ERROR;
2480           }
2481           keywordValueLen = uloc_getKeywordValue(locale, keyword, keywordValue, 256, pErrorCode);
2482           if(keywordValueLen) {
2483             if(length + length3 < destCapacity) {
2484               dest[length + length3] = 0x3D;
2485             }
2486             length3++;
2487             if(length + length3 < destCapacity) {
2488               length3 += uloc_getDisplayKeywordValue(locale, keyword, displayLocale, dest+length+length3, destCapacity-length-length3, pErrorCode);
2489             } else {
2490               length3 += uloc_getDisplayKeywordValue(locale, keyword, displayLocale, NULL, 0, pErrorCode);
2491             }
2492             if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2493                 /* keep preflighting */
2494                 *pErrorCode=U_ZERO_ERROR;
2495             }
2496           }
2497           if(keywordCount > 1) {
2498             if(length + length3 + 1 < destCapacity && keywordCount) {
2499               dest[length + length3]=0x2c;
2500               dest[length + length3+1]=0x20;
2501             }
2502             length3++; /* ',' */
2503             length3++; /* ' ' */
2504           }
2505     }
2506     uenum_close(keywordEnum);
2507
2508     hasKeywords = length3 > 0;
2509     length += length3;
2510
2511
2512
2513     if ((hasScript && !hasCountry)
2514         || ((hasScript || hasCountry) && !hasVariant && !hasKeywords)
2515         || ((hasScript || hasCountry || hasVariant) && !hasKeywords)
2516         || (hasLanguage && !hasScript && !hasCountry && !hasVariant && !hasKeywords))
2517     {
2518         /* remove ", " or " (" */
2519         length-=2;
2520     }
2521
2522     if (hasLanguage && (hasScript || hasCountry || hasVariant || hasKeywords)) {
2523         /* append ")" */
2524         if(length<destCapacity) {
2525             dest[length]=0x29;
2526         }
2527         ++length;
2528     }
2529
2530     if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2531         /* keep preflighting */
2532         *pErrorCode=U_ZERO_ERROR;
2533     }
2534
2535     return u_terminateUChars(dest, destCapacity, length, pErrorCode);
2536 }
2537
2538 U_CAPI int32_t U_EXPORT2
2539 uloc_getDisplayKeyword(const char* keyword,
2540                        const char* displayLocale,
2541                        UChar* dest,
2542                        int32_t destCapacity,
2543                        UErrorCode* status){
2544
2545     /* argument checking */
2546     if(status==NULL || U_FAILURE(*status)) {
2547         return 0;
2548     }
2549
2550     if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
2551         *status=U_ILLEGAL_ARGUMENT_ERROR;
2552         return 0;
2553     }
2554
2555
2556     /* pass itemKey=NULL to look for a top-level item */
2557     return _getStringOrCopyKey(NULL, displayLocale,
2558                                _kKeys, NULL,
2559                                keyword,
2560                                keyword,
2561                                dest, destCapacity,
2562                                status);
2563
2564 }
2565
2566
2567 #define UCURRENCY_DISPLAY_NAME_INDEX 1
2568
2569 U_CAPI int32_t U_EXPORT2
2570 uloc_getDisplayKeywordValue(   const char* locale,
2571                                const char* keyword,
2572                                const char* displayLocale,
2573                                UChar* dest,
2574                                int32_t destCapacity,
2575                                UErrorCode* status){
2576
2577
2578     char keywordValue[ULOC_FULLNAME_CAPACITY*4];
2579     int32_t capacity = ULOC_FULLNAME_CAPACITY*4;
2580     int32_t keywordValueLen =0;
2581
2582     /* argument checking */
2583     if(status==NULL || U_FAILURE(*status)) {
2584         return 0;
2585     }
2586
2587     if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
2588         *status=U_ILLEGAL_ARGUMENT_ERROR;
2589         return 0;
2590     }
2591
2592     /* get the keyword value */
2593     keywordValue[0]=0;
2594     keywordValueLen = uloc_getKeywordValue(locale, keyword, keywordValue, capacity, status);
2595
2596     /*
2597      * if the keyword is equal to currency .. then to get the display name
2598      * we need to do the fallback ourselves
2599      */
2600     if(uprv_stricmp(keyword, _kCurrency)==0){
2601
2602         int32_t dispNameLen = 0;
2603         const UChar *dispName = NULL;
2604
2605         UResourceBundle *bundle     = ures_open(NULL, displayLocale, status);
2606         UResourceBundle *currencies = ures_getByKey(bundle, _kCurrencies, NULL, status);
2607         UResourceBundle *currency   = ures_getByKeyWithFallback(currencies, keywordValue, NULL, status);
2608
2609         dispName = ures_getStringByIndex(currency, UCURRENCY_DISPLAY_NAME_INDEX, &dispNameLen, status);
2610
2611         /*close the bundles */
2612         ures_close(currency);
2613         ures_close(currencies);
2614         ures_close(bundle);
2615
2616         if(U_FAILURE(*status)){
2617             if(*status == U_MISSING_RESOURCE_ERROR){
2618                 /* we just want to write the value over if nothing is available */
2619                 *status = U_USING_DEFAULT_WARNING;
2620             }else{
2621                 return 0;
2622             }
2623         }
2624
2625         /* now copy the dispName over if not NULL */
2626         if(dispName != NULL){
2627             if(dispNameLen <= destCapacity){
2628                 uprv_memcpy(dest, dispName, dispNameLen * U_SIZEOF_UCHAR);
2629                 return u_terminateUChars(dest, destCapacity, dispNameLen, status);
2630             }else{
2631                 *status = U_BUFFER_OVERFLOW_ERROR;
2632                 return dispNameLen;
2633             }
2634         }else{
2635             /* we have not found the display name for the value .. just copy over */
2636             if(keywordValueLen <= destCapacity){
2637                 u_charsToUChars(keywordValue, dest, keywordValueLen);
2638                 return u_terminateUChars(dest, destCapacity, keywordValueLen, status);
2639             }else{
2640                  *status = U_BUFFER_OVERFLOW_ERROR;
2641                 return keywordValueLen;
2642             }
2643         }
2644
2645
2646     }else{
2647
2648         return _getStringOrCopyKey(NULL, displayLocale,
2649                                    _kTypes, keyword,
2650                                    keywordValue,
2651                                    keywordValue,
2652                                    dest, destCapacity,
2653                                    status);
2654     }
2655 }
2656
2657 /* ### Get available **************************************************/
2658
2659 static UBool U_CALLCONV uloc_cleanup(void) {
2660     char ** temp;
2661
2662     if (_installedLocales) {
2663         temp = _installedLocales;
2664         _installedLocales = NULL;
2665
2666         _installedLocalesCount = 0;
2667
2668         uprv_free(temp);
2669     }
2670     return TRUE;
2671 }
2672
2673 static void _load_installedLocales()
2674 {
2675     UBool   localesLoaded;
2676
2677     UMTX_CHECK(NULL, _installedLocales != NULL, localesLoaded);
2678
2679     if (localesLoaded == FALSE) {
2680         UResourceBundle *index = NULL;
2681         UResourceBundle installed;
2682         UErrorCode status = U_ZERO_ERROR;
2683         char ** temp;
2684         int32_t i = 0;
2685         int32_t localeCount;
2686
2687         ures_initStackObject(&installed);
2688         index = ures_openDirect(NULL, _kIndexLocaleName, &status);
2689         ures_getByKey(index, _kIndexTag, &installed, &status);
2690
2691         if(U_SUCCESS(status)) {
2692             localeCount = ures_getSize(&installed);
2693             temp = (char **) uprv_malloc(sizeof(char*) * (localeCount+1));
2694             /* Check for null pointer */
2695             if (temp != NULL) {
2696                 ures_resetIterator(&installed);
2697                 while(ures_hasNext(&installed)) {
2698                     ures_getNextString(&installed, NULL, (const char **)&temp[i++], &status);
2699                 }
2700                 temp[i] = NULL;
2701
2702                 umtx_lock(NULL);
2703                 if (_installedLocales == NULL)
2704                 {
2705                     _installedLocalesCount = localeCount;
2706                     _installedLocales = temp;
2707                     temp = NULL;
2708                     ucln_common_registerCleanup(UCLN_COMMON_ULOC, uloc_cleanup);
2709                 }
2710                 umtx_unlock(NULL);
2711
2712                 uprv_free(temp);
2713             }
2714         }
2715         ures_close(&installed);
2716         ures_close(index);
2717     }
2718 }
2719
2720 U_CAPI const char* U_EXPORT2
2721 uloc_getAvailable(int32_t offset)
2722 {
2723
2724     _load_installedLocales();
2725
2726     if (offset > _installedLocalesCount)
2727         return NULL;
2728     return _installedLocales[offset];
2729 }
2730
2731 U_CAPI int32_t  U_EXPORT2
2732 uloc_countAvailable()
2733 {
2734     _load_installedLocales();
2735     return _installedLocalesCount;
2736 }
2737
2738 /**
2739  * Returns a list of all language codes defined in ISO 639.  This is a pointer
2740  * to an array of pointers to arrays of char.  All of these pointers are owned
2741  * by ICU-- do not delete them, and do not write through them.  The array is
2742  * terminated with a null pointer.
2743  */
2744 U_CAPI const char* const*  U_EXPORT2
2745 uloc_getISOLanguages()
2746 {
2747     return LANGUAGES;
2748 }
2749
2750 /**
2751  * Returns a list of all 2-letter country codes defined in ISO 639.  This is a
2752  * pointer to an array of pointers to arrays of char.  All of these pointers are
2753  * owned by ICU-- do not delete them, and do not write through them.  The array is
2754  * terminated with a null pointer.
2755  */
2756 U_CAPI const char* const*  U_EXPORT2
2757 uloc_getISOCountries()
2758 {
2759     return COUNTRIES;
2760 }
2761
2762
2763 /* this function to be moved into cstring.c later */
2764 static char gDecimal = 0;
2765
2766 static /* U_CAPI */
2767 double
2768 /* U_EXPORT2 */
2769 _uloc_strtod(const char *start, char **end) {
2770     char *decimal;
2771     char *myEnd;
2772     char buf[30];
2773     double rv;
2774     if (!gDecimal) {
2775         char rep[5];
2776         /* For machines that decide to change the decimal on you,
2777         and try to be too smart with localization.
2778         This normally should be just a '.'. */
2779         sprintf(rep, "%+1.1f", 1.0);
2780         gDecimal = rep[2];
2781     }
2782
2783     if(gDecimal == '.') {
2784         return uprv_strtod(start, end); /* fall through to OS */
2785     } else {
2786         uprv_strncpy(buf, start, 29);
2787         buf[29]=0;
2788         decimal = uprv_strchr(buf, '.');
2789         if(decimal) {
2790             *decimal = gDecimal;
2791         } else {
2792             return uprv_strtod(start, end); /* no decimal point */
2793         }
2794         rv = uprv_strtod(buf, &myEnd);
2795         if(end) {
2796             *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */
2797         }
2798         return rv;
2799     }
2800 }
2801
2802 typedef struct {
2803     float q;
2804     int32_t dummy;  /* to avoid uninitialized memory copy from qsort */
2805     char *locale;
2806 } _acceptLangItem;
2807
2808 static int32_t U_CALLCONV
2809 uloc_acceptLanguageCompare(const void *context, const void *a, const void *b)
2810 {
2811     const _acceptLangItem *aa = (const _acceptLangItem*)a;
2812     const _acceptLangItem *bb = (const _acceptLangItem*)b;
2813
2814     int32_t rc = 0;
2815     if(bb->q < aa->q) {
2816         rc = -1;  /* A > B */
2817     } else if(bb->q > aa->q) {
2818         rc = 1;   /* A < B */
2819     } else {
2820         rc = 0;   /* A = B */
2821     }
2822
2823     if(rc==0) {
2824         rc = uprv_stricmp(aa->locale, bb->locale);
2825     }
2826
2827 #if defined(ULOC_DEBUG)
2828     /*  fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
2829     aa->locale, aa->q,
2830     bb->locale, bb->q,
2831     rc);*/
2832 #endif
2833
2834     return rc;
2835 }
2836
2837 static ULayoutType
2838 _uloc_getOrientationHelper(const char* localeId,
2839                            const char* key,
2840                            UErrorCode *status)
2841 {
2842     ULayoutType result = ULOC_LAYOUT_UNKNOWN;
2843
2844     if (!U_FAILURE(*status)) {
2845         int32_t length = 0;
2846         char localeBuffer[ULOC_FULLNAME_CAPACITY];
2847
2848         uloc_canonicalize(localeId, localeBuffer, sizeof(localeBuffer), status);
2849
2850         if (!U_FAILURE(*status)) {
2851             const UChar* const value =
2852                 _res_getTableStringWithFallback(
2853                     NULL,
2854                     localeBuffer,
2855                     "layout",
2856                     NULL,
2857                     key,
2858                     &length,
2859                     status);
2860
2861             if (!U_FAILURE(*status) && length != 0) {
2862                 switch(value[0])
2863                 {
2864                 case 0x0062: /* 'b' */
2865                     result = ULOC_LAYOUT_BTT;
2866                     break;
2867                 case 0x006C: /* 'l' */
2868                     result = ULOC_LAYOUT_LTR;
2869                     break;
2870                 case 0x0072: /* 'r' */
2871                     result = ULOC_LAYOUT_RTL;
2872                     break;
2873                 case 0x0074: /* 't' */
2874                     result = ULOC_LAYOUT_TTB;
2875                     break;
2876                 default:
2877                     *status = U_INTERNAL_PROGRAM_ERROR;
2878                     break;
2879                 }
2880             }
2881         }
2882     }
2883
2884     return result;
2885 }
2886
2887 U_DRAFT ULayoutType U_EXPORT2
2888 uloc_getCharacterOrientation(const char* localeId,
2889                              UErrorCode *status)
2890 {
2891     return _uloc_getOrientationHelper(localeId, "characters", status);
2892 }
2893
2894 /**
2895  * Get the layout line orientation for the specified locale.
2896  *
2897  * @param localeID locale name
2898  * @param status Error status
2899  * @return an enum indicating the layout orientation for lines.
2900  * @draft ICU 4.0
2901  */
2902 U_DRAFT ULayoutType U_EXPORT2
2903 uloc_getLineOrientation(const char* localeId,
2904                         UErrorCode *status)
2905 {
2906     return _uloc_getOrientationHelper(localeId, "lines", status);
2907 }
2908
2909 /*
2910 mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
2911 */
2912
2913 U_CAPI int32_t U_EXPORT2
2914 uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult,
2915                             const char *httpAcceptLanguage,
2916                             UEnumeration* availableLocales,
2917                             UErrorCode *status)
2918 {
2919     _acceptLangItem *j;
2920     _acceptLangItem smallBuffer[30];
2921     char **strs;
2922     char tmp[ULOC_FULLNAME_CAPACITY +1];
2923     int32_t n = 0;
2924     const char *itemEnd;
2925     const char *paramEnd;
2926     const char *s;
2927     const char *t;
2928     int32_t res;
2929     int32_t i;
2930     int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
2931     int32_t jSize;
2932     char *tempstr; /* Use for null pointer check */
2933
2934     j = smallBuffer;
2935     jSize = sizeof(smallBuffer)/sizeof(smallBuffer[0]);
2936     if(U_FAILURE(*status)) {
2937         return -1;
2938     }
2939
2940     for(s=httpAcceptLanguage;s&&*s;) {
2941         while(isspace(*s)) /* eat space at the beginning */
2942             s++;
2943         itemEnd=uprv_strchr(s,',');
2944         paramEnd=uprv_strchr(s,';');
2945         if(!itemEnd) {
2946             itemEnd = httpAcceptLanguage+l; /* end of string */
2947         }
2948         if(paramEnd && paramEnd<itemEnd) {
2949             /* semicolon (;) is closer than end (,) */
2950             t = paramEnd+1;
2951             if(*t=='q') {
2952                 t++;
2953             }
2954             while(isspace(*t)) {
2955                 t++;
2956             }
2957             if(*t=='=') {
2958                 t++;
2959             }
2960             while(isspace(*t)) {
2961                 t++;
2962             }
2963             j[n].q = (float)_uloc_strtod(t,NULL);
2964         } else {
2965             /* no semicolon - it's 1.0 */
2966             j[n].q = 1.0f;
2967             paramEnd = itemEnd;
2968         }
2969         j[n].dummy=0;
2970         /* eat spaces prior to semi */
2971         for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
2972             ;
2973         /* Check for null pointer from uprv_strndup */
2974         tempstr = uprv_strndup(s,(int32_t)((t+1)-s));
2975         if (tempstr == NULL) {
2976             *status = U_MEMORY_ALLOCATION_ERROR;
2977             return -1;
2978         }
2979         j[n].locale = tempstr;
2980         uloc_canonicalize(j[n].locale,tmp,sizeof(tmp)/sizeof(tmp[0]),status);
2981         if(strcmp(j[n].locale,tmp)) {
2982             uprv_free(j[n].locale);
2983             j[n].locale=uprv_strdup(tmp);
2984         }
2985 #if defined(ULOC_DEBUG)
2986         /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
2987 #endif
2988         n++;
2989         s = itemEnd;
2990         while(*s==',') { /* eat duplicate commas */
2991             s++;
2992         }
2993         if(n>=jSize) {
2994             if(j==smallBuffer) {  /* overflowed the small buffer. */
2995                 j = uprv_malloc(sizeof(j[0])*(jSize*2));
2996                 if(j!=NULL) {
2997                     uprv_memcpy(j,smallBuffer,sizeof(j[0])*jSize);
2998                 }
2999 #if defined(ULOC_DEBUG)
3000                 fprintf(stderr,"malloced at size %d\n", jSize);
3001 #endif
3002             } else {
3003                 j = uprv_realloc(j, sizeof(j[0])*jSize*2);
3004 #if defined(ULOC_DEBUG)
3005                 fprintf(stderr,"re-alloced at size %d\n", jSize);
3006 #endif
3007             }
3008             jSize *= 2;
3009             if(j==NULL) {
3010                 *status = U_MEMORY_ALLOCATION_ERROR;
3011                 return -1;
3012             }
3013         }
3014     }
3015     uprv_sortArray(j, n, sizeof(j[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
3016     if(U_FAILURE(*status)) {
3017         if(j != smallBuffer) {
3018 #if defined(ULOC_DEBUG)
3019             fprintf(stderr,"freeing j %p\n", j);
3020 #endif
3021             uprv_free(j);
3022         }
3023         return -1;
3024     }
3025     strs = uprv_malloc((size_t)(sizeof(strs[0])*n));
3026     /* Check for null pointer */
3027     if (strs == NULL) {
3028         uprv_free(j); /* Free to avoid memory leak */
3029         *status = U_MEMORY_ALLOCATION_ERROR;
3030         return -1;
3031     }
3032     for(i=0;i<n;i++) {
3033 #if defined(ULOC_DEBUG)
3034         /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
3035 #endif
3036         strs[i]=j[i].locale;
3037     }
3038     res =  uloc_acceptLanguage(result, resultAvailable, outResult,
3039         (const char**)strs, n, availableLocales, status);
3040     for(i=0;i<n;i++) {
3041         uprv_free(strs[i]);
3042     }
3043     uprv_free(strs);
3044     if(j != smallBuffer) {
3045 #if defined(ULOC_DEBUG)
3046         fprintf(stderr,"freeing j %p\n", j);
3047 #endif
3048         uprv_free(j);
3049     }
3050     return res;
3051 }
3052
3053
3054 U_CAPI int32_t U_EXPORT2
3055 uloc_acceptLanguage(char *result, int32_t resultAvailable,
3056                     UAcceptResult *outResult, const char **acceptList,
3057                     int32_t acceptListCount,
3058                     UEnumeration* availableLocales,
3059                     UErrorCode *status)
3060 {
3061     int32_t i,j;
3062     int32_t len;
3063     int32_t maxLen=0;
3064     char tmp[ULOC_FULLNAME_CAPACITY+1];
3065     const char *l;
3066     char **fallbackList;
3067     if(U_FAILURE(*status)) {
3068         return -1;
3069     }
3070     fallbackList = uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount));
3071     if(fallbackList==NULL) {
3072         *status = U_MEMORY_ALLOCATION_ERROR;
3073         return -1;
3074     }
3075     for(i=0;i<acceptListCount;i++) {
3076 #if defined(ULOC_DEBUG)
3077         fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
3078 #endif
3079         while((l=uenum_next(availableLocales, NULL, status))) {
3080 #if defined(ULOC_DEBUG)
3081             fprintf(stderr,"  %s\n", l);
3082 #endif
3083             len = (int32_t)uprv_strlen(l);
3084             if(!uprv_strcmp(acceptList[i], l)) {
3085                 if(outResult) {
3086                     *outResult = ULOC_ACCEPT_VALID;
3087                 }
3088 #if defined(ULOC_DEBUG)
3089                 fprintf(stderr, "MATCH! %s\n", l);
3090 #endif
3091                 if(len>0) {
3092                     uprv_strncpy(result, l, uprv_min(len, resultAvailable));
3093                 }
3094                 for(j=0;j<i;j++) {
3095                     uprv_free(fallbackList[j]);
3096                 }
3097                 uprv_free(fallbackList);
3098                 return u_terminateChars(result, resultAvailable, len, status);
3099             }
3100             if(len>maxLen) {
3101                 maxLen = len;
3102             }
3103         }
3104         uenum_reset(availableLocales, status);
3105         /* save off parent info */
3106         if(uloc_getParent(acceptList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
3107             fallbackList[i] = uprv_strdup(tmp);
3108         } else {
3109             fallbackList[i]=0;
3110         }
3111     }
3112
3113     for(maxLen--;maxLen>0;maxLen--) {
3114         for(i=0;i<acceptListCount;i++) {
3115             if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {
3116 #if defined(ULOC_DEBUG)
3117                 fprintf(stderr,"Try: [%s]", fallbackList[i]);
3118 #endif
3119                 while((l=uenum_next(availableLocales, NULL, status))) {
3120 #if defined(ULOC_DEBUG)
3121                     fprintf(stderr,"  %s\n", l);
3122 #endif
3123                     len = (int32_t)uprv_strlen(l);
3124                     if(!uprv_strcmp(fallbackList[i], l)) {
3125                         if(outResult) {
3126                             *outResult = ULOC_ACCEPT_FALLBACK;
3127                         }
3128 #if defined(ULOC_DEBUG)
3129                         fprintf(stderr, "fallback MATCH! %s\n", l);
3130 #endif
3131                         if(len>0) {
3132                             uprv_strncpy(result, l, uprv_min(len, resultAvailable));
3133                         }
3134                         for(j=0;j<acceptListCount;j++) {
3135                             uprv_free(fallbackList[j]);
3136                         }
3137                         uprv_free(fallbackList);
3138                         return u_terminateChars(result, resultAvailable, len, status);
3139                     }
3140                 }
3141                 uenum_reset(availableLocales, status);
3142
3143                 if(uloc_getParent(fallbackList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
3144                     uprv_free(fallbackList[i]);
3145                     fallbackList[i] = uprv_strdup(tmp);
3146                 } else {
3147                     uprv_free(fallbackList[i]);
3148                     fallbackList[i]=0;
3149                 }
3150             }
3151         }
3152         if(outResult) {
3153             *outResult = ULOC_ACCEPT_FAILED;
3154         }
3155     }
3156     for(i=0;i<acceptListCount;i++) {
3157         uprv_free(fallbackList[i]);
3158     }
3159     uprv_free(fallbackList);
3160     return -1;
3161 }
3162
3163
3164 /**
3165  * This function looks for the localeID in the likelySubtags resource.
3166  *
3167  * @param localeID The tag to find.
3168  * @param buffer A buffer to hold the matching entry
3169  * @param bufferLength The length of the output buffer
3170  * @return A pointer to "buffer" if found, or a null pointer if not.
3171  */
3172 static const char*  U_CALLCONV
3173 findLikelySubtags(const char* localeID,
3174                   char* buffer,
3175                   int32_t bufferLength,
3176                   UErrorCode* err) {
3177     const char* result = NULL;
3178
3179     if (!U_FAILURE(*err)) {
3180         int32_t resLen = 0;
3181         const UChar* s = NULL;
3182         UResourceBundle* subtags = ures_openDirect(NULL, "likelySubtags", err);
3183         if (!U_FAILURE(*err)) {
3184             s = ures_getStringByKey(subtags, localeID, &resLen, err);
3185
3186             if (U_FAILURE(*err)) {
3187                 /*
3188                  * If a resource is missing, it's not really an error, it's
3189                  * just that we don't have any data for that particular locale ID.
3190                  */
3191                 if (*err == U_MISSING_RESOURCE_ERROR) {
3192                     *err = U_ZERO_ERROR;
3193                 }
3194             }
3195             else if (resLen >= bufferLength) {
3196                 /* The buffer should never overflow. */
3197                 *err = U_INTERNAL_PROGRAM_ERROR;
3198             }
3199             else {
3200                 u_UCharsToChars(s, buffer, resLen + 1);
3201                 result = buffer;
3202             }
3203
3204             ures_close(subtags);
3205         }
3206     }
3207
3208     return result;
3209 }
3210
3211 /**
3212  * Append a tag to a buffer, adding the separator if necessary.  The buffer
3213  * must be large enough to contain the resulting tag plus any separator
3214  * necessary. The tag must not be a zero-length string.
3215  *
3216  * @param tag The tag to add.
3217  * @param tagLength The length of the tag.
3218  * @param buffer The output buffer.
3219  * @param bufferLength The length of the output buffer.  This is an input/ouput parameter.
3220  **/
3221 static void U_CALLCONV
3222 appendTag(
3223     const char* tag,
3224     int32_t tagLength,
3225     char* buffer,
3226     int32_t* bufferLength) {
3227
3228     if (*bufferLength > 0) {
3229         buffer[*bufferLength] = '_';
3230         ++(*bufferLength);
3231     }
3232
3233     uprv_memmove(
3234         &buffer[*bufferLength],
3235         tag,
3236         tagLength);
3237
3238     *bufferLength += tagLength;
3239 }
3240
3241 /**
3242  * These are the canonical strings for unknown languages, scripts and regions.
3243  **/
3244 static const char* const unknownLanguage = "und";
3245 static const char* const unknownScript = "Zzzz";
3246 static const char* const unknownRegion = "ZZ";
3247
3248 /**
3249  * Create a tag string from the supplied parameters.  The lang, script and region
3250  * parameters may be NULL pointers. If they are, their corresponding length parameters
3251  * must be less than or equal to 0.
3252  *
3253  * If any of the language, script or region parameters are empty, and the alternateTags
3254  * parameter is not NULL, it will be parsed for potential language, script and region tags
3255  * to be used when constructing the new tag.  If the alternateTags parameter is NULL, or
3256  * it contains no language tag, the default tag for the unknown language is used.
3257  *
3258  * If the length of the new string exceeds the capacity of the output buffer,
3259  * the function copies as many bytes to the output buffer as it can, and returns
3260  * the error U_BUFFER_OVERFLOW_ERROR.
3261  *
3262  * If an illegal argument is provided, the function returns the error
3263  * U_ILLEGAL_ARGUMENT_ERROR.
3264  *
3265  * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
3266  * the tag string fits in the output buffer, but the null terminator doesn't.
3267  *
3268  * @param lang The language tag to use.
3269  * @param langLength The length of the language tag.
3270  * @param script The script tag to use.
3271  * @param scriptLength The length of the script tag.
3272  * @param region The region tag to use.
3273  * @param regionLength The length of the region tag.
3274  * @param trailing Any trailing data to append to the new tag.
3275  * @param trailingLength The length of the trailing data.
3276  * @param alternateTags A string containing any alternate tags.
3277  * @param tag The output buffer.
3278  * @param tagCapacity The capacity of the output buffer.
3279  * @param err A pointer to a UErrorCode for error reporting.
3280  * @return The length of the tag string, which may be greater than tagCapacity, or -1 on error.
3281  **/
3282 static int32_t U_CALLCONV
3283 createTagStringWithAlternates(
3284     const char* lang,
3285     int32_t langLength,
3286     const char* script,
3287     int32_t scriptLength,
3288     const char* region,
3289     int32_t regionLength,
3290     const char* trailing,
3291     int32_t trailingLength,
3292     const char* alternateTags,
3293     char* tag,
3294     int32_t tagCapacity,
3295     UErrorCode* err) {
3296
3297     if (U_FAILURE(*err)) {
3298         goto error;
3299     }
3300     else if (tag == NULL ||
3301              tagCapacity <= 0 ||
3302              langLength >= ULOC_LANG_CAPACITY ||
3303              scriptLength >= ULOC_SCRIPT_CAPACITY ||
3304              regionLength >= ULOC_COUNTRY_CAPACITY) {
3305         goto error;
3306     }
3307     else {
3308         /**
3309          * ULOC_FULLNAME_CAPACITY will provide enough capacity
3310          * that we can build a string that contains the language,
3311          * script and region code without worrying about overrunning
3312          * the user-supplied buffer.
3313          **/
3314         char tagBuffer[ULOC_FULLNAME_CAPACITY];
3315         int32_t tagLength = 0;
3316         int32_t capacityRemaining = tagCapacity;
3317         UBool regionAppended = FALSE;
3318
3319         if (langLength > 0) {
3320             appendTag(
3321                 lang,
3322                 langLength,
3323                 tagBuffer,
3324                 &tagLength);
3325         }
3326         else if (alternateTags == NULL) {
3327             /*
3328              * Append the value for an unknown language, if
3329              * we found no language.
3330              */
3331             appendTag(
3332                 unknownLanguage,
3333                 uprv_strlen(unknownLanguage),
3334                 tagBuffer,
3335                 &tagLength);
3336         }
3337         else {
3338             /*
3339              * Parse the alternateTags string for the language.
3340              */
3341             char alternateLang[ULOC_LANG_CAPACITY];
3342             int32_t alternateLangLength = sizeof(alternateLang);
3343
3344             alternateLangLength =
3345                 uloc_getLanguage(
3346                     alternateTags,
3347                     alternateLang,
3348                     alternateLangLength,
3349                     err);
3350             if(U_FAILURE(*err) ||
3351                 alternateLangLength >= ULOC_LANG_CAPACITY) {
3352                 goto error;
3353             }
3354             else if (alternateLangLength == 0) {
3355                 /*
3356                  * Append the value for an unknown language, if
3357                  * we found no language.
3358                  */
3359                 appendTag(
3360                     unknownLanguage,
3361                     uprv_strlen(unknownLanguage),
3362                     tagBuffer,
3363                     &tagLength);
3364             }
3365             else {
3366                 appendTag(
3367                     alternateLang,
3368                     alternateLangLength,
3369                     tagBuffer,
3370                     &tagLength);
3371             }
3372         }
3373
3374         if (scriptLength > 0) {
3375             appendTag(
3376                 script,
3377                 scriptLength,
3378                 tagBuffer,
3379                 &tagLength);
3380         }
3381         else if (alternateTags != NULL) {
3382             /*
3383              * Parse the alternateTags string for the script.
3384              */
3385             char alternateScript[ULOC_SCRIPT_CAPACITY];
3386
3387             const int32_t alternateScriptLength =
3388                 uloc_getScript(
3389                     alternateTags,
3390                     alternateScript,
3391                     sizeof(alternateScript),
3392                     err);
3393
3394             if (U_FAILURE(*err) ||
3395                 alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {
3396                 goto error;
3397             }
3398             else if (alternateScriptLength > 0) {
3399                 appendTag(
3400                     alternateScript,
3401                     alternateScriptLength,
3402                     tagBuffer,
3403                     &tagLength);
3404             }
3405         }
3406
3407         if (regionLength > 0) {
3408             appendTag(
3409                 region,
3410                 regionLength,
3411                 tagBuffer,
3412                 &tagLength);
3413
3414             regionAppended = TRUE;
3415         }
3416         else if (alternateTags != NULL) {
3417             /*
3418              * Parse the alternateTags string for the region.
3419              */
3420             char alternateRegion[ULOC_COUNTRY_CAPACITY];
3421
3422             const int32_t alternateRegionLength =
3423                 uloc_getCountry(
3424                     alternateTags,
3425                     alternateRegion,
3426                     sizeof(alternateRegion),
3427                     err);
3428             if (U_FAILURE(*err) ||
3429                 alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {
3430                 goto error;
3431             }
3432             else if (alternateRegionLength > 0) {
3433                 appendTag(
3434                     alternateRegion,
3435                     alternateRegionLength,
3436                     tagBuffer,
3437                     &tagLength);
3438
3439                 regionAppended = TRUE;
3440             }
3441         }
3442
3443         {
3444             const int32_t toCopy =
3445                 tagLength >= tagCapacity ? tagCapacity : tagLength;
3446
3447             /**
3448              * Copy the partial tag from our internal buffer to the supplied
3449              * target.
3450              **/
3451             uprv_memcpy(
3452                 tag,
3453                 tagBuffer,
3454                 toCopy);
3455
3456             capacityRemaining -= toCopy;
3457         }
3458
3459         if (trailingLength > 0) {
3460             if (capacityRemaining > 0 && !regionAppended) {
3461                 tag[tagLength++] = '_';
3462                 --capacityRemaining;
3463             }
3464
3465             if (capacityRemaining > 0) {
3466                 /*
3467                  * Copy the trailing data into the supplied buffer.  Use uprv_memmove, since we
3468                  * don't know if the user-supplied buffers overlap.
3469                  */
3470                 const int32_t toCopy =
3471                     trailingLength >= capacityRemaining ? capacityRemaining : trailingLength;
3472
3473                 uprv_memmove(
3474                     &tag[tagLength],
3475                     trailing,
3476                     toCopy);
3477             }
3478         }
3479
3480         tagLength += trailingLength;
3481
3482         return u_terminateChars(
3483                     tag,
3484                     tagCapacity,
3485                     tagLength,
3486                     err);
3487     }
3488
3489 error:
3490
3491     /**
3492      * An overflow indicates the locale ID passed in
3493      * is ill-formed.  If we got here, and there was
3494      * no previous error, it's an implicit overflow.
3495      **/
3496     if (*err ==  U_BUFFER_OVERFLOW_ERROR ||
3497         U_SUCCESS(*err)) {
3498         *err = U_ILLEGAL_ARGUMENT_ERROR;
3499     }
3500
3501     return -1;
3502 }
3503
3504 /**
3505  * Create a tag string from the supplied parameters.  The lang, script and region
3506  * parameters may be NULL pointers. If they are, their corresponding length parameters
3507  * must be less than or equal to 0.  If the lang parameter is an empty string, the
3508  * default value for an unknown language is written to the output buffer.
3509  *
3510  * If the length of the new string exceeds the capacity of the output buffer,
3511  * the function copies as many bytes to the output buffer as it can, and returns
3512  * the error U_BUFFER_OVERFLOW_ERROR.
3513  *
3514  * If an illegal argument is provided, the function returns the error
3515  * U_ILLEGAL_ARGUMENT_ERROR.
3516  *
3517  * @param lang The language tag to use.
3518  * @param langLength The length of the language tag.
3519  * @param script The script tag to use.
3520  * @param scriptLength The length of the script tag.
3521  * @param region The region tag to use.
3522  * @param regionLength The length of the region tag.
3523  * @param trailing Any trailing data to append to the new tag.
3524  * @param trailingLength The length of the trailing data.
3525  * @param tag The output buffer.
3526  * @param tagCapacity The capacity of the output buffer.
3527  * @param err A pointer to a UErrorCode for error reporting.
3528  * @return The length of the tag string, which may be greater than tagCapacity.
3529  **/
3530 static int32_t U_CALLCONV
3531 createTagString(
3532     const char* lang,
3533     int32_t langLength,
3534     const char* script,
3535     int32_t scriptLength,
3536     const char* region,
3537     int32_t regionLength,
3538     const char* trailing,
3539     int32_t trailingLength,
3540     char* tag,
3541     int32_t tagCapacity,
3542     UErrorCode* err)
3543 {
3544     return createTagStringWithAlternates(
3545                 lang,
3546                 langLength,
3547                 script,
3548                 scriptLength,
3549                 region,
3550                 regionLength,
3551                 trailing,
3552                 trailingLength,
3553                 NULL,
3554                 tag,
3555                 tagCapacity,
3556                 err);
3557 }
3558
3559 /**
3560  * Parse the language, script, and region subtags from a tag string, and copy the
3561  * results into the corresponding output parameters. The buffers are null-terminated,
3562  * unless overflow occurs.
3563  *
3564  * The langLength, scriptLength, and regionLength parameters are input/output
3565  * parameters, and must contain the capacity of their corresponding buffers on
3566  * input.  On output, they will contain the actual length of the buffers, not
3567  * including the null terminator.
3568  *
3569  * If the length of any of the output subtags exceeds the capacity of the corresponding
3570  * buffer, the function copies as many bytes to the output buffer as it can, and returns
3571  * the error U_BUFFER_OVERFLOW_ERROR.  It will not parse any more subtags once overflow
3572  * occurs.
3573  *
3574  * If an illegal argument is provided, the function returns the error
3575  * U_ILLEGAL_ARGUMENT_ERROR.
3576  *
3577  * @param localeID The locale ID to parse.
3578  * @param lang The language tag buffer.
3579  * @param langLength The length of the language tag.
3580  * @param script The script tag buffer.
3581  * @param scriptLength The length of the script tag.
3582  * @param region The region tag buffer.
3583  * @param regionLength The length of the region tag.
3584  * @param err A pointer to a UErrorCode for error reporting.
3585  * @return The number of chars of the localeID parameter consumed.
3586  **/
3587 static int32_t U_CALLCONV
3588 parseTagString(
3589     const char* localeID,
3590     char* lang,
3591     int32_t* langLength,
3592     char* script,
3593     int32_t* scriptLength,
3594     char* region,
3595     int32_t* regionLength,
3596     UErrorCode* err)
3597 {
3598     const char* position = localeID;
3599     int32_t subtagLength = 0;
3600
3601     if(U_FAILURE(*err) ||
3602        localeID == NULL ||
3603        lang == NULL ||
3604        langLength == NULL ||
3605        script == NULL ||
3606        scriptLength == NULL ||
3607        region == NULL ||
3608        regionLength == NULL) {
3609         goto error;
3610     }
3611
3612     subtagLength = _getLanguage(position, lang, *langLength, &position);
3613     u_terminateChars(lang, *langLength, subtagLength, err);
3614
3615     /*
3616      * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
3617      * to be an error, because it indicates the user-supplied tag is
3618      * not well-formed.
3619      */
3620     if(*err != U_ZERO_ERROR) {
3621         goto error;
3622     }
3623
3624     *langLength = subtagLength;
3625
3626     /*
3627      * If no language was present, use the value of unknownLanguage
3628      * instead.  Otherwise, move past any separator.
3629      */
3630     if (*langLength == 0) {
3631         uprv_strcpy(
3632             lang,
3633             unknownLanguage);
3634         *langLength = uprv_strlen(lang);
3635     }
3636     else if (_isIDSeparator(*position)) {
3637         ++position;
3638     }
3639
3640     subtagLength = _getScript(position, script, *scriptLength, &position);
3641     u_terminateChars(script, *scriptLength, subtagLength, err);
3642
3643     if(*err != U_ZERO_ERROR) {
3644         goto error;
3645     }
3646
3647     *scriptLength = subtagLength;
3648
3649     if (*scriptLength > 0) {
3650         if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) {
3651             /**
3652              * If the script part is the "unknown" script, then don't return it.
3653              **/
3654             *scriptLength = 0;
3655         }
3656
3657         /*
3658          * Move past any separator.
3659          */
3660         if (_isIDSeparator(*position)) {
3661             ++position;
3662         }
3663     }
3664
3665     subtagLength = _getCountry(position, region, *regionLength, &position);
3666     u_terminateChars(region, *regionLength, subtagLength, err);
3667
3668     if(*err != U_ZERO_ERROR) {
3669         goto error;
3670     }
3671
3672     *regionLength = subtagLength;
3673
3674     if (*regionLength > 0) {
3675         if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) {
3676             /**
3677              * If the region part is the "unknown" region, then don't return it.
3678              **/
3679             *regionLength = 0;
3680         }
3681     }
3682
3683 exit:
3684
3685     return (int32_t)(position - localeID);
3686
3687 error:
3688
3689     /**
3690      * If we get here, we have no explicit error, it's the result of an
3691      * illegal argument.
3692      **/
3693     if (!U_FAILURE(*err)) {
3694         *err = U_ILLEGAL_ARGUMENT_ERROR;
3695     }
3696
3697     goto exit;
3698 }
3699
3700 static int32_t U_CALLCONV
3701 createLikelySubtagsString(
3702     const char* lang,
3703     int32_t langLength,
3704     const char* script,
3705     int32_t scriptLength,
3706     const char* region,
3707     int32_t regionLength,
3708     const char* variants,
3709     int32_t variantsLength,
3710     char* tag,
3711     int32_t tagCapacity,
3712     UErrorCode* err)
3713 {
3714     /**
3715      * ULOC_FULLNAME_CAPACITY will provide enough capacity
3716      * that we can build a string that contains the language,
3717      * script and region code without worrying about overrunning
3718      * the user-supplied buffer.
3719      **/
3720     char tagBuffer[ULOC_FULLNAME_CAPACITY];
3721     char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
3722     int32_t tagBufferLength = 0;
3723
3724     if(U_FAILURE(*err)) {
3725         goto error;
3726     }
3727
3728     /**
3729      * Try the language with the script and region first.
3730      **/
3731     if (scriptLength > 0 && regionLength > 0) {
3732
3733         const char* likelySubtags = NULL;
3734
3735         tagBufferLength = createTagString(
3736             lang,
3737             langLength,
3738             script,
3739             scriptLength,
3740             region,
3741             regionLength,
3742             NULL,
3743             0,
3744             tagBuffer,
3745             sizeof(tagBuffer),
3746             err);
3747         if(U_FAILURE(*err)) {
3748             goto error;
3749         }
3750
3751         likelySubtags =
3752             findLikelySubtags(
3753                 tagBuffer,
3754                 likelySubtagsBuffer,
3755                 sizeof(likelySubtagsBuffer),
3756                 err);
3757         if(U_FAILURE(*err)) {
3758             goto error;
3759         }
3760
3761         if (likelySubtags != NULL) {
3762             /* Always use the language tag from the
3763                maximal string, since it may be more
3764                specific than the one provided. */
3765             return createTagStringWithAlternates(
3766                         NULL,
3767                         0,
3768                         NULL,
3769                         0,
3770                         NULL,
3771                         0,
3772                         variants,
3773                         variantsLength,
3774                         likelySubtags,
3775                         tag,
3776                         tagCapacity,
3777                         err);
3778         }
3779     }
3780
3781     /**
3782      * Try the language with just the script.
3783      **/
3784     if (scriptLength > 0) {
3785
3786         const char* likelySubtags = NULL;
3787
3788         tagBufferLength = createTagString(
3789             lang,
3790             langLength,
3791             script,
3792             scriptLength,
3793             NULL,
3794             0,
3795             NULL,
3796             0,
3797             tagBuffer,
3798             sizeof(tagBuffer),
3799             err);
3800         if(U_FAILURE(*err)) {
3801             goto error;
3802         }
3803
3804         likelySubtags =
3805             findLikelySubtags(
3806                 tagBuffer,
3807                 likelySubtagsBuffer,
3808                 sizeof(likelySubtagsBuffer),
3809                 err);
3810         if(U_FAILURE(*err)) {
3811             goto error;
3812         }
3813
3814         if (likelySubtags != NULL) {
3815             /* Always use the language tag from the
3816                maximal string, since it may be more
3817                specific than the one provided. */
3818             return createTagStringWithAlternates(
3819                         NULL,
3820                         0,
3821                         NULL,
3822                         0,
3823                         region,
3824                         regionLength,
3825                         variants,
3826                         variantsLength,
3827                         likelySubtags,
3828                         tag,
3829                         tagCapacity,
3830                         err);
3831         }
3832     }
3833
3834     /**
3835      * Try the language with just the region.
3836      **/
3837     if (regionLength > 0) {
3838
3839         const char* likelySubtags = NULL;
3840
3841         createTagString(
3842             lang,
3843             langLength,
3844             NULL,
3845             0,
3846             region,
3847             regionLength,
3848             NULL,
3849             0,
3850             tagBuffer,
3851             sizeof(tagBuffer),
3852             err);
3853         if(U_FAILURE(*err)) {
3854             goto error;
3855         }
3856
3857         likelySubtags =
3858             findLikelySubtags(
3859                 tagBuffer,
3860                 likelySubtagsBuffer,
3861                 sizeof(likelySubtagsBuffer),
3862                 err);
3863         if(U_FAILURE(*err)) {
3864             goto error;
3865         }
3866
3867         if (likelySubtags != NULL) {
3868             /* Always use the language tag from the
3869                maximal string, since it may be more
3870                specific than the one provided. */
3871             return createTagStringWithAlternates(
3872                         NULL,
3873                         0,
3874                         script,
3875                         scriptLength,
3876                         NULL,
3877                         0,
3878                         variants,
3879                         variantsLength,
3880                         likelySubtags,
3881                         tag,
3882                         tagCapacity,
3883                         err);
3884         }
3885     }
3886
3887     /**
3888      * Finally, try just the language.
3889      **/
3890     {
3891         const char* likelySubtags = NULL;
3892
3893         createTagString(
3894             lang,
3895             langLength,
3896             NULL,
3897             0,
3898             NULL,
3899             0,
3900             NULL,
3901             0,
3902             tagBuffer,
3903             sizeof(tagBuffer),
3904             err);
3905         if(U_FAILURE(*err)) {
3906             goto error;
3907         }
3908
3909         likelySubtags =
3910             findLikelySubtags(
3911                 tagBuffer,
3912                 likelySubtagsBuffer,
3913                 sizeof(likelySubtagsBuffer),
3914                 err);
3915         if(U_FAILURE(*err)) {
3916             goto error;
3917         }
3918
3919         if (likelySubtags != NULL) {
3920             /* Always use the language tag from the
3921                maximal string, since it may be more
3922                specific than the one provided. */
3923             return createTagStringWithAlternates(
3924                         NULL,
3925                         0,
3926                         script,
3927                         scriptLength,
3928                         region,
3929                         regionLength,
3930                         variants,
3931                         variantsLength,
3932                         likelySubtags,
3933                         tag,
3934                         tagCapacity,
3935                         err);
3936         }
3937     }
3938
3939     return u_terminateChars(
3940                 tag,
3941                 tagCapacity,
3942                 0,
3943                 err);
3944
3945 error:
3946
3947     if (!U_FAILURE(*err)) {
3948         *err = U_ILLEGAL_ARGUMENT_ERROR;
3949     }
3950
3951     return -1;
3952 }
3953
3954 static int32_t
3955 _uloc_addLikelySubtags(const char*    localeID,
3956          char* maximizedLocaleID,
3957          int32_t maximizedLocaleIDCapacity,
3958          UErrorCode* err)
3959 {
3960     char lang[ULOC_LANG_CAPACITY];
3961     int32_t langLength = sizeof(lang);
3962     char script[ULOC_SCRIPT_CAPACITY];
3963     int32_t scriptLength = sizeof(script);
3964     char region[ULOC_COUNTRY_CAPACITY];
3965     int32_t regionLength = sizeof(region);
3966     const char* trailing = "";
3967     int32_t trailingLength = 0;
3968     int32_t trailingIndex = 0;
3969     int32_t resultLength = 0;
3970
3971     if(U_FAILURE(*err)) {
3972         goto error;
3973     }
3974     else if (localeID == NULL ||
3975              maximizedLocaleID == NULL ||
3976              maximizedLocaleIDCapacity <= 0) {
3977         goto error;
3978     }
3979
3980     trailingIndex = parseTagString(
3981         localeID,
3982         lang,
3983         &langLength,
3984         script,
3985         &scriptLength,
3986         region,
3987         &regionLength,
3988         err);
3989     if(U_FAILURE(*err)) {
3990         /* Overflow indicates an illegal argument error */
3991         if (*err == U_BUFFER_OVERFLOW_ERROR) {
3992             *err = U_ILLEGAL_ARGUMENT_ERROR;
3993         }
3994
3995         goto error;
3996     }
3997
3998     /* Find the length of the trailing portion. */
3999     trailing = &localeID[trailingIndex];
4000     trailingLength = uprv_strlen(trailing);
4001
4002     resultLength =
4003         createLikelySubtagsString(
4004             lang,
4005             langLength,
4006             script,
4007             scriptLength,
4008             region,
4009             regionLength,
4010             trailing,
4011             trailingLength,
4012             maximizedLocaleID,
4013             maximizedLocaleIDCapacity,
4014             err);
4015
4016     if (resultLength == 0) {
4017         const int32_t localIDLength =
4018             uprv_strlen(localeID);
4019
4020         /*
4021          * If we get here, we need to return localeID.
4022          */
4023         uprv_memcpy(
4024             maximizedLocaleID,
4025             localeID,
4026             localIDLength <= maximizedLocaleIDCapacity ?
4027                 localIDLength : maximizedLocaleIDCapacity);
4028
4029         resultLength =
4030             u_terminateChars(
4031                 maximizedLocaleID,
4032                 maximizedLocaleIDCapacity,
4033                 localIDLength,
4034                 err);
4035     }
4036
4037     return resultLength;
4038
4039 error:
4040
4041     if (!U_FAILURE(*err)) {
4042         *err = U_ILLEGAL_ARGUMENT_ERROR;
4043     }
4044
4045     return -1;
4046 }
4047
4048 static int32_t
4049 _uloc_minimizeSubtags(const char*    localeID,
4050          char* minimizedLocaleID,
4051          int32_t minimizedLocaleIDCapacity,
4052          UErrorCode* err)
4053 {
4054     /**
4055      * ULOC_FULLNAME_CAPACITY will provide enough capacity
4056      * that we can build a string that contains the language,
4057      * script and region code without worrying about overrunning
4058      * the user-supplied buffer.
4059      **/
4060     char maximizedTagBuffer[ULOC_FULLNAME_CAPACITY];
4061     int32_t maximizedTagBufferLength = sizeof(maximizedTagBuffer);
4062
4063     char lang[ULOC_LANG_CAPACITY];
4064     int32_t langLength = sizeof(lang);
4065     char script[ULOC_SCRIPT_CAPACITY];
4066     int32_t scriptLength = sizeof(script);
4067     char region[ULOC_COUNTRY_CAPACITY];
4068     int32_t regionLength = sizeof(region);
4069     const char* trailing = "";
4070     int32_t trailingLength = 0;
4071     int32_t trailingIndex = 0;
4072
4073     if(U_FAILURE(*err)) {
4074         goto error;
4075     }
4076     else if (localeID == NULL ||
4077              minimizedLocaleID == NULL ||
4078              minimizedLocaleIDCapacity <= 0) {
4079         goto error;
4080     }
4081
4082     trailingIndex =
4083         parseTagString(
4084             localeID,
4085             lang,
4086             &langLength,
4087             script,
4088             &scriptLength,
4089             region,
4090             &regionLength,
4091             err);
4092     if(U_FAILURE(*err)) {
4093
4094         /* Overflow indicates an illegal argument error */
4095         if (*err == U_BUFFER_OVERFLOW_ERROR) {
4096             *err = U_ILLEGAL_ARGUMENT_ERROR;
4097         }
4098
4099         goto error;
4100     }
4101
4102     /* Find the spot where the variants begin, if any. */
4103     trailing = &localeID[trailingIndex];
4104     trailingLength = uprv_strlen(trailing);
4105
4106     createTagString(
4107         lang,
4108         langLength,
4109         script,
4110         scriptLength,
4111         region,
4112         regionLength,
4113         NULL,
4114         0,
4115         maximizedTagBuffer,
4116         maximizedTagBufferLength,
4117         err);
4118     if(U_FAILURE(*err)) {
4119         goto error;
4120     }
4121
4122     /**
4123      * First, we need to first get the maximization
4124      * from AddLikelySubtags.
4125      **/
4126     maximizedTagBufferLength =
4127         uloc_addLikelySubtags(
4128             maximizedTagBuffer,
4129             maximizedTagBuffer,
4130             maximizedTagBufferLength,
4131             err);
4132
4133     if(U_FAILURE(*err)) {
4134         goto error;
4135     }
4136
4137     /**
4138      * Start first with just the language.
4139      **/
4140     {
4141         char tagBuffer[ULOC_FULLNAME_CAPACITY];
4142
4143         const int32_t tagBufferLength =
4144             createLikelySubtagsString(
4145                 lang,
4146                 langLength,
4147                 NULL,
4148                 0,
4149                 NULL,
4150                 0,
4151                 NULL,
4152                 0,
4153                 tagBuffer,
4154                 sizeof(tagBuffer),
4155                 err);
4156
4157         if(U_FAILURE(*err)) {
4158             goto error;
4159         }
4160         else if (uprv_strnicmp(
4161                     maximizedTagBuffer,
4162                     tagBuffer,
4163                     tagBufferLength) == 0) {
4164
4165             return createTagString(
4166                         lang,
4167                         langLength,
4168                         NULL,
4169                         0,
4170                         NULL,
4171                         0,
4172                         trailing,
4173                         trailingLength,
4174                         minimizedLocaleID,
4175                         minimizedLocaleIDCapacity,
4176                         err);
4177         }
4178     }
4179
4180     /**
4181      * Next, try the language and region.
4182      **/
4183     if (regionLength > 0) {
4184
4185         char tagBuffer[ULOC_FULLNAME_CAPACITY];
4186
4187         const int32_t tagBufferLength =
4188             createLikelySubtagsString(
4189                 lang,
4190                 langLength,
4191                 NULL,
4192                 0,
4193                 region,
4194                 regionLength,
4195                 NULL,
4196                 0,
4197                 tagBuffer,
4198                 sizeof(tagBuffer),
4199                 err);
4200
4201         if(U_FAILURE(*err)) {
4202             goto error;
4203         }
4204         else if (uprv_strnicmp(
4205                     maximizedTagBuffer,
4206                     tagBuffer,
4207                     tagBufferLength) == 0) {
4208
4209             return createTagString(
4210                         lang,
4211                         langLength,
4212                         NULL,
4213                         0,
4214                         region,
4215                         regionLength,
4216                         trailing,
4217                         trailingLength,
4218                         minimizedLocaleID,
4219                         minimizedLocaleIDCapacity,
4220                         err);
4221         }
4222     }
4223
4224     /**
4225      * Finally, try the language and script.  This is our last chance,
4226      * since trying with all three subtags would only yield the
4227      * maximal version that we already have.
4228      **/
4229     if (scriptLength > 0 && regionLength > 0) {
4230         char tagBuffer[ULOC_FULLNAME_CAPACITY];
4231
4232         const int32_t tagBufferLength =
4233             createLikelySubtagsString(
4234                 lang,
4235                 langLength,
4236                 script,
4237                 scriptLength,
4238                 NULL,
4239                 0,
4240                 NULL,
4241                 0,
4242                 tagBuffer,
4243                 sizeof(tagBuffer),
4244                 err);
4245
4246         if(U_FAILURE(*err)) {
4247             goto error;
4248         }
4249         else if (uprv_strnicmp(
4250                     maximizedTagBuffer,
4251                     tagBuffer,
4252                     tagBufferLength) == 0) {
4253
4254             return createTagString(
4255                         lang,
4256                         langLength,
4257                         script,
4258                         scriptLength,
4259                         NULL,
4260                         0,
4261                         trailing,
4262                         trailingLength,
4263                         minimizedLocaleID,
4264                         minimizedLocaleIDCapacity,
4265                         err);
4266         }
4267     }
4268
4269     {
4270         /**
4271          * If we got here, return the locale ID parameter.
4272          **/
4273         const int32_t localeIDLength = uprv_strlen(localeID);
4274
4275         uprv_memcpy(
4276             minimizedLocaleID,
4277             localeID,
4278             localeIDLength <= minimizedLocaleIDCapacity ?
4279                 localeIDLength : minimizedLocaleIDCapacity);
4280
4281         return u_terminateChars(
4282                     minimizedLocaleID,
4283                     minimizedLocaleIDCapacity,
4284                     localeIDLength,
4285                     err);
4286     }
4287
4288 error:
4289
4290     if (!U_FAILURE(*err)) {
4291         *err = U_ILLEGAL_ARGUMENT_ERROR;
4292     }
4293
4294     return -1;
4295
4296
4297 }
4298
4299 static UBool
4300 do_canonicalize(const char*    localeID,
4301          char* buffer,
4302          int32_t bufferCapacity,
4303          UErrorCode* err)
4304 {
4305     uloc_canonicalize(
4306         localeID,
4307         buffer,
4308         bufferCapacity,
4309         err);
4310
4311     if (*err == U_STRING_NOT_TERMINATED_WARNING ||
4312         *err == U_BUFFER_OVERFLOW_ERROR) {
4313         *err = U_ILLEGAL_ARGUMENT_ERROR;
4314
4315         return FALSE;
4316     }
4317     else if (U_FAILURE(*err)) {
4318
4319         return FALSE;
4320     }
4321     else {
4322         return TRUE;
4323     }
4324 }
4325
4326 U_DRAFT int32_t U_EXPORT2
4327 uloc_addLikelySubtags(const char*    localeID,
4328          char* maximizedLocaleID,
4329          int32_t maximizedLocaleIDCapacity,
4330          UErrorCode* err)
4331 {
4332     char localeBuffer[ULOC_FULLNAME_CAPACITY];
4333
4334     if (!do_canonicalize(
4335         localeID,
4336         localeBuffer,
4337         sizeof(localeBuffer),
4338         err)) {
4339         return -1;
4340     }
4341     else {
4342         return _uloc_addLikelySubtags(
4343                     localeBuffer,
4344                     maximizedLocaleID,
4345                     maximizedLocaleIDCapacity,
4346                     err);
4347     }
4348 }
4349
4350 U_DRAFT int32_t U_EXPORT2
4351 uloc_minimizeSubtags(const char*    localeID,
4352          char* minimizedLocaleID,
4353          int32_t minimizedLocaleIDCapacity,
4354          UErrorCode* err)
4355 {
4356     char localeBuffer[ULOC_FULLNAME_CAPACITY];
4357
4358     if (!do_canonicalize(
4359         localeID,
4360         localeBuffer,
4361         sizeof(localeBuffer),
4362         err)) {
4363         return -1;
4364     }
4365     else {
4366         return _uloc_minimizeSubtags(
4367                     localeBuffer,
4368                     minimizedLocaleID,
4369                     minimizedLocaleIDCapacity,
4370                     err);
4371     }
4372 }
4373
4374 /*eof*/