git.saurik.com Git - apple/icu.git/blob - icuSources/common/uloc.cpp

2 // License & terms of use: http://www.unicode.org/copyright.html

3 /*

4 **********************************************************************

7 **********************************************************************

8 *

9 * File ULOC.CPP

10 *

11 * Modification History:

12 *

13 * Date Name Description

14 * 04/01/97 aliu Creation.

15 * 08/21/98 stephen JDK 1.2 sync

16 * 12/08/98 rtg New Locale implementation and C API

17 * 03/15/99 damiba overhaul.

18 * 04/06/99 stephen changed setDefault() to realloc and copy

19 * 06/14/99 stephen Changed calls to ures_open for new params

20 * 07/21/99 stephen Modified setDefault() to propagate to C++

21 * 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,

22 * brought canonicalization code into line with spec

23 *****************************************************************************/

25 /*

26 POSIX's locale format, from putil.c: [no spaces]

28 ll [ _CC ] [ . MM ] [ @ VV]

30 l = lang, C = ctry, M = charmap, V = variant

31 */

33 #include "unicode/utypes.h"

34 #include "unicode/ustring.h"

35 #include "unicode/uloc.h"

37 #include "putilimp.h"

38 #include "ustr_imp.h"

39 #include "ulocimp.h"

40 #include "umutex.h"

41 #include "cstring.h"

42 #include "cmemory.h"

43 #include "locmap.h"

44 #include "uarrsort.h"

45 #include "uenumimp.h"

46 #include "uassert.h"

47 #include "charstr.h"

49 #include <stdio.h> /* for sprintf */

51 U_NAMESPACE_USE

53 /* ### Declarations **************************************************/

55 /* Locale stuff from locid.cpp */

 U_CFUNC void locale_set_default(const char *id);

 U_CFUNC const char *locale_get_default(void);

58 U_CFUNC int32_t

 locale_getKeywords(const char *localeID,

60 char prev,

             char *keywords, int32_t keywordCapacity,

             char *values, int32_t valuesCapacity, int32_t *valLen,

63 UBool valuesToo,

64 UErrorCode *status);

66 /* ### Data tables **************************************************/

68 /**

69 * Table of language codes, both 2- and 3-letter, with preference

70 * given to 2-letter codes where possible. Includes 3-letter codes

71 * that lack a 2-letter equivalent.

72 *

73 * This list must be in sorted order. This list is returned directly

74 * to the user by some API.

75 *

76 * This list must be kept in sync with LANGUAGES_3, with corresponding

77 * entries matched.

78 *

79 * This table should be terminated with a NULL entry, followed by a

80 * second list, and another NULL entry. The first list is visible to

81 * user code when this array is returned by API. The second list

82 * contains codes we support, but do not expose through user API.

83 *

84 * Notes

85 *

86 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to

87 * include the revisions up to 2001/7/27 *CWB*

88 *

89 * The 3 character codes are the terminology codes like RFC 3066. This

90 * is compatible with prior ICU codes

91 *

92 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the

93 * table but now at the end of the table because 3 character codes are

94 * duplicates. This avoids bad searches going from 3 to 2 character

95 * codes.

96 *

97 * The range qaa-qtz is reserved for local use

98 */

99 /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */

100 /* ISO639 table version is 20150505 */

101 /* Subsequent hand addition of selected languages */

102 static const char * const LANGUAGES[] = {

     "aa",  "ab",  "ace", "ach", "ada", "ady", "ae",  "aeb",

     "af",  "afh", "agq", "ain", "ak",  "akk", "akz", "ale",

     "aln", "alt", "am",  "an",  "ang", "anp", "ar",  "arc",

     "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "as",

     "asa", "ase", "ast", "av",  "avk", "awa", "ay",  "az",

     "ba",  "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",

     "be",  "bej", "bem", "bew", "bez", "bfd", "bfq", "bg",

     "bgn", "bho", "bi",  "bik", "bin", "bjn", "bkm", "bla",

     "bm",  "bn",  "bo",  "bpy", "bqi", "br",  "bra", "brh",

     "brx", "bs",  "bss", "bua", "bug", "bum", "byn", "byv",

     "ca",  "cad", "car", "cay", "cch", "ccp", "ce",  "ceb", "cgg",

     "ch",  "chb", "chg", "chk", "chm", "chn", "cho", "chp",

     "chr", "chy", "ckb", "co",  "cop", "cps", "cr",  "crh",

     "cs",  "csb", "cu",  "cv",  "cy",

     "da",  "dak", "dar", "dav", "de",  "del", "den", "dgr",

     "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "dv",

     "dyo", "dyu", "dz",  "dzg",

     "ebu", "ee",  "efi", "egl", "egy", "eka", "el",  "elx",

     "en",  "enm", "eo",  "es",  "esu", "et",  "eu",  "ewo",

122 "ext",

     "fa",  "fan", "fat", "ff",  "fi",  "fil", "fit", "fj",

     "fo",  "fon", "fr",  "frc", "frm", "fro", "frp", "frr",

     "frs", "fur", "fy",

     "ga",  "gaa", "gag", "gan", "gay", "gba", "gbz", "gd",

     "gez", "gil", "gl",  "glk", "gmh", "gn",  "goh", "gom",

     "gon", "gor", "got", "grb", "grc", "gsw", "gu",  "guc",

     "gur", "guz", "gv",  "gwi",

     "ha",  "hai", "hak", "haw", "he",  "hi",  "hif", "hil",

     "hit", "hmn", "ho",  "hr",  "hsb", "hsn", "ht",  "hu",

     "hup", "hy",  "hz",

     "ia",  "iba", "ibb", "id",  "ie",  "ig",  "ii",  "ik",

     "ilo", "inh", "io",  "is",  "it",  "iu",  "izh",

     "ja",  "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",

136 "jv",

     "ka",  "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",

     "kbl", "kcg", "kde", "kea", "ken", "kfo", "kg",  "kgp",

     "kha", "kho", "khq", "khw", "ki",  "kiu", "kj",  "kk",

     "kkj", "kl",  "kln", "km",  "kmb", "kn",  "ko",  "koi",

     "kok", "kos", "kpe", "kr",  "krc", "kri", "krj", "krl",

     "kru", "ks",  "ksb", "ksf", "ksh", "ku",  "kum", "kut",

     "kv",  "kw",  "ky",

     "la",  "lad", "lag", "lah", "lam", "lb",  "lez", "lfn",

     "lg",  "li",  "lij", "liv", "lkt", "lmo", "ln",  "lo",

     "lol", "loz", "lrc", "lt",  "ltg", "lu",  "lua", "lui",

     "lun", "luo", "lus", "luy", "lv",  "lzh", "lzz",

     "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",

     "mdf", "mdh", "mdr", "men", "mer", "mfe", "mg",  "mga",

     "mgh", "mgo", "mh",  "mi",  "mic", "min", "mis", "mk",

     "ml",  "mn",  "mnc", "mni", "mo",

     "moh", "mos", "mr",  "mrj",

     "ms",  "mt",  "mua", "mul", "mus", "mwl", "mwr", "mwv",

     "my",  "mye", "myv", "mzn",

     "na",  "nan", "nap", "naq", "nb",  "nd",  "nds", "ne",

     "new", "ng",  "nia", "niu", "njo", "nl",  "nmg", "nn",

     "nnh", "no",  "nog", "non", "nov", "nqo", "nr",  "nso",

     "nus", "nv",  "nwc", "ny",  "nym", "nyn", "nyo", "nzi",

     "oc",  "oj",  "om",  "or",  "os",  "osa", "ota",

     "pa",  "pag", "pal", "pam", "pap", "pau", "pcd", "pdc",

     "pdt", "peo", "pfl", "phn", "pi",  "pl",  "pms", "pnt",

     "pon", "prg", "pro", "ps",  "pt",

     "qu",  "quc", "qug",

     "raj", "rap", "rar", "rgn", "rif", "rm",  "rn",  "ro",

     "rof", "rom", "rtm", "ru",  "rue", "rug", "rup",

166 "rw", "rwk",

     "sa",  "sad", "sah", "sam", "saq", "sas", "sat", "saz",

     "sba", "sbp", "sc",  "scn", "sco", "sd",  "sdc", "sdh",

     "se",  "see", "seh", "sei", "sel", "ses", "sg",  "sga",

     "sgs", "shi", "shn", "shu", "si",  "sid", "sk",

     "sl",  "sli", "sly", "sm",  "sma", "smj", "smn", "sms",

     "sn",  "snk", "so",  "sog", "sq",  "sr",  "srn", "srr",

     "ss",  "ssy", "st",  "stq", "su",  "suk", "sus", "sux",

     "sv",  "sw",  "swb", "swc", "syc", "syr", "szl",

     "ta",  "tcy", "te",  "tem", "teo", "ter", "tet", "tg",

     "th",  "ti",  "tig", "tiv", "tk",  "tkl", "tkr", "tl",

     "tlh", "tli", "tly", "tmh", "tn",  "to",  "tog", "tpi",

     "tr",  "tru", "trv", "ts",  "tsd", "tsi", "tt",  "ttt",

     "tum", "tvl", "tw",  "twq", "ty",  "tyv", "tzm",

     "udm", "ug",  "uga", "uk",  "umb", "und", "ur",  "uz",

     "vai", "ve",  "vec", "vep", "vi",  "vls", "vmf", "vo",

     "vot", "vro", "vun",

     "wa",  "wae", "wal", "war", "was", "wbp", "wo",  "wuu",

     "xal", "xh",  "xmf", "xog",

     "yao", "yap", "yav", "ybb", "yi",  "yo",  "yrl", "yue",

     "za",  "zap", "zbl", "zea", "zen", "zgh", "zh",  "zu",

     "zun", "zxx", "zza",

188 NULL,

     "in",  "iw",  "ji",  "jw",  "sh",    /* obsolete language codes */

190 NULL

191 };

192

193 static const char* const DEPRECATED_LANGUAGES[]={

     "in", "iw", "ji", "jw", NULL, NULL

195 };

196 static const char* const REPLACEMENT_LANGUAGES[]={

     "id", "he", "yi", "jv", NULL, NULL

198 };

199

200 /**

201 * Table of 3-letter language codes.

202 *

203 * This is a lookup table used to convert 3-letter language codes to

204 * their 2-letter equivalent, where possible. It must be kept in sync

205 * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the

206 * same language as LANGUAGES_3[i]. The commented-out lines are

207 * copied from LANGUAGES to make eyeballing this baby easier.

208 *

209 * Where a 3-letter language code has no 2-letter equivalent, the

210 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].

211 *

212 * This table should be terminated with a NULL entry, followed by a

213 * second list, and another NULL entry. The two lists correspond to

214 * the two lists in LANGUAGES.

215 */

216 /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */

217 /* ISO639 table version is 20150505 */

218 /* Subsequent hand addition of selected languages */

219 static const char * const LANGUAGES_3[] = {

     "aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb",

     "afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale",

     "aln", "alt", "amh", "arg", "ang", "anp", "ara", "arc",

     "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "asm",

     "asa", "ase", "ast", "ava", "avk", "awa", "aym", "aze",

     "bak", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",

     "bel", "bej", "bem", "bew", "bez", "bfd", "bfq", "bul",

     "bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla",

     "bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh",

     "brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv",

     "cat", "cad", "car", "cay", "cch", "ccp", "che", "ceb", "cgg",

     "cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp",

     "chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh",

     "ces", "csb", "chu", "chv", "cym",

     "dan", "dak", "dar", "dav", "deu", "del", "den", "dgr",

     "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "div",

     "dyo", "dyu", "dzo", "dzg",

     "ebu", "ewe", "efi", "egl", "egy", "eka", "ell", "elx",

     "eng", "enm", "epo", "spa", "esu", "est", "eus", "ewo",

239 "ext",

     "fas", "fan", "fat", "ful", "fin", "fil", "fit", "fij",

     "fao", "fon", "fra", "frc", "frm", "fro", "frp", "frr",

     "frs", "fur", "fry",

     "gle", "gaa", "gag", "gan", "gay", "gba", "gbz", "gla",

     "gez", "gil", "glg", "glk", "gmh", "grn", "goh", "gom",

     "gon", "gor", "got", "grb", "grc", "gsw", "guj", "guc",

     "gur", "guz", "glv", "gwi",

     "hau", "hai", "hak", "haw", "heb", "hin", "hif", "hil",

     "hit", "hmn", "hmo", "hrv", "hsb", "hsn", "hat", "hun",

     "hup", "hye", "her",

     "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ipk",

     "ilo", "inh", "ido", "isl", "ita", "iku", "izh",

     "jpn", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",

253 "jav",

     "kat", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",

     "kbl", "kcg", "kde", "kea", "ken", "kfo", "kon", "kgp",

     "kha", "kho", "khq", "khw", "kik", "kiu", "kua", "kaz",

     "kkj", "kal", "kln", "khm", "kmb", "kan", "kor", "koi",

     "kok", "kos", "kpe", "kau", "krc", "kri", "krj", "krl",

     "kru", "kas", "ksb", "ksf", "ksh", "kur", "kum", "kut",

     "kom", "cor", "kir",

     "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lfn",

     "lug", "lim", "lij", "liv", "lkt", "lmo", "lin", "lao",

     "lol", "loz", "lrc", "lit", "ltg", "lub", "lua", "lui",

     "lun", "luo", "lus", "luy", "lav", "lzh", "lzz",

     "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",

     "mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga",

     "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",

     "mal", "mon", "mnc", "mni", "mol",

     "moh", "mos", "mar", "mrj",

     "msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv",

     "mya", "mye", "myv", "mzn",

     "nau", "nan", "nap", "naq", "nob", "nde", "nds", "nep",

     "new", "ndo", "nia", "niu", "njo", "nld", "nmg", "nno",

     "nnh", "nor", "nog", "non", "nov", "nqo", "nbl", "nso",

     "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi",

     "oci", "oji", "orm", "ori", "oss", "osa", "ota",

     "pan", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc",

     "pdt", "peo", "pfl", "phn", "pli", "pol", "pms", "pnt",

     "pon", "prg", "pro", "pus", "por",

     "que", "quc", "qug",

     "raj", "rap", "rar", "rgn", "rif", "roh", "run", "ron",

     "rof", "rom", "rtm", "rus", "rue", "rug", "rup",

283 "kin", "rwk",

     "san", "sad", "sah", "sam", "saq", "sas", "sat", "saz",

     "sba", "sbp", "srd", "scn", "sco", "snd", "sdc", "sdh",

     "sme", "see", "seh", "sei", "sel", "ses", "sag", "sga",

     "sgs", "shi", "shn", "shu", "sin", "sid", "slk",

     "slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms",

     "sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr",

     "ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux",

     "swe", "swa", "swb", "swc", "syc", "syr", "szl",

     "tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk",

     "tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr", "tgl",

     "tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi",

     "tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt",

     "tum", "tvl", "twi", "twq", "tah", "tyv", "tzm",

     "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",

     "vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vol",

     "vot", "vro", "vun",

     "wln", "wae", "wal", "war", "was", "wbp", "wol", "wuu",

     "xal", "xho", "xmf", "xog",

     "yao", "yap", "yav", "ybb", "yid", "yor", "yrl", "yue",

     "zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul",

     "zun", "zxx", "zza",

305 NULL,

306 /* "in", "iw", "ji", "jw", "sh", */

     "ind", "heb", "yid", "jaw", "srp",

308 NULL

309 };

310

311 /**

312 * Table of 2-letter country codes.

313 *

314 * This list must be in sorted order. This list is returned directly

315 * to the user by some API.

316 *

317 * This list must be kept in sync with COUNTRIES_3, with corresponding

318 * entries matched.

319 *

320 * This table should be terminated with a NULL entry, followed by a

321 * second list, and another NULL entry. The first list is visible to

322 * user code when this array is returned by API. The second list

323 * contains codes we support, but do not expose through user API.

324 *

325 * Notes:

326 *

327 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per

328 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added

329 * new codes keeping the old ones for compatibility updated to include

330 * 1999/12/03 revisions *CWB*

331 *

332 * RO(ROM) is now RO(ROU) according to

333 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html

334 */

335 static const char * const COUNTRIES[] = {

     "AC",  "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",

     "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",

     "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",

     "BJ",  "BL",  "BM",  "BN",  "BO",  "BQ",  "BR",  "BS",  "BT",  "BV",

     "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",

     "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CP",  "CR",

     "CU",  "CV",  "CW",  "CX",  "CY",  "CZ",  "DE",  "DG",  "DJ",  "DK",

     "DM",  "DO",  "DZ",  "EA",  "EC",  "EE",  "EG",  "EH",  "ER",

     "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",

     "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",

     "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",

     "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",

     "IC",  "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS",

     "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",

     "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",

     "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",

     "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",

     "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",

     "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",

     "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",

     "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",

     "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",

     "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",

     "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",

     "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "SS",  "ST",  "SV",

     "SX",  "SY",  "SZ",  "TA",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",

     "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",

     "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",

     "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",

     "WS",  "XK",  "YE",  "YT",  "ZA",  "ZM",  "ZW",

366 NULL,

     "AN",  "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR",   /* obsolete country codes */

368 NULL

369 };

370

371 static const char* const DEPRECATED_COUNTRIES[] = {

     "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */

373 };

374 static const char* const REPLACEMENT_COUNTRIES[] = {

375 /* "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */

     "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL  /* replacement country codes */

377 };

378

379 /**

380 * Table of 3-letter country codes.

381 *

382 * This is a lookup table used to convert 3-letter country codes to

383 * their 2-letter equivalent. It must be kept in sync with COUNTRIES.

384 * For all valid i, COUNTRIES[i] must refer to the same country as

385 * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES

386 * to make eyeballing this baby easier.

387 *

388 * This table should be terminated with a NULL entry, followed by a

389 * second list, and another NULL entry. The two lists correspond to

390 * the two lists in COUNTRIES.

391 */

392 static const char * const COUNTRIES_3[] = {

393 /* "AC", "AD", "AE", "AF", "AG", "AI", "AL", "AM", */

     "ASC", "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",

395 /* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */

     "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",

397 /* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */

     "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",

399 /* "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", */

     "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",

401 /* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */

     "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",

403 /* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CP", "CR", */

     "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CPT", "CRI",

405 /* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DG", "DJ", "DK", */

     "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DGA", "DJI", "DNK",

407 /* "DM", "DO", "DZ", "EA", "EC", "EE", "EG", "EH", "ER", */

     "DMA", "DOM", "DZA", "EA ", "ECU", "EST", "EGY", "ESH", "ERI", /* no valid 3-letter code for EA */

409 /* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */

     "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",

411 /* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */

     "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",

413 /* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */

     "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",

415 /* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */

     "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",

417 /* "IC", "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */

     "IC ", "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL", /* no valid 3-letter code for IC */

419 /* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */

     "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",

421 /* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */

     "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",

423 /* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */

     "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",

425 /* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */

     "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",

427 /* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */

     "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",

429 /* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */

     "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",

431 /* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */

     "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",

433 /* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */

     "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",

435 /* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */

     "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",

437 /* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */

     "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",

439 /* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */

     "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",

441 /* "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", */

     "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",

443 /* "SX", "SY", "SZ", "TA", "TC", "TD", "TF", "TG", "TH", "TJ", */

     "SXM", "SYR", "SWZ", "TAA", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",

445 /* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */

     "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",

447 /* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */

     "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",

449 /* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */

     "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",

451 /* "WS", "XK", "YE", "YT", "ZA", "ZM", "ZW", */

     "WSM", "XKK", "YEM", "MYT", "ZAF", "ZMB", "ZWE",

453 NULL,

454 /* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */

     "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",

456 NULL

457 };

458

459 typedef struct CanonicalizationMap {

460 const char *id; /* input ID */

461 const char *canonicalID; /* canonicalized output ID */

462 } CanonicalizationMap;

463

464 /**

465 * A map to canonicalize locale IDs. This handles a variety of

466 * different semantic kinds of transformations.

467 */

468 static const CanonicalizationMap CANONICALIZE_MAP[] = {

     { "",               "en_US_POSIX" }, /* .NET name */ // open ICU 64 deleted, we restore

     { "c",              "en_US_POSIX" }, /* POSIX name */ // open ICU 64 deleted, we restore

     { "posix",          "en_US_POSIX" }, /* POSIX name (alias of C) */ // open ICU 64 deleted, we restore

     { "art_LOJBAN",     "jbo" }, /* registered name */

     { "hy__AREVELA",    "hy" }, /* Registered IANA variant */

     { "hy__AREVMDA",    "hyw" }, /* Registered IANA variant */

     { "zh_GAN",         "gan" }, /* registered name */

     { "zh_GUOYU",       "zh" }, /* registered name */

     { "zh_HAKKA",       "hak" }, /* registered name */

     { "zh_MIN_NAN",     "nan" }, /* registered name */

     { "zh_WUU",         "wuu" }, /* registered name */

     { "zh_XIANG",       "hsn" }, /* registered name */

     { "zh_YUE",         "yue" }, /* registered name */

482 };

483

484 /* ### BCP47 Conversion *******************************************/

485 /* Test if the locale id has BCP47 u extension and does not have '@' */

486 #define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)

487 /* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */

488 #define _ConvertBCP47(finalID, id, buffer, length,err) UPRV_BLOCK_MACRO_BEGIN { \

489 if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || \

490 U_FAILURE(*err) || *err == U_STRING_NOT_TERMINATED_WARNING) { \

491 finalID=id; \

492 if (*err == U_STRING_NOT_TERMINATED_WARNING) { *err = U_BUFFER_OVERFLOW_ERROR; } \

493 } else { \

494 finalID=buffer; \

495 } \

496 } UPRV_BLOCK_MACRO_END

497 /* Gets the size of the shortest subtag in the given localeID. */

 static int32_t getShortestSubtagLength(const char *localeID) {

     int32_t localeIDLength = static_cast<int32_t>(uprv_strlen(localeID));

500 int32_t length = localeIDLength;

501 int32_t tmpLength = 0;

502 int32_t i;

503 UBool reset = TRUE;

504

     for (i = 0; i < localeIDLength; i++) {

         if (localeID[i] != '_' && localeID[i] != '-') {

507 if (reset) {

508 tmpLength = 0;

509 reset = FALSE;

510 }

511 tmpLength++;

512 } else {

             if (tmpLength != 0 && tmpLength < length) {

514 length = tmpLength;

515 }

516 reset = TRUE;

517 }

518 }

519

520 return length;

521 }

522

523 /* ### Keywords **************************************************/

 #define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))

525 #define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )

526 /* Punctuation/symbols allowed in legacy key values */

 #define UPRV_OK_VALUE_PUNCTUATION(c) ((c) == '_' || (c) == '-' || (c) == '+' || (c) == '/')

528

529 #define ULOC_KEYWORD_BUFFER_LEN 25

530 #define ULOC_MAX_NO_KEYWORDS 25

531

532 U_CAPI const char * U_EXPORT2

 locale_getKeywordsStart(const char *localeID) {

534 const char *result = NULL;

     if((result = uprv_strchr(localeID, '@')) != NULL) {

536 return result;

537 }

538 #if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)

539 else {

540 /* We do this because the @ sign is variant, and the @ sign used on one

541 EBCDIC machine won't be compiled the same way on other EBCDIC based

542 machines. */

         static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };

544 const uint8_t *charToFind = ebcdicSigns;

545 while(*charToFind) {

             if((result = uprv_strchr(localeID, *charToFind)) != NULL) {

547 return result;

548 }

549 charToFind++;

550 }

551 }

552 #endif

553 return NULL;

554 }

555

556 /**

557 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]

558 * @param keywordName incoming name to be canonicalized

559 * @param status return status (keyword too long)

560 * @return length of the keyword name

561 */

 static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)

563 {

564 int32_t keywordNameLen = 0;

565

   for (; *keywordName != 0; keywordName++) {

     if (!UPRV_ISALPHANUM(*keywordName)) {

568 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */

569 return 0;

570 }

     if (keywordNameLen < ULOC_KEYWORD_BUFFER_LEN - 1) {

       buf[keywordNameLen++] = uprv_tolower(*keywordName);

573 } else {

574 /* keyword name too long for internal buffer */

575 *status = U_INTERNAL_PROGRAM_ERROR;

576 return 0;

577 }

578 }

   if (keywordNameLen == 0) {

580 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name */

581 return 0;

582 }

   buf[keywordNameLen] = 0; /* terminate */

584

585 return keywordNameLen;

586 }

587

588 typedef struct {

589 char keyword[ULOC_KEYWORD_BUFFER_LEN];

590 int32_t keywordLen;

591 const char *valueStart;

592 int32_t valueLen;

593 } KeywordStruct;

594

595 static int32_t U_CALLCONV

 compareKeywordStructs(const void * /*context*/, const void *left, const void *right) {

     const char* leftString = ((const KeywordStruct *)left)->keyword;

     const char* rightString = ((const KeywordStruct *)right)->keyword;

     return uprv_strcmp(leftString, rightString);

600 }

601

602 static int32_t

 _getKeywords(const char *localeID,

604 char prev,

              char *keywords, int32_t keywordCapacity,

              char *values, int32_t valuesCapacity, int32_t *valLen,

607 UBool valuesToo,

608 UErrorCode *status)

609 {

610 KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];

611

612 int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;

613 int32_t numKeywords = 0;

614 const char* pos = localeID;

615 const char* equalSign = NULL;

616 const char* semicolon = NULL;

     int32_t i = 0, j, n;

618 int32_t keywordsLen = 0;

619 int32_t valuesLen = 0;

620

     if(prev == '@') { /* start of keyword definition */

622 /* we will grab pairs, trim spaces, lowercase keywords, sort and return */

623 do {

624 UBool duplicate = FALSE;

625 /* skip leading spaces */

             while(*pos == ' ') {

627 pos++;

628 }

629 if (!*pos) { /* handle trailing "; " */

630 break;

631 }

632 if(numKeywords == maxKeywords) {

633 *status = U_INTERNAL_PROGRAM_ERROR;

634 return 0;

635 }

             equalSign = uprv_strchr(pos, '=');

             semicolon = uprv_strchr(pos, ';');

638 /* lack of '=' [foo@currency] is illegal */

639 /* ';' before '=' [foo@currency;collation=pinyin] is illegal */

             if(!equalSign || (semicolon && semicolon<equalSign)) {

641 *status = U_INVALID_FORMAT_ERROR;

642 return 0;

643 }

644 /* need to normalize both keyword and keyword name */

             if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {

646 /* keyword name too long for internal buffer */

647 *status = U_INTERNAL_PROGRAM_ERROR;

648 return 0;

649 }

             for(i = 0, n = 0; i < equalSign - pos; ++i) {

                 if (pos[i] != ' ') {

                     keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);

653 }

654 }

655

656 /* zero-length keyword is an error. */

             if (n == 0) {

658 *status = U_INVALID_FORMAT_ERROR;

659 return 0;

660 }

661

             keywordList[numKeywords].keyword[n] = 0;

663 keywordList[numKeywords].keywordLen = n;

664 /* now grab the value part. First we skip the '=' */

665 equalSign++;

666 /* then we leading spaces */

             while(*equalSign == ' ') {

668 equalSign++;

669 }

670

671 /* Premature end or zero-length value */

             if (!*equalSign || equalSign == semicolon) {

673 *status = U_INVALID_FORMAT_ERROR;

674 return 0;

675 }

676

677 keywordList[numKeywords].valueStart = equalSign;

678

679 pos = semicolon;

680 i = 0;

681 if(pos) {

                 while(*(pos - i - 1) == ' ') {

683 i++;

684 }

                 keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);

686 pos++;

687 } else {

                 i = (int32_t)uprv_strlen(equalSign);

                 while(i && equalSign[i-1] == ' ') {

690 i--;

691 }

692 keywordList[numKeywords].valueLen = i;

693 }

694 /* If this is a duplicate keyword, then ignore it */

             for (j=0; j<numKeywords; ++j) {

                 if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {

697 duplicate = TRUE;

698 break;

699 }

700 }

701 if (!duplicate) {

702 ++numKeywords;

703 }

704 } while(pos);

705

706 /* now we have a list of keywords */

707 /* we need to sort it */

         uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);

709

710 /* Now construct the keyword part */

         for(i = 0; i < numKeywords; i++) {

             if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {

                 uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);

714 if(valuesToo) {

                     keywords[keywordsLen + keywordList[i].keywordLen] = '=';

716 } else {

                     keywords[keywordsLen + keywordList[i].keywordLen] = 0;

718 }

719 }

             keywordsLen += keywordList[i].keywordLen + 1;

721 if(valuesToo) {

                 if(keywordsLen + keywordList[i].valueLen <= keywordCapacity) {

                     uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);

724 }

725 keywordsLen += keywordList[i].valueLen;

726

                 if(i < numKeywords - 1) {

728 if(keywordsLen < keywordCapacity) {

729 keywords[keywordsLen] = ';';

730 }

731 keywordsLen++;

732 }

733 }

734 if(values) {

                 if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {

                     uprv_strcpy(values+valuesLen, keywordList[i].valueStart);

                     values[valuesLen + keywordList[i].valueLen] = 0;

738 }

                 valuesLen += keywordList[i].valueLen + 1;

740 }

741 }

742 if(values) {

743 values[valuesLen] = 0;

744 if(valLen) {

745 *valLen = valuesLen;

746 }

747 }

         return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);

749 } else {

750 return 0;

751 }

752 }

753

754 U_CFUNC int32_t

 locale_getKeywords(const char *localeID,

756 char prev,

                    char *keywords, int32_t keywordCapacity,

                    char *values, int32_t valuesCapacity, int32_t *valLen,

759 UBool valuesToo,

760 UErrorCode *status) {

     return _getKeywords(localeID, prev, keywords, keywordCapacity,

762 values, valuesCapacity, valLen, valuesToo,

763 status);

764 }

765

766 U_CAPI int32_t U_EXPORT2

 uloc_getKeywordValue(const char* localeID,

768 const char* keywordName,

                      char* buffer, int32_t bufferCapacity,

770 UErrorCode* status)

771 {

     if (buffer != nullptr) {

         buffer[0] = '\0';

774 }

775 const char* startSearchHere = NULL;

776 const char* nextSeparator = NULL;

777 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];

778 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];

779 int32_t result = 0;

780

     if(status && U_SUCCESS(*status) && localeID) {

782 char tempBuffer[ULOC_FULLNAME_CAPACITY];

783 const char* tmpLocaleID;

784

       if (keywordName == NULL || keywordName[0] == 0) {

786 *status = U_ILLEGAL_ARGUMENT_ERROR;

787 return 0;

788 }

789

       locale_canonKeywordName(keywordNameBuffer, keywordName, status);

       if(U_FAILURE(*status)) {

792 return 0;

793 }

794

       if (_hasBCP47Extension(localeID)) {

           _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);

797 } else {

798 tmpLocaleID=localeID;

799 }

800

801 startSearchHere = locale_getKeywordsStart(tmpLocaleID);

802 if(startSearchHere == NULL) {

803 /* no keywords, return at once */

804 return 0;

805 }

806

807 /* find the first keyword */

808 while(startSearchHere) {

809 const char* keyValueTail;

810 int32_t keyValueLen;

811

812 startSearchHere++; /* skip @ or ; */

           nextSeparator = uprv_strchr(startSearchHere, '=');

814 if(!nextSeparator) {

815 *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */

816 return 0;

817 }

818 /* strip leading & trailing spaces (TC decided to tolerate these) */

           while(*startSearchHere == ' ') {

820 startSearchHere++;

821 }

822 keyValueTail = nextSeparator;

           while (keyValueTail > startSearchHere && *(keyValueTail-1) == ' ') {

824 keyValueTail--;

825 }

826 /* now keyValueTail points to first char after the keyName */

827 /* copy & normalize keyName from locale */

828 if (startSearchHere == keyValueTail) {

829 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */

830 return 0;

831 }

832 keyValueLen = 0;

833 while (startSearchHere < keyValueTail) {

             if (!UPRV_ISALPHANUM(*startSearchHere)) {

835 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */

836 return 0;

837 }

             if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {

               localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*startSearchHere++);

840 } else {

841 /* keyword name too long for internal buffer */

842 *status = U_INTERNAL_PROGRAM_ERROR;

843 return 0;

844 }

845 }

           localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */

847

           startSearchHere = uprv_strchr(nextSeparator, ';');

849

           if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {

851 /* current entry matches the keyword. */

852 nextSeparator++; /* skip '=' */

853 /* First strip leading & trailing spaces (TC decided to tolerate these) */

               while(*nextSeparator == ' ') {

855 nextSeparator++;

856 }

               keyValueTail = (startSearchHere)? startSearchHere: nextSeparator + uprv_strlen(nextSeparator);

               while(keyValueTail > nextSeparator && *(keyValueTail-1) == ' ') {

859 keyValueTail--;

860 }

861 /* Now copy the value, but check well-formedness */

862 if (nextSeparator == keyValueTail) {

863 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value name in passed-in locale */

864 return 0;

865 }

866 keyValueLen = 0;

867 while (nextSeparator < keyValueTail) {

                 if (!UPRV_ISALPHANUM(*nextSeparator) && !UPRV_OK_VALUE_PUNCTUATION(*nextSeparator)) {

869 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */

870 return 0;

871 }

872 if (keyValueLen < bufferCapacity) {

873 /* Should we lowercase value to return here? Tests expect as-is. */

874 buffer[keyValueLen++] = *nextSeparator++;

875 } else { /* keep advancing so we return correct length in case of overflow */

876 keyValueLen++;

877 nextSeparator++;

878 }

879 }

               result = u_terminateChars(buffer, bufferCapacity, keyValueLen, status);

881 return result;

882 }

883 }

884 }

885 return 0;

886 }

887

888 U_CAPI int32_t U_EXPORT2

 uloc_setKeywordValue(const char* keywordName,

890 const char* keywordValue,

                      char* buffer, int32_t bufferCapacity,

892 UErrorCode* status)

893 {

894 /* TODO: sorting. removal. */

895 int32_t keywordNameLen;

896 int32_t keywordValueLen;

897 int32_t bufLen;

898 int32_t needLen = 0;

899 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];

     char keywordValueBuffer[ULOC_KEYWORDS_CAPACITY+1];

901 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];

902 int32_t rc;

903 char* nextSeparator = NULL;

904 char* nextEqualsign = NULL;

905 char* startSearchHere = NULL;

906 char* keywordStart = NULL;

907 CharString updatedKeysAndValues;

908 int32_t updatedKeysAndValuesLen;

909 UBool handledInputKeyAndValue = FALSE;

910 char keyValuePrefix = '@';

911

     if(U_FAILURE(*status)) {

913 return -1;

914 }

     if (keywordName == NULL || keywordName[0] == 0 || bufferCapacity <= 1) {

916 *status = U_ILLEGAL_ARGUMENT_ERROR;

917 return 0;

918 }

     bufLen = (int32_t)uprv_strlen(buffer);

920 if(bufferCapacity<bufLen) {

921 /* The capacity is less than the length?! Is this NULL terminated? */

922 *status = U_ILLEGAL_ARGUMENT_ERROR;

923 return 0;

924 }

     keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);

     if(U_FAILURE(*status)) {

927 return 0;

928 }

929

930 keywordValueLen = 0;

931 if(keywordValue) {

         while (*keywordValue != 0) {

             if (!UPRV_ISALPHANUM(*keywordValue) && !UPRV_OK_VALUE_PUNCTUATION(*keywordValue)) {

934 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */

935 return 0;

936 }

937 if (keywordValueLen < ULOC_KEYWORDS_CAPACITY) {

938 /* Should we force lowercase in value to set? */

939 keywordValueBuffer[keywordValueLen++] = *keywordValue++;

940 } else {

941 /* keywordValue too long for internal buffer */

942 *status = U_INTERNAL_PROGRAM_ERROR;

943 return 0;

944 }

945 }

946 }

     keywordValueBuffer[keywordValueLen] = 0; /* terminate */

948

     startSearchHere = (char*)locale_getKeywordsStart(buffer);

     if(startSearchHere == NULL || (startSearchHere[1]==0)) {

         if(keywordValueLen == 0) { /* no keywords = nothing to remove */

952 return bufLen;

953 }

954

         needLen = bufLen+1+keywordNameLen+1+keywordValueLen;

956 if(startSearchHere) { /* had a single @ */

957 needLen--; /* already had the @ */

958 /* startSearchHere points at the @ */

959 } else {

960 startSearchHere=buffer+bufLen;

961 }

962 if(needLen >= bufferCapacity) {

963 *status = U_BUFFER_OVERFLOW_ERROR;

964 return needLen; /* no change */

965 }

966 *startSearchHere++ = '@';

967 uprv_strcpy(startSearchHere, keywordNameBuffer);

968 startSearchHere += keywordNameLen;

969 *startSearchHere++ = '=';

970 uprv_strcpy(startSearchHere, keywordValueBuffer);

971 return needLen;

972 } /* end shortcut - no @ */

973

974 keywordStart = startSearchHere;

975 /* search for keyword */

976 while(keywordStart) {

977 const char* keyValueTail;

978 int32_t keyValueLen;

979

980 keywordStart++; /* skip @ or ; */

         nextEqualsign = uprv_strchr(keywordStart, '=');

982 if (!nextEqualsign) {

983 *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */

984 return 0;

985 }

986 /* strip leading & trailing spaces (TC decided to tolerate these) */

         while(*keywordStart == ' ') {

988 keywordStart++;

989 }

990 keyValueTail = nextEqualsign;

         while (keyValueTail > keywordStart && *(keyValueTail-1) == ' ') {

992 keyValueTail--;

993 }

994 /* now keyValueTail points to first char after the keyName */

995 /* copy & normalize keyName from locale */

996 if (keywordStart == keyValueTail) {

997 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */

998 return 0;

999 }

1000 keyValueLen = 0;

1001 while (keywordStart < keyValueTail) {

             if (!UPRV_ISALPHANUM(*keywordStart)) {

1003 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */

1004 return 0;

1005 }

             if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {

                 localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*keywordStart++);

1008 } else {

1009 /* keyword name too long for internal buffer */

1010 *status = U_INTERNAL_PROGRAM_ERROR;

1011 return 0;

1012 }

1013 }

         localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */

1015

         nextSeparator = uprv_strchr(nextEqualsign, ';');

1017

1018 /* start processing the value part */

1019 nextEqualsign++; /* skip '=' */

1020 /* First strip leading & trailing spaces (TC decided to tolerate these) */

         while(*nextEqualsign == ' ') {

1022 nextEqualsign++;

1023 }

         keyValueTail = (nextSeparator)? nextSeparator: nextEqualsign + uprv_strlen(nextEqualsign);

         while(keyValueTail > nextEqualsign && *(keyValueTail-1) == ' ') {

1026 keyValueTail--;

1027 }

1028 if (nextEqualsign == keyValueTail) {

1029 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value in passed-in locale */

1030 return 0;

1031 }

1032

         rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);

         if(rc == 0) {

1035 /* Current entry matches the input keyword. Update the entry */

             if(keywordValueLen > 0) { /* updating a value */

                 updatedKeysAndValues.append(keyValuePrefix, *status);

1038 keyValuePrefix = ';'; /* for any subsequent key-value pair */

                 updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);

                 updatedKeysAndValues.append('=', *status);

                 updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);

1042 } /* else removing this entry, don't emit anything */

1043 handledInputKeyAndValue = TRUE;

1044 } else {

1045 /* input keyword sorts earlier than current entry, add before current entry */

             if (rc < 0 && keywordValueLen > 0 && !handledInputKeyAndValue) {

1047 /* insert new entry at this location */

                 updatedKeysAndValues.append(keyValuePrefix, *status);

1049 keyValuePrefix = ';'; /* for any subsequent key-value pair */

                 updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);

                 updatedKeysAndValues.append('=', *status);

                 updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);

1053 handledInputKeyAndValue = TRUE;

1054 }

1055 /* copy the current entry */

             updatedKeysAndValues.append(keyValuePrefix, *status);

1057 keyValuePrefix = ';'; /* for any subsequent key-value pair */

             updatedKeysAndValues.append(localeKeywordNameBuffer, keyValueLen, *status);

             updatedKeysAndValues.append('=', *status);

             updatedKeysAndValues.append(nextEqualsign, static_cast<int32_t>(keyValueTail-nextEqualsign), *status);

1061 }

         if (!nextSeparator && keywordValueLen > 0 && !handledInputKeyAndValue) {

1063 /* append new entry at the end, it sorts later than existing entries */

             updatedKeysAndValues.append(keyValuePrefix, *status);

1065 /* skip keyValuePrefix update, no subsequent key-value pair */

             updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);

             updatedKeysAndValues.append('=', *status);

             updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);

1069 handledInputKeyAndValue = TRUE;

1070 }

1071 keywordStart = nextSeparator;

1072 } /* end loop searching */

1073

1074 /* Any error from updatedKeysAndValues.append above would be internal and not due to

1075 * problems with the passed-in locale. So if we did encounter problems with the

1076 * passed-in locale above, those errors took precedence and overrode any error

1077 * status from updatedKeysAndValues.append, and also caused a return of 0. If there

1078 * are errors here they are from updatedKeysAndValues.append; they do cause an

1079 * error return but the passed-in locale is unmodified and the original bufLen is

1080 * returned.

1081 */

     if (!handledInputKeyAndValue || U_FAILURE(*status)) {

1083 /* if input key/value specified removal of a keyword not present in locale, or

1084 * there was an error in CharString.append, leave original locale alone. */

1085 return bufLen;

1086 }

1087

1088 updatedKeysAndValuesLen = updatedKeysAndValues.length();

1089 /* needLen = length of the part before '@' + length of updated key-value part including '@' */

     needLen = (int32_t)(startSearchHere - buffer) + updatedKeysAndValuesLen;

1091 if(needLen >= bufferCapacity) {

1092 *status = U_BUFFER_OVERFLOW_ERROR;

1093 return needLen; /* no change */

1094 }

     if (updatedKeysAndValuesLen > 0) {

         uprv_strncpy(startSearchHere, updatedKeysAndValues.data(), updatedKeysAndValuesLen);

1097 }

1098 buffer[needLen]=0;

1099 return needLen;

1100 }

1101

1102 /* ### ID parsing implementation **************************************************/

1103

 #define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))

1105

1106 /*returns TRUE if one of the special prefixes is here (s=string)

1107 'x-' or 'i-' */

1108 #define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))

1109

1110 /* Dot terminates it because of POSIX form where dot precedes the codepage

1111 * except for variant

1112 */

 #define _isTerminator(a)  ((a==0)||(a=='.')||(a=='@'))

1114

1115 /**

1116 * Lookup 'key' in the array 'list'. The array 'list' should contain

1117 * a NULL entry, followed by more entries, and a second NULL entry.

1118 *

1119 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or

1120 * COUNTRIES_3.

1121 */

 static int16_t _findIndex(const char* const* list, const char* key)

1123 {

     const char* const* anchor = list;

1125 int32_t pass = 0;

1126

1127 /* Make two passes through two NULL-terminated arrays at 'list' */

     while (pass++ < 2) {

1129 while (*list) {

             if (uprv_strcmp(key, *list) == 0) {

                 return (int16_t)(list - anchor);

1132 }

1133 list++;

1134 }

1135 ++list; /* skip final NULL *CWB*/

1136 }

1137 return -1;

1138 }

1139

1140 /* count the length of src while copying it to dest; return strlen(src) */

1141 static inline int32_t

 _copyCount(char *dest, int32_t destCapacity, const char *src) {

1143 const char *anchor;

1144 char c;

1145

1146 anchor=src;

1147 for(;;) {

         if((c=*src)==0) {

             return (int32_t)(src-anchor);

1150 }

         if(destCapacity<=0) {

             return (int32_t)((src-anchor)+uprv_strlen(src));

1153 }

1154 ++src;

1155 *dest++=c;

1156 --destCapacity;

1157 }

1158 }

1159

1160 U_CFUNC const char*

 uloc_getCurrentCountryID(const char* oldID){

     int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);

     if (offset >= 0) {

1164 return REPLACEMENT_COUNTRIES[offset];

1165 }

1166 return oldID;

1167 }

1168 U_CFUNC const char*

 uloc_getCurrentLanguageID(const char* oldID){

     int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);

     if (offset >= 0) {

1172 return REPLACEMENT_LANGUAGES[offset];

1173 }

1174 return oldID;

1175 }

1176 /*

1177 * the internal functions _getLanguage(), _getCountry(), _getVariant()

1178 * avoid duplicating code to handle the earlier locale ID pieces

1179 * in the functions for the later ones by

1180 * setting the *pEnd pointer to where they stopped parsing

1181 *

1182 * TODO try to use this in Locale

1183 */

1184 U_CFUNC int32_t

 ulocimp_getLanguage(const char *localeID,

                     char *language, int32_t languageCapacity,

1187 const char **pEnd) {

1188 int32_t i=0;

1189 int32_t offset;

     char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */

1191

1192 /* if it starts with i- or x- then copy that prefix */

     if(_isIDPrefix(localeID)) {

1194 if(i<languageCapacity) {

             language[i]=(char)uprv_tolower(*localeID);

1196 }

1197 if(i<languageCapacity) {

             language[i+1]='-';

1199 }

1200 i+=2;

1201 localeID+=2;

1202 }

1203

1204 /* copy the language as far as possible and count its length */

     while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {

1206 if(i<languageCapacity) {

             language[i]=(char)uprv_tolower(*localeID);

1208 }

         if(i<3) {

             U_ASSERT(i>=0);

             lang[i]=(char)uprv_tolower(*localeID);

1212 }

1213 i++;

1214 localeID++;

1215 }

1216

     if(i==3) {

1218 /* convert 3 character code to 2 character code if possible *CWB*/

         offset=_findIndex(LANGUAGES_3, lang);

         if(offset>=0) {

             i=_copyCount(language, languageCapacity, LANGUAGES[offset]);

1222 }

1223 }

1224

1225 if(pEnd!=NULL) {

1226 *pEnd=localeID;

1227 }

1228 return i;

1229 }

1230

1231 U_CFUNC int32_t

 ulocimp_getScript(const char *localeID,

                   char *script, int32_t scriptCapacity,

1234 const char **pEnd)

1235 {

1236 int32_t idLen = 0;

1237

1238 if (pEnd != NULL) {

1239 *pEnd = localeID;

1240 }

1241

1242 /* copy the second item as far as possible and count its length */

     while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])

             && uprv_isASCIILetter(localeID[idLen])) {

1245 idLen++;

1246 }

1247

1248 /* If it's exactly 4 characters long, then it's a script and not a country. */

     if (idLen == 4) {

1250 int32_t i;

1251 if (pEnd != NULL) {

1252 *pEnd = localeID+idLen;

1253 }

1254 if(idLen > scriptCapacity) {

1255 idLen = scriptCapacity;

1256 }

         if (idLen >= 1) {

             script[0]=(char)uprv_toupper(*(localeID++));

1259 }

         for (i = 1; i < idLen; i++) {

             script[i]=(char)uprv_tolower(*(localeID++));

1262 }

1263 }

1264 else {

1265 idLen = 0;

1266 }

1267 return idLen;

1268 }

1269

1270 U_CFUNC int32_t

 ulocimp_getCountry(const char *localeID,

                    char *country, int32_t countryCapacity,

1273 const char **pEnd)

1274 {

1275 int32_t idLen=0;

     char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };

1277 int32_t offset;

1278

1279 /* copy the country as far as possible and count its length */

     while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {

         if(idLen<(ULOC_COUNTRY_CAPACITY-1)) {   /*CWB*/

             cnty[idLen]=(char)uprv_toupper(localeID[idLen]);

1283 }

1284 idLen++;

1285 }

1286

1287 /* the country should be either length 2 or 3 */

     if (idLen == 2 || idLen == 3) {

1289 UBool gotCountry = FALSE;

1290 /* convert 3 character code to 2 character code if possible *CWB*/

         if(idLen==3) {

             offset=_findIndex(COUNTRIES_3, cnty);

             if(offset>=0) {

                 idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]);

1295 gotCountry = TRUE;

1296 }

1297 }

1298 if (!gotCountry) {

1299 int32_t i = 0;

             for (i = 0; i < idLen; i++) {

1301 if (i < countryCapacity) {

                     country[i]=(char)uprv_toupper(localeID[i]);

1303 }

1304 }

1305 }

1306 localeID+=idLen;

1307 } else {

1308 idLen = 0;

1309 }

1310

1311 if(pEnd!=NULL) {

1312 *pEnd=localeID;

1313 }

1314

1315 return idLen;

1316 }

1317

1318 /**

1319 * @param needSeparator if true, then add leading '_' if any variants

1320 * are added to 'variant'

1321 */

1322 static int32_t

 _getVariantEx(const char *localeID,

1324 char prev,

               char *variant, int32_t variantCapacity,

1326 UBool needSeparator) {

1327 int32_t i=0;

1328

1329 /* get one or more variant tags and separate them with '_' */

     if(_isIDSeparator(prev)) {

1331 /* get a variant string after a '-' or '_' */

         while(!_isTerminator(*localeID)) {

1333 if (needSeparator) {

1334 if (i<variantCapacity) {

1335 variant[i] = '_';

1336 }

1337 ++i;

1338 needSeparator = FALSE;

1339 }

1340 if(i<variantCapacity) {

                 variant[i]=(char)uprv_toupper(*localeID);

                 if(variant[i]=='-') {

1343 variant[i]='_';

1344 }

1345 }

1346 i++;

1347 localeID++;

1348 }

1349 }

1350

1351 /* if there is no variant tag after a '-' or '_' then look for '@' */

     if(i==0) {

         if(prev=='@') {

1354 /* keep localeID */

         } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {

1356 ++localeID; /* point after the '@' */

1357 } else {

1358 return 0;

1359 }

         while(!_isTerminator(*localeID)) {

1361 if (needSeparator) {

1362 if (i<variantCapacity) {

1363 variant[i] = '_';

1364 }

1365 ++i;

1366 needSeparator = FALSE;

1367 }

1368 if(i<variantCapacity) {

                 variant[i]=(char)uprv_toupper(*localeID);

                 if(variant[i]=='-' || variant[i]==',') {

1371 variant[i]='_';

1372 }

1373 }

1374 i++;

1375 localeID++;

1376 }

1377 }

1378

1379 return i;

1380 }

1381

1382 static int32_t

 _getVariant(const char *localeID,

1384 char prev,

             char *variant, int32_t variantCapacity) {

     return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);

1387 }

1388

1389 /* Keyword enumeration */

1390

1391 typedef struct UKeywordsContext {

1392 char* keywords;

1393 char* current;

1394 } UKeywordsContext;

1395

1396 U_CDECL_BEGIN

1397

1398 static void U_CALLCONV

1399 uloc_kw_closeKeywords(UEnumeration *enumerator) {

     uprv_free(((UKeywordsContext *)enumerator->context)->keywords);

1401 uprv_free(enumerator->context);

1402 uprv_free(enumerator);

1403 }

1404

1405 static int32_t U_CALLCONV

 uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) {

     char *kw = ((UKeywordsContext *)en->context)->keywords;

1408 int32_t result = 0;

1409 while(*kw) {

1410 result++;

         kw += uprv_strlen(kw)+1;

1412 }

1413 return result;

1414 }

1415

1416 static const char * U_CALLCONV

1417 uloc_kw_nextKeyword(UEnumeration* en,

1418 int32_t* resultLength,

1419 UErrorCode* /*status*/) {

     const char* result = ((UKeywordsContext *)en->context)->current;

1421 int32_t len = 0;

1422 if(*result) {

         len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);

         ((UKeywordsContext *)en->context)->current += len+1;

1425 } else {

1426 result = NULL;

1427 }

1428 if (resultLength) {

1429 *resultLength = len;

1430 }

1431 return result;

1432 }

1433

1434 static void U_CALLCONV

1435 uloc_kw_resetKeywords(UEnumeration* en,

1436 UErrorCode* /*status*/) {

     ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;

1438 }

1439

1440 U_CDECL_END

1441

1442

1443 static const UEnumeration gKeywordsEnum = {

1444 NULL,

1445 NULL,

1446 uloc_kw_closeKeywords,

1447 uloc_kw_countKeywords,

1448 uenum_unextDefault,

1449 uloc_kw_nextKeyword,

1450 uloc_kw_resetKeywords

1451 };

1452

1453 U_CAPI UEnumeration* U_EXPORT2

 uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)

1455 {

1456 LocalMemory<UKeywordsContext> myContext;

1457 LocalMemory<UEnumeration> result;

1458

     if (U_FAILURE(*status)) {

1460 return nullptr;

1461 }

     myContext.adoptInstead(static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext))));

     result.adoptInstead(static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration))));

     if (myContext.isNull() || result.isNull()) {

1465 *status = U_MEMORY_ALLOCATION_ERROR;

1466 return nullptr;

1467 }

     uprv_memcpy(result.getAlias(), &gKeywordsEnum, sizeof(UEnumeration));

     myContext->keywords = static_cast<char *>(uprv_malloc(keywordListSize+1));

     if (myContext->keywords == nullptr) {

1471 *status = U_MEMORY_ALLOCATION_ERROR;

1472 return nullptr;

1473 }

     uprv_memcpy(myContext->keywords, keywordList, keywordListSize);

     myContext->keywords[keywordListSize] = 0;

1476 myContext->current = myContext->keywords;

     result->context = myContext.orphan();

1478 return result.orphan();

1479 }

1480

1481 U_CAPI UEnumeration* U_EXPORT2

 uloc_openKeywords(const char* localeID,

1483 UErrorCode* status)

1484 {

1485 int32_t i=0;

1486 char keywords[256];

1487 int32_t keywordsCapacity = 256;

1488 char tempBuffer[ULOC_FULLNAME_CAPACITY];

1489 const char* tmpLocaleID;

1490

     if(status==NULL || U_FAILURE(*status)) {

1492 return 0;

1493 }

1494

     if (_hasBCP47Extension(localeID)) {

         _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);

1497 } else {

1498 if (localeID==NULL) {

1499 localeID=uloc_getDefault();

1500 }

1501 tmpLocaleID=localeID;

1502 }

1503

1504 /* Skip the language */

     ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);

     if(_isIDSeparator(*tmpLocaleID)) {

1507 const char *scriptID;

1508 /* Skip the script if available */

         ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);

         if(scriptID != tmpLocaleID+1) {

1511 /* Found optional script */

1512 tmpLocaleID = scriptID;

1513 }

1514 /* Skip the Country */

         if (_isIDSeparator(*tmpLocaleID)) {

             ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID);

             if(_isIDSeparator(*tmpLocaleID)) {

                 _getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0);

1519 }

1520 }

1521 }

1522

1523 /* keywords are located after '@' */

     if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {

         i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);

1526 }

1527

1528 if(i) {

         return uloc_openKeywordList(keywords, i, status);

1530 } else {

1531 return NULL;

1532 }

1533 }

1534

1535

1536 /* bit-flags for 'options' parameter of _canonicalize */

1537 #define _ULOC_STRIP_KEYWORDS 0x2

1538 #define _ULOC_CANONICALIZE 0x1

1539

1540 #define OPTION_SET(options, mask) ((options & mask) != 0)

1541

 static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};

1543 #define I_DEFAULT_LENGTH UPRV_LENGTHOF(i_default)

1544

1545 /**

1546 * Canonicalize the given localeID, to level 1 or to level 2,

1547 * depending on the options. To specify level 1, pass in options=0.

1548 * To specify level 2, pass in options=_ULOC_CANONICALIZE.

1549 *

1550 * This is the code underlying uloc_getName and uloc_canonicalize.

1551 */

1552 static int32_t

 _canonicalize(const char* localeID,

1554 char* result,

1555 int32_t resultCapacity,

1556 uint32_t options,

1557 UErrorCode* err) {

     int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;

1559 char localeBuffer[ULOC_FULLNAME_CAPACITY];

1560 char tempBuffer[ULOC_FULLNAME_CAPACITY];

1561 const char* origLocaleID;

1562 const char* tmpLocaleID;

1563 const char* keywordAssign = NULL;

1564 const char* separatorIndicator = NULL;

1565 char* name;

     char* variant = NULL; /* pointer into name, or NULL */

1567

     if (U_FAILURE(*err)) {

1569 return 0;

1570 }

1571

     if (_hasBCP47Extension(localeID)) {

         _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);

1574 } else {

1575 if (localeID==NULL) {

1576 localeID=uloc_getDefault();

1577 }

1578 tmpLocaleID=localeID;

1579 }

1580

1581 origLocaleID=tmpLocaleID;

1582

1583 /* if we are doing a full canonicalization, then put results in

1584 localeBuffer, if necessary; otherwise send them to result. */

1585 if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/

         (result == NULL || resultCapacity < (int32_t)sizeof(localeBuffer))) {

1587 name = localeBuffer;

         nameCapacity = (int32_t)sizeof(localeBuffer);

1589 } else {

1590 name = result;

1591 nameCapacity = resultCapacity;

1592 }

1593

1594 /* get all pieces, one after another, and separate with '_' */

     len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID);

1596

     if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {

         const char *d = uloc_getDefault();

1599

         len = (int32_t)uprv_strlen(d);

1601

1602 if (name != NULL) {

             uprv_memcpy(name, d, len);

1604 }

     } else if(_isIDSeparator(*tmpLocaleID)) {

1606 const char *scriptID;

1607

1608 ++fieldCount;

1609 if(len<nameCapacity) {

1610 name[len]='_';

1611 }

1612 ++len;

1613

         scriptSize=ulocimp_getScript(tmpLocaleID+1,

             (len<nameCapacity ? name+len : NULL), nameCapacity-len, &scriptID);

         if(scriptSize > 0) {

1617 /* Found optional script */

1618 tmpLocaleID = scriptID;

1619 ++fieldCount;

1620 len+=scriptSize;

             if (_isIDSeparator(*tmpLocaleID)) {

1622 /* If there is something else, then we add the _ */

1623 if(len<nameCapacity) {

1624 name[len]='_';

1625 }

1626 ++len;

1627 }

1628 }

1629

         if (_isIDSeparator(*tmpLocaleID)) {

1631 const char *cntryID;

             int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1,

                 (len<nameCapacity ? name+len : NULL), nameCapacity-len, &cntryID);

             if (cntrySize > 0) {

1635 /* Found optional country */

1636 tmpLocaleID = cntryID;

1637 len+=cntrySize;

1638 }

             if(_isIDSeparator(*tmpLocaleID)) {

1640 /* If there is something else, then we add the _ if we found country before. */

                 if (cntrySize >= 0 && ! _isIDSeparator(*(tmpLocaleID+1)) ) {

1642 ++fieldCount;

1643 if(len<nameCapacity) {

1644 name[len]='_';

1645 }

1646 ++len;

1647 }

1648

                 variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID,

                     (len<nameCapacity ? name+len : NULL), nameCapacity-len);

                 if (variantSize > 0) {

                     variant = len<nameCapacity ? name+len : NULL;

1653 len += variantSize;

                     tmpLocaleID += variantSize + 1; /* skip '_' and variant */

1655 }

1656 }

1657 }

1658 }

1659

1660 /* Copy POSIX-style charset specifier, if any [mr.utf8] */

     if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {

1662 UBool done = FALSE;

1663 do {

1664 char c = *tmpLocaleID;

1665 switch (c) {

1666 case 0:

1667 case '@':

1668 done = TRUE;

1669 break;

1670 default:

1671 if (len<nameCapacity) {

1672 name[len] = c;

1673 }

1674 ++len;

1675 ++tmpLocaleID;

1676 break;

1677 }

1678 } while (!done);

1679 }

1680

1681 /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'

1682 After this, tmpLocaleID either points to '@' or is NULL */

     if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {

         keywordAssign = uprv_strchr(tmpLocaleID, '=');

         separatorIndicator = uprv_strchr(tmpLocaleID, ';');

1686 }

1687

1688 /* Copy POSIX-style variant, if any [mr@FOO] */

     if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&

1690 tmpLocaleID != NULL && keywordAssign == NULL) {

1691 for (;;) {

1692 char c = *tmpLocaleID;

             if (c == 0) {

1694 break;

1695 }

1696 if (len<nameCapacity) {

1697 name[len] = c;

1698 }

1699 ++len;

1700 ++tmpLocaleID;

1701 }

1702 }

1703

     if (OPTION_SET(options, _ULOC_CANONICALIZE)) {

1705 /* Handle @FOO variant if @ is present and not followed by = */

         if (tmpLocaleID!=NULL && keywordAssign==NULL) {

1707 int32_t posixVariantSize;

1708 /* Add missing '_' if needed */

             if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {

1710 do {

1711 if(len<nameCapacity) {

1712 name[len]='_';

1713 }

1714 ++len;

1715 ++fieldCount;

                 } while(fieldCount<2);

1717 }

             posixVariantSize = _getVariantEx(tmpLocaleID+1, '@', name+len, nameCapacity-len,

                                              (UBool)(variantSize > 0));

             if (posixVariantSize > 0) {

1721 if (variant == NULL) {

1722 variant = name+len;

1723 }

1724 len += posixVariantSize;

1725 variantSize += posixVariantSize;

1726 }

1727 }

1728

1729 /* Look up the ID in the canonicalization map */

         for (j=0; j<UPRV_LENGTHOF(CANONICALIZE_MAP); j++) {

             const char* id = CANONICALIZE_MAP[j].id;

             int32_t n = (int32_t)uprv_strlen(id);

             if (len == n && uprv_strncmp(name, id, n) == 0) {

                 if (n == 0 && tmpLocaleID != NULL) {

1735 break; /* Don't remap "" if keywords present */

1736 }

                 len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);

1738 break;

1739 }

1740 }

1741 }

1742

     if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {

         if (tmpLocaleID!=NULL && keywordAssign!=NULL &&

1745 (!separatorIndicator || separatorIndicator > keywordAssign)) {

1746 if(len<nameCapacity) {

1747 name[len]='@';

1748 }

1749 ++len;

1750 ++fieldCount;

             len += _getKeywords(tmpLocaleID+1, '@', (len<nameCapacity ? name+len : NULL), nameCapacity-len,

                                 NULL, 0, NULL, TRUE, err);

1753 }

1754 }

1755

     if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) {

         uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);

1758 }

1759

     return u_terminateChars(result, resultCapacity, len, err);

1761 }

1762

1763 /* ### ID parsing API **************************************************/

1764

1765 U_CAPI int32_t U_EXPORT2

 uloc_getParent(const char*    localeID,

1767 char* parent,

1768 int32_t parentCapacity,

1769 UErrorCode* err)

1770 {

1771 const char *lastUnderscore;

1772 int32_t i;

1773

     if (U_FAILURE(*err))

1775 return 0;

1776

1777 if (localeID == NULL)

1778 localeID = uloc_getDefault();

1779

     lastUnderscore=uprv_strrchr(localeID, '_');

1781 if(lastUnderscore!=NULL) {

         i=(int32_t)(lastUnderscore-localeID);

1783 } else {

1784 i=0;

1785 }

1786

     if(i>0 && parent != localeID) {

         uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));

1789 }

1790

     return u_terminateChars(parent, parentCapacity, i, err);

1792 }

1793

1794 U_CAPI int32_t U_EXPORT2

 uloc_getLanguage(const char*    localeID,

1796 char* language,

1797 int32_t languageCapacity,

1798 UErrorCode* err)

1799 {

1800 /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/

1801 int32_t i=0;

1802

     if (err==NULL || U_FAILURE(*err)) {

1804 return 0;

1805 }

1806

1807 if(localeID==NULL) {

1808 localeID=uloc_getDefault();

1809 }

1810

     i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL);

     return u_terminateChars(language, languageCapacity, i, err);

1813 }

1814

1815 U_CAPI int32_t U_EXPORT2

 uloc_getScript(const char*    localeID,

1817 char* script,

1818 int32_t scriptCapacity,

1819 UErrorCode* err)

1820 {

1821 int32_t i=0;

1822

     if(err==NULL || U_FAILURE(*err)) {

1824 return 0;

1825 }

1826

1827 if(localeID==NULL) {

1828 localeID=uloc_getDefault();

1829 }

1830

1831 /* skip the language */

     ulocimp_getLanguage(localeID, NULL, 0, &localeID);

     if(_isIDSeparator(*localeID)) {

         i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL);

1835 }

     return u_terminateChars(script, scriptCapacity, i, err);

1837 }

1838

1839 U_CAPI int32_t U_EXPORT2

 uloc_getCountry(const char* localeID,

1841 char* country,

1842 int32_t countryCapacity,

1843 UErrorCode* err)

1844 {

1845 int32_t i=0;

1846

     if(err==NULL || U_FAILURE(*err)) {

1848 return 0;

1849 }

1850

1851 if(localeID==NULL) {

1852 localeID=uloc_getDefault();

1853 }

1854

1855 /* Skip the language */

     ulocimp_getLanguage(localeID, NULL, 0, &localeID);

     if(_isIDSeparator(*localeID)) {

1858 const char *scriptID;

1859 /* Skip the script if available */

         ulocimp_getScript(localeID+1, NULL, 0, &scriptID);

         if(scriptID != localeID+1) {

1862 /* Found optional script */

1863 localeID = scriptID;

1864 }

         if(_isIDSeparator(*localeID)) {

             i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL);

1867 }

1868 }

     return u_terminateChars(country, countryCapacity, i, err);

1870 }

1871

1872 U_CAPI int32_t U_EXPORT2

 uloc_getVariant(const char* localeID,

1874 char* variant,

1875 int32_t variantCapacity,

1876 UErrorCode* err)

1877 {

1878 char tempBuffer[ULOC_FULLNAME_CAPACITY];

1879 const char* tmpLocaleID;

1880 int32_t i=0;

1881

     if(err==NULL || U_FAILURE(*err)) {

1883 return 0;

1884 }

1885

     if (_hasBCP47Extension(localeID)) {

         _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);

1888 } else {

1889 if (localeID==NULL) {

1890 localeID=uloc_getDefault();

1891 }

1892 tmpLocaleID=localeID;

1893 }

1894

1895 /* Skip the language */

     ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);

     if(_isIDSeparator(*tmpLocaleID)) {

1898 const char *scriptID;

1899 /* Skip the script if available */

         ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);

         if(scriptID != tmpLocaleID+1) {

1902 /* Found optional script */

1903 tmpLocaleID = scriptID;

1904 }

1905 /* Skip the Country */

         if (_isIDSeparator(*tmpLocaleID)) {

1907 const char *cntryID;

             ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID);

             if (cntryID != tmpLocaleID+1) {

1910 /* Found optional country */

1911 tmpLocaleID = cntryID;

1912 }

             if(_isIDSeparator(*tmpLocaleID)) {

1914 /* If there was no country ID, skip a possible extra IDSeparator */

                 if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {

1916 tmpLocaleID++;

1917 }

                 i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapacity);

1919 }

1920 }

1921 }

1922

1923 /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */

1924 /* if we do not have a variant tag yet then try a POSIX variant after '@' */

1925 /*

1926 if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {

1927 i=_getVariant(localeID+1, '@', variant, variantCapacity);

1928 }

1929 */

     return u_terminateChars(variant, variantCapacity, i, err);

1931 }

1932

1933 U_CAPI int32_t U_EXPORT2

 uloc_getName(const char* localeID,

1935 char* name,

1936 int32_t nameCapacity,

1937 UErrorCode* err)

1938 {

     return _canonicalize(localeID, name, nameCapacity, 0, err);

1940 }

1941

1942 U_CAPI int32_t U_EXPORT2

 uloc_getBaseName(const char* localeID,

1944 char* name,

1945 int32_t nameCapacity,

1946 UErrorCode* err)

1947 {

     return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);

1949 }

1950

1951 U_CAPI int32_t U_EXPORT2

 uloc_canonicalize(const char* localeID,

1953 char* name,

1954 int32_t nameCapacity,

1955 UErrorCode* err)

1956 {

     return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);

1958 }

1959

1960 U_CAPI const char* U_EXPORT2

 uloc_getISO3Language(const char* localeID)

1962 {

1963 int16_t offset;

1964 char lang[ULOC_LANG_CAPACITY];

1965 UErrorCode err = U_ZERO_ERROR;

1966

1967 if (localeID == NULL)

1968 {

1969 localeID = uloc_getDefault();

1970 }

     uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);

     if (U_FAILURE(err))

1973 return "";

     offset = _findIndex(LANGUAGES, lang);

     if (offset < 0)

1976 return "";

1977 return LANGUAGES_3[offset];

1978 }

1979

1980 U_CAPI const char* U_EXPORT2

 uloc_getISO3Country(const char* localeID)

1982 {

1983 int16_t offset;

1984 char cntry[ULOC_LANG_CAPACITY];

1985 UErrorCode err = U_ZERO_ERROR;

1986

1987 if (localeID == NULL)

1988 {

1989 localeID = uloc_getDefault();

1990 }

     uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);

     if (U_FAILURE(err))

1993 return "";

     offset = _findIndex(COUNTRIES, cntry);

     if (offset < 0)

1996 return "";

1997

1998 return COUNTRIES_3[offset];

1999 }

2000

2001 U_CAPI uint32_t U_EXPORT2

 uloc_getLCID(const char* localeID)

2003 {

2004 UErrorCode status = U_ZERO_ERROR;

2005 char langID[ULOC_FULLNAME_CAPACITY];

2006 uint32_t lcid = 0;

2007

2008 /* Check for incomplete id. */

     if (!localeID || uprv_strlen(localeID) < 2) {

2010 return 0;

2011 }

2012

2013 // First, attempt Windows platform lookup if available, but fall

2014 // through to catch any special cases (ICU vs Windows name differences).

     lcid = uprv_convertToLCIDPlatform(localeID, &status);

     if (U_FAILURE(status)) {

2017 return 0;

2018 }

     if (lcid > 0) {

2020 // Windows found an LCID, return that

2021 return lcid;

2022 }

2023

     uloc_getLanguage(localeID, langID, sizeof(langID), &status);

     if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) {

2026 return 0;

2027 }

2028

     if (uprv_strchr(localeID, '@')) {

2030 // uprv_convertToLCID does not support keywords other than collation.

2031 // Remove all keywords except collation.

2032 int32_t len;

2033 char collVal[ULOC_KEYWORDS_CAPACITY];

2034 char tmpLocaleID[ULOC_FULLNAME_CAPACITY];

2035

         len = uloc_getKeywordValue(localeID, "collation", collVal,

             UPRV_LENGTHOF(collVal) - 1, &status);

2038

         if (U_SUCCESS(status) && len > 0) {

2040 collVal[len] = 0;

2041

             len = uloc_getBaseName(localeID, tmpLocaleID,

                 UPRV_LENGTHOF(tmpLocaleID) - 1, &status);

2044

             if (U_SUCCESS(status) && len > 0) {

2046 tmpLocaleID[len] = 0;

2047

                 len = uloc_setKeywordValue("collation", collVal, tmpLocaleID,

                     UPRV_LENGTHOF(tmpLocaleID) - len - 1, &status);

2050

                 if (U_SUCCESS(status) && len > 0) {

2052 tmpLocaleID[len] = 0;

                     return uprv_convertToLCID(langID, tmpLocaleID, &status);

2054 }

2055 }

2056 }

2057

2058 // fall through - all keywords are simply ignored

2059 status = U_ZERO_ERROR;

2060 }

2061

     return uprv_convertToLCID(langID, localeID, &status);

2063 }

2064

2065 U_CAPI int32_t U_EXPORT2

 uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,

2067 UErrorCode *status)

2068 {

     return uprv_convertToPosix(hostid, locale, localeCapacity, status);

2070 }

2071

2072 /* ### Default locale **************************************************/

2073

2074 U_CAPI const char* U_EXPORT2

2075 uloc_getDefault()

2076 {

2077 return locale_get_default();

2078 }

2079

2080 U_CAPI void U_EXPORT2

 uloc_setDefault(const char*   newDefaultLocale,

2082 UErrorCode* err)

2083 {

     if (U_FAILURE(*err))

2085 return;

2086 /* the error code isn't currently used for anything by this function*/

2087

2088 /* propagate change to C++ */

2089 locale_set_default(newDefaultLocale);

2090 }

2091

2092 /**

2093 * Returns a list of all 2-letter language codes defined in ISO 639. This is a pointer

2094 * to an array of pointers to arrays of char. All of these pointers are owned

2095 * by ICU-- do not delete them, and do not write through them. The array is

2096 * terminated with a null pointer.

2097 */

2098 U_CAPI const char* const* U_EXPORT2

2099 uloc_getISOLanguages()

2100 {

2101 return LANGUAGES;

2102 }

2103

2104 /**

2105 * Returns a list of all 2-letter country codes defined in ISO 639. This is a

2106 * pointer to an array of pointers to arrays of char. All of these pointers are

2107 * owned by ICU-- do not delete them, and do not write through them. The array is

2108 * terminated with a null pointer.

2109 */

2110 U_CAPI const char* const* U_EXPORT2

2111 uloc_getISOCountries()

2112 {

2113 return COUNTRIES;

2114 }

2115

2116

2117 /* this function to be moved into cstring.c later */

2118 static char gDecimal = 0;

2119

2120 static /* U_CAPI */

2121 double

2122 /* U_EXPORT2 */

 _uloc_strtod(const char *start, char **end) {

2124 char *decimal;

2125 char *myEnd;

2126 char buf[30];

2127 double rv;

2128 if (!gDecimal) {

2129 char rep[5];

2130 /* For machines that decide to change the decimal on you,

2131 and try to be too smart with localization.

2132 This normally should be just a '.'. */

         sprintf(rep, "%+1.1f", 1.0);

2134 gDecimal = rep[2];

2135 }

2136

     if(gDecimal == '.') {

         return uprv_strtod(start, end); /* fall through to OS */

2139 } else {

         uprv_strncpy(buf, start, 29);

         buf[29]=0;

         decimal = uprv_strchr(buf, '.');

2143 if(decimal) {

2144 *decimal = gDecimal;

2145 } else {

             return uprv_strtod(start, end); /* no decimal point */

2147 }

         rv = uprv_strtod(buf, &myEnd);

2149 if(end) {

             *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */

2151 }

2152 return rv;

2153 }

2154 }

2155

2156 typedef struct {

2157 float q;

2158 int32_t dummy; /* to avoid uninitialized memory copy from qsort */

     char locale[ULOC_FULLNAME_CAPACITY+1];

2160 } _acceptLangItem;

2161

2162 static int32_t U_CALLCONV

 uloc_acceptLanguageCompare(const void * /*context*/, const void *a, const void *b)

2164 {

     const _acceptLangItem *aa = (const _acceptLangItem*)a;

     const _acceptLangItem *bb = (const _acceptLangItem*)b;

2167

2168 int32_t rc = 0;

     if(bb->q < aa->q) {

2170 rc = -1; /* A > B */

     } else if(bb->q > aa->q) {

2172 rc = 1; /* A < B */

2173 } else {

2174 rc = 0; /* A = B */

2175 }

2176

     if(rc==0) {

         rc = uprv_stricmp(aa->locale, bb->locale);

2179 }

2180

2181 #if defined(ULOC_DEBUG)

2182 /* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",

2183 aa->locale, aa->q,

2184 bb->locale, bb->q,

2185 rc);*/

2186 #endif

2187

2188 return rc;

2189 }

2190

2191 /*

2192 mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53

2193 */

2194

2195 U_CAPI int32_t U_EXPORT2

 uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult,

2197 const char *httpAcceptLanguage,

2198 UEnumeration* availableLocales,

2199 UErrorCode *status)

2200 {

   MaybeStackArray<_acceptLangItem, 4> items; // Struct for collecting items.

     char tmp[ULOC_FULLNAME_CAPACITY +1];

2203 int32_t n = 0;

2204 const char *itemEnd;

2205 const char *paramEnd;

2206 const char *s;

2207 const char *t;

2208 int32_t res;

2209 int32_t i;

     int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);

2211

     if(U_FAILURE(*status)) {

2213 return -1;

2214 }

2215

     for(s=httpAcceptLanguage;s&&*s;) {

         while(isspace(*s)) /* eat space at the beginning */

2218 s++;

         itemEnd=uprv_strchr(s,',');

         paramEnd=uprv_strchr(s,';');

2221 if(!itemEnd) {

2222 itemEnd = httpAcceptLanguage+l; /* end of string */

2223 }

         if(paramEnd && paramEnd<itemEnd) {

2225 /* semicolon (;) is closer than end (,) */

2226 t = paramEnd+1;

             if(*t=='q') {

2228 t++;

2229 }

             while(isspace(*t)) {

2231 t++;

2232 }

             if(*t=='=') {

2234 t++;

2235 }

             while(isspace(*t)) {

2237 t++;

2238 }

             items[n].q = (float)_uloc_strtod(t,NULL);

2240 } else {

2241 /* no semicolon - it's 1.0 */

             items[n].q = 1.0f;

2243 paramEnd = itemEnd;

2244 }

         items[n].dummy=0;

2246 /* eat spaces prior to semi */

         for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)

2248 ;

         int32_t slen = static_cast<int32_t>(((t+1)-s));

2250 if(slen > ULOC_FULLNAME_CAPACITY) {

2251 *status = U_BUFFER_OVERFLOW_ERROR;

           return -1; // too big

2253 }

         uprv_strncpy(items[n].locale, s, slen);

         items[n].locale[slen]=0; // terminate

         int32_t clen = uloc_canonicalize(items[n].locale, tmp, UPRV_LENGTHOF(tmp)-1, status);

         if(U_FAILURE(*status)) return -1;

         if((clen!=slen) || (uprv_strncmp(items[n].locale, tmp, slen))) {

2259 // canonicalization had an effect- copy back

             uprv_strncpy(items[n].locale, tmp, clen);

             items[n].locale[clen] = 0; // terminate

2262 }

2263 #if defined(ULOC_DEBUG)

2264 /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/

2265 #endif

2266 n++;

2267 s = itemEnd;

         while(*s==',') { /* eat duplicate commas */

2269 s++;

2270 }

         if(n>=items.getCapacity()) { // If we need more items

           if(NULL == items.resize(items.getCapacity()*2, items.getCapacity())) {

2273 *status = U_MEMORY_ALLOCATION_ERROR;

2274 return -1;

2275 }

2276 #if defined(ULOC_DEBUG)

           fprintf(stderr,"malloced at size %d\n", items.getCapacity());

2278 #endif

2279 }

2280 }

     uprv_sortArray(items.getAlias(), n, sizeof(items[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);

     if (U_FAILURE(*status)) {

2283 return -1;

2284 }

     LocalMemory<const char*> strs(NULL);

     if (strs.allocateInsteadAndReset(n) == NULL) {

2287 *status = U_MEMORY_ALLOCATION_ERROR;

2288 return -1;

2289 }

     for(i=0;i<n;i++) {

2291 #if defined(ULOC_DEBUG)

2292 /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/

2293 #endif

         strs[i]=items[i].locale;

2295 }

     res =  uloc_acceptLanguage(result, resultAvailable, outResult,

                                strs.getAlias(), n, availableLocales, status);

2298 return res;

2299 }

2300

2301

2302 U_CAPI int32_t U_EXPORT2

 uloc_acceptLanguage(char *result, int32_t resultAvailable,

                     UAcceptResult *outResult, const char **acceptList,

2305 int32_t acceptListCount,

2306 UEnumeration* availableLocales,

2307 UErrorCode *status)

2308 {

2309 int32_t i,j;

2310 int32_t len;

2311 int32_t maxLen=0;

     char tmp[ULOC_FULLNAME_CAPACITY+1];

2313 const char *l;

2314 char **fallbackList;

     if(U_FAILURE(*status)) {

2316 return -1;

2317 }

     fallbackList = static_cast<char **>(uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount)));

2319 if(fallbackList==NULL) {

2320 *status = U_MEMORY_ALLOCATION_ERROR;

2321 return -1;

2322 }

     for(i=0;i<acceptListCount;i++) {

2324 #if defined(ULOC_DEBUG)

         fprintf(stderr,"%02d: %s\n", i, acceptList[i]);

2326 #endif

         while((l=uenum_next(availableLocales, NULL, status)) != NULL) {

2328 #if defined(ULOC_DEBUG)

             fprintf(stderr,"  %s\n", l);

2330 #endif

             len = (int32_t)uprv_strlen(l);

             if(!uprv_strcmp(acceptList[i], l)) {

2333 if(outResult) {

2334 *outResult = ULOC_ACCEPT_VALID;

2335 }

2336 #if defined(ULOC_DEBUG)

                 fprintf(stderr, "MATCH! %s\n", l);

2338 #endif

                 if(len>0) {

                     uprv_strncpy(result, l, uprv_min(len, resultAvailable));

2341 }

                 for(j=0;j<i;j++) {

2343 uprv_free(fallbackList[j]);

2344 }

2345 uprv_free(fallbackList);

                 return u_terminateChars(result, resultAvailable, len, status);

2347 }

2348 if(len>maxLen) {

2349 maxLen = len;

2350 }

2351 }

2352 uenum_reset(availableLocales, status);

2353 /* save off parent info */

         if(uloc_getParent(acceptList[i], tmp, UPRV_LENGTHOF(tmp), status)!=0) {

             fallbackList[i] = uprv_strdup(tmp);

2356 } else {

2357 fallbackList[i]=0;

2358 }

2359 }

2360

     for(maxLen--;maxLen>0;maxLen--) {

         for(i=0;i<acceptListCount;i++) {

             if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {

2364 #if defined(ULOC_DEBUG)

                 fprintf(stderr,"Try: [%s]", fallbackList[i]);

2366 #endif

                 while((l=uenum_next(availableLocales, NULL, status)) != NULL) {

2368 #if defined(ULOC_DEBUG)

                     fprintf(stderr,"  %s\n", l);

2370 #endif

                     len = (int32_t)uprv_strlen(l);

                     if(!uprv_strcmp(fallbackList[i], l)) {

2373 if(outResult) {

2374 *outResult = ULOC_ACCEPT_FALLBACK;

2375 }

2376 #if defined(ULOC_DEBUG)

                         fprintf(stderr, "fallback MATCH! %s\n", l);

2378 #endif

                         if(len>0) {

                             uprv_strncpy(result, l, uprv_min(len, resultAvailable));

2381 }

                         for(j=0;j<acceptListCount;j++) {

2383 uprv_free(fallbackList[j]);

2384 }

2385 uprv_free(fallbackList);

                         return u_terminateChars(result, resultAvailable, len, status);

2387 }

2388 }

2389 uenum_reset(availableLocales, status);

2390

                 if(uloc_getParent(fallbackList[i], tmp, UPRV_LENGTHOF(tmp), status)!=0) {

2392 uprv_free(fallbackList[i]);

                     fallbackList[i] = uprv_strdup(tmp);

2394 } else {

2395 uprv_free(fallbackList[i]);

2396 fallbackList[i]=0;

2397 }

2398 }

2399 }

2400 if(outResult) {

2401 *outResult = ULOC_ACCEPT_FAILED;

2402 }

2403 }

     for(i=0;i<acceptListCount;i++) {

2405 uprv_free(fallbackList[i]);

2406 }

2407 uprv_free(fallbackList);

2408 return -1;

2409 }

2410

2411 U_CAPI const char* U_EXPORT2

 uloc_toUnicodeLocaleKey(const char* keyword)

2413 {

     const char* bcpKey = ulocimp_toBcpKey(keyword);

     if (bcpKey == NULL && ultag_isUnicodeLocaleKey(keyword, -1)) {

2416 // unknown keyword, but syntax is fine..

2417 return keyword;

2418 }

2419 return bcpKey;

2420 }

2421

2422 U_CAPI const char* U_EXPORT2

 uloc_toUnicodeLocaleType(const char* keyword, const char* value)

2424 {

     const char* bcpType = ulocimp_toBcpType(keyword, value, NULL, NULL);

     if (bcpType == NULL && ultag_isUnicodeLocaleType(value, -1)) {

2427 // unknown keyword, but syntax is fine..

2428 return value;

2429 }

2430 return bcpType;

2431 }

2432

2433 static UBool

 isWellFormedLegacyKey(const char* legacyKey)

2435 {

2436 const char* p = legacyKey;

2437 while (*p) {

         if (!UPRV_ISALPHANUM(*p)) {

2439 return FALSE;

2440 }

2441 p++;

2442 }

2443 return TRUE;

2444 }

2445

2446 static UBool

 isWellFormedLegacyType(const char* legacyType)

2448 {

2449 const char* p = legacyType;

2450 int32_t alphaNumLen = 0;

2451 while (*p) {

         if (*p == '_' || *p == '/' || *p == '-') {

             if (alphaNumLen == 0) {

2454 return FALSE;

2455 }

2456 alphaNumLen = 0;

         } else if (UPRV_ISALPHANUM(*p)) {

2458 alphaNumLen++;

2459 } else {

2460 return FALSE;

2461 }

2462 p++;

2463 }

     return (alphaNumLen != 0);

2465 }

2466

2467 U_CAPI const char* U_EXPORT2

 uloc_toLegacyKey(const char* keyword)

2469 {

     const char* legacyKey = ulocimp_toLegacyKey(keyword);

2471 if (legacyKey == NULL) {

2472 // Checks if the specified locale key is well-formed with the legacy locale syntax.

2473 //

2474 // Note:

2475 // LDML/CLDR provides some definition of keyword syntax in

2476 // * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and

2477 // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax

2478 // Keys can only consist of [0-9a-zA-Z].

         if (isWellFormedLegacyKey(keyword)) {

2480 return keyword;

2481 }

2482 }

2483 return legacyKey;

2484 }

2485

2486 U_CAPI const char* U_EXPORT2

 uloc_toLegacyType(const char* keyword, const char* value)

2488 {

     const char* legacyType = ulocimp_toLegacyType(keyword, value, NULL, NULL);

2490 if (legacyType == NULL) {

2491 // Checks if the specified locale type is well-formed with the legacy locale syntax.

2492 //

2493 // Note:

2494 // LDML/CLDR provides some definition of keyword syntax in

2495 // * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and

2496 // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax

2497 // Values (types) can only consist of [0-9a-zA-Z], plus for legacy values

2498 // we allow [/_-+] in the middle (e.g. "Etc/GMT+1", "Asia/Tel_Aviv")

         if (isWellFormedLegacyType(value)) {

2500 return value;

2501 }

2502 }

2503 return legacyType;

2504 }

2505

2506 /*eof*/