2 **********************************************************************
3 * Copyright (C) 1997-2016, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
9 * Modification History:
11 * Date Name Description
12 * 04/01/97 aliu Creation.
13 * 08/21/98 stephen JDK 1.2 sync
14 * 12/08/98 rtg New Locale implementation and C API
15 * 03/15/99 damiba overhaul.
16 * 04/06/99 stephen changed setDefault() to realloc and copy
17 * 06/14/99 stephen Changed calls to ures_open for new params
18 * 07/21/99 stephen Modified setDefault() to propagate to C++
19 * 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
20 * brought canonicalization code into line with spec
21 *****************************************************************************/
24 POSIX's locale format, from putil.c: [no spaces]
26 ll [ _CC ] [ . MM ] [ @ VV]
28 l = lang, C = ctry, M = charmap, V = variant
31 #include "unicode/utypes.h"
32 #include "unicode/ustring.h"
33 #include "unicode/uloc.h"
46 #include <stdio.h> /* for sprintf */
48 /* ### Declarations **************************************************/
50 /* Locale stuff from locid.cpp */
51 U_CFUNC
void locale_set_default(const char *id
);
52 U_CFUNC
const char *locale_get_default(void);
54 locale_getKeywords(const char *localeID
,
56 char *keywords
, int32_t keywordCapacity
,
57 char *values
, int32_t valuesCapacity
, int32_t *valLen
,
61 /* ### Data tables **************************************************/
64 * Table of language codes, both 2- and 3-letter, with preference
65 * given to 2-letter codes where possible. Includes 3-letter codes
66 * that lack a 2-letter equivalent.
68 * This list must be in sorted order. This list is returned directly
69 * to the user by some API.
71 * This list must be kept in sync with LANGUAGES_3, with corresponding
74 * This table should be terminated with a NULL entry, followed by a
75 * second list, and another NULL entry. The first list is visible to
76 * user code when this array is returned by API. The second list
77 * contains codes we support, but do not expose through user API.
81 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
82 * include the revisions up to 2001/7/27 *CWB*
84 * The 3 character codes are the terminology codes like RFC 3066. This
85 * is compatible with prior ICU codes
87 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
88 * table but now at the end of the table because 3 character codes are
89 * duplicates. This avoids bad searches going from 3 to 2 character
92 * The range qaa-qtz is reserved for local use
94 /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
95 /* ISO639 table version is 20150505 */
96 static const char * const LANGUAGES
[] = {
97 "aa", "ab", "ace", "ach", "ada", "ady", "ae", "aeb",
98 "af", "afh", "agq", "ain", "ak", "akk", "akz", "ale",
99 "aln", "alt", "am", "an", "ang", "anp", "ar", "arc",
100 "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "as",
101 "asa", "ase", "ast", "av", "avk", "awa", "ay", "az",
102 "ba", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
103 "be", "bej", "bem", "bew", "bez", "bfd", "bfq", "bg",
104 "bgn", "bho", "bi", "bik", "bin", "bjn", "bkm", "bla",
105 "bm", "bn", "bo", "bpy", "bqi", "br", "bra", "brh",
106 "brx", "bs", "bss", "bua", "bug", "bum", "byn", "byv",
107 "ca", "cad", "car", "cay", "cch", "ce", "ceb", "cgg",
108 "ch", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
109 "chr", "chy", "ckb", "co", "cop", "cps", "cr", "crh",
110 "cs", "csb", "cu", "cv", "cy",
111 "da", "dak", "dar", "dav", "de", "del", "den", "dgr",
112 "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "dv",
113 "dyo", "dyu", "dz", "dzg",
114 "ebu", "ee", "efi", "egl", "egy", "eka", "el", "elx",
115 "en", "enm", "eo", "es", "esu", "et", "eu", "ewo",
117 "fa", "fan", "fat", "ff", "fi", "fil", "fit", "fj",
118 "fo", "fon", "fr", "frc", "frm", "fro", "frp", "frr",
120 "ga", "gaa", "gag", "gan", "gay", "gba", "gbz", "gd",
121 "gez", "gil", "gl", "glk", "gmh", "gn", "goh", "gom",
122 "gon", "gor", "got", "grb", "grc", "gsw", "gu", "guc",
123 "gur", "guz", "gv", "gwi",
124 "ha", "hai", "hak", "haw", "he", "hi", "hif", "hil",
125 "hit", "hmn", "ho", "hr", "hsb", "hsn", "ht", "hu",
127 "ia", "iba", "ibb", "id", "ie", "ig", "ii", "ik",
128 "ilo", "inh", "io", "is", "it", "iu", "izh",
129 "ja", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
131 "ka", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
132 "kbl", "kcg", "kde", "kea", "ken", "kfo", "kg", "kgp",
133 "kha", "kho", "khq", "khw", "ki", "kiu", "kj", "kk",
134 "kkj", "kl", "kln", "km", "kmb", "kn", "ko", "koi",
135 "kok", "kos", "kpe", "kr", "krc", "kri", "krj", "krl",
136 "kru", "ks", "ksb", "ksf", "ksh", "ku", "kum", "kut",
138 "la", "lad", "lag", "lah", "lam", "lb", "lez", "lfn",
139 "lg", "li", "lij", "liv", "lkt", "lmo", "ln", "lo",
140 "lol", "loz", "lrc", "lt", "ltg", "lu", "lua", "lui",
141 "lun", "luo", "lus", "luy", "lv", "lzh", "lzz",
142 "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
143 "mdf", "mdh", "mdr", "men", "mer", "mfe", "mg", "mga",
144 "mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk",
145 "ml", "mn", "mnc", "mni", "moh", "mos", "mr", "mrj",
146 "ms", "mt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
147 "my", "mye", "myv", "mzn",
148 "na", "nan", "nap", "naq", "nb", "nd", "nds", "ne",
149 "new", "ng", "nia", "niu", "njo", "nl", "nmg", "nn",
150 "nnh", "no", "nog", "non", "nov", "nqo", "nr", "nso",
151 "nus", "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi",
152 "oc", "oj", "om", "or", "os", "osa", "ota",
153 "pa", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc",
154 "pdt", "peo", "pfl", "phn", "pi", "pl", "pms", "pnt",
155 "pon", "prg", "pro", "ps", "pt",
157 "raj", "rap", "rar", "rgn", "rif", "rm", "rn", "ro",
158 "rof", "rom", "rtm", "ru", "rue", "rug", "rup",
160 "sa", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
161 "sba", "sbp", "sc", "scn", "sco", "sd", "sdc", "sdh",
162 "se", "see", "seh", "sei", "sel", "ses", "sg", "sga",
163 "sgs", "shi", "shn", "shu", "si", "sid", "sk",
164 "sl", "sli", "sly", "sm", "sma", "smj", "smn", "sms",
165 "sn", "snk", "so", "sog", "sq", "sr", "srn", "srr",
166 "ss", "ssy", "st", "stq", "su", "suk", "sus", "sux",
167 "sv", "sw", "swb", "swc", "syc", "syr", "szl",
168 "ta", "tcy", "te", "tem", "teo", "ter", "tet", "tg",
169 "th", "ti", "tig", "tiv", "tk", "tkl", "tkr", "tl",
170 "tlh", "tli", "tly", "tmh", "tn", "to", "tog", "tpi",
171 "tr", "tru", "trv", "ts", "tsd", "tsi", "tt", "ttt",
172 "tum", "tvl", "tw", "twq", "ty", "tyv", "tzm",
173 "udm", "ug", "uga", "uk", "umb", "und", "ur", "uz",
174 "vai", "ve", "vec", "vep", "vi", "vls", "vmf", "vo",
176 "wa", "wae", "wal", "war", "was", "wbp", "wo", "wuu",
177 "xal", "xh", "xmf", "xog",
178 "yao", "yap", "yav", "ybb", "yi", "yo", "yrl", "yue",
179 "za", "zap", "zbl", "zea", "zen", "zgh", "zh", "zu",
182 "in", "iw", "ji", "jw", "sh", /* obsolete language codes */
186 static const char* const DEPRECATED_LANGUAGES
[]={
187 "in", "iw", "ji", "jw", NULL
, NULL
189 static const char* const REPLACEMENT_LANGUAGES
[]={
190 "id", "he", "yi", "jv", NULL
, NULL
194 * Table of 3-letter language codes.
196 * This is a lookup table used to convert 3-letter language codes to
197 * their 2-letter equivalent, where possible. It must be kept in sync
198 * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the
199 * same language as LANGUAGES_3[i]. The commented-out lines are
200 * copied from LANGUAGES to make eyeballing this baby easier.
202 * Where a 3-letter language code has no 2-letter equivalent, the
203 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
205 * This table should be terminated with a NULL entry, followed by a
206 * second list, and another NULL entry. The two lists correspond to
207 * the two lists in LANGUAGES.
209 /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
210 /* ISO639 table version is 20150505 */
211 static const char * const LANGUAGES_3
[] = {
212 "aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb",
213 "afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale",
214 "aln", "alt", "amh", "arg", "ang", "anp", "ara", "arc",
215 "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "asm",
216 "asa", "ase", "ast", "ava", "avk", "awa", "aym", "aze",
217 "bak", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
218 "bel", "bej", "bem", "bew", "bez", "bfd", "bfq", "bul",
219 "bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla",
220 "bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh",
221 "brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv",
222 "cat", "cad", "car", "cay", "cch", "che", "ceb", "cgg",
223 "cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
224 "chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh",
225 "ces", "csb", "chu", "chv", "cym",
226 "dan", "dak", "dar", "dav", "deu", "del", "den", "dgr",
227 "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "div",
228 "dyo", "dyu", "dzo", "dzg",
229 "ebu", "ewe", "efi", "egl", "egy", "eka", "ell", "elx",
230 "eng", "enm", "epo", "spa", "esu", "est", "eus", "ewo",
232 "fas", "fan", "fat", "ful", "fin", "fil", "fit", "fij",
233 "fao", "fon", "fra", "frc", "frm", "fro", "frp", "frr",
235 "gle", "gaa", "gag", "gan", "gay", "gba", "gbz", "gla",
236 "gez", "gil", "glg", "glk", "gmh", "grn", "goh", "gom",
237 "gon", "gor", "got", "grb", "grc", "gsw", "guj", "guc",
238 "gur", "guz", "glv", "gwi",
239 "hau", "hai", "hak", "haw", "heb", "hin", "hif", "hil",
240 "hit", "hmn", "hmo", "hrv", "hsb", "hsn", "hat", "hun",
242 "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ipk",
243 "ilo", "inh", "ido", "isl", "ita", "iku", "izh",
244 "jpn", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
246 "kat", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
247 "kbl", "kcg", "kde", "kea", "ken", "kfo", "kon", "kgp",
248 "kha", "kho", "khq", "khw", "kik", "kiu", "kua", "kaz",
249 "kkj", "kal", "kln", "khm", "kmb", "kan", "kor", "koi",
250 "kok", "kos", "kpe", "kau", "krc", "kri", "krj", "krl",
251 "kru", "kas", "ksb", "ksf", "ksh", "kur", "kum", "kut",
253 "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lfn",
254 "lug", "lim", "lij", "liv", "lkt", "lmo", "lin", "lao",
255 "lol", "loz", "lrc", "lit", "ltg", "lub", "lua", "lui",
256 "lun", "luo", "lus", "luy", "lav", "lzh", "lzz",
257 "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
258 "mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga",
259 "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
260 "mal", "mon", "mnc", "mni", "moh", "mos", "mar", "mrj",
261 "msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
262 "mya", "mye", "myv", "mzn",
263 "nau", "nan", "nap", "naq", "nob", "nde", "nds", "nep",
264 "new", "ndo", "nia", "niu", "njo", "nld", "nmg", "nno",
265 "nnh", "nor", "nog", "non", "nov", "nqo", "nbl", "nso",
266 "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi",
267 "oci", "oji", "orm", "ori", "oss", "osa", "ota",
268 "pan", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc",
269 "pdt", "peo", "pfl", "phn", "pli", "pol", "pms", "pnt",
270 "pon", "prg", "pro", "pus", "por",
272 "raj", "rap", "rar", "rgn", "rif", "roh", "run", "ron",
273 "rof", "rom", "rtm", "rus", "rue", "rug", "rup",
275 "san", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
276 "sba", "sbp", "srd", "scn", "sco", "snd", "sdc", "sdh",
277 "sme", "see", "seh", "sei", "sel", "ses", "sag", "sga",
278 "sgs", "shi", "shn", "shu", "sin", "sid", "slk",
279 "slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms",
280 "sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr",
281 "ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux",
282 "swe", "swa", "swb", "swc", "syc", "syr", "szl",
283 "tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk",
284 "tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr", "tgl",
285 "tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi",
286 "tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt",
287 "tum", "tvl", "twi", "twq", "tah", "tyv", "tzm",
288 "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
289 "vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vol",
291 "wln", "wae", "wal", "war", "was", "wbp", "wol", "wuu",
292 "xal", "xho", "xmf", "xog",
293 "yao", "yap", "yav", "ybb", "yid", "yor", "yrl", "yue",
294 "zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul",
297 /* "in", "iw", "ji", "jw", "sh", */
298 "ind", "heb", "yid", "jaw", "srp",
303 * Table of 2-letter country codes.
305 * This list must be in sorted order. This list is returned directly
306 * to the user by some API.
308 * This list must be kept in sync with COUNTRIES_3, with corresponding
311 * This table should be terminated with a NULL entry, followed by a
312 * second list, and another NULL entry. The first list is visible to
313 * user code when this array is returned by API. The second list
314 * contains codes we support, but do not expose through user API.
318 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
319 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
320 * new codes keeping the old ones for compatibility updated to include
321 * 1999/12/03 revisions *CWB*
323 * RO(ROM) is now RO(ROU) according to
324 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
326 static const char * const COUNTRIES
[] = {
327 "AC", "AD", "AE", "AF", "AG", "AI", "AL", "AM",
328 "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
329 "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
330 "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV",
331 "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
332 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CP", "CR",
333 "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DG", "DJ", "DK",
334 "DM", "DO", "DZ", "EA", "EC", "EE", "EG", "EH", "ER",
335 "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
336 "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
337 "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
338 "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
339 "IC", "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
340 "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
341 "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
342 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
343 "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",
344 "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
345 "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
346 "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
347 "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
348 "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
349 "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",
350 "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
351 "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV",
352 "SX", "SY", "SZ", "TA", "TC", "TD", "TF", "TG", "TH", "TJ",
353 "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
354 "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
355 "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
356 "WS", "XK", "YE", "YT", "ZA", "ZM", "ZW",
358 "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */
362 static const char* const DEPRECATED_COUNTRIES
[] = {
363 "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL
, NULL
/* deprecated country list */
365 static const char* const REPLACEMENT_COUNTRIES
[] = {
366 /* "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
367 "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL
, NULL
/* replacement country codes */
371 * Table of 3-letter country codes.
373 * This is a lookup table used to convert 3-letter country codes to
374 * their 2-letter equivalent. It must be kept in sync with COUNTRIES.
375 * For all valid i, COUNTRIES[i] must refer to the same country as
376 * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES
377 * to make eyeballing this baby easier.
379 * This table should be terminated with a NULL entry, followed by a
380 * second list, and another NULL entry. The two lists correspond to
381 * the two lists in COUNTRIES.
383 static const char * const COUNTRIES_3
[] = {
384 /* "AC", "AD", "AE", "AF", "AG", "AI", "AL", "AM", */
385 "ASC", "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
386 /* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
387 "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
388 /* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
389 "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
390 /* "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", */
391 "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
392 /* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
393 "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
394 /* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CP", "CR", */
395 "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CPT", "CRI",
396 /* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DG", "DJ", "DK", */
397 "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DGA", "DJI", "DNK",
398 /* "DM", "DO", "DZ", "EA", "EC", "EE", "EG", "EH", "ER", */
399 "DMA", "DOM", "DZA", "EA ", "ECU", "EST", "EGY", "ESH", "ERI", /* no valid 3-letter code for EA */
400 /* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
401 "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
402 /* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
403 "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
404 /* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
405 "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
406 /* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
407 "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
408 /* "IC", "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
409 "IC ", "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL", /* no valid 3-letter code for IC */
410 /* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
411 "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
412 /* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
413 "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
414 /* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
415 "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
416 /* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */
417 "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
418 /* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
419 "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
420 /* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
421 "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
422 /* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
423 "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
424 /* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
425 "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
426 /* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
427 "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
428 /* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */
429 "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
430 /* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
431 "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
432 /* "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", */
433 "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
434 /* "SX", "SY", "SZ", "TA", "TC", "TD", "TF", "TG", "TH", "TJ", */
435 "SXM", "SYR", "SWZ", "TAA", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
436 /* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
437 "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
438 /* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
439 "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
440 /* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
441 "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
442 /* "WS", "XK", "YE", "YT", "ZA", "ZM", "ZW", */
443 "WSM", "XKK", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
445 /* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */
446 "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
450 typedef struct CanonicalizationMap
{
451 const char *id
; /* input ID */
452 const char *canonicalID
; /* canonicalized output ID */
453 const char *keyword
; /* keyword, or NULL if none */
454 const char *value
; /* keyword value, or NULL if kw==NULL */
455 } CanonicalizationMap
;
458 * A map to canonicalize locale IDs. This handles a variety of
459 * different semantic kinds of transformations.
461 static const CanonicalizationMap CANONICALIZE_MAP
[] = {
462 { "", "en_US_POSIX", NULL
, NULL
}, /* .NET name */
463 { "c", "en_US_POSIX", NULL
, NULL
}, /* POSIX name */
464 { "posix", "en_US_POSIX", NULL
, NULL
}, /* POSIX name (alias of C) */
465 { "art_LOJBAN", "jbo", NULL
, NULL
}, /* registered name */
466 { "az_AZ_CYRL", "az_Cyrl_AZ", NULL
, NULL
}, /* .NET name */
467 { "az_AZ_LATN", "az_Latn_AZ", NULL
, NULL
}, /* .NET name */
468 { "ca_ES_PREEURO", "ca_ES", "currency", "ESP" },
469 { "de__PHONEBOOK", "de", "collation", "phonebook" }, /* Old ICU name */
470 { "de_AT_PREEURO", "de_AT", "currency", "ATS" },
471 { "de_DE_PREEURO", "de_DE", "currency", "DEM" },
472 { "de_LU_PREEURO", "de_LU", "currency", "LUF" },
473 { "el_GR_PREEURO", "el_GR", "currency", "GRD" },
474 { "en_BE_PREEURO", "en_BE", "currency", "BEF" },
475 { "en_IE_PREEURO", "en_IE", "currency", "IEP" },
476 { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */
477 { "es_ES_PREEURO", "es_ES", "currency", "ESP" },
478 { "eu_ES_PREEURO", "eu_ES", "currency", "ESP" },
479 { "fi_FI_PREEURO", "fi_FI", "currency", "FIM" },
480 { "fr_BE_PREEURO", "fr_BE", "currency", "BEF" },
481 { "fr_FR_PREEURO", "fr_FR", "currency", "FRF" },
482 { "fr_LU_PREEURO", "fr_LU", "currency", "LUF" },
483 { "ga_IE_PREEURO", "ga_IE", "currency", "IEP" },
484 { "gl_ES_PREEURO", "gl_ES", "currency", "ESP" },
485 { "hi__DIRECT", "hi", "collation", "direct" }, /* Old ICU name */
486 { "it_IT_PREEURO", "it_IT", "currency", "ITL" },
487 { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */
488 { "nb_NO_NY", "nn_NO", NULL
, NULL
}, /* "markus said this was ok" :-) */
489 { "nl_BE_PREEURO", "nl_BE", "currency", "BEF" },
490 { "nl_NL_PREEURO", "nl_NL", "currency", "NLG" },
491 { "pt_PT_PREEURO", "pt_PT", "currency", "PTE" },
492 { "sr_SP_CYRL", "sr_Cyrl_RS", NULL
, NULL
}, /* .NET name */
493 { "sr_SP_LATN", "sr_Latn_RS", NULL
, NULL
}, /* .NET name */
494 { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL
, NULL
}, /* Linux name */
495 { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */
496 { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL
, NULL
}, /* Linux name */
497 { "uz_UZ_CYRL", "uz_Cyrl_UZ", NULL
, NULL
}, /* .NET name */
498 { "uz_UZ_LATN", "uz_Latn_UZ", NULL
, NULL
}, /* .NET name */
499 { "zh_CHS", "zh_Hans", NULL
, NULL
}, /* .NET name */
500 { "zh_CHT", "zh_Hant", NULL
, NULL
}, /* .NET name */
501 { "zh_GAN", "gan", NULL
, NULL
}, /* registered name */
502 { "zh_GUOYU", "zh", NULL
, NULL
}, /* registered name */
503 { "zh_HAKKA", "hak", NULL
, NULL
}, /* registered name */
504 { "zh_MIN_NAN", "nan", NULL
, NULL
}, /* registered name */
505 { "zh_WUU", "wuu", NULL
, NULL
}, /* registered name */
506 { "zh_XIANG", "hsn", NULL
, NULL
}, /* registered name */
507 { "zh_YUE", "yue", NULL
, NULL
}, /* registered name */
510 typedef struct VariantMap
{
511 const char *variant
; /* input ID */
512 const char *keyword
; /* keyword, or NULL if none */
513 const char *value
; /* keyword value, or NULL if kw==NULL */
516 static const VariantMap VARIANT_MAP
[] = {
517 { "EURO", "currency", "EUR" },
518 { "PINYIN", "collation", "pinyin" }, /* Solaris variant */
519 { "STROKE", "collation", "stroke" } /* Solaris variant */
522 /* ### BCP47 Conversion *******************************************/
523 /* Test if the locale id has BCP47 u extension and does not have '@' */
524 #define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
525 /* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
526 #define _ConvertBCP47(finalID, id, buffer, length,err) \
527 if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || U_FAILURE(*err)) { \
532 /* Gets the size of the shortest subtag in the given localeID. */
533 static int32_t getShortestSubtagLength(const char *localeID
) {
534 int32_t localeIDLength
= uprv_strlen(localeID
);
535 int32_t length
= localeIDLength
;
536 int32_t tmpLength
= 0;
540 for (i
= 0; i
< localeIDLength
; i
++) {
541 if (localeID
[i
] != '_' && localeID
[i
] != '-') {
548 if (tmpLength
!= 0 && tmpLength
< length
) {
558 /* ### Keywords **************************************************/
560 #define ULOC_KEYWORD_BUFFER_LEN 25
561 #define ULOC_MAX_NO_KEYWORDS 25
563 U_CAPI
const char * U_EXPORT2
564 locale_getKeywordsStart(const char *localeID
) {
565 const char *result
= NULL
;
566 if((result
= uprv_strchr(localeID
, '@')) != NULL
) {
569 #if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
571 /* We do this because the @ sign is variant, and the @ sign used on one
572 EBCDIC machine won't be compiled the same way on other EBCDIC based
574 static const uint8_t ebcdicSigns
[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
575 const uint8_t *charToFind
= ebcdicSigns
;
577 if((result
= uprv_strchr(localeID
, *charToFind
)) != NULL
) {
588 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
589 * @param keywordName incoming name to be canonicalized
590 * @param status return status (keyword too long)
591 * @return length of the keyword name
593 static int32_t locale_canonKeywordName(char *buf
, const char *keywordName
, UErrorCode
*status
)
596 int32_t keywordNameLen
= (int32_t)uprv_strlen(keywordName
);
598 if(keywordNameLen
>= ULOC_KEYWORD_BUFFER_LEN
) {
599 /* keyword name too long for internal buffer */
600 *status
= U_INTERNAL_PROGRAM_ERROR
;
604 /* normalize the keyword name */
605 for(i
= 0; i
< keywordNameLen
; i
++) {
606 buf
[i
] = uprv_tolower(keywordName
[i
]);
610 return keywordNameLen
;
614 char keyword
[ULOC_KEYWORD_BUFFER_LEN
];
616 const char *valueStart
;
620 static int32_t U_CALLCONV
621 compareKeywordStructs(const void * /*context*/, const void *left
, const void *right
) {
622 const char* leftString
= ((const KeywordStruct
*)left
)->keyword
;
623 const char* rightString
= ((const KeywordStruct
*)right
)->keyword
;
624 return uprv_strcmp(leftString
, rightString
);
628 * Both addKeyword and addValue must already be in canonical form.
629 * Either both addKeyword and addValue are NULL, or neither is NULL.
630 * If they are not NULL they must be zero terminated.
631 * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
634 _getKeywords(const char *localeID
,
636 char *keywords
, int32_t keywordCapacity
,
637 char *values
, int32_t valuesCapacity
, int32_t *valLen
,
639 const char* addKeyword
,
640 const char* addValue
,
643 KeywordStruct keywordList
[ULOC_MAX_NO_KEYWORDS
];
645 int32_t maxKeywords
= ULOC_MAX_NO_KEYWORDS
;
646 int32_t numKeywords
= 0;
647 const char* pos
= localeID
;
648 const char* equalSign
= NULL
;
649 const char* semicolon
= NULL
;
651 int32_t keywordsLen
= 0;
652 int32_t valuesLen
= 0;
654 if(prev
== '@') { /* start of keyword definition */
655 /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
657 UBool duplicate
= FALSE
;
658 /* skip leading spaces */
662 if (!*pos
) { /* handle trailing "; " */
665 if(numKeywords
== maxKeywords
) {
666 *status
= U_INTERNAL_PROGRAM_ERROR
;
669 equalSign
= uprv_strchr(pos
, '=');
670 semicolon
= uprv_strchr(pos
, ';');
671 /* lack of '=' [foo@currency] is illegal */
672 /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
673 if(!equalSign
|| (semicolon
&& semicolon
<equalSign
)) {
674 *status
= U_INVALID_FORMAT_ERROR
;
677 /* need to normalize both keyword and keyword name */
678 if(equalSign
- pos
>= ULOC_KEYWORD_BUFFER_LEN
) {
679 /* keyword name too long for internal buffer */
680 *status
= U_INTERNAL_PROGRAM_ERROR
;
683 for(i
= 0, n
= 0; i
< equalSign
- pos
; ++i
) {
685 keywordList
[numKeywords
].keyword
[n
++] = uprv_tolower(pos
[i
]);
689 /* zero-length keyword is an error. */
691 *status
= U_INVALID_FORMAT_ERROR
;
695 keywordList
[numKeywords
].keyword
[n
] = 0;
696 keywordList
[numKeywords
].keywordLen
= n
;
697 /* now grab the value part. First we skip the '=' */
699 /* then we leading spaces */
700 while(*equalSign
== ' ') {
704 /* Premature end or zero-length value */
705 if (!*equalSign
|| equalSign
== semicolon
) {
706 *status
= U_INVALID_FORMAT_ERROR
;
710 keywordList
[numKeywords
].valueStart
= equalSign
;
715 while(*(pos
- i
- 1) == ' ') {
718 keywordList
[numKeywords
].valueLen
= (int32_t)(pos
- equalSign
- i
);
721 i
= (int32_t)uprv_strlen(equalSign
);
722 while(i
&& equalSign
[i
-1] == ' ') {
725 keywordList
[numKeywords
].valueLen
= i
;
727 /* If this is a duplicate keyword, then ignore it */
728 for (j
=0; j
<numKeywords
; ++j
) {
729 if (uprv_strcmp(keywordList
[j
].keyword
, keywordList
[numKeywords
].keyword
) == 0) {
739 /* Handle addKeyword/addValue. */
740 if (addKeyword
!= NULL
) {
741 UBool duplicate
= FALSE
;
742 U_ASSERT(addValue
!= NULL
);
743 /* Search for duplicate; if found, do nothing. Explicit keyword
744 overrides addKeyword. */
745 for (j
=0; j
<numKeywords
; ++j
) {
746 if (uprv_strcmp(keywordList
[j
].keyword
, addKeyword
) == 0) {
752 if (numKeywords
== maxKeywords
) {
753 *status
= U_INTERNAL_PROGRAM_ERROR
;
756 uprv_strcpy(keywordList
[numKeywords
].keyword
, addKeyword
);
757 keywordList
[numKeywords
].keywordLen
= (int32_t)uprv_strlen(addKeyword
);
758 keywordList
[numKeywords
].valueStart
= addValue
;
759 keywordList
[numKeywords
].valueLen
= (int32_t)uprv_strlen(addValue
);
763 U_ASSERT(addValue
== NULL
);
766 /* now we have a list of keywords */
767 /* we need to sort it */
768 uprv_sortArray(keywordList
, numKeywords
, sizeof(KeywordStruct
), compareKeywordStructs
, NULL
, FALSE
, status
);
770 /* Now construct the keyword part */
771 for(i
= 0; i
< numKeywords
; i
++) {
772 if(keywordsLen
+ keywordList
[i
].keywordLen
+ 1< keywordCapacity
) {
773 uprv_strcpy(keywords
+keywordsLen
, keywordList
[i
].keyword
);
775 keywords
[keywordsLen
+ keywordList
[i
].keywordLen
] = '=';
777 keywords
[keywordsLen
+ keywordList
[i
].keywordLen
] = 0;
780 keywordsLen
+= keywordList
[i
].keywordLen
+ 1;
782 if(keywordsLen
+ keywordList
[i
].valueLen
< keywordCapacity
) {
783 uprv_strncpy(keywords
+keywordsLen
, keywordList
[i
].valueStart
, keywordList
[i
].valueLen
);
785 keywordsLen
+= keywordList
[i
].valueLen
;
787 if(i
< numKeywords
- 1) {
788 if(keywordsLen
< keywordCapacity
) {
789 keywords
[keywordsLen
] = ';';
795 if(valuesLen
+ keywordList
[i
].valueLen
+ 1< valuesCapacity
) {
796 uprv_strcpy(values
+valuesLen
, keywordList
[i
].valueStart
);
797 values
[valuesLen
+ keywordList
[i
].valueLen
] = 0;
799 valuesLen
+= keywordList
[i
].valueLen
+ 1;
803 values
[valuesLen
] = 0;
808 return u_terminateChars(keywords
, keywordCapacity
, keywordsLen
, status
);
815 locale_getKeywords(const char *localeID
,
817 char *keywords
, int32_t keywordCapacity
,
818 char *values
, int32_t valuesCapacity
, int32_t *valLen
,
820 UErrorCode
*status
) {
821 return _getKeywords(localeID
, prev
, keywords
, keywordCapacity
,
822 values
, valuesCapacity
, valLen
, valuesToo
,
826 U_CAPI
int32_t U_EXPORT2
827 uloc_getKeywordValue(const char* localeID
,
828 const char* keywordName
,
829 char* buffer
, int32_t bufferCapacity
,
832 const char* startSearchHere
= NULL
;
833 const char* nextSeparator
= NULL
;
834 char keywordNameBuffer
[ULOC_KEYWORD_BUFFER_LEN
];
835 char localeKeywordNameBuffer
[ULOC_KEYWORD_BUFFER_LEN
];
839 if(status
&& U_SUCCESS(*status
) && localeID
) {
840 char tempBuffer
[ULOC_FULLNAME_CAPACITY
];
841 const char* tmpLocaleID
;
843 if (_hasBCP47Extension(localeID
)) {
844 _ConvertBCP47(tmpLocaleID
, localeID
, tempBuffer
, sizeof(tempBuffer
), status
);
846 tmpLocaleID
=localeID
;
849 startSearchHere
= uprv_strchr(tmpLocaleID
, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */
850 if(startSearchHere
== NULL
) {
851 /* no keywords, return at once */
855 locale_canonKeywordName(keywordNameBuffer
, keywordName
, status
);
856 if(U_FAILURE(*status
)) {
860 /* find the first keyword */
861 while(startSearchHere
) {
863 /* skip leading spaces (allowed?) */
864 while(*startSearchHere
== ' ') {
867 nextSeparator
= uprv_strchr(startSearchHere
, '=');
868 /* need to normalize both keyword and keyword name */
872 if(nextSeparator
- startSearchHere
>= ULOC_KEYWORD_BUFFER_LEN
) {
873 /* keyword name too long for internal buffer */
874 *status
= U_INTERNAL_PROGRAM_ERROR
;
877 for(i
= 0; i
< nextSeparator
- startSearchHere
; i
++) {
878 localeKeywordNameBuffer
[i
] = uprv_tolower(startSearchHere
[i
]);
880 /* trim trailing spaces */
881 while(startSearchHere
[i
-1] == ' ') {
885 localeKeywordNameBuffer
[i
] = 0;
887 startSearchHere
= uprv_strchr(nextSeparator
, ';');
889 if(uprv_strcmp(keywordNameBuffer
, localeKeywordNameBuffer
) == 0) {
891 while(*nextSeparator
== ' ') {
894 /* we actually found the keyword. Copy the value */
895 if(startSearchHere
&& startSearchHere
- nextSeparator
< bufferCapacity
) {
896 while(*(startSearchHere
-1) == ' ') {
899 uprv_strncpy(buffer
, nextSeparator
, startSearchHere
- nextSeparator
);
900 result
= u_terminateChars(buffer
, bufferCapacity
, (int32_t)(startSearchHere
- nextSeparator
), status
);
901 } else if(!startSearchHere
&& (int32_t)uprv_strlen(nextSeparator
) < bufferCapacity
) { /* last item in string */
902 i
= (int32_t)uprv_strlen(nextSeparator
);
903 while(nextSeparator
[i
- 1] == ' ') {
906 uprv_strncpy(buffer
, nextSeparator
, i
);
907 result
= u_terminateChars(buffer
, bufferCapacity
, i
, status
);
909 /* give a bigger buffer, please */
910 *status
= U_BUFFER_OVERFLOW_ERROR
;
911 if(startSearchHere
) {
912 result
= (int32_t)(startSearchHere
- nextSeparator
);
914 result
= (int32_t)uprv_strlen(nextSeparator
);
924 U_CAPI
int32_t U_EXPORT2
925 uloc_setKeywordValue(const char* keywordName
,
926 const char* keywordValue
,
927 char* buffer
, int32_t bufferCapacity
,
930 /* TODO: sorting. removal. */
931 int32_t keywordNameLen
;
932 int32_t keywordValueLen
;
935 int32_t foundValueLen
;
936 int32_t keywordAtEnd
= 0; /* is the keyword at the end of the string? */
937 char keywordNameBuffer
[ULOC_KEYWORD_BUFFER_LEN
];
938 char localeKeywordNameBuffer
[ULOC_KEYWORD_BUFFER_LEN
];
941 char* nextSeparator
= NULL
;
942 char* nextEqualsign
= NULL
;
943 char* startSearchHere
= NULL
;
944 char* keywordStart
= NULL
;
945 char *insertHere
= NULL
;
946 if(U_FAILURE(*status
)) {
949 if(bufferCapacity
>1) {
950 bufLen
= (int32_t)uprv_strlen(buffer
);
952 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
955 if(bufferCapacity
<bufLen
) {
956 /* The capacity is less than the length?! Is this NULL terminated? */
957 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
960 if(keywordValue
&& !*keywordValue
) {
964 keywordValueLen
= (int32_t)uprv_strlen(keywordValue
);
968 keywordNameLen
= locale_canonKeywordName(keywordNameBuffer
, keywordName
, status
);
969 if(U_FAILURE(*status
)) {
972 startSearchHere
= (char*)locale_getKeywordsStart(buffer
);
973 if(startSearchHere
== NULL
|| (startSearchHere
[1]==0)) {
974 if(!keywordValue
) { /* no keywords = nothing to remove */
978 needLen
= bufLen
+1+keywordNameLen
+1+keywordValueLen
;
979 if(startSearchHere
) { /* had a single @ */
980 needLen
--; /* already had the @ */
981 /* startSearchHere points at the @ */
983 startSearchHere
=buffer
+bufLen
;
985 if(needLen
>= bufferCapacity
) {
986 *status
= U_BUFFER_OVERFLOW_ERROR
;
987 return needLen
; /* no change */
989 *startSearchHere
= '@';
991 uprv_strcpy(startSearchHere
, keywordNameBuffer
);
992 startSearchHere
+= keywordNameLen
;
993 *startSearchHere
= '=';
995 uprv_strcpy(startSearchHere
, keywordValue
);
996 startSearchHere
+=keywordValueLen
;
998 } /* end shortcut - no @ */
1000 keywordStart
= startSearchHere
;
1001 /* search for keyword */
1002 while(keywordStart
) {
1004 /* skip leading spaces (allowed?) */
1005 while(*keywordStart
== ' ') {
1008 nextEqualsign
= uprv_strchr(keywordStart
, '=');
1009 /* need to normalize both keyword and keyword name */
1010 if(!nextEqualsign
) {
1013 if(nextEqualsign
- keywordStart
>= ULOC_KEYWORD_BUFFER_LEN
) {
1014 /* keyword name too long for internal buffer */
1015 *status
= U_INTERNAL_PROGRAM_ERROR
;
1018 for(i
= 0; i
< nextEqualsign
- keywordStart
; i
++) {
1019 localeKeywordNameBuffer
[i
] = uprv_tolower(keywordStart
[i
]);
1021 /* trim trailing spaces */
1022 while(keywordStart
[i
-1] == ' ') {
1025 U_ASSERT(i
>=0 && i
<ULOC_KEYWORD_BUFFER_LEN
);
1026 localeKeywordNameBuffer
[i
] = 0;
1028 nextSeparator
= uprv_strchr(nextEqualsign
, ';');
1029 rc
= uprv_strcmp(keywordNameBuffer
, localeKeywordNameBuffer
);
1032 while(*nextEqualsign
== ' ') {
1035 /* we actually found the keyword. Change the value */
1036 if (nextSeparator
) {
1038 foundValueLen
= (int32_t)(nextSeparator
- nextEqualsign
);
1041 foundValueLen
= (int32_t)uprv_strlen(nextEqualsign
);
1043 if(keywordValue
) { /* adding a value - not removing */
1044 if(foundValueLen
== keywordValueLen
) {
1045 uprv_strncpy(nextEqualsign
, keywordValue
, keywordValueLen
);
1046 return bufLen
; /* no change in size */
1047 } else if(foundValueLen
> keywordValueLen
) {
1048 int32_t delta
= foundValueLen
- keywordValueLen
;
1049 if(nextSeparator
) { /* RH side */
1050 uprv_memmove(nextSeparator
- delta
, nextSeparator
, bufLen
-(nextSeparator
-buffer
));
1052 uprv_strncpy(nextEqualsign
, keywordValue
, keywordValueLen
);
1056 } else { /* FVL < KVL */
1057 int32_t delta
= keywordValueLen
- foundValueLen
;
1058 if((bufLen
+delta
) >= bufferCapacity
) {
1059 *status
= U_BUFFER_OVERFLOW_ERROR
;
1060 return bufLen
+delta
;
1062 if(nextSeparator
) { /* RH side */
1063 uprv_memmove(nextSeparator
+delta
,nextSeparator
, bufLen
-(nextSeparator
-buffer
));
1065 uprv_strncpy(nextEqualsign
, keywordValue
, keywordValueLen
);
1070 } else { /* removing a keyword */
1072 /* zero out the ';' or '@' just before startSearchhere */
1073 keywordStart
[-1] = 0;
1074 return (int32_t)((keywordStart
-buffer
)-1); /* (string length without keyword) minus separator */
1076 uprv_memmove(keywordStart
, nextSeparator
+1, bufLen
-((nextSeparator
+1)-buffer
));
1077 keywordStart
[bufLen
-((nextSeparator
+1)-buffer
)]=0;
1078 return (int32_t)(bufLen
-((nextSeparator
+1)-keywordStart
));
1081 } else if(rc
<0){ /* end match keyword */
1082 /* could insert at this location. */
1083 insertHere
= keywordStart
;
1085 keywordStart
= nextSeparator
;
1086 } /* end loop searching */
1089 return bufLen
; /* removal of non-extant keyword - no change */
1092 /* we know there is at least one keyword. */
1093 needLen
= bufLen
+1+keywordNameLen
+1+keywordValueLen
;
1094 if(needLen
>= bufferCapacity
) {
1095 *status
= U_BUFFER_OVERFLOW_ERROR
;
1096 return needLen
; /* no change */
1100 uprv_memmove(insertHere
+(1+keywordNameLen
+1+keywordValueLen
), insertHere
, bufLen
-(insertHere
-buffer
));
1101 keywordStart
= insertHere
;
1103 keywordStart
= buffer
+bufLen
;
1104 *keywordStart
= ';';
1107 uprv_strncpy(keywordStart
, keywordNameBuffer
, keywordNameLen
);
1108 keywordStart
+= keywordNameLen
;
1109 *keywordStart
= '=';
1111 uprv_strncpy(keywordStart
, keywordValue
, keywordValueLen
); /* terminates. */
1112 keywordStart
+=keywordValueLen
;
1114 *keywordStart
= ';';
1121 /* ### ID parsing implementation **************************************************/
1123 #define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
1125 /*returns TRUE if one of the special prefixes is here (s=string)
1127 #define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1129 /* Dot terminates it because of POSIX form where dot precedes the codepage
1130 * except for variant
1132 #define _isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
1134 static char* _strnchr(const char* str
, int32_t len
, char c
) {
1135 U_ASSERT(str
!= 0 && len
>= 0);
1136 while (len
-- != 0) {
1140 } else if (d
== 0) {
1149 * Lookup 'key' in the array 'list'. The array 'list' should contain
1150 * a NULL entry, followed by more entries, and a second NULL entry.
1152 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1155 static int16_t _findIndex(const char* const* list
, const char* key
)
1157 const char* const* anchor
= list
;
1160 /* Make two passes through two NULL-terminated arrays at 'list' */
1161 while (pass
++ < 2) {
1163 if (uprv_strcmp(key
, *list
) == 0) {
1164 return (int16_t)(list
- anchor
);
1168 ++list
; /* skip final NULL *CWB*/
1173 /* count the length of src while copying it to dest; return strlen(src) */
1174 static inline int32_t
1175 _copyCount(char *dest
, int32_t destCapacity
, const char *src
) {
1182 return (int32_t)(src
-anchor
);
1184 if(destCapacity
<=0) {
1185 return (int32_t)((src
-anchor
)+uprv_strlen(src
));
1194 uloc_getCurrentCountryID(const char* oldID
){
1195 int32_t offset
= _findIndex(DEPRECATED_COUNTRIES
, oldID
);
1197 return REPLACEMENT_COUNTRIES
[offset
];
1202 uloc_getCurrentLanguageID(const char* oldID
){
1203 int32_t offset
= _findIndex(DEPRECATED_LANGUAGES
, oldID
);
1205 return REPLACEMENT_LANGUAGES
[offset
];
1210 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1211 * avoid duplicating code to handle the earlier locale ID pieces
1212 * in the functions for the later ones by
1213 * setting the *pEnd pointer to where they stopped parsing
1215 * TODO try to use this in Locale
1218 ulocimp_getLanguage(const char *localeID
,
1219 char *language
, int32_t languageCapacity
,
1220 const char **pEnd
) {
1223 char lang
[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
1225 /* if it starts with i- or x- then copy that prefix */
1226 if(_isIDPrefix(localeID
)) {
1227 if(i
<languageCapacity
) {
1228 language
[i
]=(char)uprv_tolower(*localeID
);
1230 if(i
<languageCapacity
) {
1237 /* copy the language as far as possible and count its length */
1238 while(!_isTerminator(*localeID
) && !_isIDSeparator(*localeID
)) {
1239 if(i
<languageCapacity
) {
1240 language
[i
]=(char)uprv_tolower(*localeID
);
1244 lang
[i
]=(char)uprv_tolower(*localeID
);
1251 /* convert 3 character code to 2 character code if possible *CWB*/
1252 offset
=_findIndex(LANGUAGES_3
, lang
);
1254 i
=_copyCount(language
, languageCapacity
, LANGUAGES
[offset
]);
1265 ulocimp_getScript(const char *localeID
,
1266 char *script
, int32_t scriptCapacity
,
1275 /* copy the second item as far as possible and count its length */
1276 while(!_isTerminator(localeID
[idLen
]) && !_isIDSeparator(localeID
[idLen
])
1277 && uprv_isASCIILetter(localeID
[idLen
])) {
1281 /* If it's exactly 4 characters long, then it's a script and not a country. */
1285 *pEnd
= localeID
+idLen
;
1287 if(idLen
> scriptCapacity
) {
1288 idLen
= scriptCapacity
;
1291 script
[0]=(char)uprv_toupper(*(localeID
++));
1293 for (i
= 1; i
< idLen
; i
++) {
1294 script
[i
]=(char)uprv_tolower(*(localeID
++));
1304 ulocimp_getCountry(const char *localeID
,
1305 char *country
, int32_t countryCapacity
,
1309 char cnty
[ULOC_COUNTRY_CAPACITY
]={ 0, 0, 0, 0 };
1312 /* copy the country as far as possible and count its length */
1313 while(!_isTerminator(localeID
[idLen
]) && !_isIDSeparator(localeID
[idLen
])) {
1314 if(idLen
<(ULOC_COUNTRY_CAPACITY
-1)) { /*CWB*/
1315 cnty
[idLen
]=(char)uprv_toupper(localeID
[idLen
]);
1320 /* the country should be either length 2 or 3 */
1321 if (idLen
== 2 || idLen
== 3) {
1322 UBool gotCountry
= FALSE
;
1323 /* convert 3 character code to 2 character code if possible *CWB*/
1325 offset
=_findIndex(COUNTRIES_3
, cnty
);
1327 idLen
=_copyCount(country
, countryCapacity
, COUNTRIES
[offset
]);
1333 for (i
= 0; i
< idLen
; i
++) {
1334 if (i
< countryCapacity
) {
1335 country
[i
]=(char)uprv_toupper(localeID
[i
]);
1352 * @param needSeparator if true, then add leading '_' if any variants
1353 * are added to 'variant'
1356 _getVariantEx(const char *localeID
,
1358 char *variant
, int32_t variantCapacity
,
1359 UBool needSeparator
) {
1362 /* get one or more variant tags and separate them with '_' */
1363 if(_isIDSeparator(prev
)) {
1364 /* get a variant string after a '-' or '_' */
1365 while(!_isTerminator(*localeID
)) {
1366 if (needSeparator
) {
1367 if (i
<variantCapacity
) {
1371 needSeparator
= FALSE
;
1373 if(i
<variantCapacity
) {
1374 variant
[i
]=(char)uprv_toupper(*localeID
);
1375 if(variant
[i
]=='-') {
1384 /* if there is no variant tag after a '-' or '_' then look for '@' */
1388 } else if((localeID
=locale_getKeywordsStart(localeID
))!=NULL
) {
1389 ++localeID
; /* point after the '@' */
1393 while(!_isTerminator(*localeID
)) {
1394 if (needSeparator
) {
1395 if (i
<variantCapacity
) {
1399 needSeparator
= FALSE
;
1401 if(i
<variantCapacity
) {
1402 variant
[i
]=(char)uprv_toupper(*localeID
);
1403 if(variant
[i
]=='-' || variant
[i
]==',') {
1416 _getVariant(const char *localeID
,
1418 char *variant
, int32_t variantCapacity
) {
1419 return _getVariantEx(localeID
, prev
, variant
, variantCapacity
, FALSE
);
1423 * Delete ALL instances of a variant from the given list of one or
1424 * more variants. Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
1425 * @param variants the source string of one or more variants,
1426 * separated by '_'. This will be MODIFIED IN PLACE. Not zero
1427 * terminated; if it is, trailing zero will NOT be maintained.
1428 * @param variantsLen length of variants
1429 * @param toDelete variant to delete, without separators, e.g. "EURO"
1430 * or "PREEURO"; not zero terminated
1431 * @param toDeleteLen length of toDelete
1432 * @return number of characters deleted from variants
1435 _deleteVariant(char* variants
, int32_t variantsLen
,
1436 const char* toDelete
, int32_t toDeleteLen
)
1438 int32_t delta
= 0; /* number of chars deleted */
1441 if (variantsLen
< toDeleteLen
) {
1444 if (uprv_strncmp(variants
, toDelete
, toDeleteLen
) == 0 &&
1445 (variantsLen
== toDeleteLen
||
1446 (flag
=(variants
[toDeleteLen
] == '_'))))
1448 int32_t d
= toDeleteLen
+ (flag
?1:0);
1451 if (variantsLen
> 0) {
1452 uprv_memmove(variants
, variants
+d
, variantsLen
);
1455 char* p
= _strnchr(variants
, variantsLen
, '_');
1460 variantsLen
-= (int32_t)(p
- variants
);
1466 /* Keyword enumeration */
1468 typedef struct UKeywordsContext
{
1473 static void U_CALLCONV
1474 uloc_kw_closeKeywords(UEnumeration
*enumerator
) {
1475 uprv_free(((UKeywordsContext
*)enumerator
->context
)->keywords
);
1476 uprv_free(enumerator
->context
);
1477 uprv_free(enumerator
);
1480 static int32_t U_CALLCONV
1481 uloc_kw_countKeywords(UEnumeration
*en
, UErrorCode
* /*status*/) {
1482 char *kw
= ((UKeywordsContext
*)en
->context
)->keywords
;
1486 kw
+= uprv_strlen(kw
)+1;
1491 static const char* U_CALLCONV
1492 uloc_kw_nextKeyword(UEnumeration
* en
,
1493 int32_t* resultLength
,
1494 UErrorCode
* /*status*/) {
1495 const char* result
= ((UKeywordsContext
*)en
->context
)->current
;
1498 len
= (int32_t)uprv_strlen(((UKeywordsContext
*)en
->context
)->current
);
1499 ((UKeywordsContext
*)en
->context
)->current
+= len
+1;
1504 *resultLength
= len
;
1509 static void U_CALLCONV
1510 uloc_kw_resetKeywords(UEnumeration
* en
,
1511 UErrorCode
* /*status*/) {
1512 ((UKeywordsContext
*)en
->context
)->current
= ((UKeywordsContext
*)en
->context
)->keywords
;
1515 static const UEnumeration gKeywordsEnum
= {
1518 uloc_kw_closeKeywords
,
1519 uloc_kw_countKeywords
,
1521 uloc_kw_nextKeyword
,
1522 uloc_kw_resetKeywords
1525 U_CAPI UEnumeration
* U_EXPORT2
1526 uloc_openKeywordList(const char *keywordList
, int32_t keywordListSize
, UErrorCode
* status
)
1528 UKeywordsContext
*myContext
= NULL
;
1529 UEnumeration
*result
= NULL
;
1531 if(U_FAILURE(*status
)) {
1534 result
= (UEnumeration
*)uprv_malloc(sizeof(UEnumeration
));
1535 /* Null pointer test */
1536 if (result
== NULL
) {
1537 *status
= U_MEMORY_ALLOCATION_ERROR
;
1540 uprv_memcpy(result
, &gKeywordsEnum
, sizeof(UEnumeration
));
1541 myContext
= static_cast<UKeywordsContext
*>(uprv_malloc(sizeof(UKeywordsContext
)));
1542 if (myContext
== NULL
) {
1543 *status
= U_MEMORY_ALLOCATION_ERROR
;
1547 myContext
->keywords
= (char *)uprv_malloc(keywordListSize
+1);
1548 uprv_memcpy(myContext
->keywords
, keywordList
, keywordListSize
);
1549 myContext
->keywords
[keywordListSize
] = 0;
1550 myContext
->current
= myContext
->keywords
;
1551 result
->context
= myContext
;
1555 U_CAPI UEnumeration
* U_EXPORT2
1556 uloc_openKeywords(const char* localeID
,
1561 int32_t keywordsCapacity
= 256;
1562 char tempBuffer
[ULOC_FULLNAME_CAPACITY
];
1563 const char* tmpLocaleID
;
1565 if(status
==NULL
|| U_FAILURE(*status
)) {
1569 if (_hasBCP47Extension(localeID
)) {
1570 _ConvertBCP47(tmpLocaleID
, localeID
, tempBuffer
, sizeof(tempBuffer
), status
);
1572 if (localeID
==NULL
) {
1573 localeID
=uloc_getDefault();
1575 tmpLocaleID
=localeID
;
1578 /* Skip the language */
1579 ulocimp_getLanguage(tmpLocaleID
, NULL
, 0, &tmpLocaleID
);
1580 if(_isIDSeparator(*tmpLocaleID
)) {
1581 const char *scriptID
;
1582 /* Skip the script if available */
1583 ulocimp_getScript(tmpLocaleID
+1, NULL
, 0, &scriptID
);
1584 if(scriptID
!= tmpLocaleID
+1) {
1585 /* Found optional script */
1586 tmpLocaleID
= scriptID
;
1588 /* Skip the Country */
1589 if (_isIDSeparator(*tmpLocaleID
)) {
1590 ulocimp_getCountry(tmpLocaleID
+1, NULL
, 0, &tmpLocaleID
);
1591 if(_isIDSeparator(*tmpLocaleID
)) {
1592 _getVariant(tmpLocaleID
+1, *tmpLocaleID
, NULL
, 0);
1597 /* keywords are located after '@' */
1598 if((tmpLocaleID
= locale_getKeywordsStart(tmpLocaleID
)) != NULL
) {
1599 i
=locale_getKeywords(tmpLocaleID
+1, '@', keywords
, keywordsCapacity
, NULL
, 0, NULL
, FALSE
, status
);
1603 return uloc_openKeywordList(keywords
, i
, status
);
1610 /* bit-flags for 'options' parameter of _canonicalize */
1611 #define _ULOC_STRIP_KEYWORDS 0x2
1612 #define _ULOC_CANONICALIZE 0x1
1614 #define OPTION_SET(options, mask) ((options & mask) != 0)
1616 static const char i_default
[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
1617 #define I_DEFAULT_LENGTH UPRV_LENGTHOF(i_default)
1620 * Canonicalize the given localeID, to level 1 or to level 2,
1621 * depending on the options. To specify level 1, pass in options=0.
1622 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1624 * This is the code underlying uloc_getName and uloc_canonicalize.
1627 _canonicalize(const char* localeID
,
1629 int32_t resultCapacity
,
1632 int32_t j
, len
, fieldCount
=0, scriptSize
=0, variantSize
=0, nameCapacity
;
1633 char localeBuffer
[ULOC_FULLNAME_CAPACITY
];
1634 char tempBuffer
[ULOC_FULLNAME_CAPACITY
];
1635 const char* origLocaleID
;
1636 const char* tmpLocaleID
;
1637 const char* keywordAssign
= NULL
;
1638 const char* separatorIndicator
= NULL
;
1639 const char* addKeyword
= NULL
;
1640 const char* addValue
= NULL
;
1642 char* variant
= NULL
; /* pointer into name, or NULL */
1644 if (U_FAILURE(*err
)) {
1648 if (_hasBCP47Extension(localeID
)) {
1649 _ConvertBCP47(tmpLocaleID
, localeID
, tempBuffer
, sizeof(tempBuffer
), err
);
1651 if (localeID
==NULL
) {
1652 localeID
=uloc_getDefault();
1654 tmpLocaleID
=localeID
;
1657 origLocaleID
=tmpLocaleID
;
1659 /* if we are doing a full canonicalization, then put results in
1660 localeBuffer, if necessary; otherwise send them to result. */
1661 if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/
1662 (result
== NULL
|| resultCapacity
< (int32_t)sizeof(localeBuffer
))) {
1663 name
= localeBuffer
;
1664 nameCapacity
= (int32_t)sizeof(localeBuffer
);
1667 nameCapacity
= resultCapacity
;
1670 /* get all pieces, one after another, and separate with '_' */
1671 len
=ulocimp_getLanguage(tmpLocaleID
, name
, nameCapacity
, &tmpLocaleID
);
1673 if(len
== I_DEFAULT_LENGTH
&& uprv_strncmp(origLocaleID
, i_default
, len
) == 0) {
1674 const char *d
= uloc_getDefault();
1676 len
= (int32_t)uprv_strlen(d
);
1679 uprv_strncpy(name
, d
, len
);
1681 } else if(_isIDSeparator(*tmpLocaleID
)) {
1682 const char *scriptID
;
1685 if(len
<nameCapacity
) {
1690 scriptSize
=ulocimp_getScript(tmpLocaleID
+1,
1691 (len
<nameCapacity
? name
+len
: NULL
), nameCapacity
-len
, &scriptID
);
1692 if(scriptSize
> 0) {
1693 /* Found optional script */
1694 tmpLocaleID
= scriptID
;
1697 if (_isIDSeparator(*tmpLocaleID
)) {
1698 /* If there is something else, then we add the _ */
1699 if(len
<nameCapacity
) {
1706 if (_isIDSeparator(*tmpLocaleID
)) {
1707 const char *cntryID
;
1708 int32_t cntrySize
= ulocimp_getCountry(tmpLocaleID
+1,
1709 (len
<nameCapacity
? name
+len
: NULL
), nameCapacity
-len
, &cntryID
);
1710 if (cntrySize
> 0) {
1711 /* Found optional country */
1712 tmpLocaleID
= cntryID
;
1715 if(_isIDSeparator(*tmpLocaleID
)) {
1716 /* If there is something else, then we add the _ if we found country before. */
1717 if (cntrySize
>= 0 && ! _isIDSeparator(*(tmpLocaleID
+1)) ) {
1719 if(len
<nameCapacity
) {
1725 variantSize
= _getVariant(tmpLocaleID
+1, *tmpLocaleID
,
1726 (len
<nameCapacity
? name
+len
: NULL
), nameCapacity
-len
);
1727 if (variantSize
> 0) {
1728 variant
= len
<nameCapacity
? name
+len
: NULL
;
1730 tmpLocaleID
+= variantSize
+ 1; /* skip '_' and variant */
1736 /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1737 if (!OPTION_SET(options
, _ULOC_CANONICALIZE
) && *tmpLocaleID
== '.') {
1740 char c
= *tmpLocaleID
;
1747 if (len
<nameCapacity
) {
1757 /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1758 After this, tmpLocaleID either points to '@' or is NULL */
1759 if ((tmpLocaleID
=locale_getKeywordsStart(tmpLocaleID
))!=NULL
) {
1760 keywordAssign
= uprv_strchr(tmpLocaleID
, '=');
1761 separatorIndicator
= uprv_strchr(tmpLocaleID
, ';');
1764 /* Copy POSIX-style variant, if any [mr@FOO] */
1765 if (!OPTION_SET(options
, _ULOC_CANONICALIZE
) &&
1766 tmpLocaleID
!= NULL
&& keywordAssign
== NULL
) {
1768 char c
= *tmpLocaleID
;
1772 if (len
<nameCapacity
) {
1780 if (OPTION_SET(options
, _ULOC_CANONICALIZE
)) {
1781 /* Handle @FOO variant if @ is present and not followed by = */
1782 if (tmpLocaleID
!=NULL
&& keywordAssign
==NULL
) {
1783 int32_t posixVariantSize
;
1784 /* Add missing '_' if needed */
1785 if (fieldCount
< 2 || (fieldCount
< 3 && scriptSize
> 0)) {
1787 if(len
<nameCapacity
) {
1792 } while(fieldCount
<2);
1794 posixVariantSize
= _getVariantEx(tmpLocaleID
+1, '@', name
+len
, nameCapacity
-len
,
1795 (UBool
)(variantSize
> 0));
1796 if (posixVariantSize
> 0) {
1797 if (variant
== NULL
) {
1800 len
+= posixVariantSize
;
1801 variantSize
+= posixVariantSize
;
1805 /* Handle generic variants first */
1807 for (j
=0; j
<UPRV_LENGTHOF(VARIANT_MAP
); j
++) {
1808 const char* variantToCompare
= VARIANT_MAP
[j
].variant
;
1809 int32_t n
= (int32_t)uprv_strlen(variantToCompare
);
1810 int32_t variantLen
= _deleteVariant(variant
, uprv_min(variantSize
, (nameCapacity
-len
)), variantToCompare
, n
);
1812 if (variantLen
> 0) {
1813 if (len
> 0 && name
[len
-1] == '_') { /* delete trailing '_' */
1816 addKeyword
= VARIANT_MAP
[j
].keyword
;
1817 addValue
= VARIANT_MAP
[j
].value
;
1821 if (len
> 0 && len
<= nameCapacity
&& name
[len
-1] == '_') { /* delete trailing '_' */
1826 /* Look up the ID in the canonicalization map */
1827 for (j
=0; j
<UPRV_LENGTHOF(CANONICALIZE_MAP
); j
++) {
1828 const char* id
= CANONICALIZE_MAP
[j
].id
;
1829 int32_t n
= (int32_t)uprv_strlen(id
);
1830 if (len
== n
&& uprv_strncmp(name
, id
, n
) == 0) {
1831 if (n
== 0 && tmpLocaleID
!= NULL
) {
1832 break; /* Don't remap "" if keywords present */
1834 len
= _copyCount(name
, nameCapacity
, CANONICALIZE_MAP
[j
].canonicalID
);
1835 if (CANONICALIZE_MAP
[j
].keyword
) {
1836 addKeyword
= CANONICALIZE_MAP
[j
].keyword
;
1837 addValue
= CANONICALIZE_MAP
[j
].value
;
1844 if (!OPTION_SET(options
, _ULOC_STRIP_KEYWORDS
)) {
1845 if (tmpLocaleID
!=NULL
&& keywordAssign
!=NULL
&&
1846 (!separatorIndicator
|| separatorIndicator
> keywordAssign
)) {
1847 if(len
<nameCapacity
) {
1852 len
+= _getKeywords(tmpLocaleID
+1, '@', (len
<nameCapacity
? name
+len
: NULL
), nameCapacity
-len
,
1853 NULL
, 0, NULL
, TRUE
, addKeyword
, addValue
, err
);
1854 } else if (addKeyword
!= NULL
) {
1855 U_ASSERT(addValue
!= NULL
&& len
< nameCapacity
);
1856 /* inelegant but works -- later make _getKeywords do this? */
1857 len
+= _copyCount(name
+len
, nameCapacity
-len
, "@");
1858 len
+= _copyCount(name
+len
, nameCapacity
-len
, addKeyword
);
1859 len
+= _copyCount(name
+len
, nameCapacity
-len
, "=");
1860 len
+= _copyCount(name
+len
, nameCapacity
-len
, addValue
);
1864 if (U_SUCCESS(*err
) && result
!= NULL
&& name
== localeBuffer
) {
1865 uprv_strncpy(result
, localeBuffer
, (len
> resultCapacity
) ? resultCapacity
: len
);
1868 return u_terminateChars(result
, resultCapacity
, len
, err
);
1871 /* ### ID parsing API **************************************************/
1873 U_CAPI
int32_t U_EXPORT2
1874 uloc_getParent(const char* localeID
,
1876 int32_t parentCapacity
,
1879 const char *lastUnderscore
;
1882 if (U_FAILURE(*err
))
1885 if (localeID
== NULL
)
1886 localeID
= uloc_getDefault();
1888 lastUnderscore
=uprv_strrchr(localeID
, '_');
1889 if(lastUnderscore
!=NULL
) {
1890 i
=(int32_t)(lastUnderscore
-localeID
);
1895 if(i
>0 && parent
!= localeID
) {
1896 uprv_memcpy(parent
, localeID
, uprv_min(i
, parentCapacity
));
1898 return u_terminateChars(parent
, parentCapacity
, i
, err
);
1901 U_CAPI
int32_t U_EXPORT2
1902 uloc_getLanguage(const char* localeID
,
1904 int32_t languageCapacity
,
1907 /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1910 if (err
==NULL
|| U_FAILURE(*err
)) {
1914 if(localeID
==NULL
) {
1915 localeID
=uloc_getDefault();
1918 i
=ulocimp_getLanguage(localeID
, language
, languageCapacity
, NULL
);
1919 return u_terminateChars(language
, languageCapacity
, i
, err
);
1922 U_CAPI
int32_t U_EXPORT2
1923 uloc_getScript(const char* localeID
,
1925 int32_t scriptCapacity
,
1930 if(err
==NULL
|| U_FAILURE(*err
)) {
1934 if(localeID
==NULL
) {
1935 localeID
=uloc_getDefault();
1938 /* skip the language */
1939 ulocimp_getLanguage(localeID
, NULL
, 0, &localeID
);
1940 if(_isIDSeparator(*localeID
)) {
1941 i
=ulocimp_getScript(localeID
+1, script
, scriptCapacity
, NULL
);
1943 return u_terminateChars(script
, scriptCapacity
, i
, err
);
1946 U_CAPI
int32_t U_EXPORT2
1947 uloc_getCountry(const char* localeID
,
1949 int32_t countryCapacity
,
1954 if(err
==NULL
|| U_FAILURE(*err
)) {
1958 if(localeID
==NULL
) {
1959 localeID
=uloc_getDefault();
1962 /* Skip the language */
1963 ulocimp_getLanguage(localeID
, NULL
, 0, &localeID
);
1964 if(_isIDSeparator(*localeID
)) {
1965 const char *scriptID
;
1966 /* Skip the script if available */
1967 ulocimp_getScript(localeID
+1, NULL
, 0, &scriptID
);
1968 if(scriptID
!= localeID
+1) {
1969 /* Found optional script */
1970 localeID
= scriptID
;
1972 if(_isIDSeparator(*localeID
)) {
1973 i
=ulocimp_getCountry(localeID
+1, country
, countryCapacity
, NULL
);
1976 return u_terminateChars(country
, countryCapacity
, i
, err
);
1979 U_CAPI
int32_t U_EXPORT2
1980 uloc_getVariant(const char* localeID
,
1982 int32_t variantCapacity
,
1985 char tempBuffer
[ULOC_FULLNAME_CAPACITY
];
1986 const char* tmpLocaleID
;
1989 if(err
==NULL
|| U_FAILURE(*err
)) {
1993 if (_hasBCP47Extension(localeID
)) {
1994 _ConvertBCP47(tmpLocaleID
, localeID
, tempBuffer
, sizeof(tempBuffer
), err
);
1996 if (localeID
==NULL
) {
1997 localeID
=uloc_getDefault();
1999 tmpLocaleID
=localeID
;
2002 /* Skip the language */
2003 ulocimp_getLanguage(tmpLocaleID
, NULL
, 0, &tmpLocaleID
);
2004 if(_isIDSeparator(*tmpLocaleID
)) {
2005 const char *scriptID
;
2006 /* Skip the script if available */
2007 ulocimp_getScript(tmpLocaleID
+1, NULL
, 0, &scriptID
);
2008 if(scriptID
!= tmpLocaleID
+1) {
2009 /* Found optional script */
2010 tmpLocaleID
= scriptID
;
2012 /* Skip the Country */
2013 if (_isIDSeparator(*tmpLocaleID
)) {
2014 const char *cntryID
;
2015 ulocimp_getCountry(tmpLocaleID
+1, NULL
, 0, &cntryID
);
2016 if (cntryID
!= tmpLocaleID
+1) {
2017 /* Found optional country */
2018 tmpLocaleID
= cntryID
;
2020 if(_isIDSeparator(*tmpLocaleID
)) {
2021 /* If there was no country ID, skip a possible extra IDSeparator */
2022 if (tmpLocaleID
!= cntryID
&& _isIDSeparator(tmpLocaleID
[1])) {
2025 i
=_getVariant(tmpLocaleID
+1, *tmpLocaleID
, variant
, variantCapacity
);
2030 /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
2031 /* if we do not have a variant tag yet then try a POSIX variant after '@' */
2033 if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
2034 i=_getVariant(localeID+1, '@', variant, variantCapacity);
2037 return u_terminateChars(variant
, variantCapacity
, i
, err
);
2040 U_CAPI
int32_t U_EXPORT2
2041 uloc_getName(const char* localeID
,
2043 int32_t nameCapacity
,
2046 return _canonicalize(localeID
, name
, nameCapacity
, 0, err
);
2049 U_CAPI
int32_t U_EXPORT2
2050 uloc_getBaseName(const char* localeID
,
2052 int32_t nameCapacity
,
2055 return _canonicalize(localeID
, name
, nameCapacity
, _ULOC_STRIP_KEYWORDS
, err
);
2058 U_CAPI
int32_t U_EXPORT2
2059 uloc_canonicalize(const char* localeID
,
2061 int32_t nameCapacity
,
2064 return _canonicalize(localeID
, name
, nameCapacity
, _ULOC_CANONICALIZE
, err
);
2067 U_CAPI
const char* U_EXPORT2
2068 uloc_getISO3Language(const char* localeID
)
2071 char lang
[ULOC_LANG_CAPACITY
];
2072 UErrorCode err
= U_ZERO_ERROR
;
2074 if (localeID
== NULL
)
2076 localeID
= uloc_getDefault();
2078 uloc_getLanguage(localeID
, lang
, ULOC_LANG_CAPACITY
, &err
);
2081 offset
= _findIndex(LANGUAGES
, lang
);
2084 return LANGUAGES_3
[offset
];
2087 U_CAPI
const char* U_EXPORT2
2088 uloc_getISO3Country(const char* localeID
)
2091 char cntry
[ULOC_LANG_CAPACITY
];
2092 UErrorCode err
= U_ZERO_ERROR
;
2094 if (localeID
== NULL
)
2096 localeID
= uloc_getDefault();
2098 uloc_getCountry(localeID
, cntry
, ULOC_LANG_CAPACITY
, &err
);
2101 offset
= _findIndex(COUNTRIES
, cntry
);
2105 return COUNTRIES_3
[offset
];
2108 U_CAPI
uint32_t U_EXPORT2
2109 uloc_getLCID(const char* localeID
)
2111 UErrorCode status
= U_ZERO_ERROR
;
2112 char langID
[ULOC_FULLNAME_CAPACITY
];
2114 uloc_getLanguage(localeID
, langID
, sizeof(langID
), &status
);
2115 if (U_FAILURE(status
)) {
2119 if (uprv_strchr(localeID
, '@')) {
2120 // uprv_convertToLCID does not support keywords other than collation.
2121 // Remove all keywords except collation.
2123 char collVal
[ULOC_KEYWORDS_CAPACITY
];
2124 char tmpLocaleID
[ULOC_FULLNAME_CAPACITY
];
2126 len
= uloc_getKeywordValue(localeID
, "collation", collVal
,
2127 UPRV_LENGTHOF(collVal
) - 1, &status
);
2129 if (U_SUCCESS(status
) && len
> 0) {
2132 len
= uloc_getBaseName(localeID
, tmpLocaleID
,
2133 UPRV_LENGTHOF(tmpLocaleID
) - 1, &status
);
2135 if (U_SUCCESS(status
) && len
> 0) {
2136 tmpLocaleID
[len
] = 0;
2138 len
= uloc_setKeywordValue("collation", collVal
, tmpLocaleID
,
2139 UPRV_LENGTHOF(tmpLocaleID
) - len
- 1, &status
);
2141 if (U_SUCCESS(status
) && len
> 0) {
2142 tmpLocaleID
[len
] = 0;
2143 return uprv_convertToLCID(langID
, tmpLocaleID
, &status
);
2148 // fall through - all keywords are simply ignored
2149 status
= U_ZERO_ERROR
;
2152 return uprv_convertToLCID(langID
, localeID
, &status
);
2155 U_CAPI
int32_t U_EXPORT2
2156 uloc_getLocaleForLCID(uint32_t hostid
, char *locale
, int32_t localeCapacity
,
2159 return uprv_convertToPosix(hostid
, locale
, localeCapacity
, status
);
2162 /* ### Default locale **************************************************/
2164 U_CAPI
const char* U_EXPORT2
2167 return locale_get_default();
2170 U_CAPI
void U_EXPORT2
2171 uloc_setDefault(const char* newDefaultLocale
,
2174 if (U_FAILURE(*err
))
2176 /* the error code isn't currently used for anything by this function*/
2178 /* propagate change to C++ */
2179 locale_set_default(newDefaultLocale
);
2183 * Returns a list of all 2-letter language codes defined in ISO 639. This is a pointer
2184 * to an array of pointers to arrays of char. All of these pointers are owned
2185 * by ICU-- do not delete them, and do not write through them. The array is
2186 * terminated with a null pointer.
2188 U_CAPI
const char* const* U_EXPORT2
2189 uloc_getISOLanguages()
2195 * Returns a list of all 2-letter country codes defined in ISO 639. This is a
2196 * pointer to an array of pointers to arrays of char. All of these pointers are
2197 * owned by ICU-- do not delete them, and do not write through them. The array is
2198 * terminated with a null pointer.
2200 U_CAPI
const char* const* U_EXPORT2
2201 uloc_getISOCountries()
2207 /* this function to be moved into cstring.c later */
2208 static char gDecimal
= 0;
2213 _uloc_strtod(const char *start
, char **end
) {
2220 /* For machines that decide to change the decimal on you,
2221 and try to be too smart with localization.
2222 This normally should be just a '.'. */
2223 sprintf(rep
, "%+1.1f", 1.0);
2227 if(gDecimal
== '.') {
2228 return uprv_strtod(start
, end
); /* fall through to OS */
2230 uprv_strncpy(buf
, start
, 29);
2232 decimal
= uprv_strchr(buf
, '.');
2234 *decimal
= gDecimal
;
2236 return uprv_strtod(start
, end
); /* no decimal point */
2238 rv
= uprv_strtod(buf
, &myEnd
);
2240 *end
= (char*)(start
+(myEnd
-buf
)); /* cast away const (to follow uprv_strtod API.) */
2248 int32_t dummy
; /* to avoid uninitialized memory copy from qsort */
2252 static int32_t U_CALLCONV
2253 uloc_acceptLanguageCompare(const void * /*context*/, const void *a
, const void *b
)
2255 const _acceptLangItem
*aa
= (const _acceptLangItem
*)a
;
2256 const _acceptLangItem
*bb
= (const _acceptLangItem
*)b
;
2260 rc
= -1; /* A > B */
2261 } else if(bb
->q
> aa
->q
) {
2268 rc
= uprv_stricmp(aa
->locale
, bb
->locale
);
2271 #if defined(ULOC_DEBUG)
2272 /* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
2282 mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
2285 U_CAPI
int32_t U_EXPORT2
2286 uloc_acceptLanguageFromHTTP(char *result
, int32_t resultAvailable
, UAcceptResult
*outResult
,
2287 const char *httpAcceptLanguage
,
2288 UEnumeration
* availableLocales
,
2292 _acceptLangItem smallBuffer
[30];
2294 char tmp
[ULOC_FULLNAME_CAPACITY
+1];
2296 const char *itemEnd
;
2297 const char *paramEnd
;
2302 int32_t l
= (int32_t)uprv_strlen(httpAcceptLanguage
);
2304 char *tempstr
; /* Use for null pointer check */
2307 jSize
= UPRV_LENGTHOF(smallBuffer
);
2308 if(U_FAILURE(*status
)) {
2312 for(s
=httpAcceptLanguage
;s
&&*s
;) {
2313 while(isspace(*s
)) /* eat space at the beginning */
2315 itemEnd
=uprv_strchr(s
,',');
2316 paramEnd
=uprv_strchr(s
,';');
2318 itemEnd
= httpAcceptLanguage
+l
; /* end of string */
2320 if(paramEnd
&& paramEnd
<itemEnd
) {
2321 /* semicolon (;) is closer than end (,) */
2326 while(isspace(*t
)) {
2332 while(isspace(*t
)) {
2335 j
[n
].q
= (float)_uloc_strtod(t
,NULL
);
2337 /* no semicolon - it's 1.0 */
2342 /* eat spaces prior to semi */
2343 for(t
=(paramEnd
-1);(paramEnd
>s
)&&isspace(*t
);t
--)
2345 /* Check for null pointer from uprv_strndup */
2346 tempstr
= uprv_strndup(s
,(int32_t)((t
+1)-s
));
2347 if (tempstr
== NULL
) {
2348 *status
= U_MEMORY_ALLOCATION_ERROR
;
2351 j
[n
].locale
= tempstr
;
2352 uloc_canonicalize(j
[n
].locale
,tmp
,UPRV_LENGTHOF(tmp
),status
);
2353 if(strcmp(j
[n
].locale
,tmp
)) {
2354 uprv_free(j
[n
].locale
);
2355 j
[n
].locale
=uprv_strdup(tmp
);
2357 #if defined(ULOC_DEBUG)
2358 /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
2362 while(*s
==',') { /* eat duplicate commas */
2366 if(j
==smallBuffer
) { /* overflowed the small buffer. */
2367 j
= static_cast<_acceptLangItem
*>(uprv_malloc(sizeof(j
[0])*(jSize
*2)));
2369 uprv_memcpy(j
,smallBuffer
,sizeof(j
[0])*jSize
);
2371 #if defined(ULOC_DEBUG)
2372 fprintf(stderr
,"malloced at size %d\n", jSize
);
2375 j
= static_cast<_acceptLangItem
*>(uprv_realloc(j
, sizeof(j
[0])*jSize
*2));
2376 #if defined(ULOC_DEBUG)
2377 fprintf(stderr
,"re-alloced at size %d\n", jSize
);
2382 *status
= U_MEMORY_ALLOCATION_ERROR
;
2387 uprv_sortArray(j
, n
, sizeof(j
[0]), uloc_acceptLanguageCompare
, NULL
, TRUE
, status
);
2388 if(U_FAILURE(*status
)) {
2389 if(j
!= smallBuffer
) {
2390 #if defined(ULOC_DEBUG)
2391 fprintf(stderr
,"freeing j %p\n", j
);
2397 strs
= static_cast<char **>(uprv_malloc((size_t)(sizeof(strs
[0])*n
)));
2398 /* Check for null pointer */
2400 uprv_free(j
); /* Free to avoid memory leak */
2401 *status
= U_MEMORY_ALLOCATION_ERROR
;
2405 #if defined(ULOC_DEBUG)
2406 /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
2408 strs
[i
]=j
[i
].locale
;
2410 res
= uloc_acceptLanguage(result
, resultAvailable
, outResult
,
2411 (const char**)strs
, n
, availableLocales
, status
);
2416 if(j
!= smallBuffer
) {
2417 #if defined(ULOC_DEBUG)
2418 fprintf(stderr
,"freeing j %p\n", j
);
2426 U_CAPI
int32_t U_EXPORT2
2427 uloc_acceptLanguage(char *result
, int32_t resultAvailable
,
2428 UAcceptResult
*outResult
, const char **acceptList
,
2429 int32_t acceptListCount
,
2430 UEnumeration
* availableLocales
,
2436 char tmp
[ULOC_FULLNAME_CAPACITY
+1];
2438 char **fallbackList
;
2439 if(U_FAILURE(*status
)) {
2442 fallbackList
= static_cast<char **>(uprv_malloc((size_t)(sizeof(fallbackList
[0])*acceptListCount
)));
2443 if(fallbackList
==NULL
) {
2444 *status
= U_MEMORY_ALLOCATION_ERROR
;
2447 for(i
=0;i
<acceptListCount
;i
++) {
2448 #if defined(ULOC_DEBUG)
2449 fprintf(stderr
,"%02d: %s\n", i
, acceptList
[i
]);
2451 while((l
=uenum_next(availableLocales
, NULL
, status
))) {
2452 #if defined(ULOC_DEBUG)
2453 fprintf(stderr
," %s\n", l
);
2455 len
= (int32_t)uprv_strlen(l
);
2456 if(!uprv_strcmp(acceptList
[i
], l
)) {
2458 *outResult
= ULOC_ACCEPT_VALID
;
2460 #if defined(ULOC_DEBUG)
2461 fprintf(stderr
, "MATCH! %s\n", l
);
2464 uprv_strncpy(result
, l
, uprv_min(len
, resultAvailable
));
2467 uprv_free(fallbackList
[j
]);
2469 uprv_free(fallbackList
);
2470 return u_terminateChars(result
, resultAvailable
, len
, status
);
2476 uenum_reset(availableLocales
, status
);
2477 /* save off parent info */
2478 if(uloc_getParent(acceptList
[i
], tmp
, UPRV_LENGTHOF(tmp
), status
)!=0) {
2479 fallbackList
[i
] = uprv_strdup(tmp
);
2485 for(maxLen
--;maxLen
>0;maxLen
--) {
2486 for(i
=0;i
<acceptListCount
;i
++) {
2487 if(fallbackList
[i
] && ((int32_t)uprv_strlen(fallbackList
[i
])==maxLen
)) {
2488 #if defined(ULOC_DEBUG)
2489 fprintf(stderr
,"Try: [%s]", fallbackList
[i
]);
2491 while((l
=uenum_next(availableLocales
, NULL
, status
))) {
2492 #if defined(ULOC_DEBUG)
2493 fprintf(stderr
," %s\n", l
);
2495 len
= (int32_t)uprv_strlen(l
);
2496 if(!uprv_strcmp(fallbackList
[i
], l
)) {
2498 *outResult
= ULOC_ACCEPT_FALLBACK
;
2500 #if defined(ULOC_DEBUG)
2501 fprintf(stderr
, "fallback MATCH! %s\n", l
);
2504 uprv_strncpy(result
, l
, uprv_min(len
, resultAvailable
));
2506 for(j
=0;j
<acceptListCount
;j
++) {
2507 uprv_free(fallbackList
[j
]);
2509 uprv_free(fallbackList
);
2510 return u_terminateChars(result
, resultAvailable
, len
, status
);
2513 uenum_reset(availableLocales
, status
);
2515 if(uloc_getParent(fallbackList
[i
], tmp
, UPRV_LENGTHOF(tmp
), status
)!=0) {
2516 uprv_free(fallbackList
[i
]);
2517 fallbackList
[i
] = uprv_strdup(tmp
);
2519 uprv_free(fallbackList
[i
]);
2525 *outResult
= ULOC_ACCEPT_FAILED
;
2528 for(i
=0;i
<acceptListCount
;i
++) {
2529 uprv_free(fallbackList
[i
]);
2531 uprv_free(fallbackList
);
2535 U_CAPI
const char* U_EXPORT2
2536 uloc_toUnicodeLocaleKey(const char* keyword
)
2538 const char* bcpKey
= ulocimp_toBcpKey(keyword
);
2539 if (bcpKey
== NULL
&& ultag_isUnicodeLocaleKey(keyword
, -1)) {
2540 // unknown keyword, but syntax is fine..
2546 U_CAPI
const char* U_EXPORT2
2547 uloc_toUnicodeLocaleType(const char* keyword
, const char* value
)
2549 const char* bcpType
= ulocimp_toBcpType(keyword
, value
, NULL
, NULL
);
2550 if (bcpType
== NULL
&& ultag_isUnicodeLocaleType(value
, -1)) {
2551 // unknown keyword, but syntax is fine..
2557 #define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
2558 #define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
2561 isWellFormedLegacyKey(const char* legacyKey
)
2563 const char* p
= legacyKey
;
2565 if (!UPRV_ISALPHANUM(*p
)) {
2574 isWellFormedLegacyType(const char* legacyType
)
2576 const char* p
= legacyType
;
2577 int32_t alphaNumLen
= 0;
2579 if (*p
== '_' || *p
== '/' || *p
== '-') {
2580 if (alphaNumLen
== 0) {
2584 } else if (UPRV_ISALPHANUM(*p
)) {
2591 return (alphaNumLen
!= 0);
2594 U_CAPI
const char* U_EXPORT2
2595 uloc_toLegacyKey(const char* keyword
)
2597 const char* legacyKey
= ulocimp_toLegacyKey(keyword
);
2598 if (legacyKey
== NULL
) {
2599 // Checks if the specified locale key is well-formed with the legacy locale syntax.
2602 // Neither ICU nor LDML/CLDR provides the definition of keyword syntax.
2603 // However, a key should not contain '=' obviously. For now, all existing
2604 // keys are using ASCII alphabetic letters only. We won't add any new key
2605 // that is not compatible with the BCP 47 syntax. Therefore, we assume
2606 // a valid key consist from [0-9a-zA-Z], no symbols.
2607 if (isWellFormedLegacyKey(keyword
)) {
2614 U_CAPI
const char* U_EXPORT2
2615 uloc_toLegacyType(const char* keyword
, const char* value
)
2617 const char* legacyType
= ulocimp_toLegacyType(keyword
, value
, NULL
, NULL
);
2618 if (legacyType
== NULL
) {
2619 // Checks if the specified locale type is well-formed with the legacy locale syntax.
2622 // Neither ICU nor LDML/CLDR provides the definition of keyword syntax.
2623 // However, a type should not contain '=' obviously. For now, all existing
2624 // types are using ASCII alphabetic letters with a few symbol letters. We won't
2625 // add any new type that is not compatible with the BCP 47 syntax except timezone
2626 // IDs. For now, we assume a valid type start with [0-9a-zA-Z], but may contain
2627 // '-' '_' '/' in the middle.
2628 if (isWellFormedLegacyType(value
)) {