]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/uloc.cpp
ICU-57131.0.1.tar.gz
[apple/icu.git] / icuSources / common / uloc.cpp
CommitLineData
b75a7d8f
A
1/*
2**********************************************************************
2ca993e8 3* Copyright (C) 1997-2016, International Business Machines
b75a7d8f
A
4* Corporation and others. All Rights Reserved.
5**********************************************************************
6*
7* File ULOC.CPP
8*
9* Modification History:
10*
11* Date Name Description
12* 04/01/97 aliu Creation.
13* 08/21/98 stephen JDK 1.2 sync
14* 12/08/98 rtg New Locale implementation and C API
15* 03/15/99 damiba overhaul.
16* 04/06/99 stephen changed setDefault() to realloc and copy
17* 06/14/99 stephen Changed calls to ures_open for new params
18* 07/21/99 stephen Modified setDefault() to propagate to C++
374ca955
A
19* 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
20* brought canonicalization code into line with spec
b75a7d8f
A
21*****************************************************************************/
22
23/*
24 POSIX's locale format, from putil.c: [no spaces]
25
26 ll [ _CC ] [ . MM ] [ @ VV]
27
28 l = lang, C = ctry, M = charmap, V = variant
29*/
30
b75a7d8f
A
31#include "unicode/utypes.h"
32#include "unicode/ustring.h"
33#include "unicode/uloc.h"
34
374ca955 35#include "putilimp.h"
b75a7d8f 36#include "ustr_imp.h"
374ca955 37#include "ulocimp.h"
b75a7d8f
A
38#include "umutex.h"
39#include "cstring.h"
40#include "cmemory.h"
374ca955
A
41#include "locmap.h"
42#include "uarrsort.h"
43#include "uenumimp.h"
44#include "uassert.h"
b75a7d8f 45
374ca955
A
46#include <stdio.h> /* for sprintf */
47
48/* ### Declarations **************************************************/
b75a7d8f
A
49
50/* Locale stuff from locid.cpp */
51U_CFUNC void locale_set_default(const char *id);
52U_CFUNC const char *locale_get_default(void);
374ca955
A
53U_CFUNC int32_t
54locale_getKeywords(const char *localeID,
55 char prev,
56 char *keywords, int32_t keywordCapacity,
57 char *values, int32_t valuesCapacity, int32_t *valLen,
58 UBool valuesToo,
59 UErrorCode *status);
60
374ca955
A
61/* ### Data tables **************************************************/
62
63/**
64 * Table of language codes, both 2- and 3-letter, with preference
65 * given to 2-letter codes where possible. Includes 3-letter codes
66 * that lack a 2-letter equivalent.
67 *
68 * This list must be in sorted order. This list is returned directly
69 * to the user by some API.
70 *
71 * This list must be kept in sync with LANGUAGES_3, with corresponding
72 * entries matched.
73 *
74 * This table should be terminated with a NULL entry, followed by a
75 * second list, and another NULL entry. The first list is visible to
76 * user code when this array is returned by API. The second list
77 * contains codes we support, but do not expose through user API.
78 *
79 * Notes
80 *
81 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
82 * include the revisions up to 2001/7/27 *CWB*
83 *
84 * The 3 character codes are the terminology codes like RFC 3066. This
85 * is compatible with prior ICU codes
86 *
87 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
88 * table but now at the end of the table because 3 character codes are
89 * duplicates. This avoids bad searches going from 3 to 2 character
90 * codes.
91 *
92 * The range qaa-qtz is reserved for local use
93 */
51004dcb 94/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
2ca993e8 95/* ISO639 table version is 20150505 */
374ca955 96static const char * const LANGUAGES[] = {
2ca993e8
A
97 "aa", "ab", "ace", "ach", "ada", "ady", "ae", "aeb",
98 "af", "afh", "agq", "ain", "ak", "akk", "akz", "ale",
99 "aln", "alt", "am", "an", "ang", "anp", "ar", "arc",
100 "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "as",
101 "asa", "ase", "ast", "av", "avk", "awa", "ay", "az",
102 "ba", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
103 "be", "bej", "bem", "bew", "bez", "bfd", "bfq", "bg",
104 "bgn", "bho", "bi", "bik", "bin", "bjn", "bkm", "bla",
105 "bm", "bn", "bo", "bpy", "bqi", "br", "bra", "brh",
106 "brx", "bs", "bss", "bua", "bug", "bum", "byn", "byv",
107 "ca", "cad", "car", "cay", "cch", "ce", "ceb", "cgg",
108 "ch", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
109 "chr", "chy", "ckb", "co", "cop", "cps", "cr", "crh",
110 "cs", "csb", "cu", "cv", "cy",
111 "da", "dak", "dar", "dav", "de", "del", "den", "dgr",
112 "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "dv",
113 "dyo", "dyu", "dz", "dzg",
114 "ebu", "ee", "efi", "egl", "egy", "eka", "el", "elx",
115 "en", "enm", "eo", "es", "esu", "et", "eu", "ewo",
116 "ext",
117 "fa", "fan", "fat", "ff", "fi", "fil", "fit", "fj",
118 "fo", "fon", "fr", "frc", "frm", "fro", "frp", "frr",
119 "frs", "fur", "fy",
120 "ga", "gaa", "gag", "gan", "gay", "gba", "gbz", "gd",
121 "gez", "gil", "gl", "glk", "gmh", "gn", "goh", "gom",
122 "gon", "gor", "got", "grb", "grc", "gsw", "gu", "guc",
123 "gur", "guz", "gv", "gwi",
124 "ha", "hai", "hak", "haw", "he", "hi", "hif", "hil",
125 "hit", "hmn", "ho", "hr", "hsb", "hsn", "ht", "hu",
126 "hup", "hy", "hz",
127 "ia", "iba", "ibb", "id", "ie", "ig", "ii", "ik",
128 "ilo", "inh", "io", "is", "it", "iu", "izh",
129 "ja", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
130 "jv",
131 "ka", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
132 "kbl", "kcg", "kde", "kea", "ken", "kfo", "kg", "kgp",
133 "kha", "kho", "khq", "khw", "ki", "kiu", "kj", "kk",
134 "kkj", "kl", "kln", "km", "kmb", "kn", "ko", "koi",
135 "kok", "kos", "kpe", "kr", "krc", "kri", "krj", "krl",
136 "kru", "ks", "ksb", "ksf", "ksh", "ku", "kum", "kut",
137 "kv", "kw", "ky",
138 "la", "lad", "lag", "lah", "lam", "lb", "lez", "lfn",
139 "lg", "li", "lij", "liv", "lkt", "lmo", "ln", "lo",
140 "lol", "loz", "lrc", "lt", "ltg", "lu", "lua", "lui",
141 "lun", "luo", "lus", "luy", "lv", "lzh", "lzz",
142 "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
143 "mdf", "mdh", "mdr", "men", "mer", "mfe", "mg", "mga",
51004dcb 144 "mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk",
2ca993e8
A
145 "ml", "mn", "mnc", "mni", "moh", "mos", "mr", "mrj",
146 "ms", "mt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
147 "my", "mye", "myv", "mzn",
148 "na", "nan", "nap", "naq", "nb", "nd", "nds", "ne",
149 "new", "ng", "nia", "niu", "njo", "nl", "nmg", "nn",
150 "nnh", "no", "nog", "non", "nov", "nqo", "nr", "nso",
151 "nus", "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi",
152 "oc", "oj", "om", "or", "os", "osa", "ota",
153 "pa", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc",
154 "pdt", "peo", "pfl", "phn", "pi", "pl", "pms", "pnt",
155 "pon", "prg", "pro", "ps", "pt",
156 "qu", "quc", "qug",
157 "raj", "rap", "rar", "rgn", "rif", "rm", "rn", "ro",
158 "rof", "rom", "rtm", "ru", "rue", "rug", "rup",
159 "rw", "rwk",
160 "sa", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
161 "sba", "sbp", "sc", "scn", "sco", "sd", "sdc", "sdh",
162 "se", "see", "seh", "sei", "sel", "ses", "sg", "sga",
163 "sgs", "shi", "shn", "shu", "si", "sid", "sk",
164 "sl", "sli", "sly", "sm", "sma", "smj", "smn", "sms",
165 "sn", "snk", "so", "sog", "sq", "sr", "srn", "srr",
166 "ss", "ssy", "st", "stq", "su", "suk", "sus", "sux",
167 "sv", "sw", "swb", "swc", "syc", "syr", "szl",
168 "ta", "tcy", "te", "tem", "teo", "ter", "tet", "tg",
169 "th", "ti", "tig", "tiv", "tk", "tkl", "tkr", "tl",
170 "tlh", "tli", "tly", "tmh", "tn", "to", "tog", "tpi",
171 "tr", "tru", "trv", "ts", "tsd", "tsi", "tt", "ttt",
172 "tum", "tvl", "tw", "twq", "ty", "tyv", "tzm",
51004dcb 173 "udm", "ug", "uga", "uk", "umb", "und", "ur", "uz",
2ca993e8
A
174 "vai", "ve", "vec", "vep", "vi", "vls", "vmf", "vo",
175 "vot", "vro", "vun",
176 "wa", "wae", "wal", "war", "was", "wbp", "wo", "wuu",
177 "xal", "xh", "xmf", "xog",
178 "yao", "yap", "yav", "ybb", "yi", "yo", "yrl", "yue",
179 "za", "zap", "zbl", "zea", "zen", "zgh", "zh", "zu",
57a6839d 180 "zun", "zxx", "zza",
b75a7d8f
A
181NULL,
182 "in", "iw", "ji", "jw", "sh", /* obsolete language codes */
183NULL
184};
51004dcb 185
73c04bcf
A
186static const char* const DEPRECATED_LANGUAGES[]={
187 "in", "iw", "ji", "jw", NULL, NULL
188};
189static const char* const REPLACEMENT_LANGUAGES[]={
190 "id", "he", "yi", "jv", NULL, NULL
191};
b75a7d8f 192
374ca955
A
193/**
194 * Table of 3-letter language codes.
195 *
196 * This is a lookup table used to convert 3-letter language codes to
197 * their 2-letter equivalent, where possible. It must be kept in sync
198 * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the
199 * same language as LANGUAGES_3[i]. The commented-out lines are
200 * copied from LANGUAGES to make eyeballing this baby easier.
201 *
202 * Where a 3-letter language code has no 2-letter equivalent, the
203 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
204 *
205 * This table should be terminated with a NULL entry, followed by a
206 * second list, and another NULL entry. The two lists correspond to
207 * the two lists in LANGUAGES.
208 */
51004dcb 209/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
2ca993e8 210/* ISO639 table version is 20150505 */
374ca955 211static const char * const LANGUAGES_3[] = {
2ca993e8
A
212 "aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb",
213 "afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale",
214 "aln", "alt", "amh", "arg", "ang", "anp", "ara", "arc",
215 "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "asm",
216 "asa", "ase", "ast", "ava", "avk", "awa", "aym", "aze",
217 "bak", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
218 "bel", "bej", "bem", "bew", "bez", "bfd", "bfq", "bul",
219 "bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla",
220 "bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh",
221 "brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv",
222 "cat", "cad", "car", "cay", "cch", "che", "ceb", "cgg",
223 "cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
224 "chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh",
225 "ces", "csb", "chu", "chv", "cym",
226 "dan", "dak", "dar", "dav", "deu", "del", "den", "dgr",
227 "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "div",
228 "dyo", "dyu", "dzo", "dzg",
229 "ebu", "ewe", "efi", "egl", "egy", "eka", "ell", "elx",
230 "eng", "enm", "epo", "spa", "esu", "est", "eus", "ewo",
231 "ext",
232 "fas", "fan", "fat", "ful", "fin", "fil", "fit", "fij",
233 "fao", "fon", "fra", "frc", "frm", "fro", "frp", "frr",
234 "frs", "fur", "fry",
235 "gle", "gaa", "gag", "gan", "gay", "gba", "gbz", "gla",
236 "gez", "gil", "glg", "glk", "gmh", "grn", "goh", "gom",
237 "gon", "gor", "got", "grb", "grc", "gsw", "guj", "guc",
238 "gur", "guz", "glv", "gwi",
239 "hau", "hai", "hak", "haw", "heb", "hin", "hif", "hil",
240 "hit", "hmn", "hmo", "hrv", "hsb", "hsn", "hat", "hun",
241 "hup", "hye", "her",
242 "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ipk",
243 "ilo", "inh", "ido", "isl", "ita", "iku", "izh",
244 "jpn", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
245 "jav",
246 "kat", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
247 "kbl", "kcg", "kde", "kea", "ken", "kfo", "kon", "kgp",
248 "kha", "kho", "khq", "khw", "kik", "kiu", "kua", "kaz",
249 "kkj", "kal", "kln", "khm", "kmb", "kan", "kor", "koi",
250 "kok", "kos", "kpe", "kau", "krc", "kri", "krj", "krl",
251 "kru", "kas", "ksb", "ksf", "ksh", "kur", "kum", "kut",
252 "kom", "cor", "kir",
253 "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lfn",
254 "lug", "lim", "lij", "liv", "lkt", "lmo", "lin", "lao",
255 "lol", "loz", "lrc", "lit", "ltg", "lub", "lua", "lui",
256 "lun", "luo", "lus", "luy", "lav", "lzh", "lzz",
257 "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
258 "mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga",
51004dcb 259 "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
2ca993e8
A
260 "mal", "mon", "mnc", "mni", "moh", "mos", "mar", "mrj",
261 "msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
262 "mya", "mye", "myv", "mzn",
263 "nau", "nan", "nap", "naq", "nob", "nde", "nds", "nep",
264 "new", "ndo", "nia", "niu", "njo", "nld", "nmg", "nno",
265 "nnh", "nor", "nog", "non", "nov", "nqo", "nbl", "nso",
266 "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi",
267 "oci", "oji", "orm", "ori", "oss", "osa", "ota",
268 "pan", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc",
269 "pdt", "peo", "pfl", "phn", "pli", "pol", "pms", "pnt",
270 "pon", "prg", "pro", "pus", "por",
271 "que", "quc", "qug",
272 "raj", "rap", "rar", "rgn", "rif", "roh", "run", "ron",
273 "rof", "rom", "rtm", "rus", "rue", "rug", "rup",
274 "kin", "rwk",
275 "san", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
276 "sba", "sbp", "srd", "scn", "sco", "snd", "sdc", "sdh",
277 "sme", "see", "seh", "sei", "sel", "ses", "sag", "sga",
278 "sgs", "shi", "shn", "shu", "sin", "sid", "slk",
279 "slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms",
280 "sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr",
281 "ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux",
282 "swe", "swa", "swb", "swc", "syc", "syr", "szl",
283 "tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk",
284 "tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr", "tgl",
285 "tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi",
286 "tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt",
287 "tum", "tvl", "twi", "twq", "tah", "tyv", "tzm",
51004dcb 288 "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
2ca993e8
A
289 "vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vol",
290 "vot", "vro", "vun",
291 "wln", "wae", "wal", "war", "was", "wbp", "wol", "wuu",
292 "xal", "xho", "xmf", "xog",
293 "yao", "yap", "yav", "ybb", "yid", "yor", "yrl", "yue",
294 "zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul",
57a6839d 295 "zun", "zxx", "zza",
b75a7d8f
A
296NULL,
297/* "in", "iw", "ji", "jw", "sh", */
298 "ind", "heb", "yid", "jaw", "srp",
299NULL
300};
301
374ca955
A
302/**
303 * Table of 2-letter country codes.
304 *
305 * This list must be in sorted order. This list is returned directly
306 * to the user by some API.
307 *
308 * This list must be kept in sync with COUNTRIES_3, with corresponding
309 * entries matched.
310 *
311 * This table should be terminated with a NULL entry, followed by a
312 * second list, and another NULL entry. The first list is visible to
313 * user code when this array is returned by API. The second list
314 * contains codes we support, but do not expose through user API.
315 *
316 * Notes:
317 *
318 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
319 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
320 * new codes keeping the old ones for compatibility updated to include
321 * 1999/12/03 revisions *CWB*
322 *
323 * RO(ROM) is now RO(ROU) according to
324 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
325 */
326static const char * const COUNTRIES[] = {
2ca993e8 327 "AC", "AD", "AE", "AF", "AG", "AI", "AL", "AM",
73c04bcf 328 "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
b75a7d8f 329 "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
51004dcb 330 "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV",
b75a7d8f 331 "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
2ca993e8
A
332 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CP", "CR",
333 "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DG", "DJ", "DK",
334 "DM", "DO", "DZ", "EA", "EC", "EE", "EG", "EH", "ER",
b75a7d8f 335 "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
73c04bcf 336 "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
b75a7d8f
A
337 "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
338 "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
2ca993e8 339 "IC", "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
73c04bcf 340 "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
b75a7d8f
A
341 "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
342 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
46f4442e 343 "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",
b75a7d8f
A
344 "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
345 "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
346 "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
347 "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
348 "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
46f4442e 349 "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",
b75a7d8f 350 "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
51004dcb 351 "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV",
2ca993e8 352 "SX", "SY", "SZ", "TA", "TC", "TD", "TF", "TG", "TH", "TJ",
b75a7d8f
A
353 "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
354 "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
355 "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
2ca993e8 356 "WS", "XK", "YE", "YT", "ZA", "ZM", "ZW",
b75a7d8f 357NULL,
51004dcb 358 "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */
b75a7d8f
A
359NULL
360};
361
51004dcb
A
362static const char* const DEPRECATED_COUNTRIES[] = {
363 "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */
73c04bcf
A
364};
365static const char* const REPLACEMENT_COUNTRIES[] = {
51004dcb
A
366/* "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
367 "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL /* replacement country codes */
73c04bcf
A
368};
369
374ca955
A
370/**
371 * Table of 3-letter country codes.
372 *
373 * This is a lookup table used to convert 3-letter country codes to
374 * their 2-letter equivalent. It must be kept in sync with COUNTRIES.
375 * For all valid i, COUNTRIES[i] must refer to the same country as
376 * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES
377 * to make eyeballing this baby easier.
378 *
379 * This table should be terminated with a NULL entry, followed by a
380 * second list, and another NULL entry. The two lists correspond to
381 * the two lists in COUNTRIES.
382 */
383static const char * const COUNTRIES_3[] = {
2ca993e8
A
384/* "AC", "AD", "AE", "AF", "AG", "AI", "AL", "AM", */
385 "ASC", "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
73c04bcf
A
386/* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
387 "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
b75a7d8f
A
388/* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
389 "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
51004dcb
A
390/* "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", */
391 "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
b75a7d8f
A
392/* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
393 "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
2ca993e8
A
394/* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CP", "CR", */
395 "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CPT", "CRI",
396/* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DG", "DJ", "DK", */
397 "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DGA", "DJI", "DNK",
398/* "DM", "DO", "DZ", "EA", "EC", "EE", "EG", "EH", "ER", */
399 "DMA", "DOM", "DZA", "EA ", "ECU", "EST", "EGY", "ESH", "ERI", /* no valid 3-letter code for EA */
b75a7d8f
A
400/* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
401 "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
46f4442e 402/* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
73c04bcf 403 "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
b75a7d8f
A
404/* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
405 "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
406/* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
407 "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
2ca993e8
A
408/* "IC", "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
409 "IC ", "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL", /* no valid 3-letter code for IC */
46f4442e 410/* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
73c04bcf 411 "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
b75a7d8f
A
412/* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
413 "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
414/* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
415 "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
46f4442e
A
416/* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */
417 "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
b75a7d8f
A
418/* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
419 "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
420/* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
421 "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
422/* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
423 "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
424/* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
425 "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
426/* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
427 "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
46f4442e
A
428/* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */
429 "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
b75a7d8f
A
430/* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
431 "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
51004dcb
A
432/* "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", */
433 "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
2ca993e8
A
434/* "SX", "SY", "SZ", "TA", "TC", "TD", "TF", "TG", "TH", "TJ", */
435 "SXM", "SYR", "SWZ", "TAA", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
b75a7d8f
A
436/* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
437 "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
438/* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
439 "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
440/* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
441 "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
2ca993e8
A
442/* "WS", "XK", "YE", "YT", "ZA", "ZM", "ZW", */
443 "WSM", "XKK", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
b75a7d8f 444NULL,
51004dcb
A
445/* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */
446 "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
b75a7d8f
A
447NULL
448};
449
374ca955
A
450typedef struct CanonicalizationMap {
451 const char *id; /* input ID */
452 const char *canonicalID; /* canonicalized output ID */
453 const char *keyword; /* keyword, or NULL if none */
454 const char *value; /* keyword value, or NULL if kw==NULL */
455} CanonicalizationMap;
456
457/**
458 * A map to canonicalize locale IDs. This handles a variety of
459 * different semantic kinds of transformations.
460 */
461static const CanonicalizationMap CANONICALIZE_MAP[] = {
462 { "", "en_US_POSIX", NULL, NULL }, /* .NET name */
729e4ab9 463 { "c", "en_US_POSIX", NULL, NULL }, /* POSIX name */
73c04bcf 464 { "posix", "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */
374ca955
A
465 { "art_LOJBAN", "jbo", NULL, NULL }, /* registered name */
466 { "az_AZ_CYRL", "az_Cyrl_AZ", NULL, NULL }, /* .NET name */
467 { "az_AZ_LATN", "az_Latn_AZ", NULL, NULL }, /* .NET name */
468 { "ca_ES_PREEURO", "ca_ES", "currency", "ESP" },
46f4442e 469 { "de__PHONEBOOK", "de", "collation", "phonebook" }, /* Old ICU name */
374ca955
A
470 { "de_AT_PREEURO", "de_AT", "currency", "ATS" },
471 { "de_DE_PREEURO", "de_DE", "currency", "DEM" },
472 { "de_LU_PREEURO", "de_LU", "currency", "LUF" },
473 { "el_GR_PREEURO", "el_GR", "currency", "GRD" },
374ca955
A
474 { "en_BE_PREEURO", "en_BE", "currency", "BEF" },
475 { "en_IE_PREEURO", "en_IE", "currency", "IEP" },
46f4442e 476 { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */
374ca955
A
477 { "es_ES_PREEURO", "es_ES", "currency", "ESP" },
478 { "eu_ES_PREEURO", "eu_ES", "currency", "ESP" },
479 { "fi_FI_PREEURO", "fi_FI", "currency", "FIM" },
480 { "fr_BE_PREEURO", "fr_BE", "currency", "BEF" },
481 { "fr_FR_PREEURO", "fr_FR", "currency", "FRF" },
482 { "fr_LU_PREEURO", "fr_LU", "currency", "LUF" },
483 { "ga_IE_PREEURO", "ga_IE", "currency", "IEP" },
484 { "gl_ES_PREEURO", "gl_ES", "currency", "ESP" },
46f4442e 485 { "hi__DIRECT", "hi", "collation", "direct" }, /* Old ICU name */
374ca955 486 { "it_IT_PREEURO", "it_IT", "currency", "ITL" },
46f4442e 487 { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */
374ca955
A
488 { "nb_NO_NY", "nn_NO", NULL, NULL }, /* "markus said this was ok" :-) */
489 { "nl_BE_PREEURO", "nl_BE", "currency", "BEF" },
490 { "nl_NL_PREEURO", "nl_NL", "currency", "NLG" },
491 { "pt_PT_PREEURO", "pt_PT", "currency", "PTE" },
46f4442e
A
492 { "sr_SP_CYRL", "sr_Cyrl_RS", NULL, NULL }, /* .NET name */
493 { "sr_SP_LATN", "sr_Latn_RS", NULL, NULL }, /* .NET name */
494 { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL, NULL }, /* Linux name */
495 { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */
73c04bcf 496 { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */
374ca955
A
497 { "uz_UZ_CYRL", "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */
498 { "uz_UZ_LATN", "uz_Latn_UZ", NULL, NULL }, /* .NET name */
499 { "zh_CHS", "zh_Hans", NULL, NULL }, /* .NET name */
46f4442e 500 { "zh_CHT", "zh_Hant", NULL, NULL }, /* .NET name */
4388f060 501 { "zh_GAN", "gan", NULL, NULL }, /* registered name */
374ca955 502 { "zh_GUOYU", "zh", NULL, NULL }, /* registered name */
4388f060
A
503 { "zh_HAKKA", "hak", NULL, NULL }, /* registered name */
504 { "zh_MIN_NAN", "nan", NULL, NULL }, /* registered name */
505 { "zh_WUU", "wuu", NULL, NULL }, /* registered name */
506 { "zh_XIANG", "hsn", NULL, NULL }, /* registered name */
507 { "zh_YUE", "yue", NULL, NULL }, /* registered name */
46f4442e
A
508};
509
510typedef struct VariantMap {
511 const char *variant; /* input ID */
512 const char *keyword; /* keyword, or NULL if none */
513 const char *value; /* keyword value, or NULL if kw==NULL */
514} VariantMap;
515
516static const VariantMap VARIANT_MAP[] = {
517 { "EURO", "currency", "EUR" },
518 { "PINYIN", "collation", "pinyin" }, /* Solaris variant */
519 { "STROKE", "collation", "stroke" } /* Solaris variant */
374ca955
A
520};
521
729e4ab9
A
522/* ### BCP47 Conversion *******************************************/
523/* Test if the locale id has BCP47 u extension and does not have '@' */
524#define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
525/* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
526#define _ConvertBCP47(finalID, id, buffer, length,err) \
527 if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || U_FAILURE(*err)) { \
528 finalID=id; \
529 } else { \
530 finalID=buffer; \
531 }
532/* Gets the size of the shortest subtag in the given localeID. */
533static int32_t getShortestSubtagLength(const char *localeID) {
534 int32_t localeIDLength = uprv_strlen(localeID);
535 int32_t length = localeIDLength;
536 int32_t tmpLength = 0;
537 int32_t i;
538 UBool reset = TRUE;
539
540 for (i = 0; i < localeIDLength; i++) {
541 if (localeID[i] != '_' && localeID[i] != '-') {
542 if (reset) {
543 tmpLength = 0;
544 reset = FALSE;
545 }
546 tmpLength++;
547 } else {
548 if (tmpLength != 0 && tmpLength < length) {
549 length = tmpLength;
550 }
551 reset = TRUE;
552 }
553 }
554
555 return length;
556}
557
374ca955
A
558/* ### Keywords **************************************************/
559
560#define ULOC_KEYWORD_BUFFER_LEN 25
561#define ULOC_MAX_NO_KEYWORDS 25
562
729e4ab9 563U_CAPI const char * U_EXPORT2
374ca955 564locale_getKeywordsStart(const char *localeID) {
374ca955 565 const char *result = NULL;
374ca955
A
566 if((result = uprv_strchr(localeID, '@')) != NULL) {
567 return result;
73c04bcf
A
568 }
569#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
570 else {
571 /* We do this because the @ sign is variant, and the @ sign used on one
572 EBCDIC machine won't be compiled the same way on other EBCDIC based
573 machines. */
574 static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
374ca955
A
575 const uint8_t *charToFind = ebcdicSigns;
576 while(*charToFind) {
577 if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
578 return result;
579 }
580 charToFind++;
581 }
582 }
73c04bcf 583#endif
374ca955
A
584 return NULL;
585}
586
587/**
588 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
589 * @param keywordName incoming name to be canonicalized
590 * @param status return status (keyword too long)
591 * @return length of the keyword name
592 */
593static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
594{
595 int32_t i;
73c04bcf 596 int32_t keywordNameLen = (int32_t)uprv_strlen(keywordName);
374ca955
A
597
598 if(keywordNameLen >= ULOC_KEYWORD_BUFFER_LEN) {
599 /* keyword name too long for internal buffer */
600 *status = U_INTERNAL_PROGRAM_ERROR;
601 return 0;
602 }
603
604 /* normalize the keyword name */
605 for(i = 0; i < keywordNameLen; i++) {
606 buf[i] = uprv_tolower(keywordName[i]);
607 }
608 buf[i] = 0;
609
610 return keywordNameLen;
611}
612
613typedef struct {
614 char keyword[ULOC_KEYWORD_BUFFER_LEN];
615 int32_t keywordLen;
616 const char *valueStart;
617 int32_t valueLen;
618} KeywordStruct;
619
620static int32_t U_CALLCONV
4388f060 621compareKeywordStructs(const void * /*context*/, const void *left, const void *right) {
374ca955
A
622 const char* leftString = ((const KeywordStruct *)left)->keyword;
623 const char* rightString = ((const KeywordStruct *)right)->keyword;
624 return uprv_strcmp(leftString, rightString);
625}
626
627/**
628 * Both addKeyword and addValue must already be in canonical form.
629 * Either both addKeyword and addValue are NULL, or neither is NULL.
630 * If they are not NULL they must be zero terminated.
631 * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
632 */
633static int32_t
634_getKeywords(const char *localeID,
635 char prev,
636 char *keywords, int32_t keywordCapacity,
637 char *values, int32_t valuesCapacity, int32_t *valLen,
638 UBool valuesToo,
639 const char* addKeyword,
640 const char* addValue,
641 UErrorCode *status)
642{
643 KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
644
645 int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
646 int32_t numKeywords = 0;
647 const char* pos = localeID;
648 const char* equalSign = NULL;
649 const char* semicolon = NULL;
650 int32_t i = 0, j, n;
651 int32_t keywordsLen = 0;
652 int32_t valuesLen = 0;
653
654 if(prev == '@') { /* start of keyword definition */
655 /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
656 do {
657 UBool duplicate = FALSE;
658 /* skip leading spaces */
659 while(*pos == ' ') {
660 pos++;
661 }
662 if (!*pos) { /* handle trailing "; " */
663 break;
664 }
665 if(numKeywords == maxKeywords) {
666 *status = U_INTERNAL_PROGRAM_ERROR;
667 return 0;
668 }
669 equalSign = uprv_strchr(pos, '=');
670 semicolon = uprv_strchr(pos, ';');
671 /* lack of '=' [foo@currency] is illegal */
672 /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
673 if(!equalSign || (semicolon && semicolon<equalSign)) {
674 *status = U_INVALID_FORMAT_ERROR;
675 return 0;
676 }
677 /* need to normalize both keyword and keyword name */
678 if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
679 /* keyword name too long for internal buffer */
680 *status = U_INTERNAL_PROGRAM_ERROR;
681 return 0;
682 }
683 for(i = 0, n = 0; i < equalSign - pos; ++i) {
684 if (pos[i] != ' ') {
685 keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
686 }
687 }
57a6839d
A
688
689 /* zero-length keyword is an error. */
690 if (n == 0) {
691 *status = U_INVALID_FORMAT_ERROR;
692 return 0;
693 }
694
374ca955
A
695 keywordList[numKeywords].keyword[n] = 0;
696 keywordList[numKeywords].keywordLen = n;
697 /* now grab the value part. First we skip the '=' */
698 equalSign++;
699 /* then we leading spaces */
700 while(*equalSign == ' ') {
701 equalSign++;
702 }
57a6839d
A
703
704 /* Premature end or zero-length value */
2ca993e8 705 if (!*equalSign || equalSign == semicolon) {
57a6839d
A
706 *status = U_INVALID_FORMAT_ERROR;
707 return 0;
708 }
709
374ca955 710 keywordList[numKeywords].valueStart = equalSign;
57a6839d 711
374ca955
A
712 pos = semicolon;
713 i = 0;
714 if(pos) {
715 while(*(pos - i - 1) == ' ') {
716 i++;
717 }
73c04bcf 718 keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
374ca955
A
719 pos++;
720 } else {
73c04bcf 721 i = (int32_t)uprv_strlen(equalSign);
4388f060 722 while(i && equalSign[i-1] == ' ') {
374ca955
A
723 i--;
724 }
725 keywordList[numKeywords].valueLen = i;
726 }
727 /* If this is a duplicate keyword, then ignore it */
728 for (j=0; j<numKeywords; ++j) {
729 if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
730 duplicate = TRUE;
731 break;
732 }
733 }
734 if (!duplicate) {
735 ++numKeywords;
736 }
737 } while(pos);
738
739 /* Handle addKeyword/addValue. */
740 if (addKeyword != NULL) {
741 UBool duplicate = FALSE;
742 U_ASSERT(addValue != NULL);
743 /* Search for duplicate; if found, do nothing. Explicit keyword
744 overrides addKeyword. */
745 for (j=0; j<numKeywords; ++j) {
746 if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) {
747 duplicate = TRUE;
748 break;
749 }
750 }
751 if (!duplicate) {
752 if (numKeywords == maxKeywords) {
753 *status = U_INTERNAL_PROGRAM_ERROR;
754 return 0;
755 }
756 uprv_strcpy(keywordList[numKeywords].keyword, addKeyword);
73c04bcf 757 keywordList[numKeywords].keywordLen = (int32_t)uprv_strlen(addKeyword);
374ca955 758 keywordList[numKeywords].valueStart = addValue;
73c04bcf 759 keywordList[numKeywords].valueLen = (int32_t)uprv_strlen(addValue);
374ca955
A
760 ++numKeywords;
761 }
762 } else {
763 U_ASSERT(addValue == NULL);
764 }
765
766 /* now we have a list of keywords */
767 /* we need to sort it */
768 uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
769
770 /* Now construct the keyword part */
771 for(i = 0; i < numKeywords; i++) {
772 if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {
773 uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);
774 if(valuesToo) {
775 keywords[keywordsLen + keywordList[i].keywordLen] = '=';
776 } else {
777 keywords[keywordsLen + keywordList[i].keywordLen] = 0;
778 }
779 }
780 keywordsLen += keywordList[i].keywordLen + 1;
781 if(valuesToo) {
782 if(keywordsLen + keywordList[i].valueLen < keywordCapacity) {
783 uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
784 }
785 keywordsLen += keywordList[i].valueLen;
786
787 if(i < numKeywords - 1) {
788 if(keywordsLen < keywordCapacity) {
789 keywords[keywordsLen] = ';';
790 }
791 keywordsLen++;
792 }
793 }
794 if(values) {
795 if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {
796 uprv_strcpy(values+valuesLen, keywordList[i].valueStart);
797 values[valuesLen + keywordList[i].valueLen] = 0;
798 }
799 valuesLen += keywordList[i].valueLen + 1;
800 }
801 }
802 if(values) {
803 values[valuesLen] = 0;
804 if(valLen) {
805 *valLen = valuesLen;
806 }
807 }
808 return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
809 } else {
810 return 0;
811 }
812}
813
814U_CFUNC int32_t
815locale_getKeywords(const char *localeID,
816 char prev,
817 char *keywords, int32_t keywordCapacity,
818 char *values, int32_t valuesCapacity, int32_t *valLen,
819 UBool valuesToo,
820 UErrorCode *status) {
821 return _getKeywords(localeID, prev, keywords, keywordCapacity,
822 values, valuesCapacity, valLen, valuesToo,
823 NULL, NULL, status);
824}
825
826U_CAPI int32_t U_EXPORT2
827uloc_getKeywordValue(const char* localeID,
828 const char* keywordName,
829 char* buffer, int32_t bufferCapacity,
830 UErrorCode* status)
831{
729e4ab9 832 const char* startSearchHere = NULL;
374ca955 833 const char* nextSeparator = NULL;
374ca955
A
834 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
835 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
836 int32_t i = 0;
837 int32_t result = 0;
838
839 if(status && U_SUCCESS(*status) && localeID) {
729e4ab9
A
840 char tempBuffer[ULOC_FULLNAME_CAPACITY];
841 const char* tmpLocaleID;
842
843 if (_hasBCP47Extension(localeID)) {
844 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
845 } else {
846 tmpLocaleID=localeID;
847 }
374ca955 848
729e4ab9 849 startSearchHere = uprv_strchr(tmpLocaleID, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */
374ca955
A
850 if(startSearchHere == NULL) {
851 /* no keywords, return at once */
852 return 0;
853 }
854
73c04bcf 855 locale_canonKeywordName(keywordNameBuffer, keywordName, status);
374ca955
A
856 if(U_FAILURE(*status)) {
857 return 0;
858 }
859
860 /* find the first keyword */
861 while(startSearchHere) {
862 startSearchHere++;
863 /* skip leading spaces (allowed?) */
864 while(*startSearchHere == ' ') {
865 startSearchHere++;
866 }
867 nextSeparator = uprv_strchr(startSearchHere, '=');
868 /* need to normalize both keyword and keyword name */
869 if(!nextSeparator) {
870 break;
871 }
872 if(nextSeparator - startSearchHere >= ULOC_KEYWORD_BUFFER_LEN) {
873 /* keyword name too long for internal buffer */
874 *status = U_INTERNAL_PROGRAM_ERROR;
875 return 0;
876 }
877 for(i = 0; i < nextSeparator - startSearchHere; i++) {
878 localeKeywordNameBuffer[i] = uprv_tolower(startSearchHere[i]);
879 }
880 /* trim trailing spaces */
881 while(startSearchHere[i-1] == ' ') {
882 i--;
4388f060 883 U_ASSERT(i>=0);
374ca955
A
884 }
885 localeKeywordNameBuffer[i] = 0;
886
887 startSearchHere = uprv_strchr(nextSeparator, ';');
888
889 if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
890 nextSeparator++;
891 while(*nextSeparator == ' ') {
892 nextSeparator++;
893 }
894 /* we actually found the keyword. Copy the value */
895 if(startSearchHere && startSearchHere - nextSeparator < bufferCapacity) {
896 while(*(startSearchHere-1) == ' ') {
897 startSearchHere--;
898 }
899 uprv_strncpy(buffer, nextSeparator, startSearchHere - nextSeparator);
73c04bcf 900 result = u_terminateChars(buffer, bufferCapacity, (int32_t)(startSearchHere - nextSeparator), status);
374ca955 901 } else if(!startSearchHere && (int32_t)uprv_strlen(nextSeparator) < bufferCapacity) { /* last item in string */
73c04bcf 902 i = (int32_t)uprv_strlen(nextSeparator);
374ca955
A
903 while(nextSeparator[i - 1] == ' ') {
904 i--;
905 }
906 uprv_strncpy(buffer, nextSeparator, i);
907 result = u_terminateChars(buffer, bufferCapacity, i, status);
908 } else {
909 /* give a bigger buffer, please */
910 *status = U_BUFFER_OVERFLOW_ERROR;
911 if(startSearchHere) {
73c04bcf 912 result = (int32_t)(startSearchHere - nextSeparator);
374ca955 913 } else {
73c04bcf 914 result = (int32_t)uprv_strlen(nextSeparator);
374ca955
A
915 }
916 }
917 return result;
918 }
919 }
920 }
921 return 0;
922}
923
924U_CAPI int32_t U_EXPORT2
925uloc_setKeywordValue(const char* keywordName,
926 const char* keywordValue,
927 char* buffer, int32_t bufferCapacity,
928 UErrorCode* status)
929{
930 /* TODO: sorting. removal. */
931 int32_t keywordNameLen;
932 int32_t keywordValueLen;
933 int32_t bufLen;
934 int32_t needLen = 0;
935 int32_t foundValueLen;
936 int32_t keywordAtEnd = 0; /* is the keyword at the end of the string? */
937 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
938 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
939 int32_t i = 0;
940 int32_t rc;
941 char* nextSeparator = NULL;
942 char* nextEqualsign = NULL;
943 char* startSearchHere = NULL;
944 char* keywordStart = NULL;
945 char *insertHere = NULL;
946 if(U_FAILURE(*status)) {
947 return -1;
948 }
73c04bcf
A
949 if(bufferCapacity>1) {
950 bufLen = (int32_t)uprv_strlen(buffer);
951 } else {
952 *status = U_ILLEGAL_ARGUMENT_ERROR;
953 return 0;
954 }
955 if(bufferCapacity<bufLen) {
956 /* The capacity is less than the length?! Is this NULL terminated? */
957 *status = U_ILLEGAL_ARGUMENT_ERROR;
958 return 0;
959 }
374ca955
A
960 if(keywordValue && !*keywordValue) {
961 keywordValue = NULL;
962 }
963 if(keywordValue) {
73c04bcf 964 keywordValueLen = (int32_t)uprv_strlen(keywordValue);
374ca955
A
965 } else {
966 keywordValueLen = 0;
967 }
968 keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
969 if(U_FAILURE(*status)) {
970 return 0;
971 }
972 startSearchHere = (char*)locale_getKeywordsStart(buffer);
374ca955
A
973 if(startSearchHere == NULL || (startSearchHere[1]==0)) {
974 if(!keywordValue) { /* no keywords = nothing to remove */
975 return bufLen;
976 }
977
978 needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
979 if(startSearchHere) { /* had a single @ */
980 needLen--; /* already had the @ */
981 /* startSearchHere points at the @ */
982 } else {
983 startSearchHere=buffer+bufLen;
984 }
985 if(needLen >= bufferCapacity) {
986 *status = U_BUFFER_OVERFLOW_ERROR;
987 return needLen; /* no change */
988 }
989 *startSearchHere = '@';
990 startSearchHere++;
991 uprv_strcpy(startSearchHere, keywordNameBuffer);
992 startSearchHere += keywordNameLen;
993 *startSearchHere = '=';
994 startSearchHere++;
995 uprv_strcpy(startSearchHere, keywordValue);
996 startSearchHere+=keywordValueLen;
997 return needLen;
998 } /* end shortcut - no @ */
999
1000 keywordStart = startSearchHere;
1001 /* search for keyword */
1002 while(keywordStart) {
1003 keywordStart++;
1004 /* skip leading spaces (allowed?) */
1005 while(*keywordStart == ' ') {
1006 keywordStart++;
1007 }
1008 nextEqualsign = uprv_strchr(keywordStart, '=');
1009 /* need to normalize both keyword and keyword name */
1010 if(!nextEqualsign) {
1011 break;
1012 }
1013 if(nextEqualsign - keywordStart >= ULOC_KEYWORD_BUFFER_LEN) {
1014 /* keyword name too long for internal buffer */
1015 *status = U_INTERNAL_PROGRAM_ERROR;
1016 return 0;
1017 }
1018 for(i = 0; i < nextEqualsign - keywordStart; i++) {
1019 localeKeywordNameBuffer[i] = uprv_tolower(keywordStart[i]);
1020 }
1021 /* trim trailing spaces */
1022 while(keywordStart[i-1] == ' ') {
1023 i--;
1024 }
51004dcb 1025 U_ASSERT(i>=0 && i<ULOC_KEYWORD_BUFFER_LEN);
374ca955
A
1026 localeKeywordNameBuffer[i] = 0;
1027
1028 nextSeparator = uprv_strchr(nextEqualsign, ';');
1029 rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
1030 if(rc == 0) {
1031 nextEqualsign++;
1032 while(*nextEqualsign == ' ') {
1033 nextEqualsign++;
1034 }
1035 /* we actually found the keyword. Change the value */
1036 if (nextSeparator) {
1037 keywordAtEnd = 0;
73c04bcf 1038 foundValueLen = (int32_t)(nextSeparator - nextEqualsign);
374ca955
A
1039 } else {
1040 keywordAtEnd = 1;
73c04bcf 1041 foundValueLen = (int32_t)uprv_strlen(nextEqualsign);
374ca955
A
1042 }
1043 if(keywordValue) { /* adding a value - not removing */
1044 if(foundValueLen == keywordValueLen) {
1045 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1046 return bufLen; /* no change in size */
1047 } else if(foundValueLen > keywordValueLen) {
1048 int32_t delta = foundValueLen - keywordValueLen;
1049 if(nextSeparator) { /* RH side */
1050 uprv_memmove(nextSeparator - delta, nextSeparator, bufLen-(nextSeparator-buffer));
1051 }
1052 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1053 bufLen -= delta;
1054 buffer[bufLen]=0;
1055 return bufLen;
1056 } else { /* FVL < KVL */
1057 int32_t delta = keywordValueLen - foundValueLen;
1058 if((bufLen+delta) >= bufferCapacity) {
1059 *status = U_BUFFER_OVERFLOW_ERROR;
1060 return bufLen+delta;
1061 }
1062 if(nextSeparator) { /* RH side */
1063 uprv_memmove(nextSeparator+delta,nextSeparator, bufLen-(nextSeparator-buffer));
1064 }
1065 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1066 bufLen += delta;
1067 buffer[bufLen]=0;
1068 return bufLen;
1069 }
1070 } else { /* removing a keyword */
1071 if(keywordAtEnd) {
1072 /* zero out the ';' or '@' just before startSearchhere */
1073 keywordStart[-1] = 0;
73c04bcf 1074 return (int32_t)((keywordStart-buffer)-1); /* (string length without keyword) minus separator */
374ca955
A
1075 } else {
1076 uprv_memmove(keywordStart, nextSeparator+1, bufLen-((nextSeparator+1)-buffer));
1077 keywordStart[bufLen-((nextSeparator+1)-buffer)]=0;
73c04bcf 1078 return (int32_t)(bufLen-((nextSeparator+1)-keywordStart));
374ca955
A
1079 }
1080 }
1081 } else if(rc<0){ /* end match keyword */
1082 /* could insert at this location. */
1083 insertHere = keywordStart;
1084 }
1085 keywordStart = nextSeparator;
1086 } /* end loop searching */
1087
1088 if(!keywordValue) {
1089 return bufLen; /* removal of non-extant keyword - no change */
1090 }
1091
1092 /* we know there is at least one keyword. */
1093 needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
1094 if(needLen >= bufferCapacity) {
1095 *status = U_BUFFER_OVERFLOW_ERROR;
1096 return needLen; /* no change */
1097 }
1098
1099 if(insertHere) {
1100 uprv_memmove(insertHere+(1+keywordNameLen+1+keywordValueLen), insertHere, bufLen-(insertHere-buffer));
1101 keywordStart = insertHere;
1102 } else {
1103 keywordStart = buffer+bufLen;
1104 *keywordStart = ';';
1105 keywordStart++;
1106 }
1107 uprv_strncpy(keywordStart, keywordNameBuffer, keywordNameLen);
1108 keywordStart += keywordNameLen;
1109 *keywordStart = '=';
1110 keywordStart++;
1111 uprv_strncpy(keywordStart, keywordValue, keywordValueLen); /* terminates. */
1112 keywordStart+=keywordValueLen;
1113 if(insertHere) {
1114 *keywordStart = ';';
1115 keywordStart++;
1116 }
1117 buffer[needLen]=0;
1118 return needLen;
1119}
b75a7d8f 1120
374ca955 1121/* ### ID parsing implementation **************************************************/
b75a7d8f 1122
b75a7d8f 1123#define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
374ca955 1124
b75a7d8f
A
1125/*returns TRUE if one of the special prefixes is here (s=string)
1126 'x-' or 'i-' */
1127#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1128
1129/* Dot terminates it because of POSIX form where dot precedes the codepage
1130 * except for variant
1131 */
1132#define _isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
1133
374ca955
A
1134static char* _strnchr(const char* str, int32_t len, char c) {
1135 U_ASSERT(str != 0 && len >= 0);
1136 while (len-- != 0) {
1137 char d = *str;
1138 if (d == c) {
1139 return (char*) str;
1140 } else if (d == 0) {
1141 break;
1142 }
1143 ++str;
1144 }
1145 return NULL;
1146}
1147
1148/**
1149 * Lookup 'key' in the array 'list'. The array 'list' should contain
1150 * a NULL entry, followed by more entries, and a second NULL entry.
1151 *
1152 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1153 * COUNTRIES_3.
1154 */
b75a7d8f
A
1155static int16_t _findIndex(const char* const* list, const char* key)
1156{
1157 const char* const* anchor = list;
374ca955
A
1158 int32_t pass = 0;
1159
1160 /* Make two passes through two NULL-terminated arrays at 'list' */
1161 while (pass++ < 2) {
1162 while (*list) {
1163 if (uprv_strcmp(key, *list) == 0) {
1164 return (int16_t)(list - anchor);
1165 }
1166 list++;
b75a7d8f 1167 }
374ca955 1168 ++list; /* skip final NULL *CWB*/
b75a7d8f
A
1169 }
1170 return -1;
1171}
1172
1173/* count the length of src while copying it to dest; return strlen(src) */
4388f060 1174static inline int32_t
b75a7d8f
A
1175_copyCount(char *dest, int32_t destCapacity, const char *src) {
1176 const char *anchor;
1177 char c;
1178
1179 anchor=src;
1180 for(;;) {
1181 if((c=*src)==0) {
1182 return (int32_t)(src-anchor);
1183 }
1184 if(destCapacity<=0) {
1185 return (int32_t)((src-anchor)+uprv_strlen(src));
1186 }
1187 ++src;
1188 *dest++=c;
1189 --destCapacity;
1190 }
1191}
1192
729e4ab9 1193U_CFUNC const char*
73c04bcf
A
1194uloc_getCurrentCountryID(const char* oldID){
1195 int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
1196 if (offset >= 0) {
1197 return REPLACEMENT_COUNTRIES[offset];
1198 }
1199 return oldID;
1200}
729e4ab9 1201U_CFUNC const char*
73c04bcf
A
1202uloc_getCurrentLanguageID(const char* oldID){
1203 int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
1204 if (offset >= 0) {
1205 return REPLACEMENT_LANGUAGES[offset];
1206 }
1207 return oldID;
1208}
b75a7d8f
A
1209/*
1210 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1211 * avoid duplicating code to handle the earlier locale ID pieces
1212 * in the functions for the later ones by
1213 * setting the *pEnd pointer to where they stopped parsing
1214 *
1215 * TODO try to use this in Locale
1216 */
729e4ab9
A
1217U_CFUNC int32_t
1218ulocimp_getLanguage(const char *localeID,
1219 char *language, int32_t languageCapacity,
1220 const char **pEnd) {
b75a7d8f
A
1221 int32_t i=0;
1222 int32_t offset;
1223 char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
1224
1225 /* if it starts with i- or x- then copy that prefix */
1226 if(_isIDPrefix(localeID)) {
1227 if(i<languageCapacity) {
1228 language[i]=(char)uprv_tolower(*localeID);
1229 }
1230 if(i<languageCapacity) {
1231 language[i+1]='-';
1232 }
1233 i+=2;
1234 localeID+=2;
1235 }
1236
1237 /* copy the language as far as possible and count its length */
1238 while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
1239 if(i<languageCapacity) {
1240 language[i]=(char)uprv_tolower(*localeID);
1241 }
1242 if(i<3) {
4388f060 1243 U_ASSERT(i>=0);
b75a7d8f
A
1244 lang[i]=(char)uprv_tolower(*localeID);
1245 }
1246 i++;
1247 localeID++;
1248 }
1249
1250 if(i==3) {
1251 /* convert 3 character code to 2 character code if possible *CWB*/
374ca955 1252 offset=_findIndex(LANGUAGES_3, lang);
b75a7d8f 1253 if(offset>=0) {
374ca955 1254 i=_copyCount(language, languageCapacity, LANGUAGES[offset]);
b75a7d8f
A
1255 }
1256 }
1257
1258 if(pEnd!=NULL) {
1259 *pEnd=localeID;
1260 }
1261 return i;
1262}
1263
729e4ab9
A
1264U_CFUNC int32_t
1265ulocimp_getScript(const char *localeID,
1266 char *script, int32_t scriptCapacity,
1267 const char **pEnd)
b75a7d8f 1268{
374ca955 1269 int32_t idLen = 0;
b75a7d8f 1270
374ca955
A
1271 if (pEnd != NULL) {
1272 *pEnd = localeID;
b75a7d8f 1273 }
374ca955
A
1274
1275 /* copy the second item as far as possible and count its length */
4388f060
A
1276 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
1277 && uprv_isASCIILetter(localeID[idLen])) {
374ca955 1278 idLen++;
b75a7d8f
A
1279 }
1280
374ca955
A
1281 /* If it's exactly 4 characters long, then it's a script and not a country. */
1282 if (idLen == 4) {
1283 int32_t i;
1284 if (pEnd != NULL) {
1285 *pEnd = localeID+idLen;
1286 }
1287 if(idLen > scriptCapacity) {
1288 idLen = scriptCapacity;
1289 }
1290 if (idLen >= 1) {
1291 script[0]=(char)uprv_toupper(*(localeID++));
1292 }
1293 for (i = 1; i < idLen; i++) {
1294 script[i]=(char)uprv_tolower(*(localeID++));
1295 }
1296 }
1297 else {
1298 idLen = 0;
1299 }
1300 return idLen;
b75a7d8f
A
1301}
1302
729e4ab9
A
1303U_CFUNC int32_t
1304ulocimp_getCountry(const char *localeID,
1305 char *country, int32_t countryCapacity,
1306 const char **pEnd)
374ca955 1307{
729e4ab9 1308 int32_t idLen=0;
374ca955 1309 char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };
b75a7d8f
A
1310 int32_t offset;
1311
1312 /* copy the country as far as possible and count its length */
729e4ab9
A
1313 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
1314 if(idLen<(ULOC_COUNTRY_CAPACITY-1)) { /*CWB*/
1315 cnty[idLen]=(char)uprv_toupper(localeID[idLen]);
b75a7d8f 1316 }
729e4ab9 1317 idLen++;
b75a7d8f
A
1318 }
1319
729e4ab9
A
1320 /* the country should be either length 2 or 3 */
1321 if (idLen == 2 || idLen == 3) {
1322 UBool gotCountry = FALSE;
1323 /* convert 3 character code to 2 character code if possible *CWB*/
1324 if(idLen==3) {
1325 offset=_findIndex(COUNTRIES_3, cnty);
1326 if(offset>=0) {
1327 idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]);
1328 gotCountry = TRUE;
1329 }
1330 }
1331 if (!gotCountry) {
1332 int32_t i = 0;
1333 for (i = 0; i < idLen; i++) {
1334 if (i < countryCapacity) {
1335 country[i]=(char)uprv_toupper(localeID[i]);
1336 }
1337 }
b75a7d8f 1338 }
729e4ab9
A
1339 localeID+=idLen;
1340 } else {
1341 idLen = 0;
b75a7d8f
A
1342 }
1343
1344 if(pEnd!=NULL) {
1345 *pEnd=localeID;
1346 }
729e4ab9
A
1347
1348 return idLen;
b75a7d8f
A
1349}
1350
374ca955
A
1351/**
1352 * @param needSeparator if true, then add leading '_' if any variants
1353 * are added to 'variant'
1354 */
1355static int32_t
1356_getVariantEx(const char *localeID,
1357 char prev,
1358 char *variant, int32_t variantCapacity,
1359 UBool needSeparator) {
b75a7d8f
A
1360 int32_t i=0;
1361
1362 /* get one or more variant tags and separate them with '_' */
1363 if(_isIDSeparator(prev)) {
1364 /* get a variant string after a '-' or '_' */
1365 while(!_isTerminator(*localeID)) {
374ca955
A
1366 if (needSeparator) {
1367 if (i<variantCapacity) {
1368 variant[i] = '_';
1369 }
1370 ++i;
1371 needSeparator = FALSE;
1372 }
b75a7d8f
A
1373 if(i<variantCapacity) {
1374 variant[i]=(char)uprv_toupper(*localeID);
1375 if(variant[i]=='-') {
1376 variant[i]='_';
1377 }
1378 }
1379 i++;
1380 localeID++;
1381 }
1382 }
1383
1384 /* if there is no variant tag after a '-' or '_' then look for '@' */
1385 if(i==0) {
1386 if(prev=='@') {
1387 /* keep localeID */
374ca955 1388 } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
b75a7d8f
A
1389 ++localeID; /* point after the '@' */
1390 } else {
1391 return 0;
1392 }
1393 while(!_isTerminator(*localeID)) {
374ca955
A
1394 if (needSeparator) {
1395 if (i<variantCapacity) {
1396 variant[i] = '_';
1397 }
1398 ++i;
1399 needSeparator = FALSE;
1400 }
b75a7d8f
A
1401 if(i<variantCapacity) {
1402 variant[i]=(char)uprv_toupper(*localeID);
1403 if(variant[i]=='-' || variant[i]==',') {
1404 variant[i]='_';
1405 }
1406 }
1407 i++;
1408 localeID++;
1409 }
1410 }
374ca955 1411
b75a7d8f
A
1412 return i;
1413}
1414
374ca955
A
1415static int32_t
1416_getVariant(const char *localeID,
1417 char prev,
1418 char *variant, int32_t variantCapacity) {
1419 return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
1420}
1421
1422/**
1423 * Delete ALL instances of a variant from the given list of one or
1424 * more variants. Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
1425 * @param variants the source string of one or more variants,
1426 * separated by '_'. This will be MODIFIED IN PLACE. Not zero
1427 * terminated; if it is, trailing zero will NOT be maintained.
1428 * @param variantsLen length of variants
1429 * @param toDelete variant to delete, without separators, e.g. "EURO"
1430 * or "PREEURO"; not zero terminated
1431 * @param toDeleteLen length of toDelete
1432 * @return number of characters deleted from variants
1433 */
1434static int32_t
1435_deleteVariant(char* variants, int32_t variantsLen,
46f4442e
A
1436 const char* toDelete, int32_t toDeleteLen)
1437{
374ca955
A
1438 int32_t delta = 0; /* number of chars deleted */
1439 for (;;) {
1440 UBool flag = FALSE;
1441 if (variantsLen < toDeleteLen) {
1442 return delta;
1443 }
1444 if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 &&
1445 (variantsLen == toDeleteLen ||
46f4442e
A
1446 (flag=(variants[toDeleteLen] == '_'))))
1447 {
374ca955
A
1448 int32_t d = toDeleteLen + (flag?1:0);
1449 variantsLen -= d;
1450 delta += d;
46f4442e
A
1451 if (variantsLen > 0) {
1452 uprv_memmove(variants, variants+d, variantsLen);
1453 }
374ca955
A
1454 } else {
1455 char* p = _strnchr(variants, variantsLen, '_');
1456 if (p == NULL) {
1457 return delta;
1458 }
1459 ++p;
73c04bcf 1460 variantsLen -= (int32_t)(p - variants);
374ca955
A
1461 variants = p;
1462 }
1463 }
1464}
1465
1466/* Keyword enumeration */
1467
1468typedef struct UKeywordsContext {
1469 char* keywords;
1470 char* current;
1471} UKeywordsContext;
1472
1473static void U_CALLCONV
1474uloc_kw_closeKeywords(UEnumeration *enumerator) {
1475 uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
1476 uprv_free(enumerator->context);
1477 uprv_free(enumerator);
1478}
1479
1480static int32_t U_CALLCONV
4388f060 1481uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) {
374ca955
A
1482 char *kw = ((UKeywordsContext *)en->context)->keywords;
1483 int32_t result = 0;
1484 while(*kw) {
1485 result++;
1486 kw += uprv_strlen(kw)+1;
1487 }
1488 return result;
1489}
1490
1491static const char* U_CALLCONV
1492uloc_kw_nextKeyword(UEnumeration* en,
1493 int32_t* resultLength,
4388f060 1494 UErrorCode* /*status*/) {
374ca955
A
1495 const char* result = ((UKeywordsContext *)en->context)->current;
1496 int32_t len = 0;
1497 if(*result) {
73c04bcf 1498 len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
374ca955
A
1499 ((UKeywordsContext *)en->context)->current += len+1;
1500 } else {
1501 result = NULL;
1502 }
1503 if (resultLength) {
1504 *resultLength = len;
1505 }
1506 return result;
1507}
1508
1509static void U_CALLCONV
1510uloc_kw_resetKeywords(UEnumeration* en,
4388f060 1511 UErrorCode* /*status*/) {
374ca955
A
1512 ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
1513}
1514
1515static const UEnumeration gKeywordsEnum = {
1516 NULL,
1517 NULL,
1518 uloc_kw_closeKeywords,
1519 uloc_kw_countKeywords,
1520 uenum_unextDefault,
1521 uloc_kw_nextKeyword,
1522 uloc_kw_resetKeywords
1523};
1524
1525U_CAPI UEnumeration* U_EXPORT2
1526uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
b75a7d8f 1527{
46f4442e
A
1528 UKeywordsContext *myContext = NULL;
1529 UEnumeration *result = NULL;
b75a7d8f 1530
46f4442e
A
1531 if(U_FAILURE(*status)) {
1532 return NULL;
1533 }
1534 result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
1535 /* Null pointer test */
1536 if (result == NULL) {
1537 *status = U_MEMORY_ALLOCATION_ERROR;
1538 return NULL;
1539 }
1540 uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration));
51004dcb 1541 myContext = static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext)));
46f4442e
A
1542 if (myContext == NULL) {
1543 *status = U_MEMORY_ALLOCATION_ERROR;
1544 uprv_free(result);
1545 return NULL;
1546 }
1547 myContext->keywords = (char *)uprv_malloc(keywordListSize+1);
1548 uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
1549 myContext->keywords[keywordListSize] = 0;
1550 myContext->current = myContext->keywords;
1551 result->context = myContext;
1552 return result;
374ca955
A
1553}
1554
1555U_CAPI UEnumeration* U_EXPORT2
1556uloc_openKeywords(const char* localeID,
1557 UErrorCode* status)
1558{
1559 int32_t i=0;
1560 char keywords[256];
1561 int32_t keywordsCapacity = 256;
729e4ab9
A
1562 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1563 const char* tmpLocaleID;
1564
374ca955 1565 if(status==NULL || U_FAILURE(*status)) {
b75a7d8f
A
1566 return 0;
1567 }
1568
729e4ab9
A
1569 if (_hasBCP47Extension(localeID)) {
1570 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
1571 } else {
1572 if (localeID==NULL) {
1573 localeID=uloc_getDefault();
1574 }
1575 tmpLocaleID=localeID;
b75a7d8f
A
1576 }
1577
374ca955 1578 /* Skip the language */
729e4ab9
A
1579 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
1580 if(_isIDSeparator(*tmpLocaleID)) {
374ca955
A
1581 const char *scriptID;
1582 /* Skip the script if available */
729e4ab9
A
1583 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
1584 if(scriptID != tmpLocaleID+1) {
374ca955 1585 /* Found optional script */
729e4ab9 1586 tmpLocaleID = scriptID;
374ca955
A
1587 }
1588 /* Skip the Country */
729e4ab9
A
1589 if (_isIDSeparator(*tmpLocaleID)) {
1590 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID);
1591 if(_isIDSeparator(*tmpLocaleID)) {
1592 _getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0);
374ca955 1593 }
b75a7d8f
A
1594 }
1595 }
1596
374ca955 1597 /* keywords are located after '@' */
729e4ab9
A
1598 if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
1599 i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
374ca955
A
1600 }
1601
1602 if(i) {
1603 return uloc_openKeywordList(keywords, i, status);
1604 } else {
1605 return NULL;
b75a7d8f 1606 }
b75a7d8f
A
1607}
1608
b75a7d8f 1609
374ca955
A
1610/* bit-flags for 'options' parameter of _canonicalize */
1611#define _ULOC_STRIP_KEYWORDS 0x2
1612#define _ULOC_CANONICALIZE 0x1
1613
1614#define OPTION_SET(options, mask) ((options & mask) != 0)
1615
73c04bcf 1616static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
2ca993e8 1617#define I_DEFAULT_LENGTH UPRV_LENGTHOF(i_default)
73c04bcf 1618
374ca955
A
1619/**
1620 * Canonicalize the given localeID, to level 1 or to level 2,
1621 * depending on the options. To specify level 1, pass in options=0.
1622 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1623 *
1624 * This is the code underlying uloc_getName and uloc_canonicalize.
1625 */
1626static int32_t
1627_canonicalize(const char* localeID,
1628 char* result,
1629 int32_t resultCapacity,
1630 uint32_t options,
1631 UErrorCode* err) {
1632 int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
1633 char localeBuffer[ULOC_FULLNAME_CAPACITY];
729e4ab9 1634 char tempBuffer[ULOC_FULLNAME_CAPACITY];
46f4442e 1635 const char* origLocaleID;
729e4ab9 1636 const char* tmpLocaleID;
374ca955
A
1637 const char* keywordAssign = NULL;
1638 const char* separatorIndicator = NULL;
1639 const char* addKeyword = NULL;
1640 const char* addValue = NULL;
1641 char* name;
1642 char* variant = NULL; /* pointer into name, or NULL */
374ca955
A
1643
1644 if (U_FAILURE(*err)) {
b75a7d8f
A
1645 return 0;
1646 }
1647
729e4ab9
A
1648 if (_hasBCP47Extension(localeID)) {
1649 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1650 } else {
1651 if (localeID==NULL) {
1652 localeID=uloc_getDefault();
1653 }
1654 tmpLocaleID=localeID;
b75a7d8f 1655 }
729e4ab9
A
1656
1657 origLocaleID=tmpLocaleID;
b75a7d8f 1658
374ca955
A
1659 /* if we are doing a full canonicalization, then put results in
1660 localeBuffer, if necessary; otherwise send them to result. */
729e4ab9 1661 if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/
4388f060 1662 (result == NULL || resultCapacity < (int32_t)sizeof(localeBuffer))) {
374ca955 1663 name = localeBuffer;
4388f060 1664 nameCapacity = (int32_t)sizeof(localeBuffer);
374ca955
A
1665 } else {
1666 name = result;
1667 nameCapacity = resultCapacity;
1668 }
1669
b75a7d8f 1670 /* get all pieces, one after another, and separate with '_' */
729e4ab9 1671 len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID);
73c04bcf
A
1672
1673 if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {
1674 const char *d = uloc_getDefault();
1675
729e4ab9 1676 len = (int32_t)uprv_strlen(d);
73c04bcf
A
1677
1678 if (name != NULL) {
1679 uprv_strncpy(name, d, len);
1680 }
729e4ab9 1681 } else if(_isIDSeparator(*tmpLocaleID)) {
374ca955
A
1682 const char *scriptID;
1683
b75a7d8f 1684 ++fieldCount;
374ca955
A
1685 if(len<nameCapacity) {
1686 name[len]='_';
b75a7d8f 1687 }
374ca955
A
1688 ++len;
1689
4388f060
A
1690 scriptSize=ulocimp_getScript(tmpLocaleID+1,
1691 (len<nameCapacity ? name+len : NULL), nameCapacity-len, &scriptID);
374ca955
A
1692 if(scriptSize > 0) {
1693 /* Found optional script */
729e4ab9 1694 tmpLocaleID = scriptID;
b75a7d8f 1695 ++fieldCount;
374ca955 1696 len+=scriptSize;
729e4ab9 1697 if (_isIDSeparator(*tmpLocaleID)) {
374ca955
A
1698 /* If there is something else, then we add the _ */
1699 if(len<nameCapacity) {
1700 name[len]='_';
1701 }
1702 ++len;
1703 }
1704 }
1705
729e4ab9
A
1706 if (_isIDSeparator(*tmpLocaleID)) {
1707 const char *cntryID;
4388f060
A
1708 int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1,
1709 (len<nameCapacity ? name+len : NULL), nameCapacity-len, &cntryID);
729e4ab9
A
1710 if (cntrySize > 0) {
1711 /* Found optional country */
1712 tmpLocaleID = cntryID;
1713 len+=cntrySize;
1714 }
1715 if(_isIDSeparator(*tmpLocaleID)) {
51004dcb
A
1716 /* If there is something else, then we add the _ if we found country before. */
1717 if (cntrySize >= 0 && ! _isIDSeparator(*(tmpLocaleID+1)) ) {
729e4ab9
A
1718 ++fieldCount;
1719 if(len<nameCapacity) {
1720 name[len]='_';
1721 }
1722 ++len;
374ca955 1723 }
729e4ab9 1724
4388f060
A
1725 variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID,
1726 (len<nameCapacity ? name+len : NULL), nameCapacity-len);
374ca955 1727 if (variantSize > 0) {
4388f060 1728 variant = len<nameCapacity ? name+len : NULL;
374ca955 1729 len += variantSize;
729e4ab9 1730 tmpLocaleID += variantSize + 1; /* skip '_' and variant */
374ca955 1731 }
b75a7d8f 1732 }
b75a7d8f
A
1733 }
1734 }
1735
374ca955 1736 /* Copy POSIX-style charset specifier, if any [mr.utf8] */
729e4ab9 1737 if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
374ca955 1738 UBool done = FALSE;
b75a7d8f 1739 do {
729e4ab9 1740 char c = *tmpLocaleID;
374ca955
A
1741 switch (c) {
1742 case 0:
1743 case '@':
1744 done = TRUE;
1745 break;
1746 default:
1747 if (len<nameCapacity) {
1748 name[len] = c;
1749 }
1750 ++len;
729e4ab9 1751 ++tmpLocaleID;
374ca955
A
1752 break;
1753 }
1754 } while (!done);
1755 }
1756
1757 /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
729e4ab9
A
1758 After this, tmpLocaleID either points to '@' or is NULL */
1759 if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
1760 keywordAssign = uprv_strchr(tmpLocaleID, '=');
1761 separatorIndicator = uprv_strchr(tmpLocaleID, ';');
374ca955
A
1762 }
1763
1764 /* Copy POSIX-style variant, if any [mr@FOO] */
1765 if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
729e4ab9 1766 tmpLocaleID != NULL && keywordAssign == NULL) {
374ca955 1767 for (;;) {
729e4ab9 1768 char c = *tmpLocaleID;
374ca955
A
1769 if (c == 0) {
1770 break;
1771 }
1772 if (len<nameCapacity) {
1773 name[len] = c;
1774 }
1775 ++len;
729e4ab9 1776 ++tmpLocaleID;
374ca955
A
1777 }
1778 }
1779
1780 if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
1781 /* Handle @FOO variant if @ is present and not followed by = */
729e4ab9 1782 if (tmpLocaleID!=NULL && keywordAssign==NULL) {
374ca955
A
1783 int32_t posixVariantSize;
1784 /* Add missing '_' if needed */
1785 if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
1786 do {
1787 if(len<nameCapacity) {
1788 name[len]='_';
1789 }
1790 ++len;
1791 ++fieldCount;
1792 } while(fieldCount<2);
1793 }
729e4ab9 1794 posixVariantSize = _getVariantEx(tmpLocaleID+1, '@', name+len, nameCapacity-len,
374ca955
A
1795 (UBool)(variantSize > 0));
1796 if (posixVariantSize > 0) {
1797 if (variant == NULL) {
1798 variant = name+len;
1799 }
1800 len += posixVariantSize;
1801 variantSize += posixVariantSize;
b75a7d8f 1802 }
374ca955
A
1803 }
1804
46f4442e
A
1805 /* Handle generic variants first */
1806 if (variant) {
2ca993e8 1807 for (j=0; j<UPRV_LENGTHOF(VARIANT_MAP); j++) {
46f4442e
A
1808 const char* variantToCompare = VARIANT_MAP[j].variant;
1809 int32_t n = (int32_t)uprv_strlen(variantToCompare);
1810 int32_t variantLen = _deleteVariant(variant, uprv_min(variantSize, (nameCapacity-len)), variantToCompare, n);
1811 len -= variantLen;
1812 if (variantLen > 0) {
b25be066 1813 if (len > 0 && name[len-1] == '_') { /* delete trailing '_' */
46f4442e
A
1814 --len;
1815 }
1816 addKeyword = VARIANT_MAP[j].keyword;
1817 addValue = VARIANT_MAP[j].value;
1818 break;
1819 }
1820 }
b25be066 1821 if (len > 0 && len <= nameCapacity && name[len-1] == '_') { /* delete trailing '_' */
46f4442e
A
1822 --len;
1823 }
374ca955
A
1824 }
1825
1826 /* Look up the ID in the canonicalization map */
2ca993e8 1827 for (j=0; j<UPRV_LENGTHOF(CANONICALIZE_MAP); j++) {
374ca955 1828 const char* id = CANONICALIZE_MAP[j].id;
73c04bcf 1829 int32_t n = (int32_t)uprv_strlen(id);
374ca955 1830 if (len == n && uprv_strncmp(name, id, n) == 0) {
729e4ab9 1831 if (n == 0 && tmpLocaleID != NULL) {
374ca955
A
1832 break; /* Don't remap "" if keywords present */
1833 }
1834 len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
46f4442e
A
1835 if (CANONICALIZE_MAP[j].keyword) {
1836 addKeyword = CANONICALIZE_MAP[j].keyword;
1837 addValue = CANONICALIZE_MAP[j].value;
1838 }
374ca955
A
1839 break;
1840 }
1841 }
374ca955
A
1842 }
1843
1844 if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
729e4ab9 1845 if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
374ca955
A
1846 (!separatorIndicator || separatorIndicator > keywordAssign)) {
1847 if(len<nameCapacity) {
1848 name[len]='@';
1849 }
1850 ++len;
b75a7d8f 1851 ++fieldCount;
4388f060
A
1852 len += _getKeywords(tmpLocaleID+1, '@', (len<nameCapacity ? name+len : NULL), nameCapacity-len,
1853 NULL, 0, NULL, TRUE, addKeyword, addValue, err);
374ca955 1854 } else if (addKeyword != NULL) {
51004dcb 1855 U_ASSERT(addValue != NULL && len < nameCapacity);
374ca955
A
1856 /* inelegant but works -- later make _getKeywords do this? */
1857 len += _copyCount(name+len, nameCapacity-len, "@");
1858 len += _copyCount(name+len, nameCapacity-len, addKeyword);
1859 len += _copyCount(name+len, nameCapacity-len, "=");
1860 len += _copyCount(name+len, nameCapacity-len, addValue);
1861 }
1862 }
1863
46f4442e 1864 if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) {
374ca955
A
1865 uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
1866 }
1867
1868 return u_terminateChars(result, resultCapacity, len, err);
1869}
1870
1871/* ### ID parsing API **************************************************/
1872
1873U_CAPI int32_t U_EXPORT2
1874uloc_getParent(const char* localeID,
1875 char* parent,
1876 int32_t parentCapacity,
1877 UErrorCode* err)
1878{
1879 const char *lastUnderscore;
1880 int32_t i;
1881
1882 if (U_FAILURE(*err))
1883 return 0;
1884
1885 if (localeID == NULL)
1886 localeID = uloc_getDefault();
1887
1888 lastUnderscore=uprv_strrchr(localeID, '_');
1889 if(lastUnderscore!=NULL) {
1890 i=(int32_t)(lastUnderscore-localeID);
1891 } else {
1892 i=0;
b75a7d8f 1893 }
374ca955 1894
73c04bcf 1895 if(i>0 && parent != localeID) {
374ca955
A
1896 uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
1897 }
1898 return u_terminateChars(parent, parentCapacity, i, err);
b75a7d8f 1899}
374ca955
A
1900
1901U_CAPI int32_t U_EXPORT2
1902uloc_getLanguage(const char* localeID,
1903 char* language,
1904 int32_t languageCapacity,
1905 UErrorCode* err)
1906{
1907 /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1908 int32_t i=0;
1909
1910 if (err==NULL || U_FAILURE(*err)) {
1911 return 0;
1912 }
1913
1914 if(localeID==NULL) {
1915 localeID=uloc_getDefault();
1916 }
1917
729e4ab9 1918 i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL);
374ca955
A
1919 return u_terminateChars(language, languageCapacity, i, err);
1920}
1921
1922U_CAPI int32_t U_EXPORT2
1923uloc_getScript(const char* localeID,
1924 char* script,
1925 int32_t scriptCapacity,
1926 UErrorCode* err)
1927{
1928 int32_t i=0;
1929
1930 if(err==NULL || U_FAILURE(*err)) {
1931 return 0;
1932 }
1933
1934 if(localeID==NULL) {
1935 localeID=uloc_getDefault();
1936 }
1937
1938 /* skip the language */
729e4ab9 1939 ulocimp_getLanguage(localeID, NULL, 0, &localeID);
374ca955 1940 if(_isIDSeparator(*localeID)) {
729e4ab9 1941 i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL);
374ca955
A
1942 }
1943 return u_terminateChars(script, scriptCapacity, i, err);
1944}
1945
1946U_CAPI int32_t U_EXPORT2
1947uloc_getCountry(const char* localeID,
1948 char* country,
1949 int32_t countryCapacity,
1950 UErrorCode* err)
1951{
1952 int32_t i=0;
1953
1954 if(err==NULL || U_FAILURE(*err)) {
1955 return 0;
1956 }
1957
1958 if(localeID==NULL) {
1959 localeID=uloc_getDefault();
1960 }
1961
1962 /* Skip the language */
729e4ab9 1963 ulocimp_getLanguage(localeID, NULL, 0, &localeID);
374ca955
A
1964 if(_isIDSeparator(*localeID)) {
1965 const char *scriptID;
1966 /* Skip the script if available */
729e4ab9 1967 ulocimp_getScript(localeID+1, NULL, 0, &scriptID);
374ca955
A
1968 if(scriptID != localeID+1) {
1969 /* Found optional script */
1970 localeID = scriptID;
1971 }
1972 if(_isIDSeparator(*localeID)) {
729e4ab9 1973 i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL);
374ca955
A
1974 }
1975 }
1976 return u_terminateChars(country, countryCapacity, i, err);
1977}
1978
1979U_CAPI int32_t U_EXPORT2
1980uloc_getVariant(const char* localeID,
1981 char* variant,
1982 int32_t variantCapacity,
1983 UErrorCode* err)
1984{
729e4ab9
A
1985 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1986 const char* tmpLocaleID;
374ca955 1987 int32_t i=0;
374ca955
A
1988
1989 if(err==NULL || U_FAILURE(*err)) {
1990 return 0;
1991 }
1992
729e4ab9
A
1993 if (_hasBCP47Extension(localeID)) {
1994 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1995 } else {
1996 if (localeID==NULL) {
1997 localeID=uloc_getDefault();
1998 }
1999 tmpLocaleID=localeID;
374ca955
A
2000 }
2001
2002 /* Skip the language */
729e4ab9
A
2003 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
2004 if(_isIDSeparator(*tmpLocaleID)) {
374ca955
A
2005 const char *scriptID;
2006 /* Skip the script if available */
729e4ab9
A
2007 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
2008 if(scriptID != tmpLocaleID+1) {
374ca955 2009 /* Found optional script */
729e4ab9 2010 tmpLocaleID = scriptID;
374ca955
A
2011 }
2012 /* Skip the Country */
729e4ab9
A
2013 if (_isIDSeparator(*tmpLocaleID)) {
2014 const char *cntryID;
2015 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID);
2016 if (cntryID != tmpLocaleID+1) {
2017 /* Found optional country */
2018 tmpLocaleID = cntryID;
2019 }
2020 if(_isIDSeparator(*tmpLocaleID)) {
2021 /* If there was no country ID, skip a possible extra IDSeparator */
2022 if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
2023 tmpLocaleID++;
2024 }
2025 i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapacity);
374ca955
A
2026 }
2027 }
2028 }
2029
2030 /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
2031 /* if we do not have a variant tag yet then try a POSIX variant after '@' */
2032/*
2033 if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
2034 i=_getVariant(localeID+1, '@', variant, variantCapacity);
2035 }
2036*/
2037 return u_terminateChars(variant, variantCapacity, i, err);
2038}
2039
2040U_CAPI int32_t U_EXPORT2
2041uloc_getName(const char* localeID,
2042 char* name,
2043 int32_t nameCapacity,
2044 UErrorCode* err)
2045{
2046 return _canonicalize(localeID, name, nameCapacity, 0, err);
2047}
2048
2049U_CAPI int32_t U_EXPORT2
2050uloc_getBaseName(const char* localeID,
2051 char* name,
2052 int32_t nameCapacity,
2053 UErrorCode* err)
2054{
2055 return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);
2056}
2057
2058U_CAPI int32_t U_EXPORT2
2059uloc_canonicalize(const char* localeID,
2060 char* name,
2061 int32_t nameCapacity,
2062 UErrorCode* err)
2063{
2064 return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);
2065}
2066
b75a7d8f
A
2067U_CAPI const char* U_EXPORT2
2068uloc_getISO3Language(const char* localeID)
2069{
374ca955
A
2070 int16_t offset;
2071 char lang[ULOC_LANG_CAPACITY];
2072 UErrorCode err = U_ZERO_ERROR;
2073
2074 if (localeID == NULL)
2075 {
2076 localeID = uloc_getDefault();
2077 }
2078 uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
2079 if (U_FAILURE(err))
2080 return "";
2081 offset = _findIndex(LANGUAGES, lang);
2082 if (offset < 0)
2083 return "";
2084 return LANGUAGES_3[offset];
b75a7d8f
A
2085}
2086
2087U_CAPI const char* U_EXPORT2
2088uloc_getISO3Country(const char* localeID)
2089{
2090 int16_t offset;
374ca955 2091 char cntry[ULOC_LANG_CAPACITY];
b75a7d8f
A
2092 UErrorCode err = U_ZERO_ERROR;
2093
2094 if (localeID == NULL)
2095 {
2096 localeID = uloc_getDefault();
2097 }
374ca955 2098 uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
b75a7d8f
A
2099 if (U_FAILURE(err))
2100 return "";
374ca955 2101 offset = _findIndex(COUNTRIES, cntry);
b75a7d8f
A
2102 if (offset < 0)
2103 return "";
2104
374ca955 2105 return COUNTRIES_3[offset];
b75a7d8f
A
2106}
2107
2108U_CAPI uint32_t U_EXPORT2
2109uloc_getLCID(const char* localeID)
2110{
374ca955
A
2111 UErrorCode status = U_ZERO_ERROR;
2112 char langID[ULOC_FULLNAME_CAPACITY];
2113
2114 uloc_getLanguage(localeID, langID, sizeof(langID), &status);
2115 if (U_FAILURE(status)) {
2116 return 0;
b75a7d8f 2117 }
374ca955 2118
57a6839d
A
2119 if (uprv_strchr(localeID, '@')) {
2120 // uprv_convertToLCID does not support keywords other than collation.
2121 // Remove all keywords except collation.
2122 int32_t len;
2123 char collVal[ULOC_KEYWORDS_CAPACITY];
2124 char tmpLocaleID[ULOC_FULLNAME_CAPACITY];
2125
2126 len = uloc_getKeywordValue(localeID, "collation", collVal,
2ca993e8 2127 UPRV_LENGTHOF(collVal) - 1, &status);
57a6839d
A
2128
2129 if (U_SUCCESS(status) && len > 0) {
2130 collVal[len] = 0;
2131
2132 len = uloc_getBaseName(localeID, tmpLocaleID,
2ca993e8 2133 UPRV_LENGTHOF(tmpLocaleID) - 1, &status);
57a6839d 2134
2ca993e8 2135 if (U_SUCCESS(status) && len > 0) {
57a6839d
A
2136 tmpLocaleID[len] = 0;
2137
2138 len = uloc_setKeywordValue("collation", collVal, tmpLocaleID,
2ca993e8 2139 UPRV_LENGTHOF(tmpLocaleID) - len - 1, &status);
57a6839d 2140
2ca993e8 2141 if (U_SUCCESS(status) && len > 0) {
57a6839d
A
2142 tmpLocaleID[len] = 0;
2143 return uprv_convertToLCID(langID, tmpLocaleID, &status);
2144 }
2145 }
2146 }
2147
2148 // fall through - all keywords are simply ignored
2149 status = U_ZERO_ERROR;
2150 }
2151
374ca955
A
2152 return uprv_convertToLCID(langID, localeID, &status);
2153}
2154
73c04bcf
A
2155U_CAPI int32_t U_EXPORT2
2156uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
2157 UErrorCode *status)
2158{
57a6839d 2159 return uprv_convertToPosix(hostid, locale, localeCapacity, status);
73c04bcf
A
2160}
2161
374ca955
A
2162/* ### Default locale **************************************************/
2163
2164U_CAPI const char* U_EXPORT2
2165uloc_getDefault()
2166{
2167 return locale_get_default();
2168}
2169
2170U_CAPI void U_EXPORT2
2171uloc_setDefault(const char* newDefaultLocale,
2172 UErrorCode* err)
2173{
2174 if (U_FAILURE(*err))
2175 return;
2176 /* the error code isn't currently used for anything by this function*/
b75a7d8f 2177
374ca955
A
2178 /* propagate change to C++ */
2179 locale_set_default(newDefaultLocale);
b75a7d8f
A
2180}
2181
729e4ab9 2182/**
51004dcb 2183 * Returns a list of all 2-letter language codes defined in ISO 639. This is a pointer
729e4ab9
A
2184 * to an array of pointers to arrays of char. All of these pointers are owned
2185 * by ICU-- do not delete them, and do not write through them. The array is
2186 * terminated with a null pointer.
2187 */
2188U_CAPI const char* const* U_EXPORT2
2189uloc_getISOLanguages()
2190{
2191 return LANGUAGES;
2192}
374ca955 2193
729e4ab9
A
2194/**
2195 * Returns a list of all 2-letter country codes defined in ISO 639. This is a
2196 * pointer to an array of pointers to arrays of char. All of these pointers are
2197 * owned by ICU-- do not delete them, and do not write through them. The array is
2198 * terminated with a null pointer.
b75a7d8f 2199 */
729e4ab9
A
2200U_CAPI const char* const* U_EXPORT2
2201uloc_getISOCountries()
b75a7d8f 2202{
729e4ab9
A
2203 return COUNTRIES;
2204}
73c04bcf 2205
b75a7d8f 2206
729e4ab9
A
2207/* this function to be moved into cstring.c later */
2208static char gDecimal = 0;
b75a7d8f 2209
729e4ab9
A
2210static /* U_CAPI */
2211double
2212/* U_EXPORT2 */
2213_uloc_strtod(const char *start, char **end) {
2214 char *decimal;
2215 char *myEnd;
2216 char buf[30];
2217 double rv;
2218 if (!gDecimal) {
2219 char rep[5];
2220 /* For machines that decide to change the decimal on you,
2221 and try to be too smart with localization.
2222 This normally should be just a '.'. */
2223 sprintf(rep, "%+1.1f", 1.0);
2224 gDecimal = rep[2];
b75a7d8f 2225 }
b75a7d8f 2226
729e4ab9
A
2227 if(gDecimal == '.') {
2228 return uprv_strtod(start, end); /* fall through to OS */
b75a7d8f 2229 } else {
729e4ab9
A
2230 uprv_strncpy(buf, start, 29);
2231 buf[29]=0;
2232 decimal = uprv_strchr(buf, '.');
2233 if(decimal) {
2234 *decimal = gDecimal;
46f4442e 2235 } else {
729e4ab9 2236 return uprv_strtod(start, end); /* no decimal point */
46f4442e 2237 }
729e4ab9
A
2238 rv = uprv_strtod(buf, &myEnd);
2239 if(end) {
2240 *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */
b75a7d8f 2241 }
729e4ab9 2242 return rv;
374ca955 2243 }
374ca955
A
2244}
2245
729e4ab9
A
2246typedef struct {
2247 float q;
2248 int32_t dummy; /* to avoid uninitialized memory copy from qsort */
2249 char *locale;
2250} _acceptLangItem;
b75a7d8f 2251
729e4ab9 2252static int32_t U_CALLCONV
4388f060 2253uloc_acceptLanguageCompare(const void * /*context*/, const void *a, const void *b)
729e4ab9
A
2254{
2255 const _acceptLangItem *aa = (const _acceptLangItem*)a;
2256 const _acceptLangItem *bb = (const _acceptLangItem*)b;
b75a7d8f 2257
729e4ab9
A
2258 int32_t rc = 0;
2259 if(bb->q < aa->q) {
2260 rc = -1; /* A > B */
2261 } else if(bb->q > aa->q) {
2262 rc = 1; /* A < B */
2263 } else {
2264 rc = 0; /* A = B */
b75a7d8f
A
2265 }
2266
729e4ab9
A
2267 if(rc==0) {
2268 rc = uprv_stricmp(aa->locale, bb->locale);
b75a7d8f
A
2269 }
2270
729e4ab9
A
2271#if defined(ULOC_DEBUG)
2272 /* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
2273 aa->locale, aa->q,
2274 bb->locale, bb->q,
2275 rc);*/
2276#endif
374ca955 2277
729e4ab9 2278 return rc;
374ca955
A
2279}
2280
729e4ab9
A
2281/*
2282mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
2283*/
374ca955 2284
b75a7d8f 2285U_CAPI int32_t U_EXPORT2
729e4ab9
A
2286uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult,
2287 const char *httpAcceptLanguage,
2288 UEnumeration* availableLocales,
2289 UErrorCode *status)
374ca955 2290{
729e4ab9
A
2291 _acceptLangItem *j;
2292 _acceptLangItem smallBuffer[30];
2293 char **strs;
2294 char tmp[ULOC_FULLNAME_CAPACITY +1];
2295 int32_t n = 0;
2296 const char *itemEnd;
2297 const char *paramEnd;
2298 const char *s;
2299 const char *t;
2300 int32_t res;
2301 int32_t i;
2302 int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
2303 int32_t jSize;
2304 char *tempstr; /* Use for null pointer check */
b75a7d8f 2305
729e4ab9 2306 j = smallBuffer;
2ca993e8 2307 jSize = UPRV_LENGTHOF(smallBuffer);
729e4ab9
A
2308 if(U_FAILURE(*status)) {
2309 return -1;
b75a7d8f
A
2310 }
2311
729e4ab9
A
2312 for(s=httpAcceptLanguage;s&&*s;) {
2313 while(isspace(*s)) /* eat space at the beginning */
2314 s++;
2315 itemEnd=uprv_strchr(s,',');
2316 paramEnd=uprv_strchr(s,';');
2317 if(!itemEnd) {
2318 itemEnd = httpAcceptLanguage+l; /* end of string */
b75a7d8f 2319 }
729e4ab9
A
2320 if(paramEnd && paramEnd<itemEnd) {
2321 /* semicolon (;) is closer than end (,) */
2322 t = paramEnd+1;
2323 if(*t=='q') {
2324 t++;
2325 }
2326 while(isspace(*t)) {
2327 t++;
2328 }
2329 if(*t=='=') {
2330 t++;
2331 }
2332 while(isspace(*t)) {
2333 t++;
2334 }
2335 j[n].q = (float)_uloc_strtod(t,NULL);
2336 } else {
2337 /* no semicolon - it's 1.0 */
2338 j[n].q = 1.0f;
2339 paramEnd = itemEnd;
374ca955 2340 }
46f4442e 2341 j[n].dummy=0;
374ca955
A
2342 /* eat spaces prior to semi */
2343 for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
2344 ;
46f4442e
A
2345 /* Check for null pointer from uprv_strndup */
2346 tempstr = uprv_strndup(s,(int32_t)((t+1)-s));
2347 if (tempstr == NULL) {
2348 *status = U_MEMORY_ALLOCATION_ERROR;
2349 return -1;
2350 }
2351 j[n].locale = tempstr;
2ca993e8 2352 uloc_canonicalize(j[n].locale,tmp,UPRV_LENGTHOF(tmp),status);
374ca955
A
2353 if(strcmp(j[n].locale,tmp)) {
2354 uprv_free(j[n].locale);
2355 j[n].locale=uprv_strdup(tmp);
2356 }
2357#if defined(ULOC_DEBUG)
2358 /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
2359#endif
2360 n++;
2361 s = itemEnd;
2362 while(*s==',') { /* eat duplicate commas */
2363 s++;
2364 }
2365 if(n>=jSize) {
46f4442e 2366 if(j==smallBuffer) { /* overflowed the small buffer. */
51004dcb 2367 j = static_cast<_acceptLangItem *>(uprv_malloc(sizeof(j[0])*(jSize*2)));
46f4442e
A
2368 if(j!=NULL) {
2369 uprv_memcpy(j,smallBuffer,sizeof(j[0])*jSize);
2370 }
374ca955 2371#if defined(ULOC_DEBUG)
46f4442e 2372 fprintf(stderr,"malloced at size %d\n", jSize);
374ca955 2373#endif
46f4442e 2374 } else {
51004dcb 2375 j = static_cast<_acceptLangItem *>(uprv_realloc(j, sizeof(j[0])*jSize*2));
374ca955 2376#if defined(ULOC_DEBUG)
46f4442e 2377 fprintf(stderr,"re-alloced at size %d\n", jSize);
374ca955 2378#endif
46f4442e
A
2379 }
2380 jSize *= 2;
2381 if(j==NULL) {
2382 *status = U_MEMORY_ALLOCATION_ERROR;
2383 return -1;
2384 }
374ca955
A
2385 }
2386 }
2387 uprv_sortArray(j, n, sizeof(j[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
2388 if(U_FAILURE(*status)) {
46f4442e 2389 if(j != smallBuffer) {
374ca955 2390#if defined(ULOC_DEBUG)
46f4442e 2391 fprintf(stderr,"freeing j %p\n", j);
374ca955 2392#endif
46f4442e
A
2393 uprv_free(j);
2394 }
2395 return -1;
374ca955 2396 }
51004dcb 2397 strs = static_cast<char **>(uprv_malloc((size_t)(sizeof(strs[0])*n)));
46f4442e
A
2398 /* Check for null pointer */
2399 if (strs == NULL) {
2400 uprv_free(j); /* Free to avoid memory leak */
2401 *status = U_MEMORY_ALLOCATION_ERROR;
2402 return -1;
2403 }
374ca955
A
2404 for(i=0;i<n;i++) {
2405#if defined(ULOC_DEBUG)
2406 /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
2407#endif
2408 strs[i]=j[i].locale;
2409 }
2410 res = uloc_acceptLanguage(result, resultAvailable, outResult,
2411 (const char**)strs, n, availableLocales, status);
2412 for(i=0;i<n;i++) {
2413 uprv_free(strs[i]);
2414 }
2415 uprv_free(strs);
2416 if(j != smallBuffer) {
2417#if defined(ULOC_DEBUG)
46f4442e 2418 fprintf(stderr,"freeing j %p\n", j);
374ca955 2419#endif
46f4442e 2420 uprv_free(j);
374ca955
A
2421 }
2422 return res;
2423}
2424
2425
2426U_CAPI int32_t U_EXPORT2
2427uloc_acceptLanguage(char *result, int32_t resultAvailable,
2428 UAcceptResult *outResult, const char **acceptList,
2429 int32_t acceptListCount,
2430 UEnumeration* availableLocales,
2431 UErrorCode *status)
2432{
2433 int32_t i,j;
2434 int32_t len;
2435 int32_t maxLen=0;
2436 char tmp[ULOC_FULLNAME_CAPACITY+1];
2437 const char *l;
2438 char **fallbackList;
2439 if(U_FAILURE(*status)) {
2440 return -1;
2441 }
51004dcb 2442 fallbackList = static_cast<char **>(uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount)));
374ca955 2443 if(fallbackList==NULL) {
46f4442e
A
2444 *status = U_MEMORY_ALLOCATION_ERROR;
2445 return -1;
374ca955
A
2446 }
2447 for(i=0;i<acceptListCount;i++) {
2448#if defined(ULOC_DEBUG)
2449 fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
2450#endif
2451 while((l=uenum_next(availableLocales, NULL, status))) {
2452#if defined(ULOC_DEBUG)
2453 fprintf(stderr," %s\n", l);
2454#endif
73c04bcf 2455 len = (int32_t)uprv_strlen(l);
374ca955
A
2456 if(!uprv_strcmp(acceptList[i], l)) {
2457 if(outResult) {
2458 *outResult = ULOC_ACCEPT_VALID;
2459 }
2460#if defined(ULOC_DEBUG)
2461 fprintf(stderr, "MATCH! %s\n", l);
2462#endif
2463 if(len>0) {
2464 uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2465 }
2466 for(j=0;j<i;j++) {
2467 uprv_free(fallbackList[j]);
2468 }
2469 uprv_free(fallbackList);
2470 return u_terminateChars(result, resultAvailable, len, status);
2471 }
2472 if(len>maxLen) {
2473 maxLen = len;
2474 }
2475 }
2476 uenum_reset(availableLocales, status);
2477 /* save off parent info */
2ca993e8 2478 if(uloc_getParent(acceptList[i], tmp, UPRV_LENGTHOF(tmp), status)!=0) {
374ca955
A
2479 fallbackList[i] = uprv_strdup(tmp);
2480 } else {
2481 fallbackList[i]=0;
2482 }
2483 }
2484
2485 for(maxLen--;maxLen>0;maxLen--) {
2486 for(i=0;i<acceptListCount;i++) {
2487 if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {
2488#if defined(ULOC_DEBUG)
2489 fprintf(stderr,"Try: [%s]", fallbackList[i]);
2490#endif
2491 while((l=uenum_next(availableLocales, NULL, status))) {
2492#if defined(ULOC_DEBUG)
2493 fprintf(stderr," %s\n", l);
2494#endif
73c04bcf 2495 len = (int32_t)uprv_strlen(l);
374ca955
A
2496 if(!uprv_strcmp(fallbackList[i], l)) {
2497 if(outResult) {
2498 *outResult = ULOC_ACCEPT_FALLBACK;
2499 }
2500#if defined(ULOC_DEBUG)
2501 fprintf(stderr, "fallback MATCH! %s\n", l);
2502#endif
2503 if(len>0) {
2504 uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2505 }
73c04bcf
A
2506 for(j=0;j<acceptListCount;j++) {
2507 uprv_free(fallbackList[j]);
374ca955
A
2508 }
2509 uprv_free(fallbackList);
73c04bcf 2510 return u_terminateChars(result, resultAvailable, len, status);
374ca955
A
2511 }
2512 }
2513 uenum_reset(availableLocales, status);
2514
2ca993e8 2515 if(uloc_getParent(fallbackList[i], tmp, UPRV_LENGTHOF(tmp), status)!=0) {
374ca955
A
2516 uprv_free(fallbackList[i]);
2517 fallbackList[i] = uprv_strdup(tmp);
2518 } else {
2519 uprv_free(fallbackList[i]);
2520 fallbackList[i]=0;
2521 }
2522 }
2523 }
2524 if(outResult) {
2525 *outResult = ULOC_ACCEPT_FAILED;
2526 }
2527 }
2528 for(i=0;i<acceptListCount;i++) {
2529 uprv_free(fallbackList[i]);
2530 }
2531 uprv_free(fallbackList);
2532 return -1;
b75a7d8f 2533}
374ca955 2534
b331163b
A
2535U_CAPI const char* U_EXPORT2
2536uloc_toUnicodeLocaleKey(const char* keyword)
2537{
2538 const char* bcpKey = ulocimp_toBcpKey(keyword);
2539 if (bcpKey == NULL && ultag_isUnicodeLocaleKey(keyword, -1)) {
2540 // unknown keyword, but syntax is fine..
2541 return keyword;
2542 }
2543 return bcpKey;
2544}
2545
2546U_CAPI const char* U_EXPORT2
2547uloc_toUnicodeLocaleType(const char* keyword, const char* value)
2548{
2549 const char* bcpType = ulocimp_toBcpType(keyword, value, NULL, NULL);
2550 if (bcpType == NULL && ultag_isUnicodeLocaleType(value, -1)) {
2551 // unknown keyword, but syntax is fine..
2552 return value;
2553 }
2554 return bcpType;
2555}
2556
2557#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
2558#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
2559
2560static UBool
2561isWellFormedLegacyKey(const char* legacyKey)
2562{
2563 const char* p = legacyKey;
2564 while (*p) {
2565 if (!UPRV_ISALPHANUM(*p)) {
2566 return FALSE;
2567 }
2568 p++;
2569 }
2570 return TRUE;
2571}
2572
2573static UBool
2574isWellFormedLegacyType(const char* legacyType)
2575{
2576 const char* p = legacyType;
2577 int32_t alphaNumLen = 0;
2578 while (*p) {
2579 if (*p == '_' || *p == '/' || *p == '-') {
2580 if (alphaNumLen == 0) {
2581 return FALSE;
2582 }
2583 alphaNumLen = 0;
2584 } else if (UPRV_ISALPHANUM(*p)) {
2585 alphaNumLen++;
2586 } else {
2587 return FALSE;
2588 }
2589 p++;
2590 }
2591 return (alphaNumLen != 0);
2592}
2593
2594U_CAPI const char* U_EXPORT2
2595uloc_toLegacyKey(const char* keyword)
2596{
2597 const char* legacyKey = ulocimp_toLegacyKey(keyword);
2598 if (legacyKey == NULL) {
2599 // Checks if the specified locale key is well-formed with the legacy locale syntax.
2600 //
2601 // Note:
2602 // Neither ICU nor LDML/CLDR provides the definition of keyword syntax.
2603 // However, a key should not contain '=' obviously. For now, all existing
2604 // keys are using ASCII alphabetic letters only. We won't add any new key
2605 // that is not compatible with the BCP 47 syntax. Therefore, we assume
2606 // a valid key consist from [0-9a-zA-Z], no symbols.
2607 if (isWellFormedLegacyKey(keyword)) {
2608 return keyword;
2609 }
2610 }
2611 return legacyKey;
2612}
2613
2614U_CAPI const char* U_EXPORT2
2615uloc_toLegacyType(const char* keyword, const char* value)
2616{
2617 const char* legacyType = ulocimp_toLegacyType(keyword, value, NULL, NULL);
2618 if (legacyType == NULL) {
2619 // Checks if the specified locale type is well-formed with the legacy locale syntax.
2620 //
2621 // Note:
2622 // Neither ICU nor LDML/CLDR provides the definition of keyword syntax.
2623 // However, a type should not contain '=' obviously. For now, all existing
2624 // types are using ASCII alphabetic letters with a few symbol letters. We won't
2625 // add any new type that is not compatible with the BCP 47 syntax except timezone
2626 // IDs. For now, we assume a valid type start with [0-9a-zA-Z], but may contain
2627 // '-' '_' '/' in the middle.
2628 if (isWellFormedLegacyType(value)) {
2629 return value;
2630 }
2631 }
2632 return legacyType;
2633}
2634
374ca955 2635/*eof*/