]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/uloc.cpp
d473899a7df99d55df16e6a12c357ec62d7967d5
[apple/icu.git] / icuSources / common / uloc.cpp
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (C) 1997-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 *
9 * File ULOC.CPP
10 *
11 * Modification History:
12 *
13 * Date Name Description
14 * 04/01/97 aliu Creation.
15 * 08/21/98 stephen JDK 1.2 sync
16 * 12/08/98 rtg New Locale implementation and C API
17 * 03/15/99 damiba overhaul.
18 * 04/06/99 stephen changed setDefault() to realloc and copy
19 * 06/14/99 stephen Changed calls to ures_open for new params
20 * 07/21/99 stephen Modified setDefault() to propagate to C++
21 * 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
22 * brought canonicalization code into line with spec
23 *****************************************************************************/
24
25 /*
26 POSIX's locale format, from putil.c: [no spaces]
27
28 ll [ _CC ] [ . MM ] [ @ VV]
29
30 l = lang, C = ctry, M = charmap, V = variant
31 */
32
33 #include "unicode/utypes.h"
34 #include "unicode/ustring.h"
35 #include "unicode/uloc.h"
36
37 #include "putilimp.h"
38 #include "ustr_imp.h"
39 #include "ulocimp.h"
40 #include "umutex.h"
41 #include "cstring.h"
42 #include "cmemory.h"
43 #include "locmap.h"
44 #include "uarrsort.h"
45 #include "uenumimp.h"
46 #include "uassert.h"
47 #include "charstr.h"
48
49 #include <stdio.h> /* for sprintf */
50
51 U_NAMESPACE_USE
52
53 /* ### Declarations **************************************************/
54
55 /* Locale stuff from locid.cpp */
56 U_CFUNC void locale_set_default(const char *id);
57 U_CFUNC const char *locale_get_default(void);
58 U_CFUNC int32_t
59 locale_getKeywords(const char *localeID,
60 char prev,
61 char *keywords, int32_t keywordCapacity,
62 char *values, int32_t valuesCapacity, int32_t *valLen,
63 UBool valuesToo,
64 UErrorCode *status);
65
66 /* ### Data tables **************************************************/
67
68 /**
69 * Table of language codes, both 2- and 3-letter, with preference
70 * given to 2-letter codes where possible. Includes 3-letter codes
71 * that lack a 2-letter equivalent.
72 *
73 * This list must be in sorted order. This list is returned directly
74 * to the user by some API.
75 *
76 * This list must be kept in sync with LANGUAGES_3, with corresponding
77 * entries matched.
78 *
79 * This table should be terminated with a NULL entry, followed by a
80 * second list, and another NULL entry. The first list is visible to
81 * user code when this array is returned by API. The second list
82 * contains codes we support, but do not expose through user API.
83 *
84 * Notes
85 *
86 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
87 * include the revisions up to 2001/7/27 *CWB*
88 *
89 * The 3 character codes are the terminology codes like RFC 3066. This
90 * is compatible with prior ICU codes
91 *
92 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
93 * table but now at the end of the table because 3 character codes are
94 * duplicates. This avoids bad searches going from 3 to 2 character
95 * codes.
96 *
97 * The range qaa-qtz is reserved for local use
98 */
99 /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
100 /* ISO639 table version is 20150505 */
101 /* Subsequent hand addition of selected languages */
102 static const char * const LANGUAGES[] = {
103 "aa", "ab", "ace", "ach", "ada", "ady", "ae", "aeb",
104 "af", "afh", "agq", "ain", "ak", "akk", "akz", "ale",
105 "aln", "alt", "am", "an", "ang", "anp", "ar", "arc",
106 "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "as",
107 "asa", "ase", "ast", "av", "avk", "awa", "ay", "az",
108 "ba", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
109 "be", "bej", "bem", "bew", "bez", "bfd", "bfq", "bg",
110 "bgn", "bho", "bi", "bik", "bin", "bjn", "bkm", "bla",
111 "bm", "bn", "bo", "bpy", "bqi", "br", "bra", "brh",
112 "brx", "bs", "bss", "bua", "bug", "bum", "byn", "byv",
113 "ca", "cad", "car", "cay", "cch", "ccp", "ce", "ceb", "cgg",
114 "ch", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
115 "chr", "chy", "ckb", "co", "cop", "cps", "cr", "crh",
116 "cs", "csb", "cu", "cv", "cy",
117 "da", "dak", "dar", "dav", "de", "del", "den", "dgr",
118 "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "dv",
119 "dyo", "dyu", "dz", "dzg",
120 "ebu", "ee", "efi", "egl", "egy", "eka", "el", "elx",
121 "en", "enm", "eo", "es", "esu", "et", "eu", "ewo",
122 "ext",
123 "fa", "fan", "fat", "ff", "fi", "fil", "fit", "fj",
124 "fo", "fon", "fr", "frc", "frm", "fro", "frp", "frr",
125 "frs", "fur", "fy",
126 "ga", "gaa", "gag", "gan", "gay", "gba", "gbz", "gd",
127 "gez", "gil", "gl", "glk", "gmh", "gn", "goh", "gom",
128 "gon", "gor", "got", "grb", "grc", "gsw", "gu", "guc",
129 "gur", "guz", "gv", "gwi",
130 "ha", "hai", "hak", "haw", "he", "hi", "hif", "hil",
131 "hit", "hmn", "ho", "hr", "hsb", "hsn", "ht", "hu",
132 "hup", "hy", "hz",
133 "ia", "iba", "ibb", "id", "ie", "ig", "ii", "ik",
134 "ilo", "inh", "io", "is", "it", "iu", "izh",
135 "ja", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
136 "jv",
137 "ka", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
138 "kbl", "kcg", "kde", "kea", "ken", "kfo", "kg", "kgp",
139 "kha", "kho", "khq", "khw", "ki", "kiu", "kj", "kk",
140 "kkj", "kl", "kln", "km", "kmb", "kn", "ko", "koi",
141 "kok", "kos", "kpe", "kr", "krc", "kri", "krj", "krl",
142 "kru", "ks", "ksb", "ksf", "ksh", "ku", "kum", "kut",
143 "kv", "kw", "ky",
144 "la", "lad", "lag", "lah", "lam", "lb", "lez", "lfn",
145 "lg", "li", "lij", "liv", "lkt", "lmo", "ln", "lo",
146 "lol", "loz", "lrc", "lt", "ltg", "lu", "lua", "lui",
147 "lun", "luo", "lus", "luy", "lv", "lzh", "lzz",
148 "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
149 "mdf", "mdh", "mdr", "men", "mer", "mfe", "mg", "mga",
150 "mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk",
151 "ml", "mn", "mnc", "mni", "mo",
152 "moh", "mos", "mr", "mrj",
153 "ms", "mt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
154 "my", "mye", "myv", "mzn",
155 "na", "nan", "nap", "naq", "nb", "nd", "nds", "ne",
156 "new", "ng", "nia", "niu", "njo", "nl", "nmg", "nn",
157 "nnh", "no", "nog", "non", "nov", "nqo", "nr", "nso",
158 "nus", "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi",
159 "oc", "oj", "om", "or", "os", "osa", "ota",
160 "pa", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc",
161 "pdt", "peo", "pfl", "phn", "pi", "pl", "pms", "pnt",
162 "pon", "prg", "pro", "ps", "pt",
163 "qu", "quc", "qug",
164 "raj", "rap", "rar", "rgn", "rif", "rm", "rn", "ro",
165 "rof", "rom", "rtm", "ru", "rue", "rug", "rup",
166 "rw", "rwk",
167 "sa", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
168 "sba", "sbp", "sc", "scn", "sco", "sd", "sdc", "sdh",
169 "se", "see", "seh", "sei", "sel", "ses", "sg", "sga",
170 "sgs", "shi", "shn", "shu", "si", "sid", "sk",
171 "sl", "sli", "sly", "sm", "sma", "smj", "smn", "sms",
172 "sn", "snk", "so", "sog", "sq", "sr", "srn", "srr",
173 "ss", "ssy", "st", "stq", "su", "suk", "sus", "sux",
174 "sv", "sw", "swb", "swc", "syc", "syr", "szl",
175 "ta", "tcy", "te", "tem", "teo", "ter", "tet", "tg",
176 "th", "ti", "tig", "tiv", "tk", "tkl", "tkr", "tl",
177 "tlh", "tli", "tly", "tmh", "tn", "to", "tog", "tpi",
178 "tr", "tru", "trv", "ts", "tsd", "tsi", "tt", "ttt",
179 "tum", "tvl", "tw", "twq", "ty", "tyv", "tzm",
180 "udm", "ug", "uga", "uk", "umb", "und", "ur", "uz",
181 "vai", "ve", "vec", "vep", "vi", "vls", "vmf", "vo",
182 "vot", "vro", "vun",
183 "wa", "wae", "wal", "war", "was", "wbp", "wo", "wuu",
184 "xal", "xh", "xmf", "xog",
185 "yao", "yap", "yav", "ybb", "yi", "yo", "yrl", "yue",
186 "za", "zap", "zbl", "zea", "zen", "zgh", "zh", "zu",
187 "zun", "zxx", "zza",
188 NULL,
189 "in", "iw", "ji", "jw", "sh", /* obsolete language codes */
190 NULL
191 };
192
193 static const char* const DEPRECATED_LANGUAGES[]={
194 "in", "iw", "ji", "jw", NULL, NULL
195 };
196 static const char* const REPLACEMENT_LANGUAGES[]={
197 "id", "he", "yi", "jv", NULL, NULL
198 };
199
200 /**
201 * Table of 3-letter language codes.
202 *
203 * This is a lookup table used to convert 3-letter language codes to
204 * their 2-letter equivalent, where possible. It must be kept in sync
205 * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the
206 * same language as LANGUAGES_3[i]. The commented-out lines are
207 * copied from LANGUAGES to make eyeballing this baby easier.
208 *
209 * Where a 3-letter language code has no 2-letter equivalent, the
210 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
211 *
212 * This table should be terminated with a NULL entry, followed by a
213 * second list, and another NULL entry. The two lists correspond to
214 * the two lists in LANGUAGES.
215 */
216 /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
217 /* ISO639 table version is 20150505 */
218 /* Subsequent hand addition of selected languages */
219 static const char * const LANGUAGES_3[] = {
220 "aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb",
221 "afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale",
222 "aln", "alt", "amh", "arg", "ang", "anp", "ara", "arc",
223 "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "asm",
224 "asa", "ase", "ast", "ava", "avk", "awa", "aym", "aze",
225 "bak", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
226 "bel", "bej", "bem", "bew", "bez", "bfd", "bfq", "bul",
227 "bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla",
228 "bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh",
229 "brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv",
230 "cat", "cad", "car", "cay", "cch", "ccp", "che", "ceb", "cgg",
231 "cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
232 "chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh",
233 "ces", "csb", "chu", "chv", "cym",
234 "dan", "dak", "dar", "dav", "deu", "del", "den", "dgr",
235 "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "div",
236 "dyo", "dyu", "dzo", "dzg",
237 "ebu", "ewe", "efi", "egl", "egy", "eka", "ell", "elx",
238 "eng", "enm", "epo", "spa", "esu", "est", "eus", "ewo",
239 "ext",
240 "fas", "fan", "fat", "ful", "fin", "fil", "fit", "fij",
241 "fao", "fon", "fra", "frc", "frm", "fro", "frp", "frr",
242 "frs", "fur", "fry",
243 "gle", "gaa", "gag", "gan", "gay", "gba", "gbz", "gla",
244 "gez", "gil", "glg", "glk", "gmh", "grn", "goh", "gom",
245 "gon", "gor", "got", "grb", "grc", "gsw", "guj", "guc",
246 "gur", "guz", "glv", "gwi",
247 "hau", "hai", "hak", "haw", "heb", "hin", "hif", "hil",
248 "hit", "hmn", "hmo", "hrv", "hsb", "hsn", "hat", "hun",
249 "hup", "hye", "her",
250 "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ipk",
251 "ilo", "inh", "ido", "isl", "ita", "iku", "izh",
252 "jpn", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
253 "jav",
254 "kat", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
255 "kbl", "kcg", "kde", "kea", "ken", "kfo", "kon", "kgp",
256 "kha", "kho", "khq", "khw", "kik", "kiu", "kua", "kaz",
257 "kkj", "kal", "kln", "khm", "kmb", "kan", "kor", "koi",
258 "kok", "kos", "kpe", "kau", "krc", "kri", "krj", "krl",
259 "kru", "kas", "ksb", "ksf", "ksh", "kur", "kum", "kut",
260 "kom", "cor", "kir",
261 "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lfn",
262 "lug", "lim", "lij", "liv", "lkt", "lmo", "lin", "lao",
263 "lol", "loz", "lrc", "lit", "ltg", "lub", "lua", "lui",
264 "lun", "luo", "lus", "luy", "lav", "lzh", "lzz",
265 "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
266 "mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga",
267 "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
268 "mal", "mon", "mnc", "mni", "mol",
269 "moh", "mos", "mar", "mrj",
270 "msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
271 "mya", "mye", "myv", "mzn",
272 "nau", "nan", "nap", "naq", "nob", "nde", "nds", "nep",
273 "new", "ndo", "nia", "niu", "njo", "nld", "nmg", "nno",
274 "nnh", "nor", "nog", "non", "nov", "nqo", "nbl", "nso",
275 "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi",
276 "oci", "oji", "orm", "ori", "oss", "osa", "ota",
277 "pan", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc",
278 "pdt", "peo", "pfl", "phn", "pli", "pol", "pms", "pnt",
279 "pon", "prg", "pro", "pus", "por",
280 "que", "quc", "qug",
281 "raj", "rap", "rar", "rgn", "rif", "roh", "run", "ron",
282 "rof", "rom", "rtm", "rus", "rue", "rug", "rup",
283 "kin", "rwk",
284 "san", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
285 "sba", "sbp", "srd", "scn", "sco", "snd", "sdc", "sdh",
286 "sme", "see", "seh", "sei", "sel", "ses", "sag", "sga",
287 "sgs", "shi", "shn", "shu", "sin", "sid", "slk",
288 "slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms",
289 "sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr",
290 "ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux",
291 "swe", "swa", "swb", "swc", "syc", "syr", "szl",
292 "tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk",
293 "tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr", "tgl",
294 "tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi",
295 "tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt",
296 "tum", "tvl", "twi", "twq", "tah", "tyv", "tzm",
297 "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
298 "vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vol",
299 "vot", "vro", "vun",
300 "wln", "wae", "wal", "war", "was", "wbp", "wol", "wuu",
301 "xal", "xho", "xmf", "xog",
302 "yao", "yap", "yav", "ybb", "yid", "yor", "yrl", "yue",
303 "zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul",
304 "zun", "zxx", "zza",
305 NULL,
306 /* "in", "iw", "ji", "jw", "sh", */
307 "ind", "heb", "yid", "jaw", "srp",
308 NULL
309 };
310
311 /**
312 * Table of 2-letter country codes.
313 *
314 * This list must be in sorted order. This list is returned directly
315 * to the user by some API.
316 *
317 * This list must be kept in sync with COUNTRIES_3, with corresponding
318 * entries matched.
319 *
320 * This table should be terminated with a NULL entry, followed by a
321 * second list, and another NULL entry. The first list is visible to
322 * user code when this array is returned by API. The second list
323 * contains codes we support, but do not expose through user API.
324 *
325 * Notes:
326 *
327 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
328 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
329 * new codes keeping the old ones for compatibility updated to include
330 * 1999/12/03 revisions *CWB*
331 *
332 * RO(ROM) is now RO(ROU) according to
333 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
334 */
335 static const char * const COUNTRIES[] = {
336 "AC", "AD", "AE", "AF", "AG", "AI", "AL", "AM",
337 "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
338 "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
339 "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV",
340 "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
341 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CP", "CR",
342 "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DG", "DJ", "DK",
343 "DM", "DO", "DZ", "EA", "EC", "EE", "EG", "EH", "ER",
344 "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
345 "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
346 "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
347 "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
348 "IC", "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
349 "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
350 "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
351 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
352 "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",
353 "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
354 "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
355 "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
356 "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
357 "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
358 "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",
359 "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
360 "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV",
361 "SX", "SY", "SZ", "TA", "TC", "TD", "TF", "TG", "TH", "TJ",
362 "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
363 "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
364 "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
365 "WS", "XK", "YE", "YT", "ZA", "ZM", "ZW",
366 NULL,
367 "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */
368 NULL
369 };
370
371 static const char* const DEPRECATED_COUNTRIES[] = {
372 "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */
373 };
374 static const char* const REPLACEMENT_COUNTRIES[] = {
375 /* "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
376 "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL /* replacement country codes */
377 };
378
379 /**
380 * Table of 3-letter country codes.
381 *
382 * This is a lookup table used to convert 3-letter country codes to
383 * their 2-letter equivalent. It must be kept in sync with COUNTRIES.
384 * For all valid i, COUNTRIES[i] must refer to the same country as
385 * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES
386 * to make eyeballing this baby easier.
387 *
388 * This table should be terminated with a NULL entry, followed by a
389 * second list, and another NULL entry. The two lists correspond to
390 * the two lists in COUNTRIES.
391 */
392 static const char * const COUNTRIES_3[] = {
393 /* "AC", "AD", "AE", "AF", "AG", "AI", "AL", "AM", */
394 "ASC", "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
395 /* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
396 "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
397 /* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
398 "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
399 /* "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", */
400 "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
401 /* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
402 "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
403 /* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CP", "CR", */
404 "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CPT", "CRI",
405 /* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DG", "DJ", "DK", */
406 "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DGA", "DJI", "DNK",
407 /* "DM", "DO", "DZ", "EA", "EC", "EE", "EG", "EH", "ER", */
408 "DMA", "DOM", "DZA", "EA ", "ECU", "EST", "EGY", "ESH", "ERI", /* no valid 3-letter code for EA */
409 /* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
410 "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
411 /* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
412 "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
413 /* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
414 "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
415 /* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
416 "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
417 /* "IC", "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
418 "IC ", "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL", /* no valid 3-letter code for IC */
419 /* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
420 "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
421 /* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
422 "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
423 /* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
424 "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
425 /* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */
426 "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
427 /* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
428 "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
429 /* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
430 "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
431 /* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
432 "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
433 /* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
434 "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
435 /* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
436 "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
437 /* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */
438 "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
439 /* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
440 "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
441 /* "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", */
442 "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
443 /* "SX", "SY", "SZ", "TA", "TC", "TD", "TF", "TG", "TH", "TJ", */
444 "SXM", "SYR", "SWZ", "TAA", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
445 /* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
446 "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
447 /* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
448 "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
449 /* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
450 "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
451 /* "WS", "XK", "YE", "YT", "ZA", "ZM", "ZW", */
452 "WSM", "XKK", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
453 NULL,
454 /* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */
455 "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
456 NULL
457 };
458
459 typedef struct CanonicalizationMap {
460 const char *id; /* input ID */
461 const char *canonicalID; /* canonicalized output ID */
462 } CanonicalizationMap;
463
464 /**
465 * A map to canonicalize locale IDs. This handles a variety of
466 * different semantic kinds of transformations.
467 */
468 static const CanonicalizationMap CANONICALIZE_MAP[] = {
469 { "", "en_US_POSIX" }, /* .NET name */ // open ICU 64 deleted, we restore
470 { "c", "en_US_POSIX" }, /* POSIX name */ // open ICU 64 deleted, we restore
471 { "posix", "en_US_POSIX" }, /* POSIX name (alias of C) */ // open ICU 64 deleted, we restore
472 { "art_LOJBAN", "jbo" }, /* registered name */
473 { "hy__AREVELA", "hy" }, /* Registered IANA variant */
474 { "hy__AREVMDA", "hyw" }, /* Registered IANA variant */
475 { "zh_GAN", "gan" }, /* registered name */
476 { "zh_GUOYU", "zh" }, /* registered name */
477 { "zh_HAKKA", "hak" }, /* registered name */
478 { "zh_MIN_NAN", "nan" }, /* registered name */
479 { "zh_WUU", "wuu" }, /* registered name */
480 { "zh_XIANG", "hsn" }, /* registered name */
481 { "zh_YUE", "yue" }, /* registered name */
482 };
483
484 /* ### BCP47 Conversion *******************************************/
485 /* Test if the locale id has BCP47 u extension and does not have '@' */
486 #define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
487 /* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
488 #define _ConvertBCP47(finalID, id, buffer, length,err) UPRV_BLOCK_MACRO_BEGIN { \
489 if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || \
490 U_FAILURE(*err) || *err == U_STRING_NOT_TERMINATED_WARNING) { \
491 finalID=id; \
492 if (*err == U_STRING_NOT_TERMINATED_WARNING) { *err = U_BUFFER_OVERFLOW_ERROR; } \
493 } else { \
494 finalID=buffer; \
495 } \
496 } UPRV_BLOCK_MACRO_END
497 /* Gets the size of the shortest subtag in the given localeID. */
498 static int32_t getShortestSubtagLength(const char *localeID) {
499 int32_t localeIDLength = static_cast<int32_t>(uprv_strlen(localeID));
500 int32_t length = localeIDLength;
501 int32_t tmpLength = 0;
502 int32_t i;
503 UBool reset = TRUE;
504
505 for (i = 0; i < localeIDLength; i++) {
506 if (localeID[i] != '_' && localeID[i] != '-') {
507 if (reset) {
508 tmpLength = 0;
509 reset = FALSE;
510 }
511 tmpLength++;
512 } else {
513 if (tmpLength != 0 && tmpLength < length) {
514 length = tmpLength;
515 }
516 reset = TRUE;
517 }
518 }
519
520 return length;
521 }
522
523 /* ### Keywords **************************************************/
524 #define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
525 #define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
526 /* Punctuation/symbols allowed in legacy key values */
527 #define UPRV_OK_VALUE_PUNCTUATION(c) ((c) == '_' || (c) == '-' || (c) == '+' || (c) == '/')
528
529 #define ULOC_KEYWORD_BUFFER_LEN 25
530 #define ULOC_MAX_NO_KEYWORDS 25
531
532 U_CAPI const char * U_EXPORT2
533 locale_getKeywordsStart(const char *localeID) {
534 const char *result = NULL;
535 if((result = uprv_strchr(localeID, '@')) != NULL) {
536 return result;
537 }
538 #if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
539 else {
540 /* We do this because the @ sign is variant, and the @ sign used on one
541 EBCDIC machine won't be compiled the same way on other EBCDIC based
542 machines. */
543 static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
544 const uint8_t *charToFind = ebcdicSigns;
545 while(*charToFind) {
546 if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
547 return result;
548 }
549 charToFind++;
550 }
551 }
552 #endif
553 return NULL;
554 }
555
556 /**
557 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
558 * @param keywordName incoming name to be canonicalized
559 * @param status return status (keyword too long)
560 * @return length of the keyword name
561 */
562 static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
563 {
564 int32_t keywordNameLen = 0;
565
566 for (; *keywordName != 0; keywordName++) {
567 if (!UPRV_ISALPHANUM(*keywordName)) {
568 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
569 return 0;
570 }
571 if (keywordNameLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
572 buf[keywordNameLen++] = uprv_tolower(*keywordName);
573 } else {
574 /* keyword name too long for internal buffer */
575 *status = U_INTERNAL_PROGRAM_ERROR;
576 return 0;
577 }
578 }
579 if (keywordNameLen == 0) {
580 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name */
581 return 0;
582 }
583 buf[keywordNameLen] = 0; /* terminate */
584
585 return keywordNameLen;
586 }
587
588 typedef struct {
589 char keyword[ULOC_KEYWORD_BUFFER_LEN];
590 int32_t keywordLen;
591 const char *valueStart;
592 int32_t valueLen;
593 } KeywordStruct;
594
595 static int32_t U_CALLCONV
596 compareKeywordStructs(const void * /*context*/, const void *left, const void *right) {
597 const char* leftString = ((const KeywordStruct *)left)->keyword;
598 const char* rightString = ((const KeywordStruct *)right)->keyword;
599 return uprv_strcmp(leftString, rightString);
600 }
601
602 static int32_t
603 _getKeywords(const char *localeID,
604 char prev,
605 char *keywords, int32_t keywordCapacity,
606 char *values, int32_t valuesCapacity, int32_t *valLen,
607 UBool valuesToo,
608 UErrorCode *status)
609 {
610 KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
611
612 int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
613 int32_t numKeywords = 0;
614 const char* pos = localeID;
615 const char* equalSign = NULL;
616 const char* semicolon = NULL;
617 int32_t i = 0, j, n;
618 int32_t keywordsLen = 0;
619 int32_t valuesLen = 0;
620
621 if(prev == '@') { /* start of keyword definition */
622 /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
623 do {
624 UBool duplicate = FALSE;
625 /* skip leading spaces */
626 while(*pos == ' ') {
627 pos++;
628 }
629 if (!*pos) { /* handle trailing "; " */
630 break;
631 }
632 if(numKeywords == maxKeywords) {
633 *status = U_INTERNAL_PROGRAM_ERROR;
634 return 0;
635 }
636 equalSign = uprv_strchr(pos, '=');
637 semicolon = uprv_strchr(pos, ';');
638 /* lack of '=' [foo@currency] is illegal */
639 /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
640 if(!equalSign || (semicolon && semicolon<equalSign)) {
641 *status = U_INVALID_FORMAT_ERROR;
642 return 0;
643 }
644 /* need to normalize both keyword and keyword name */
645 if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
646 /* keyword name too long for internal buffer */
647 *status = U_INTERNAL_PROGRAM_ERROR;
648 return 0;
649 }
650 for(i = 0, n = 0; i < equalSign - pos; ++i) {
651 if (pos[i] != ' ') {
652 keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
653 }
654 }
655
656 /* zero-length keyword is an error. */
657 if (n == 0) {
658 *status = U_INVALID_FORMAT_ERROR;
659 return 0;
660 }
661
662 keywordList[numKeywords].keyword[n] = 0;
663 keywordList[numKeywords].keywordLen = n;
664 /* now grab the value part. First we skip the '=' */
665 equalSign++;
666 /* then we leading spaces */
667 while(*equalSign == ' ') {
668 equalSign++;
669 }
670
671 /* Premature end or zero-length value */
672 if (!*equalSign || equalSign == semicolon) {
673 *status = U_INVALID_FORMAT_ERROR;
674 return 0;
675 }
676
677 keywordList[numKeywords].valueStart = equalSign;
678
679 pos = semicolon;
680 i = 0;
681 if(pos) {
682 while(*(pos - i - 1) == ' ') {
683 i++;
684 }
685 keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
686 pos++;
687 } else {
688 i = (int32_t)uprv_strlen(equalSign);
689 while(i && equalSign[i-1] == ' ') {
690 i--;
691 }
692 keywordList[numKeywords].valueLen = i;
693 }
694 /* If this is a duplicate keyword, then ignore it */
695 for (j=0; j<numKeywords; ++j) {
696 if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
697 duplicate = TRUE;
698 break;
699 }
700 }
701 if (!duplicate) {
702 ++numKeywords;
703 }
704 } while(pos);
705
706 /* now we have a list of keywords */
707 /* we need to sort it */
708 uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
709
710 /* Now construct the keyword part */
711 for(i = 0; i < numKeywords; i++) {
712 if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {
713 uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);
714 if(valuesToo) {
715 keywords[keywordsLen + keywordList[i].keywordLen] = '=';
716 } else {
717 keywords[keywordsLen + keywordList[i].keywordLen] = 0;
718 }
719 }
720 keywordsLen += keywordList[i].keywordLen + 1;
721 if(valuesToo) {
722 if(keywordsLen + keywordList[i].valueLen <= keywordCapacity) {
723 uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
724 }
725 keywordsLen += keywordList[i].valueLen;
726
727 if(i < numKeywords - 1) {
728 if(keywordsLen < keywordCapacity) {
729 keywords[keywordsLen] = ';';
730 }
731 keywordsLen++;
732 }
733 }
734 if(values) {
735 if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {
736 uprv_strcpy(values+valuesLen, keywordList[i].valueStart);
737 values[valuesLen + keywordList[i].valueLen] = 0;
738 }
739 valuesLen += keywordList[i].valueLen + 1;
740 }
741 }
742 if(values) {
743 values[valuesLen] = 0;
744 if(valLen) {
745 *valLen = valuesLen;
746 }
747 }
748 return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
749 } else {
750 return 0;
751 }
752 }
753
754 U_CFUNC int32_t
755 locale_getKeywords(const char *localeID,
756 char prev,
757 char *keywords, int32_t keywordCapacity,
758 char *values, int32_t valuesCapacity, int32_t *valLen,
759 UBool valuesToo,
760 UErrorCode *status) {
761 return _getKeywords(localeID, prev, keywords, keywordCapacity,
762 values, valuesCapacity, valLen, valuesToo,
763 status);
764 }
765
766 U_CAPI int32_t U_EXPORT2
767 uloc_getKeywordValue(const char* localeID,
768 const char* keywordName,
769 char* buffer, int32_t bufferCapacity,
770 UErrorCode* status)
771 {
772 if (buffer != nullptr) {
773 buffer[0] = '\0';
774 }
775 const char* startSearchHere = NULL;
776 const char* nextSeparator = NULL;
777 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
778 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
779 int32_t result = 0;
780
781 if(status && U_SUCCESS(*status) && localeID) {
782 char tempBuffer[ULOC_FULLNAME_CAPACITY];
783 const char* tmpLocaleID;
784
785 if (keywordName == NULL || keywordName[0] == 0) {
786 *status = U_ILLEGAL_ARGUMENT_ERROR;
787 return 0;
788 }
789
790 locale_canonKeywordName(keywordNameBuffer, keywordName, status);
791 if(U_FAILURE(*status)) {
792 return 0;
793 }
794
795 if (_hasBCP47Extension(localeID)) {
796 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
797 } else {
798 tmpLocaleID=localeID;
799 }
800
801 startSearchHere = locale_getKeywordsStart(tmpLocaleID);
802 if(startSearchHere == NULL) {
803 /* no keywords, return at once */
804 return 0;
805 }
806
807 /* find the first keyword */
808 while(startSearchHere) {
809 const char* keyValueTail;
810 int32_t keyValueLen;
811
812 startSearchHere++; /* skip @ or ; */
813 nextSeparator = uprv_strchr(startSearchHere, '=');
814 if(!nextSeparator) {
815 *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
816 return 0;
817 }
818 /* strip leading & trailing spaces (TC decided to tolerate these) */
819 while(*startSearchHere == ' ') {
820 startSearchHere++;
821 }
822 keyValueTail = nextSeparator;
823 while (keyValueTail > startSearchHere && *(keyValueTail-1) == ' ') {
824 keyValueTail--;
825 }
826 /* now keyValueTail points to first char after the keyName */
827 /* copy & normalize keyName from locale */
828 if (startSearchHere == keyValueTail) {
829 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
830 return 0;
831 }
832 keyValueLen = 0;
833 while (startSearchHere < keyValueTail) {
834 if (!UPRV_ISALPHANUM(*startSearchHere)) {
835 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
836 return 0;
837 }
838 if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
839 localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*startSearchHere++);
840 } else {
841 /* keyword name too long for internal buffer */
842 *status = U_INTERNAL_PROGRAM_ERROR;
843 return 0;
844 }
845 }
846 localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
847
848 startSearchHere = uprv_strchr(nextSeparator, ';');
849
850 if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
851 /* current entry matches the keyword. */
852 nextSeparator++; /* skip '=' */
853 /* First strip leading & trailing spaces (TC decided to tolerate these) */
854 while(*nextSeparator == ' ') {
855 nextSeparator++;
856 }
857 keyValueTail = (startSearchHere)? startSearchHere: nextSeparator + uprv_strlen(nextSeparator);
858 while(keyValueTail > nextSeparator && *(keyValueTail-1) == ' ') {
859 keyValueTail--;
860 }
861 /* Now copy the value, but check well-formedness */
862 if (nextSeparator == keyValueTail) {
863 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value name in passed-in locale */
864 return 0;
865 }
866 keyValueLen = 0;
867 while (nextSeparator < keyValueTail) {
868 if (!UPRV_ISALPHANUM(*nextSeparator) && !UPRV_OK_VALUE_PUNCTUATION(*nextSeparator)) {
869 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
870 return 0;
871 }
872 if (keyValueLen < bufferCapacity) {
873 /* Should we lowercase value to return here? Tests expect as-is. */
874 buffer[keyValueLen++] = *nextSeparator++;
875 } else { /* keep advancing so we return correct length in case of overflow */
876 keyValueLen++;
877 nextSeparator++;
878 }
879 }
880 result = u_terminateChars(buffer, bufferCapacity, keyValueLen, status);
881 return result;
882 }
883 }
884 }
885 return 0;
886 }
887
888 U_CAPI int32_t U_EXPORT2
889 uloc_setKeywordValue(const char* keywordName,
890 const char* keywordValue,
891 char* buffer, int32_t bufferCapacity,
892 UErrorCode* status)
893 {
894 /* TODO: sorting. removal. */
895 int32_t keywordNameLen;
896 int32_t keywordValueLen;
897 int32_t bufLen;
898 int32_t needLen = 0;
899 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
900 char keywordValueBuffer[ULOC_KEYWORDS_CAPACITY+1];
901 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
902 int32_t rc;
903 char* nextSeparator = NULL;
904 char* nextEqualsign = NULL;
905 char* startSearchHere = NULL;
906 char* keywordStart = NULL;
907 CharString updatedKeysAndValues;
908 int32_t updatedKeysAndValuesLen;
909 UBool handledInputKeyAndValue = FALSE;
910 char keyValuePrefix = '@';
911
912 if(U_FAILURE(*status)) {
913 return -1;
914 }
915 if (keywordName == NULL || keywordName[0] == 0 || bufferCapacity <= 1) {
916 *status = U_ILLEGAL_ARGUMENT_ERROR;
917 return 0;
918 }
919 bufLen = (int32_t)uprv_strlen(buffer);
920 if(bufferCapacity<bufLen) {
921 /* The capacity is less than the length?! Is this NULL terminated? */
922 *status = U_ILLEGAL_ARGUMENT_ERROR;
923 return 0;
924 }
925 keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
926 if(U_FAILURE(*status)) {
927 return 0;
928 }
929
930 keywordValueLen = 0;
931 if(keywordValue) {
932 while (*keywordValue != 0) {
933 if (!UPRV_ISALPHANUM(*keywordValue) && !UPRV_OK_VALUE_PUNCTUATION(*keywordValue)) {
934 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
935 return 0;
936 }
937 if (keywordValueLen < ULOC_KEYWORDS_CAPACITY) {
938 /* Should we force lowercase in value to set? */
939 keywordValueBuffer[keywordValueLen++] = *keywordValue++;
940 } else {
941 /* keywordValue too long for internal buffer */
942 *status = U_INTERNAL_PROGRAM_ERROR;
943 return 0;
944 }
945 }
946 }
947 keywordValueBuffer[keywordValueLen] = 0; /* terminate */
948
949 startSearchHere = (char*)locale_getKeywordsStart(buffer);
950 if(startSearchHere == NULL || (startSearchHere[1]==0)) {
951 if(keywordValueLen == 0) { /* no keywords = nothing to remove */
952 return bufLen;
953 }
954
955 needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
956 if(startSearchHere) { /* had a single @ */
957 needLen--; /* already had the @ */
958 /* startSearchHere points at the @ */
959 } else {
960 startSearchHere=buffer+bufLen;
961 }
962 if(needLen >= bufferCapacity) {
963 *status = U_BUFFER_OVERFLOW_ERROR;
964 return needLen; /* no change */
965 }
966 *startSearchHere++ = '@';
967 uprv_strcpy(startSearchHere, keywordNameBuffer);
968 startSearchHere += keywordNameLen;
969 *startSearchHere++ = '=';
970 uprv_strcpy(startSearchHere, keywordValueBuffer);
971 return needLen;
972 } /* end shortcut - no @ */
973
974 keywordStart = startSearchHere;
975 /* search for keyword */
976 while(keywordStart) {
977 const char* keyValueTail;
978 int32_t keyValueLen;
979
980 keywordStart++; /* skip @ or ; */
981 nextEqualsign = uprv_strchr(keywordStart, '=');
982 if (!nextEqualsign) {
983 *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
984 return 0;
985 }
986 /* strip leading & trailing spaces (TC decided to tolerate these) */
987 while(*keywordStart == ' ') {
988 keywordStart++;
989 }
990 keyValueTail = nextEqualsign;
991 while (keyValueTail > keywordStart && *(keyValueTail-1) == ' ') {
992 keyValueTail--;
993 }
994 /* now keyValueTail points to first char after the keyName */
995 /* copy & normalize keyName from locale */
996 if (keywordStart == keyValueTail) {
997 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
998 return 0;
999 }
1000 keyValueLen = 0;
1001 while (keywordStart < keyValueTail) {
1002 if (!UPRV_ISALPHANUM(*keywordStart)) {
1003 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
1004 return 0;
1005 }
1006 if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
1007 localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*keywordStart++);
1008 } else {
1009 /* keyword name too long for internal buffer */
1010 *status = U_INTERNAL_PROGRAM_ERROR;
1011 return 0;
1012 }
1013 }
1014 localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
1015
1016 nextSeparator = uprv_strchr(nextEqualsign, ';');
1017
1018 /* start processing the value part */
1019 nextEqualsign++; /* skip '=' */
1020 /* First strip leading & trailing spaces (TC decided to tolerate these) */
1021 while(*nextEqualsign == ' ') {
1022 nextEqualsign++;
1023 }
1024 keyValueTail = (nextSeparator)? nextSeparator: nextEqualsign + uprv_strlen(nextEqualsign);
1025 while(keyValueTail > nextEqualsign && *(keyValueTail-1) == ' ') {
1026 keyValueTail--;
1027 }
1028 if (nextEqualsign == keyValueTail) {
1029 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value in passed-in locale */
1030 return 0;
1031 }
1032
1033 rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
1034 if(rc == 0) {
1035 /* Current entry matches the input keyword. Update the entry */
1036 if(keywordValueLen > 0) { /* updating a value */
1037 updatedKeysAndValues.append(keyValuePrefix, *status);
1038 keyValuePrefix = ';'; /* for any subsequent key-value pair */
1039 updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1040 updatedKeysAndValues.append('=', *status);
1041 updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1042 } /* else removing this entry, don't emit anything */
1043 handledInputKeyAndValue = TRUE;
1044 } else {
1045 /* input keyword sorts earlier than current entry, add before current entry */
1046 if (rc < 0 && keywordValueLen > 0 && !handledInputKeyAndValue) {
1047 /* insert new entry at this location */
1048 updatedKeysAndValues.append(keyValuePrefix, *status);
1049 keyValuePrefix = ';'; /* for any subsequent key-value pair */
1050 updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1051 updatedKeysAndValues.append('=', *status);
1052 updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1053 handledInputKeyAndValue = TRUE;
1054 }
1055 /* copy the current entry */
1056 updatedKeysAndValues.append(keyValuePrefix, *status);
1057 keyValuePrefix = ';'; /* for any subsequent key-value pair */
1058 updatedKeysAndValues.append(localeKeywordNameBuffer, keyValueLen, *status);
1059 updatedKeysAndValues.append('=', *status);
1060 updatedKeysAndValues.append(nextEqualsign, static_cast<int32_t>(keyValueTail-nextEqualsign), *status);
1061 }
1062 if (!nextSeparator && keywordValueLen > 0 && !handledInputKeyAndValue) {
1063 /* append new entry at the end, it sorts later than existing entries */
1064 updatedKeysAndValues.append(keyValuePrefix, *status);
1065 /* skip keyValuePrefix update, no subsequent key-value pair */
1066 updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1067 updatedKeysAndValues.append('=', *status);
1068 updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1069 handledInputKeyAndValue = TRUE;
1070 }
1071 keywordStart = nextSeparator;
1072 } /* end loop searching */
1073
1074 /* Any error from updatedKeysAndValues.append above would be internal and not due to
1075 * problems with the passed-in locale. So if we did encounter problems with the
1076 * passed-in locale above, those errors took precedence and overrode any error
1077 * status from updatedKeysAndValues.append, and also caused a return of 0. If there
1078 * are errors here they are from updatedKeysAndValues.append; they do cause an
1079 * error return but the passed-in locale is unmodified and the original bufLen is
1080 * returned.
1081 */
1082 if (!handledInputKeyAndValue || U_FAILURE(*status)) {
1083 /* if input key/value specified removal of a keyword not present in locale, or
1084 * there was an error in CharString.append, leave original locale alone. */
1085 return bufLen;
1086 }
1087
1088 updatedKeysAndValuesLen = updatedKeysAndValues.length();
1089 /* needLen = length of the part before '@' + length of updated key-value part including '@' */
1090 needLen = (int32_t)(startSearchHere - buffer) + updatedKeysAndValuesLen;
1091 if(needLen >= bufferCapacity) {
1092 *status = U_BUFFER_OVERFLOW_ERROR;
1093 return needLen; /* no change */
1094 }
1095 if (updatedKeysAndValuesLen > 0) {
1096 uprv_strncpy(startSearchHere, updatedKeysAndValues.data(), updatedKeysAndValuesLen);
1097 }
1098 buffer[needLen]=0;
1099 return needLen;
1100 }
1101
1102 /* ### ID parsing implementation **************************************************/
1103
1104 #define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
1105
1106 /*returns TRUE if one of the special prefixes is here (s=string)
1107 'x-' or 'i-' */
1108 #define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1109
1110 /* Dot terminates it because of POSIX form where dot precedes the codepage
1111 * except for variant
1112 */
1113 #define _isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
1114
1115 /**
1116 * Lookup 'key' in the array 'list'. The array 'list' should contain
1117 * a NULL entry, followed by more entries, and a second NULL entry.
1118 *
1119 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1120 * COUNTRIES_3.
1121 */
1122 static int16_t _findIndex(const char* const* list, const char* key)
1123 {
1124 const char* const* anchor = list;
1125 int32_t pass = 0;
1126
1127 /* Make two passes through two NULL-terminated arrays at 'list' */
1128 while (pass++ < 2) {
1129 while (*list) {
1130 if (uprv_strcmp(key, *list) == 0) {
1131 return (int16_t)(list - anchor);
1132 }
1133 list++;
1134 }
1135 ++list; /* skip final NULL *CWB*/
1136 }
1137 return -1;
1138 }
1139
1140 /* count the length of src while copying it to dest; return strlen(src) */
1141 static inline int32_t
1142 _copyCount(char *dest, int32_t destCapacity, const char *src) {
1143 const char *anchor;
1144 char c;
1145
1146 anchor=src;
1147 for(;;) {
1148 if((c=*src)==0) {
1149 return (int32_t)(src-anchor);
1150 }
1151 if(destCapacity<=0) {
1152 return (int32_t)((src-anchor)+uprv_strlen(src));
1153 }
1154 ++src;
1155 *dest++=c;
1156 --destCapacity;
1157 }
1158 }
1159
1160 U_CFUNC const char*
1161 uloc_getCurrentCountryID(const char* oldID){
1162 int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
1163 if (offset >= 0) {
1164 return REPLACEMENT_COUNTRIES[offset];
1165 }
1166 return oldID;
1167 }
1168 U_CFUNC const char*
1169 uloc_getCurrentLanguageID(const char* oldID){
1170 int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
1171 if (offset >= 0) {
1172 return REPLACEMENT_LANGUAGES[offset];
1173 }
1174 return oldID;
1175 }
1176 /*
1177 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1178 * avoid duplicating code to handle the earlier locale ID pieces
1179 * in the functions for the later ones by
1180 * setting the *pEnd pointer to where they stopped parsing
1181 *
1182 * TODO try to use this in Locale
1183 */
1184 U_CFUNC int32_t
1185 ulocimp_getLanguage(const char *localeID,
1186 char *language, int32_t languageCapacity,
1187 const char **pEnd) {
1188 int32_t i=0;
1189 int32_t offset;
1190 char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
1191
1192 /* if it starts with i- or x- then copy that prefix */
1193 if(_isIDPrefix(localeID)) {
1194 if(i<languageCapacity) {
1195 language[i]=(char)uprv_tolower(*localeID);
1196 }
1197 if(i<languageCapacity) {
1198 language[i+1]='-';
1199 }
1200 i+=2;
1201 localeID+=2;
1202 }
1203
1204 /* copy the language as far as possible and count its length */
1205 while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
1206 if(i<languageCapacity) {
1207 language[i]=(char)uprv_tolower(*localeID);
1208 }
1209 if(i<3) {
1210 U_ASSERT(i>=0);
1211 lang[i]=(char)uprv_tolower(*localeID);
1212 }
1213 i++;
1214 localeID++;
1215 }
1216
1217 if(i==3) {
1218 /* convert 3 character code to 2 character code if possible *CWB*/
1219 offset=_findIndex(LANGUAGES_3, lang);
1220 if(offset>=0) {
1221 i=_copyCount(language, languageCapacity, LANGUAGES[offset]);
1222 }
1223 }
1224
1225 if(pEnd!=NULL) {
1226 *pEnd=localeID;
1227 }
1228 return i;
1229 }
1230
1231 U_CFUNC int32_t
1232 ulocimp_getScript(const char *localeID,
1233 char *script, int32_t scriptCapacity,
1234 const char **pEnd)
1235 {
1236 int32_t idLen = 0;
1237
1238 if (pEnd != NULL) {
1239 *pEnd = localeID;
1240 }
1241
1242 /* copy the second item as far as possible and count its length */
1243 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
1244 && uprv_isASCIILetter(localeID[idLen])) {
1245 idLen++;
1246 }
1247
1248 /* If it's exactly 4 characters long, then it's a script and not a country. */
1249 if (idLen == 4) {
1250 int32_t i;
1251 if (pEnd != NULL) {
1252 *pEnd = localeID+idLen;
1253 }
1254 if(idLen > scriptCapacity) {
1255 idLen = scriptCapacity;
1256 }
1257 if (idLen >= 1) {
1258 script[0]=(char)uprv_toupper(*(localeID++));
1259 }
1260 for (i = 1; i < idLen; i++) {
1261 script[i]=(char)uprv_tolower(*(localeID++));
1262 }
1263 }
1264 else {
1265 idLen = 0;
1266 }
1267 return idLen;
1268 }
1269
1270 U_CFUNC int32_t
1271 ulocimp_getCountry(const char *localeID,
1272 char *country, int32_t countryCapacity,
1273 const char **pEnd)
1274 {
1275 int32_t idLen=0;
1276 char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };
1277 int32_t offset;
1278
1279 /* copy the country as far as possible and count its length */
1280 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
1281 if(idLen<(ULOC_COUNTRY_CAPACITY-1)) { /*CWB*/
1282 cnty[idLen]=(char)uprv_toupper(localeID[idLen]);
1283 }
1284 idLen++;
1285 }
1286
1287 /* the country should be either length 2 or 3 */
1288 if (idLen == 2 || idLen == 3) {
1289 UBool gotCountry = FALSE;
1290 /* convert 3 character code to 2 character code if possible *CWB*/
1291 if(idLen==3) {
1292 offset=_findIndex(COUNTRIES_3, cnty);
1293 if(offset>=0) {
1294 idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]);
1295 gotCountry = TRUE;
1296 }
1297 }
1298 if (!gotCountry) {
1299 int32_t i = 0;
1300 for (i = 0; i < idLen; i++) {
1301 if (i < countryCapacity) {
1302 country[i]=(char)uprv_toupper(localeID[i]);
1303 }
1304 }
1305 }
1306 localeID+=idLen;
1307 } else {
1308 idLen = 0;
1309 }
1310
1311 if(pEnd!=NULL) {
1312 *pEnd=localeID;
1313 }
1314
1315 return idLen;
1316 }
1317
1318 /**
1319 * @param needSeparator if true, then add leading '_' if any variants
1320 * are added to 'variant'
1321 */
1322 static int32_t
1323 _getVariantEx(const char *localeID,
1324 char prev,
1325 char *variant, int32_t variantCapacity,
1326 UBool needSeparator) {
1327 int32_t i=0;
1328
1329 /* get one or more variant tags and separate them with '_' */
1330 if(_isIDSeparator(prev)) {
1331 /* get a variant string after a '-' or '_' */
1332 while(!_isTerminator(*localeID)) {
1333 if (needSeparator) {
1334 if (i<variantCapacity) {
1335 variant[i] = '_';
1336 }
1337 ++i;
1338 needSeparator = FALSE;
1339 }
1340 if(i<variantCapacity) {
1341 variant[i]=(char)uprv_toupper(*localeID);
1342 if(variant[i]=='-') {
1343 variant[i]='_';
1344 }
1345 }
1346 i++;
1347 localeID++;
1348 }
1349 }
1350
1351 /* if there is no variant tag after a '-' or '_' then look for '@' */
1352 if(i==0) {
1353 if(prev=='@') {
1354 /* keep localeID */
1355 } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
1356 ++localeID; /* point after the '@' */
1357 } else {
1358 return 0;
1359 }
1360 while(!_isTerminator(*localeID)) {
1361 if (needSeparator) {
1362 if (i<variantCapacity) {
1363 variant[i] = '_';
1364 }
1365 ++i;
1366 needSeparator = FALSE;
1367 }
1368 if(i<variantCapacity) {
1369 variant[i]=(char)uprv_toupper(*localeID);
1370 if(variant[i]=='-' || variant[i]==',') {
1371 variant[i]='_';
1372 }
1373 }
1374 i++;
1375 localeID++;
1376 }
1377 }
1378
1379 return i;
1380 }
1381
1382 static int32_t
1383 _getVariant(const char *localeID,
1384 char prev,
1385 char *variant, int32_t variantCapacity) {
1386 return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
1387 }
1388
1389 /* Keyword enumeration */
1390
1391 typedef struct UKeywordsContext {
1392 char* keywords;
1393 char* current;
1394 } UKeywordsContext;
1395
1396 U_CDECL_BEGIN
1397
1398 static void U_CALLCONV
1399 uloc_kw_closeKeywords(UEnumeration *enumerator) {
1400 uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
1401 uprv_free(enumerator->context);
1402 uprv_free(enumerator);
1403 }
1404
1405 static int32_t U_CALLCONV
1406 uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) {
1407 char *kw = ((UKeywordsContext *)en->context)->keywords;
1408 int32_t result = 0;
1409 while(*kw) {
1410 result++;
1411 kw += uprv_strlen(kw)+1;
1412 }
1413 return result;
1414 }
1415
1416 static const char * U_CALLCONV
1417 uloc_kw_nextKeyword(UEnumeration* en,
1418 int32_t* resultLength,
1419 UErrorCode* /*status*/) {
1420 const char* result = ((UKeywordsContext *)en->context)->current;
1421 int32_t len = 0;
1422 if(*result) {
1423 len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
1424 ((UKeywordsContext *)en->context)->current += len+1;
1425 } else {
1426 result = NULL;
1427 }
1428 if (resultLength) {
1429 *resultLength = len;
1430 }
1431 return result;
1432 }
1433
1434 static void U_CALLCONV
1435 uloc_kw_resetKeywords(UEnumeration* en,
1436 UErrorCode* /*status*/) {
1437 ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
1438 }
1439
1440 U_CDECL_END
1441
1442
1443 static const UEnumeration gKeywordsEnum = {
1444 NULL,
1445 NULL,
1446 uloc_kw_closeKeywords,
1447 uloc_kw_countKeywords,
1448 uenum_unextDefault,
1449 uloc_kw_nextKeyword,
1450 uloc_kw_resetKeywords
1451 };
1452
1453 U_CAPI UEnumeration* U_EXPORT2
1454 uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
1455 {
1456 LocalMemory<UKeywordsContext> myContext;
1457 LocalMemory<UEnumeration> result;
1458
1459 if (U_FAILURE(*status)) {
1460 return nullptr;
1461 }
1462 myContext.adoptInstead(static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext))));
1463 result.adoptInstead(static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration))));
1464 if (myContext.isNull() || result.isNull()) {
1465 *status = U_MEMORY_ALLOCATION_ERROR;
1466 return nullptr;
1467 }
1468 uprv_memcpy(result.getAlias(), &gKeywordsEnum, sizeof(UEnumeration));
1469 myContext->keywords = static_cast<char *>(uprv_malloc(keywordListSize+1));
1470 if (myContext->keywords == nullptr) {
1471 *status = U_MEMORY_ALLOCATION_ERROR;
1472 return nullptr;
1473 }
1474 uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
1475 myContext->keywords[keywordListSize] = 0;
1476 myContext->current = myContext->keywords;
1477 result->context = myContext.orphan();
1478 return result.orphan();
1479 }
1480
1481 U_CAPI UEnumeration* U_EXPORT2
1482 uloc_openKeywords(const char* localeID,
1483 UErrorCode* status)
1484 {
1485 int32_t i=0;
1486 char keywords[256];
1487 int32_t keywordsCapacity = 256;
1488 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1489 const char* tmpLocaleID;
1490
1491 if(status==NULL || U_FAILURE(*status)) {
1492 return 0;
1493 }
1494
1495 if (_hasBCP47Extension(localeID)) {
1496 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
1497 } else {
1498 if (localeID==NULL) {
1499 localeID=uloc_getDefault();
1500 }
1501 tmpLocaleID=localeID;
1502 }
1503
1504 /* Skip the language */
1505 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
1506 if(_isIDSeparator(*tmpLocaleID)) {
1507 const char *scriptID;
1508 /* Skip the script if available */
1509 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
1510 if(scriptID != tmpLocaleID+1) {
1511 /* Found optional script */
1512 tmpLocaleID = scriptID;
1513 }
1514 /* Skip the Country */
1515 if (_isIDSeparator(*tmpLocaleID)) {
1516 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID);
1517 if(_isIDSeparator(*tmpLocaleID)) {
1518 _getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0);
1519 }
1520 }
1521 }
1522
1523 /* keywords are located after '@' */
1524 if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
1525 i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
1526 }
1527
1528 if(i) {
1529 return uloc_openKeywordList(keywords, i, status);
1530 } else {
1531 return NULL;
1532 }
1533 }
1534
1535
1536 /* bit-flags for 'options' parameter of _canonicalize */
1537 #define _ULOC_STRIP_KEYWORDS 0x2
1538 #define _ULOC_CANONICALIZE 0x1
1539
1540 #define OPTION_SET(options, mask) ((options & mask) != 0)
1541
1542 static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
1543 #define I_DEFAULT_LENGTH UPRV_LENGTHOF(i_default)
1544
1545 /**
1546 * Canonicalize the given localeID, to level 1 or to level 2,
1547 * depending on the options. To specify level 1, pass in options=0.
1548 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1549 *
1550 * This is the code underlying uloc_getName and uloc_canonicalize.
1551 */
1552 static int32_t
1553 _canonicalize(const char* localeID,
1554 char* result,
1555 int32_t resultCapacity,
1556 uint32_t options,
1557 UErrorCode* err) {
1558 int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
1559 char localeBuffer[ULOC_FULLNAME_CAPACITY];
1560 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1561 const char* origLocaleID;
1562 const char* tmpLocaleID;
1563 const char* keywordAssign = NULL;
1564 const char* separatorIndicator = NULL;
1565 char* name;
1566 char* variant = NULL; /* pointer into name, or NULL */
1567
1568 if (U_FAILURE(*err)) {
1569 return 0;
1570 }
1571
1572 if (_hasBCP47Extension(localeID)) {
1573 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1574 } else {
1575 if (localeID==NULL) {
1576 localeID=uloc_getDefault();
1577 }
1578 tmpLocaleID=localeID;
1579 }
1580
1581 origLocaleID=tmpLocaleID;
1582
1583 /* if we are doing a full canonicalization, then put results in
1584 localeBuffer, if necessary; otherwise send them to result. */
1585 if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/
1586 (result == NULL || resultCapacity < (int32_t)sizeof(localeBuffer))) {
1587 name = localeBuffer;
1588 nameCapacity = (int32_t)sizeof(localeBuffer);
1589 } else {
1590 name = result;
1591 nameCapacity = resultCapacity;
1592 }
1593
1594 /* get all pieces, one after another, and separate with '_' */
1595 len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID);
1596
1597 if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {
1598 const char *d = uloc_getDefault();
1599
1600 len = (int32_t)uprv_strlen(d);
1601
1602 if (name != NULL) {
1603 uprv_memcpy(name, d, len);
1604 }
1605 } else if(_isIDSeparator(*tmpLocaleID)) {
1606 const char *scriptID;
1607
1608 ++fieldCount;
1609 if(len<nameCapacity) {
1610 name[len]='_';
1611 }
1612 ++len;
1613
1614 scriptSize=ulocimp_getScript(tmpLocaleID+1,
1615 (len<nameCapacity ? name+len : NULL), nameCapacity-len, &scriptID);
1616 if(scriptSize > 0) {
1617 /* Found optional script */
1618 tmpLocaleID = scriptID;
1619 ++fieldCount;
1620 len+=scriptSize;
1621 if (_isIDSeparator(*tmpLocaleID)) {
1622 /* If there is something else, then we add the _ */
1623 if(len<nameCapacity) {
1624 name[len]='_';
1625 }
1626 ++len;
1627 }
1628 }
1629
1630 if (_isIDSeparator(*tmpLocaleID)) {
1631 const char *cntryID;
1632 int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1,
1633 (len<nameCapacity ? name+len : NULL), nameCapacity-len, &cntryID);
1634 if (cntrySize > 0) {
1635 /* Found optional country */
1636 tmpLocaleID = cntryID;
1637 len+=cntrySize;
1638 }
1639 if(_isIDSeparator(*tmpLocaleID)) {
1640 /* If there is something else, then we add the _ if we found country before. */
1641 if (cntrySize >= 0 && ! _isIDSeparator(*(tmpLocaleID+1)) ) {
1642 ++fieldCount;
1643 if(len<nameCapacity) {
1644 name[len]='_';
1645 }
1646 ++len;
1647 }
1648
1649 variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID,
1650 (len<nameCapacity ? name+len : NULL), nameCapacity-len);
1651 if (variantSize > 0) {
1652 variant = len<nameCapacity ? name+len : NULL;
1653 len += variantSize;
1654 tmpLocaleID += variantSize + 1; /* skip '_' and variant */
1655 }
1656 }
1657 }
1658 }
1659
1660 /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1661 if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
1662 UBool done = FALSE;
1663 do {
1664 char c = *tmpLocaleID;
1665 switch (c) {
1666 case 0:
1667 case '@':
1668 done = TRUE;
1669 break;
1670 default:
1671 if (len<nameCapacity) {
1672 name[len] = c;
1673 }
1674 ++len;
1675 ++tmpLocaleID;
1676 break;
1677 }
1678 } while (!done);
1679 }
1680
1681 /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1682 After this, tmpLocaleID either points to '@' or is NULL */
1683 if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
1684 keywordAssign = uprv_strchr(tmpLocaleID, '=');
1685 separatorIndicator = uprv_strchr(tmpLocaleID, ';');
1686 }
1687
1688 /* Copy POSIX-style variant, if any [mr@FOO] */
1689 if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
1690 tmpLocaleID != NULL && keywordAssign == NULL) {
1691 for (;;) {
1692 char c = *tmpLocaleID;
1693 if (c == 0) {
1694 break;
1695 }
1696 if (len<nameCapacity) {
1697 name[len] = c;
1698 }
1699 ++len;
1700 ++tmpLocaleID;
1701 }
1702 }
1703
1704 if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
1705 /* Handle @FOO variant if @ is present and not followed by = */
1706 if (tmpLocaleID!=NULL && keywordAssign==NULL) {
1707 int32_t posixVariantSize;
1708 /* Add missing '_' if needed */
1709 if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
1710 do {
1711 if(len<nameCapacity) {
1712 name[len]='_';
1713 }
1714 ++len;
1715 ++fieldCount;
1716 } while(fieldCount<2);
1717 }
1718 posixVariantSize = _getVariantEx(tmpLocaleID+1, '@', name+len, nameCapacity-len,
1719 (UBool)(variantSize > 0));
1720 if (posixVariantSize > 0) {
1721 if (variant == NULL) {
1722 variant = name+len;
1723 }
1724 len += posixVariantSize;
1725 variantSize += posixVariantSize;
1726 }
1727 }
1728
1729 /* Look up the ID in the canonicalization map */
1730 for (j=0; j<UPRV_LENGTHOF(CANONICALIZE_MAP); j++) {
1731 const char* id = CANONICALIZE_MAP[j].id;
1732 int32_t n = (int32_t)uprv_strlen(id);
1733 if (len == n && uprv_strncmp(name, id, n) == 0) {
1734 if (n == 0 && tmpLocaleID != NULL) {
1735 break; /* Don't remap "" if keywords present */
1736 }
1737 len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
1738 break;
1739 }
1740 }
1741 }
1742
1743 if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
1744 if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
1745 (!separatorIndicator || separatorIndicator > keywordAssign)) {
1746 if(len<nameCapacity) {
1747 name[len]='@';
1748 }
1749 ++len;
1750 ++fieldCount;
1751 len += _getKeywords(tmpLocaleID+1, '@', (len<nameCapacity ? name+len : NULL), nameCapacity-len,
1752 NULL, 0, NULL, TRUE, err);
1753 }
1754 }
1755
1756 if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) {
1757 uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
1758 }
1759
1760 return u_terminateChars(result, resultCapacity, len, err);
1761 }
1762
1763 /* ### ID parsing API **************************************************/
1764
1765 U_CAPI int32_t U_EXPORT2
1766 uloc_getParent(const char* localeID,
1767 char* parent,
1768 int32_t parentCapacity,
1769 UErrorCode* err)
1770 {
1771 const char *lastUnderscore;
1772 int32_t i;
1773
1774 if (U_FAILURE(*err))
1775 return 0;
1776
1777 if (localeID == NULL)
1778 localeID = uloc_getDefault();
1779
1780 lastUnderscore=uprv_strrchr(localeID, '_');
1781 if(lastUnderscore!=NULL) {
1782 i=(int32_t)(lastUnderscore-localeID);
1783 } else {
1784 i=0;
1785 }
1786
1787 if(i>0 && parent != localeID) {
1788 uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
1789 }
1790
1791 return u_terminateChars(parent, parentCapacity, i, err);
1792 }
1793
1794 U_CAPI int32_t U_EXPORT2
1795 uloc_getLanguage(const char* localeID,
1796 char* language,
1797 int32_t languageCapacity,
1798 UErrorCode* err)
1799 {
1800 /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1801 int32_t i=0;
1802
1803 if (err==NULL || U_FAILURE(*err)) {
1804 return 0;
1805 }
1806
1807 if(localeID==NULL) {
1808 localeID=uloc_getDefault();
1809 }
1810
1811 i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL);
1812 return u_terminateChars(language, languageCapacity, i, err);
1813 }
1814
1815 U_CAPI int32_t U_EXPORT2
1816 uloc_getScript(const char* localeID,
1817 char* script,
1818 int32_t scriptCapacity,
1819 UErrorCode* err)
1820 {
1821 int32_t i=0;
1822
1823 if(err==NULL || U_FAILURE(*err)) {
1824 return 0;
1825 }
1826
1827 if(localeID==NULL) {
1828 localeID=uloc_getDefault();
1829 }
1830
1831 /* skip the language */
1832 ulocimp_getLanguage(localeID, NULL, 0, &localeID);
1833 if(_isIDSeparator(*localeID)) {
1834 i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL);
1835 }
1836 return u_terminateChars(script, scriptCapacity, i, err);
1837 }
1838
1839 U_CAPI int32_t U_EXPORT2
1840 uloc_getCountry(const char* localeID,
1841 char* country,
1842 int32_t countryCapacity,
1843 UErrorCode* err)
1844 {
1845 int32_t i=0;
1846
1847 if(err==NULL || U_FAILURE(*err)) {
1848 return 0;
1849 }
1850
1851 if(localeID==NULL) {
1852 localeID=uloc_getDefault();
1853 }
1854
1855 /* Skip the language */
1856 ulocimp_getLanguage(localeID, NULL, 0, &localeID);
1857 if(_isIDSeparator(*localeID)) {
1858 const char *scriptID;
1859 /* Skip the script if available */
1860 ulocimp_getScript(localeID+1, NULL, 0, &scriptID);
1861 if(scriptID != localeID+1) {
1862 /* Found optional script */
1863 localeID = scriptID;
1864 }
1865 if(_isIDSeparator(*localeID)) {
1866 i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL);
1867 }
1868 }
1869 return u_terminateChars(country, countryCapacity, i, err);
1870 }
1871
1872 U_CAPI int32_t U_EXPORT2
1873 uloc_getVariant(const char* localeID,
1874 char* variant,
1875 int32_t variantCapacity,
1876 UErrorCode* err)
1877 {
1878 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1879 const char* tmpLocaleID;
1880 int32_t i=0;
1881
1882 if(err==NULL || U_FAILURE(*err)) {
1883 return 0;
1884 }
1885
1886 if (_hasBCP47Extension(localeID)) {
1887 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1888 } else {
1889 if (localeID==NULL) {
1890 localeID=uloc_getDefault();
1891 }
1892 tmpLocaleID=localeID;
1893 }
1894
1895 /* Skip the language */
1896 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
1897 if(_isIDSeparator(*tmpLocaleID)) {
1898 const char *scriptID;
1899 /* Skip the script if available */
1900 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
1901 if(scriptID != tmpLocaleID+1) {
1902 /* Found optional script */
1903 tmpLocaleID = scriptID;
1904 }
1905 /* Skip the Country */
1906 if (_isIDSeparator(*tmpLocaleID)) {
1907 const char *cntryID;
1908 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID);
1909 if (cntryID != tmpLocaleID+1) {
1910 /* Found optional country */
1911 tmpLocaleID = cntryID;
1912 }
1913 if(_isIDSeparator(*tmpLocaleID)) {
1914 /* If there was no country ID, skip a possible extra IDSeparator */
1915 if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
1916 tmpLocaleID++;
1917 }
1918 i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapacity);
1919 }
1920 }
1921 }
1922
1923 /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
1924 /* if we do not have a variant tag yet then try a POSIX variant after '@' */
1925 /*
1926 if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
1927 i=_getVariant(localeID+1, '@', variant, variantCapacity);
1928 }
1929 */
1930 return u_terminateChars(variant, variantCapacity, i, err);
1931 }
1932
1933 U_CAPI int32_t U_EXPORT2
1934 uloc_getName(const char* localeID,
1935 char* name,
1936 int32_t nameCapacity,
1937 UErrorCode* err)
1938 {
1939 return _canonicalize(localeID, name, nameCapacity, 0, err);
1940 }
1941
1942 U_CAPI int32_t U_EXPORT2
1943 uloc_getBaseName(const char* localeID,
1944 char* name,
1945 int32_t nameCapacity,
1946 UErrorCode* err)
1947 {
1948 return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);
1949 }
1950
1951 U_CAPI int32_t U_EXPORT2
1952 uloc_canonicalize(const char* localeID,
1953 char* name,
1954 int32_t nameCapacity,
1955 UErrorCode* err)
1956 {
1957 return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);
1958 }
1959
1960 U_CAPI const char* U_EXPORT2
1961 uloc_getISO3Language(const char* localeID)
1962 {
1963 int16_t offset;
1964 char lang[ULOC_LANG_CAPACITY];
1965 UErrorCode err = U_ZERO_ERROR;
1966
1967 if (localeID == NULL)
1968 {
1969 localeID = uloc_getDefault();
1970 }
1971 uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
1972 if (U_FAILURE(err))
1973 return "";
1974 offset = _findIndex(LANGUAGES, lang);
1975 if (offset < 0)
1976 return "";
1977 return LANGUAGES_3[offset];
1978 }
1979
1980 U_CAPI const char* U_EXPORT2
1981 uloc_getISO3Country(const char* localeID)
1982 {
1983 int16_t offset;
1984 char cntry[ULOC_LANG_CAPACITY];
1985 UErrorCode err = U_ZERO_ERROR;
1986
1987 if (localeID == NULL)
1988 {
1989 localeID = uloc_getDefault();
1990 }
1991 uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
1992 if (U_FAILURE(err))
1993 return "";
1994 offset = _findIndex(COUNTRIES, cntry);
1995 if (offset < 0)
1996 return "";
1997
1998 return COUNTRIES_3[offset];
1999 }
2000
2001 U_CAPI uint32_t U_EXPORT2
2002 uloc_getLCID(const char* localeID)
2003 {
2004 UErrorCode status = U_ZERO_ERROR;
2005 char langID[ULOC_FULLNAME_CAPACITY];
2006 uint32_t lcid = 0;
2007
2008 /* Check for incomplete id. */
2009 if (!localeID || uprv_strlen(localeID) < 2) {
2010 return 0;
2011 }
2012
2013 // First, attempt Windows platform lookup if available, but fall
2014 // through to catch any special cases (ICU vs Windows name differences).
2015 lcid = uprv_convertToLCIDPlatform(localeID, &status);
2016 if (U_FAILURE(status)) {
2017 return 0;
2018 }
2019 if (lcid > 0) {
2020 // Windows found an LCID, return that
2021 return lcid;
2022 }
2023
2024 uloc_getLanguage(localeID, langID, sizeof(langID), &status);
2025 if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) {
2026 return 0;
2027 }
2028
2029 if (uprv_strchr(localeID, '@')) {
2030 // uprv_convertToLCID does not support keywords other than collation.
2031 // Remove all keywords except collation.
2032 int32_t len;
2033 char collVal[ULOC_KEYWORDS_CAPACITY];
2034 char tmpLocaleID[ULOC_FULLNAME_CAPACITY];
2035
2036 len = uloc_getKeywordValue(localeID, "collation", collVal,
2037 UPRV_LENGTHOF(collVal) - 1, &status);
2038
2039 if (U_SUCCESS(status) && len > 0) {
2040 collVal[len] = 0;
2041
2042 len = uloc_getBaseName(localeID, tmpLocaleID,
2043 UPRV_LENGTHOF(tmpLocaleID) - 1, &status);
2044
2045 if (U_SUCCESS(status) && len > 0) {
2046 tmpLocaleID[len] = 0;
2047
2048 len = uloc_setKeywordValue("collation", collVal, tmpLocaleID,
2049 UPRV_LENGTHOF(tmpLocaleID) - len - 1, &status);
2050
2051 if (U_SUCCESS(status) && len > 0) {
2052 tmpLocaleID[len] = 0;
2053 return uprv_convertToLCID(langID, tmpLocaleID, &status);
2054 }
2055 }
2056 }
2057
2058 // fall through - all keywords are simply ignored
2059 status = U_ZERO_ERROR;
2060 }
2061
2062 return uprv_convertToLCID(langID, localeID, &status);
2063 }
2064
2065 U_CAPI int32_t U_EXPORT2
2066 uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
2067 UErrorCode *status)
2068 {
2069 return uprv_convertToPosix(hostid, locale, localeCapacity, status);
2070 }
2071
2072 /* ### Default locale **************************************************/
2073
2074 U_CAPI const char* U_EXPORT2
2075 uloc_getDefault()
2076 {
2077 return locale_get_default();
2078 }
2079
2080 U_CAPI void U_EXPORT2
2081 uloc_setDefault(const char* newDefaultLocale,
2082 UErrorCode* err)
2083 {
2084 if (U_FAILURE(*err))
2085 return;
2086 /* the error code isn't currently used for anything by this function*/
2087
2088 /* propagate change to C++ */
2089 locale_set_default(newDefaultLocale);
2090 }
2091
2092 /**
2093 * Returns a list of all 2-letter language codes defined in ISO 639. This is a pointer
2094 * to an array of pointers to arrays of char. All of these pointers are owned
2095 * by ICU-- do not delete them, and do not write through them. The array is
2096 * terminated with a null pointer.
2097 */
2098 U_CAPI const char* const* U_EXPORT2
2099 uloc_getISOLanguages()
2100 {
2101 return LANGUAGES;
2102 }
2103
2104 /**
2105 * Returns a list of all 2-letter country codes defined in ISO 639. This is a
2106 * pointer to an array of pointers to arrays of char. All of these pointers are
2107 * owned by ICU-- do not delete them, and do not write through them. The array is
2108 * terminated with a null pointer.
2109 */
2110 U_CAPI const char* const* U_EXPORT2
2111 uloc_getISOCountries()
2112 {
2113 return COUNTRIES;
2114 }
2115
2116
2117 /* this function to be moved into cstring.c later */
2118 static char gDecimal = 0;
2119
2120 static /* U_CAPI */
2121 double
2122 /* U_EXPORT2 */
2123 _uloc_strtod(const char *start, char **end) {
2124 char *decimal;
2125 char *myEnd;
2126 char buf[30];
2127 double rv;
2128 if (!gDecimal) {
2129 char rep[5];
2130 /* For machines that decide to change the decimal on you,
2131 and try to be too smart with localization.
2132 This normally should be just a '.'. */
2133 sprintf(rep, "%+1.1f", 1.0);
2134 gDecimal = rep[2];
2135 }
2136
2137 if(gDecimal == '.') {
2138 return uprv_strtod(start, end); /* fall through to OS */
2139 } else {
2140 uprv_strncpy(buf, start, 29);
2141 buf[29]=0;
2142 decimal = uprv_strchr(buf, '.');
2143 if(decimal) {
2144 *decimal = gDecimal;
2145 } else {
2146 return uprv_strtod(start, end); /* no decimal point */
2147 }
2148 rv = uprv_strtod(buf, &myEnd);
2149 if(end) {
2150 *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */
2151 }
2152 return rv;
2153 }
2154 }
2155
2156 typedef struct {
2157 float q;
2158 int32_t dummy; /* to avoid uninitialized memory copy from qsort */
2159 char locale[ULOC_FULLNAME_CAPACITY+1];
2160 } _acceptLangItem;
2161
2162 static int32_t U_CALLCONV
2163 uloc_acceptLanguageCompare(const void * /*context*/, const void *a, const void *b)
2164 {
2165 const _acceptLangItem *aa = (const _acceptLangItem*)a;
2166 const _acceptLangItem *bb = (const _acceptLangItem*)b;
2167
2168 int32_t rc = 0;
2169 if(bb->q < aa->q) {
2170 rc = -1; /* A > B */
2171 } else if(bb->q > aa->q) {
2172 rc = 1; /* A < B */
2173 } else {
2174 rc = 0; /* A = B */
2175 }
2176
2177 if(rc==0) {
2178 rc = uprv_stricmp(aa->locale, bb->locale);
2179 }
2180
2181 #if defined(ULOC_DEBUG)
2182 /* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
2183 aa->locale, aa->q,
2184 bb->locale, bb->q,
2185 rc);*/
2186 #endif
2187
2188 return rc;
2189 }
2190
2191 /*
2192 mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
2193 */
2194
2195 U_CAPI int32_t U_EXPORT2
2196 uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult,
2197 const char *httpAcceptLanguage,
2198 UEnumeration* availableLocales,
2199 UErrorCode *status)
2200 {
2201 MaybeStackArray<_acceptLangItem, 4> items; // Struct for collecting items.
2202 char tmp[ULOC_FULLNAME_CAPACITY +1];
2203 int32_t n = 0;
2204 const char *itemEnd;
2205 const char *paramEnd;
2206 const char *s;
2207 const char *t;
2208 int32_t res;
2209 int32_t i;
2210 int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
2211
2212 if(U_FAILURE(*status)) {
2213 return -1;
2214 }
2215
2216 for(s=httpAcceptLanguage;s&&*s;) {
2217 while(isspace(*s)) /* eat space at the beginning */
2218 s++;
2219 itemEnd=uprv_strchr(s,',');
2220 paramEnd=uprv_strchr(s,';');
2221 if(!itemEnd) {
2222 itemEnd = httpAcceptLanguage+l; /* end of string */
2223 }
2224 if(paramEnd && paramEnd<itemEnd) {
2225 /* semicolon (;) is closer than end (,) */
2226 t = paramEnd+1;
2227 if(*t=='q') {
2228 t++;
2229 }
2230 while(isspace(*t)) {
2231 t++;
2232 }
2233 if(*t=='=') {
2234 t++;
2235 }
2236 while(isspace(*t)) {
2237 t++;
2238 }
2239 items[n].q = (float)_uloc_strtod(t,NULL);
2240 } else {
2241 /* no semicolon - it's 1.0 */
2242 items[n].q = 1.0f;
2243 paramEnd = itemEnd;
2244 }
2245 items[n].dummy=0;
2246 /* eat spaces prior to semi */
2247 for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
2248 ;
2249 int32_t slen = static_cast<int32_t>(((t+1)-s));
2250 if(slen > ULOC_FULLNAME_CAPACITY) {
2251 *status = U_BUFFER_OVERFLOW_ERROR;
2252 return -1; // too big
2253 }
2254 uprv_strncpy(items[n].locale, s, slen);
2255 items[n].locale[slen]=0; // terminate
2256 int32_t clen = uloc_canonicalize(items[n].locale, tmp, UPRV_LENGTHOF(tmp)-1, status);
2257 if(U_FAILURE(*status)) return -1;
2258 if((clen!=slen) || (uprv_strncmp(items[n].locale, tmp, slen))) {
2259 // canonicalization had an effect- copy back
2260 uprv_strncpy(items[n].locale, tmp, clen);
2261 items[n].locale[clen] = 0; // terminate
2262 }
2263 #if defined(ULOC_DEBUG)
2264 /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
2265 #endif
2266 n++;
2267 s = itemEnd;
2268 while(*s==',') { /* eat duplicate commas */
2269 s++;
2270 }
2271 if(n>=items.getCapacity()) { // If we need more items
2272 if(NULL == items.resize(items.getCapacity()*2, items.getCapacity())) {
2273 *status = U_MEMORY_ALLOCATION_ERROR;
2274 return -1;
2275 }
2276 #if defined(ULOC_DEBUG)
2277 fprintf(stderr,"malloced at size %d\n", items.getCapacity());
2278 #endif
2279 }
2280 }
2281 uprv_sortArray(items.getAlias(), n, sizeof(items[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
2282 if (U_FAILURE(*status)) {
2283 return -1;
2284 }
2285 LocalMemory<const char*> strs(NULL);
2286 if (strs.allocateInsteadAndReset(n) == NULL) {
2287 *status = U_MEMORY_ALLOCATION_ERROR;
2288 return -1;
2289 }
2290 for(i=0;i<n;i++) {
2291 #if defined(ULOC_DEBUG)
2292 /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
2293 #endif
2294 strs[i]=items[i].locale;
2295 }
2296 res = uloc_acceptLanguage(result, resultAvailable, outResult,
2297 strs.getAlias(), n, availableLocales, status);
2298 return res;
2299 }
2300
2301
2302 U_CAPI int32_t U_EXPORT2
2303 uloc_acceptLanguage(char *result, int32_t resultAvailable,
2304 UAcceptResult *outResult, const char **acceptList,
2305 int32_t acceptListCount,
2306 UEnumeration* availableLocales,
2307 UErrorCode *status)
2308 {
2309 int32_t i,j;
2310 int32_t len;
2311 int32_t maxLen=0;
2312 char tmp[ULOC_FULLNAME_CAPACITY+1];
2313 const char *l;
2314 char **fallbackList;
2315 if(U_FAILURE(*status)) {
2316 return -1;
2317 }
2318 fallbackList = static_cast<char **>(uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount)));
2319 if(fallbackList==NULL) {
2320 *status = U_MEMORY_ALLOCATION_ERROR;
2321 return -1;
2322 }
2323 for(i=0;i<acceptListCount;i++) {
2324 #if defined(ULOC_DEBUG)
2325 fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
2326 #endif
2327 while((l=uenum_next(availableLocales, NULL, status)) != NULL) {
2328 #if defined(ULOC_DEBUG)
2329 fprintf(stderr," %s\n", l);
2330 #endif
2331 len = (int32_t)uprv_strlen(l);
2332 if(!uprv_strcmp(acceptList[i], l)) {
2333 if(outResult) {
2334 *outResult = ULOC_ACCEPT_VALID;
2335 }
2336 #if defined(ULOC_DEBUG)
2337 fprintf(stderr, "MATCH! %s\n", l);
2338 #endif
2339 if(len>0) {
2340 uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2341 }
2342 for(j=0;j<i;j++) {
2343 uprv_free(fallbackList[j]);
2344 }
2345 uprv_free(fallbackList);
2346 return u_terminateChars(result, resultAvailable, len, status);
2347 }
2348 if(len>maxLen) {
2349 maxLen = len;
2350 }
2351 }
2352 uenum_reset(availableLocales, status);
2353 /* save off parent info */
2354 if(uloc_getParent(acceptList[i], tmp, UPRV_LENGTHOF(tmp), status)!=0) {
2355 fallbackList[i] = uprv_strdup(tmp);
2356 } else {
2357 fallbackList[i]=0;
2358 }
2359 }
2360
2361 for(maxLen--;maxLen>0;maxLen--) {
2362 for(i=0;i<acceptListCount;i++) {
2363 if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {
2364 #if defined(ULOC_DEBUG)
2365 fprintf(stderr,"Try: [%s]", fallbackList[i]);
2366 #endif
2367 while((l=uenum_next(availableLocales, NULL, status)) != NULL) {
2368 #if defined(ULOC_DEBUG)
2369 fprintf(stderr," %s\n", l);
2370 #endif
2371 len = (int32_t)uprv_strlen(l);
2372 if(!uprv_strcmp(fallbackList[i], l)) {
2373 if(outResult) {
2374 *outResult = ULOC_ACCEPT_FALLBACK;
2375 }
2376 #if defined(ULOC_DEBUG)
2377 fprintf(stderr, "fallback MATCH! %s\n", l);
2378 #endif
2379 if(len>0) {
2380 uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2381 }
2382 for(j=0;j<acceptListCount;j++) {
2383 uprv_free(fallbackList[j]);
2384 }
2385 uprv_free(fallbackList);
2386 return u_terminateChars(result, resultAvailable, len, status);
2387 }
2388 }
2389 uenum_reset(availableLocales, status);
2390
2391 if(uloc_getParent(fallbackList[i], tmp, UPRV_LENGTHOF(tmp), status)!=0) {
2392 uprv_free(fallbackList[i]);
2393 fallbackList[i] = uprv_strdup(tmp);
2394 } else {
2395 uprv_free(fallbackList[i]);
2396 fallbackList[i]=0;
2397 }
2398 }
2399 }
2400 if(outResult) {
2401 *outResult = ULOC_ACCEPT_FAILED;
2402 }
2403 }
2404 for(i=0;i<acceptListCount;i++) {
2405 uprv_free(fallbackList[i]);
2406 }
2407 uprv_free(fallbackList);
2408 return -1;
2409 }
2410
2411 U_CAPI const char* U_EXPORT2
2412 uloc_toUnicodeLocaleKey(const char* keyword)
2413 {
2414 const char* bcpKey = ulocimp_toBcpKey(keyword);
2415 if (bcpKey == NULL && ultag_isUnicodeLocaleKey(keyword, -1)) {
2416 // unknown keyword, but syntax is fine..
2417 return keyword;
2418 }
2419 return bcpKey;
2420 }
2421
2422 U_CAPI const char* U_EXPORT2
2423 uloc_toUnicodeLocaleType(const char* keyword, const char* value)
2424 {
2425 const char* bcpType = ulocimp_toBcpType(keyword, value, NULL, NULL);
2426 if (bcpType == NULL && ultag_isUnicodeLocaleType(value, -1)) {
2427 // unknown keyword, but syntax is fine..
2428 return value;
2429 }
2430 return bcpType;
2431 }
2432
2433 static UBool
2434 isWellFormedLegacyKey(const char* legacyKey)
2435 {
2436 const char* p = legacyKey;
2437 while (*p) {
2438 if (!UPRV_ISALPHANUM(*p)) {
2439 return FALSE;
2440 }
2441 p++;
2442 }
2443 return TRUE;
2444 }
2445
2446 static UBool
2447 isWellFormedLegacyType(const char* legacyType)
2448 {
2449 const char* p = legacyType;
2450 int32_t alphaNumLen = 0;
2451 while (*p) {
2452 if (*p == '_' || *p == '/' || *p == '-') {
2453 if (alphaNumLen == 0) {
2454 return FALSE;
2455 }
2456 alphaNumLen = 0;
2457 } else if (UPRV_ISALPHANUM(*p)) {
2458 alphaNumLen++;
2459 } else {
2460 return FALSE;
2461 }
2462 p++;
2463 }
2464 return (alphaNumLen != 0);
2465 }
2466
2467 U_CAPI const char* U_EXPORT2
2468 uloc_toLegacyKey(const char* keyword)
2469 {
2470 const char* legacyKey = ulocimp_toLegacyKey(keyword);
2471 if (legacyKey == NULL) {
2472 // Checks if the specified locale key is well-formed with the legacy locale syntax.
2473 //
2474 // Note:
2475 // LDML/CLDR provides some definition of keyword syntax in
2476 // * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2477 // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2478 // Keys can only consist of [0-9a-zA-Z].
2479 if (isWellFormedLegacyKey(keyword)) {
2480 return keyword;
2481 }
2482 }
2483 return legacyKey;
2484 }
2485
2486 U_CAPI const char* U_EXPORT2
2487 uloc_toLegacyType(const char* keyword, const char* value)
2488 {
2489 const char* legacyType = ulocimp_toLegacyType(keyword, value, NULL, NULL);
2490 if (legacyType == NULL) {
2491 // Checks if the specified locale type is well-formed with the legacy locale syntax.
2492 //
2493 // Note:
2494 // LDML/CLDR provides some definition of keyword syntax in
2495 // * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2496 // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2497 // Values (types) can only consist of [0-9a-zA-Z], plus for legacy values
2498 // we allow [/_-+] in the middle (e.g. "Etc/GMT+1", "Asia/Tel_Aviv")
2499 if (isWellFormedLegacyType(value)) {
2500 return value;
2501 }
2502 }
2503 return legacyType;
2504 }
2505
2506 /*eof*/